# Automated Resume Review Agent with CrewAI & Open‑Source LLM


In [None]:
!pip install --quiet python-docx PyPDF2 pandas scikit-learn imapclient crewai transformers accelerate sentencepiece


In [None]:
import os

RESUME_DIR = '/content/resumes'
LOG_DIR    = '/content/logs'

os.makedirs(RESUME_DIR, exist_ok=True)
os.makedirs(LOG_DIR, exist_ok=True)

print("Resumes →", RESUME_DIR)
print("Logs    →", LOG_DIR)



Resumes → /content/resumes
Logs    → /content/logs


## Please add a json file named secrets, and add mail credentials in it.

In [None]:
import getpass
# Load credentials from an external JSON file and immediately delete it
import json, os

# Upload 'secrets.json' via the Colab Files sidebar before running this cell.
# 'secrets.json' should contain:
# {
#   "email": "your.name@gmail.com",
#   "app_password": "your_app_password",
#   "hf_model_id": "tiiuae/falcon-7b",
#   "hf_token": "your_hf_api_token"
# }

with open('/content/secrets.json', 'r') as f:
    creds = json.load(f)

EMAIL_USER  = creds['email']
EMAIL_PASS  = creds['app_password']
HF_MODEL_ID = creds['hf_model_id']
HF_TOKEN    = creds['hf_token']

os.remove('/content/secrets.json')

# Verify variables are set.
print("Credentials loaded; secrets.json removed.")



Credentials loaded; secrets.json removed.


In [None]:
import os
os.environ["HUGGINGFACE_API_TOKEN"] = HF_TOKEN

from crewai import Agent, Task, Crew
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import imapclient, email, re
from PyPDF2 import PdfReader
import docx
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
from datetime import datetime


In [None]:
# 1) Fetch resumes from email → RESUME_DIR
def fetch_resumes():
    imap = imapclient.IMAPClient('imap.gmail.com', ssl=True)
    imap.login(EMAIL_USER, EMAIL_PASS)
    imap.select_folder('INBOX')
    for uid in imap.search(['UNSEEN']):
        raw = imap.fetch([uid], ['RFC822'])[uid][b'RFC822']
        msg = email.message_from_bytes(raw)
        for part in msg.walk():
            if part.get_content_maintype()=='multipart': continue
            if not part.get('Content-Disposition'): continue
            fn = part.get_filename() or ''
            if fn.lower().endswith(('.pdf','.docx')):
                path = os.path.join(RESUME_DIR, fn)
                with open(path, 'wb') as f:
                    f.write(part.get_payload(decode=True))
        imap.add_flags(uid, [imapclient.SEEN])
    imap.logout()

# 2) Parse & mask PII
def parse_and_mask(path:str) -> str:
    if path.lower().endswith('.pdf'):
        reader = PdfReader(path)
        text = "".join(p.extract_text() or "" for p in reader.pages)
    else:
        doc = docx.Document(path)
        text = "\n".join(p.text for p in doc.paragraphs)
    return re.sub(r'[\w\.-]+@[\w\.-]+', '[EMAIL]', text)

# 3) Scoring heuristics
GENERIC_JD = """<PASTE YOUR JOB DESCRIPTION HERE>"""

def score_exp(txt:str)->float:
    yrs = re.findall(r'20\d{2}', txt)
    return min((int(max(yrs))-int(min(yrs))) if len(yrs)>=2 else 0, 10)

def score_edu(txt:str)->float:
    t=txt.lower()
    return 10 if 'phd' in t or 'doctor' in t else 8 if 'master' in t else 6 if 'bachelor' in t else 4

AI_KW=['machine learning','deep learning','python','tensorflow','pytorch','nlp','data science']
def score_kw(txt:str)->float:
    return min(sum(1 for kw in AI_KW if kw in txt.lower()), 10)

def score_jd(txt:str)->float:
    vect = TfidfVectorizer(stop_words='english').fit_transform([txt, GENERIC_JD])
    return float(cosine_similarity(vect[0:1], vect[1:2])[0][0]) * 10

def score_fmt(txt:str)->float:
    return min(sum(1 for hdr in ['experience','education','skills'] if hdr in txt.lower())*3, 10)


In [None]:
tokenizer = AutoTokenizer.from_pretrained(HF_MODEL_ID, use_fast=True)
model     = AutoModelForCausalLM.from_pretrained(
                HF_MODEL_ID, trust_remote_code=True
            ).to("cuda")
generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/287 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.73M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/281 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.05k [00:00<?, ?B/s]

configuration_falcon.py:   0%|          | 0.00/7.16k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/tiiuae/falcon-7b:
- configuration_falcon.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.



modeling_falcon.py:   0%|          | 0.00/56.9k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/tiiuae/falcon-7b:
- modeling_falcon.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors.index.json:   0%|          | 0.00/17.7k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.95G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.48G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
def generate_feedback(name:str, excerpt:str, scores:dict) -> str:
    prompt = f"""
You are an expert AI resume coach.
Candidate: {name}

Scores:
- Experience: {scores['exp']}/10
- Education: {scores['edu']}/10
- AI Keywords: {scores['kw']}/10
- JD Match: {scores['jd']}/10
- Formatting: {scores['fmt']}/10

Write a concise feedback email (include subject) summarizing strengths, improvements, and overall score out of 100.
"""
    return generator(prompt, max_length=512, temperature=0.7)[0]['generated_text']


In [None]:
# Cell X: Define Tasks with expected_output and instantiate CrewAI

from crewai import Task, Crew

tasks = [
    Task(
        description="Fetch resumes from email",
        func=fetch_resumes,
        expected_output="Resume files saved in /content/resumes",
        agent=fetch_agent
    ),
    Task(
        description="Parse and mask resumes",
        func=parse_and_mask,
        expected_output="Extracted text with email addresses redacted",
        agent=parse_agent
    ),
    Task(
        description="Score resume text",
        func=lambda path: {
            "exp": score_exp(txt := parse_and_mask(path)),
            "edu": score_edu(txt),
            "kw":  score_kw(txt),
            "jd":  score_jd(txt),
            "fmt": score_fmt(txt)
        },
        expected_output="Dictionary of heuristic scores",
        agent=score_agent
    ),
    Task(
        description="Generate feedback email",
        func=lambda path, scores: generate_feedback(
            name=path.split("/")[-1].split("_",1)[0],
            excerpt=parse_and_mask(path)[:2000],
            scores=scores
        ),
        expected_output="Personalized feedback email content",
        agent=feedback_agent
    ),
    Task(
        description="Send feedback email",
        func=lambda path, fb: __import__("smtplib").SMTP_SSL("smtp.gmail.com", 465).sendmail(
            EMAIL_USER,
            path.split("/")[-1].split("_",1)[1].rsplit(".",1)[0],
            fb
        ),
        expected_output="Feedback email delivered to candidate",
        agent=email_agent
    ),
]

# Instantiate the Crew with keyword args
"""
crew = Crew(
    agents=[fetch_agent, parse_agent, score_agent, feedback_agent, email_agent],
    tasks=tasks,
    verbose=2
)
"""
print("CrewAI pipeline configured successfully. Now run crew.kickoff().")


CrewAI pipeline configured successfully. Now run crew.kickoff().


In [None]:
# Ensure log file exists
log_path = f"{LOG_DIR}/process_log.csv"
if not os.path.exists(log_path):
    pd.DataFrame(columns=['file','email','time','status']).to_csv(log_path, index=False)


In [None]:
result = crew.kickoff()
print("CrewAI result:", result)

# Append statuses to log
logs = pd.read_csv(log_path)
for step in result.steps:
    logs = logs.append({
        'file' : step.input.get('path', step.input),
        'email': step.input.get('email', ''),
        'time' : datetime.now(),
        'status': step.status
    }, ignore_index=True)
logs.to_csv(log_path, index=False)
print("Logs updated at", log_path)
