In [9]:
# =========================
# AUTOMATED RESUME ANALYZER
# =========================

import re
from PyPDF2 import PdfReader
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from sentence_transformers import SentenceTransformer, util
def evaluate_resume_only(clean_resume_text):
    """
    Evaluates resume quality without a job description.
    Returns a baseline readiness score and insights.
    """
    BASELINE_SUPPORT_SKILLS = [
    "linux", "unix", "shell", "bash",
    "sql", "data analysis",
    "xml", "json", "csv", "api",
    "troubleshooting", "technical support", "support",
    "rca", "root cause analysis",
    "log analysis", "automation",
    "communication", "networking", "b2b"
]

    resume_skills = extract_skills(clean_resume_text, BASELINE_SUPPORT_SKILLS)

    coverage_score = (len(resume_skills) / len(BASELINE_SUPPORT_SKILLS)) * 100

    insights = []

    if "communication" not in resume_skills:
        insights.append("Consider highlighting communication skills explicitly.")

    if "data analysis" not in resume_skills:
        insights.append("Adding data analysis experience may improve ATS performance.")

    if "root cause analysis" not in resume_skills:
        insights.append("Spell out 'root cause analysis (RCA)' at least once.")

    if coverage_score >= 70:
        level = "Strong Technical Support Readiness"
    elif coverage_score >= 50:
        level = "Moderate Technical Support Readiness"
    else:
        level = "Basic Technical Support Readiness"

    return {
        "baseline_score": round(coverage_score, 2),
        "readiness_level": level,
        "skills_found": resume_skills,
        "suggestions": insights
    }

def run_resume_analyzer(
    resume_path="resumes/Kshitij_Gawankar.pdf",
    jd_path=None,
    verbose=True
):
    resume_text = extract_pdf_text(resume_path)
    clean_resume = clean_text(resume_text)

    # ---- CASE 1: JD PROVIDED ----
    if jd_path:
        with open(jd_path, "r", encoding="utf-8") as f:
            jd_text = f.read()
        clean_jd = clean_text(jd_text)

        resume_skills = extract_skills(clean_resume, SKILLS)
        jd_skills = extract_skills(clean_jd, SKILLS)

        resume_emb = model.encode(clean_resume, convert_to_tensor=True)
        jd_emb = model.encode(clean_jd, convert_to_tensor=True)
        ats_score = util.cos_sim(resume_emb, jd_emb).item() * 100

        return {
            "mode": "JD-based",
            "ats_score": round(ats_score, 2),
            "matched_skills": sorted(set(resume_skills) & set(jd_skills)),
            "missing_skills": sorted(set(jd_skills) - set(resume_skills))
        }

    # ---- CASE 2: NO JD ----
    else:
        result = evaluate_resume_only(clean_resume)
        result["mode"] = "Resume-only"
        return result


In [12]:
run_resume_analyzer("resumes/Kshitij_Gawankar.pdf")

{'baseline_score': 70.0,
 'readiness_level': 'Strong Technical Support Readiness',
 'skills_found': ['api',
  'automation',
  'communication',
  'csv',
  'json',
  'linux',
  'log analysis',
  'rca',
  'shell',
  'sql',
  'support',
  'technical support',
  'troubleshooting',
  'xml'],
 'suggestions': ['Adding data analysis experience may improve ATS performance.',
  "Spell out 'root cause analysis (RCA)' at least once."],
 'mode': 'Resume-only'}

In [11]:
run_resume_analyzer("resumes/Kshitij_Gawankar.pdf", "job_descriptions/jd.txt")

{'mode': 'JD-based',
 'ats_score': 74.79,
 'matched_skills': ['api',
  'automation',
  'communication',
  'csv',
  'linux',
  'python',
  'shell',
  'support',
  'technical support',
  'xml'],
 'missing_skills': ['data analysis',
  'networking',
  'root cause analysis',
  'unix']}