In [None]:
import fitz  # PyMuPDF
import spacy

# ---------- STEP 1: Extract text from PDF resume ----------

def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    text = ""
    for page in doc:
        text += page.get_text()
    doc.close()
    return text

# ---------- STEP 2: Define required job skills ----------

job_skills = {
    "python", "sql", "machine learning", "deep learning", "nlp",
    "opencv", "data analysis", "wordpress", "tensorflow",
    "keras", "flask", "django", "data preprocessing"
}

# ---------- STEP 3: Process resume text using spaCy ----------

nlp = spacy.load("en_core_web_sm")

def extract_skills_from_text(text):
    doc = nlp(text.lower())  # Lowercase for consistency
    extracted = set()

    for chunk in doc.noun_chunks:
        chunk_text = chunk.text.strip().lower()
        if chunk_text in job_skills:
            extracted.add(chunk_text)

    # Match individual tokens too
    for token in doc:
        if token.text.lower() in job_skills:
            extracted.add(token.text.lower())

    return extracted

# ---------- STEP 4: Compare and Score Resume ----------

def match_resume_to_job(pdf_path):
    resume_text = extract_text_from_pdf(pdf_path)
    resume_skills = extract_skills_from_text(resume_text)

    matched = resume_skills & job_skills
    unmatched = job_skills - resume_skills
    match_score = len(matched) / len(job_skills) * 100

    print(f"\n🔍 Extracted Resume Skills: {resume_skills}")
    print(f"✅ Matched Skills: {matched}")
    print(f"❌ Missing Skills: {unmatched}")
    print(f"\n📊 Resume Match Score: {match_score:.2f}%")

# ---------- RUN ----------

# Replace with your resume file path
pdf_file = "sample_resume.pdf"
match_resume_to_job(pdf_file)
