# InterviewMate - Resume vs Job Description (Fit vs Missing each one)

In [51]:
# %pip install spacy
!python -m spacy download en_core_web_sm


Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
     ---------------------------------------- 0.0/12.8 MB ? eta -:--:--
     ---------------------------------------- 0.0/12.8 MB ? eta -:--:--
     ---------------------------------------- 0.0/12.8 MB ? eta -:--:--
     --------------------------------------- 0.0/12.8 MB 245.8 kB/s eta 0:00:52
     --------------------------------------- 0.1/12.8 MB 595.3 kB/s eta 0:00:22
     - -------------------------------------- 0.4/12.8 MB 1.6 MB/s eta 0:00:08
     -- ------------------------------------- 0.9/12.8 MB 3.4 MB/s eta 0:00:04
     ------ --------------------------------- 2.2/12.8 MB 7.0 MB/s eta 0:00:02
     ----------- ---------------------------- 3.8/12.8 MB 10.6 MB/s eta 0:00:01
     ----------------- ---------------------- 5.6/12.8 MB 13.8 MB/s eta 0:00:01
     ------------------ --------------------- 5.9/1


[notice] A new release of pip is available: 24.0 -> 25.3
[notice] To update, run: C:\Users\nguye\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [52]:
import pdfplumber
import re
import nltk
import spacy
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

nltk.download('punkt')


nlp = spacy.load("en_core_web_sm")


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\nguye\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


### 1Ô∏è‚É£ PDF ‚Üí Text

In [53]:
def extract_text_from_pdf(pdf_path):
    text = ""
    with pdfplumber.open(pdf_path) as pdf:
        for page in pdf.pages:
            page_text = page.extract_text()
            if page_text:
                text += page_text + "\n"
    return text

### 2Ô∏è‚É£ Cleaning

In [54]:
def clean_text(text):
    text = text.lower()
    text = re.sub(r'\s+', ' ', text)
    text = re.sub(r'[^\w\s]', ' ', text)
    return text.strip()

### 3Ô∏è‚É£ Auto Skill Extraction (Smart)

In [55]:
COMMON_TECH_SKILLS = [
    "python","java","c++","c#","go","javascript","typescript",
    "react","angular","node","sql","mysql","postgres","mongodb",
    "aws","gcp","azure","docker","kubernetes","microservices",
    "machine learning","ml","nlp","ai","tensorflow","pytorch",
    "git","devops","rest","graphql","spark","hadoop","redis"
]

def extract_skills(text):
    doc = nlp(text)
    keywords = set()

    # Named entities & noun chunks
    for chunk in doc.noun_chunks:
        keywords.add(chunk.text.lower())

    for ent in doc.ents:
        keywords.add(ent.text.lower())

    # Match against known tech terms
    for skill in COMMON_TECH_SKILLS:
        if skill.lower() in text.lower():
            keywords.add(skill.lower())

    # Cleanup
    filtered = set([k.strip() for k in keywords if len(k.strip()) > 2])
    return list(filtered)


### 4Ô∏è‚É£ Fit vs Missing

In [56]:
def normalize_skill(s):
    s = s.lower().strip()
    s = re.sub(r'[^a-z0-9+.# ]', ' ', s)
    s = re.sub(r'\s+', ' ', s)
    return s

STOP_PHRASES = {
    "strong experience",
    "a plus",
    "knowledge",
    "experience",
    "software engineer",
}

def clean_skill_list(skills):
    final = set()

    for s in skills:
        s = normalize_skill(s)

        # skip empty / meaningless
        if len(s) < 2:
            continue
        
        # remove stop phrases
        if s in STOP_PHRASES:
            continue
        
        # split combined strings
        parts = re.split(r'[ ,/|]+', s)

        for p in parts:
            p = p.strip()
            if len(p) < 2:
                continue
            final.add(p)

    return list(final)

def match_skills(resume_text, jd_text):
    resume = resume_text.lower()
    jd = jd_text.lower()

    jd_skills_raw = extract_skills(jd_text)
    resume_skills_raw = extract_skills(resume_text)

    jd_skills = clean_skill_list(jd_skills_raw)
    resume_skills = clean_skill_list(resume_skills_raw)

    jd_set = set(jd_skills)
    resume_set = set(resume_skills)

    fit = sorted(list(jd_set & resume_set))
    missing = sorted(list(jd_set - resume_set))

    return jd_skills, fit, missing, resume_skills


### Load Embedding Model

In [57]:
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

### üì• Input Section

In [58]:
resume_test = 'test_resume.pdf'
resume_text = extract_text_from_pdf(resume_test)
resume_clean = clean_text(resume_text)
print("Resume extracted successfully!")
print(resume_clean)

Resume extracted successfully!
benjamin shah address  123 anywhere st   any city phone  123 456 7890 email  hello reallygreatsite com www reallygreatsite com website  summary results oriented mechanical and mechatronics engineer seeking a challenging position to apply expertise in designing and implementing innovative solutions for complex engineering challenges  proven track record of success in project management  problem solving  and cross functional collaboration  adept at utilising cutting edge technologies to optimise processes and enhance overall efficiency  work experience mechatronics engineer  borcelle technologies jan 2023   present led development of an advanced automation system  achieving a 15  increase in operational efficiency  streamlined manufacturing processes  reducing production costs by 10   implemented preventive maintenance strategies  resulting in a 20  decrease in equipment downtime  system engineer  arrowai industries feb 2021   dec 2022 designed and optimise

In [59]:
jd_text = """
We are seeking a Mechatronics / Automation Engineer to design, develop, and optimize advanced automation and robotic systems. The ideal candidate will have strong experience in mechatronics engineering, manufacturing system improvement, automation project lifecycle management, and implementation of preventive maintenance strategies.

Responsibilities:
- Lead development and optimisation of automation and robotic control systems
- Improve manufacturing processes to increase performance, reduce downtime, and optimise operational efficiency
- Conduct system validation, testing, and compliance with industry standards
- Execute feasibility studies and risk assessments for engineering projects
- Work cross-functionally with engineering teams, clients, and stakeholders to deliver innovative technical solutions
- Implement preventive maintenance and reliability improvement initiatives
- Support continuous improvement, quality, and safety initiatives

Required Skills:
- Mechatronics System Integration
- Robotics and Automation
- Mechanical and Mechatronics Engineering
- Project Management
- Manufacturing Process Improvement
- CAD for Mechatronics
- Knowledge of automotive / industrial systems
- Strong problem-solving, analytical and collaboration skills

"""
jd_clean = clean_text(jd_text)

print("JD Loaded Successfully!")
print(jd_clean)

JD Loaded Successfully!
we are seeking a mechatronics   automation engineer to design  develop  and optimize advanced automation and robotic systems  the ideal candidate will have strong experience in mechatronics engineering  manufacturing system improvement  automation project lifecycle management  and implementation of preventive maintenance strategies  responsibilities    lead development and optimisation of automation and robotic control systems   improve manufacturing processes to increase performance  reduce downtime  and optimise operational efficiency   conduct system validation  testing  and compliance with industry standards   execute feasibility studies and risk assessments for engineering projects   work cross functionally with engineering teams  clients  and stakeholders to deliver innovative technical solutions   implement preventive maintenance and reliability improvement initiatives   support continuous improvement  quality  and safety initiatives required skills    me

### üîç Embedding + Similarity Score

In [60]:
resume_vec = model.encode([resume_clean])
jd_vec = model.encode([jd_clean])

similarity_score = float(cosine_similarity(resume_vec, jd_vec)[0][0])
match_percentage = round(similarity_score * 100, 2)

print("Resume Match Score:", match_percentage, "%")


Resume Match Score: 74.52 %


In [61]:
jd_skills, fit_skills, missing_skills, resume_skills= match_skills(resume_clean, jd_clean)

print("JD Extracted Skills:", jd_skills)
print("Resume Extracted Skills", resume_skills)
print("Matched Skills:", fit_skills)
print("Missing Skills:", missing_skills)


JD Extracted Skills: ['development', 'initiatives', 'stakeholders', 'improvement', 'risk', 'analytical', 'control', 'skills', 'optimisation', 'manufacturing', 'feasibility', 'maintenance', 'processes', 'performance', 'testing', 'problem', 'cad', 'responsibilities', 'downtime', 'project', 'efficiency', 'validation', 'systems', 'ideal', 'robotics', 'system', 'engineer', 'conduct', 'industry', 'the', 'studies', 'reliability', 'industrial', 'standards', 'safety', 'collaboration', 'solutions', 'and', 'mechatronics', 'strategies', 'projects', 'work', 'innovative', 'teams', 'continuous', 'knowledge', 'strong', 'automation', 'engineering', 'technical', 'process', 'advanced', 'integration', 'robotic', 'clients', 'operational', 'compliance', 'management', 'preventive', 'assessments', 'implementation', 'automotive', 'quality', 'candidate']
Resume Extracted Skills ['development', 'design', 'inspiring', 'record', 'production', 'benjamin', 'summary', '25', 'aug', 'testing', 'cad', 'optimise', 'equip

In [62]:
import pandas as pd

summary_df = pd.DataFrame({
    "Metric": [
        "Overall Match Score",
        "Result Category",
        "Total JD Skills",
        "Skills Matched",
        "Skills Missing"
    ],
    "Value": [
        f"{match_percentage}%",
        "‚≠ê Strong Match ‚Äì Great Fit" if match_percentage >= 80 
        else "üëç Moderate Match ‚Äì Trainable Fit" if match_percentage >= 60
        else "‚ö†Ô∏è Weak Match ‚Äì Needs Improvement",
        len(jd_skills),
        len(fit_skills),
        len(missing_skills)
    ]
})

summary_df


Unnamed: 0,Metric,Value
0,Overall Match Score,74.52%
1,Result Category,üëç Moderate Match ‚Äì Trainable Fit
2,Total JD Skills,64
3,Skills Matched,42
4,Skills Missing,22


In [63]:
# Safely create DataFrame when lists have different lengths by using Series
skills_compare_df = pd.DataFrame({
    "Fit Skill": pd.Series(fit_skills),
    "Missing Skill": pd.Series(missing_skills)
})

skills_compare_df


Unnamed: 0,Fit Skill,Missing Skill
0,advanced,analytical
1,and,candidate
2,assessments,collaboration
3,automation,conduct
4,automotive,continuous
5,cad,ideal
6,clients,implementation
7,compliance,industrial
8,control,initiatives
9,development,knowledge
