In [7]:
import importlib

required_libraries = [
    "pdfplumber", "pandas", "numpy", "nltk",
    "sentence_transformers", "sklearn"
]

for lib in required_libraries:
    try:
        importlib.import_module(lib)
        print(f"{lib} ✅ Installed")
    except ImportError:
        print(f"{lib} ❌ Not Installed")


pdfplumber ✅ Installed
pandas ✅ Installed
numpy ✅ Installed
nltk ✅ Installed

sentence_transformers ✅ Installed
sklearn ✅ Installed


In [13]:
# Resume Analyzer & Job Matcher with Skill Suggestions using NLP in Jupyter Notebook

# Step 1: Install required packages
!pip install sentence-transformers pdfplumber pandas scikit-learn nltk

# Step 2: Import libraries
import pdfplumber
import pandas as pd
import numpy as np
import nltk
from sentence_transformers import SentenceTransformer, util

nltk.download('punkt')

# Step 3: Define utility functions

def extract_text_from_pdf(pdf_path):
    with pdfplumber.open(pdf_path) as pdf:
        text = ''
        for page in pdf.pages:
            text += page.extract_text()
    return text

def extract_skills(text, skill_list):
    text = text.lower()
    return [skill for skill in skill_list if skill in text]

def load_job_data(csv_path):
    return pd.read_csv(csv_path)

def match_jobs(resume_text, job_df, model, top_n=5):
    resume_embedding = model.encode(resume_text, convert_to_tensor=True)
    job_embeddings = model.encode(job_df["JobDescription"].tolist(), convert_to_tensor=True)
    similarities = util.cos_sim(resume_embedding, job_embeddings)[0].cpu().numpy()
    job_df["MatchScore"] = similarities
    return job_df.sort_values("MatchScore", ascending=False).head(top_n)

def detect_skill_gap(resume_skills, job_skills):
    return set(job_skills) - set(resume_skills)

def suggest_skills(top_jobs):
    all_required = set(skill for skills in top_jobs["RequiredSkills"] for skill in skills)
    all_gaps = set(skill for gaps in top_jobs["SkillGap"] for skill in gaps)
    recommended = all_required.intersection(all_gaps)
    return sorted(recommended)

def run_resume_matcher(resume_pdf_path, job_csv_path, skill_list):
    resume_text = extract_text_from_pdf(resume_pdf_path)
    job_df = load_job_data(job_csv_path)
    model = SentenceTransformer('all-MiniLM-L6-v2')

    top_jobs = match_jobs(resume_text, job_df, model, top_n=5)
    resume_skills = extract_skills(resume_text, skill_list)
    top_jobs["RequiredSkills"] = top_jobs["JobDescription"].apply(lambda x: extract_skills(x, skill_list))
    top_jobs["SkillGap"] = top_jobs["RequiredSkills"].apply(lambda x: list(detect_skill_gap(resume_skills, x)))

    suggested_skills = suggest_skills(top_jobs)

    return top_jobs[["JobTitle", "MatchScore", "RequiredSkills", "SkillGap"]], suggested_skills

# Step 4: Define a broad skill list for matching
skills = [
    'python', 'java', 'c++', 'sql', 'nosql', 'excel', 'tableau', 'power bi', 'pandas', 'numpy',
    'matplotlib', 'seaborn', 'scikit-learn', 'tensorflow', 'keras', 'pytorch', 'nlp',
    'natural language processing', 'transformers', 'huggingface', 'bert', 'gpt',
    'data visualization', 'data preprocessing', 'data wrangling', 'data analysis',
    'deep learning', 'machine learning', 'cloud computing', 'aws', 'azure', 'gcp',
    'linux', 'git', 'docker', 'kubernetes', 'communication', 'teamwork', 'problem solving',
    'critical thinking', 'neural networks', 'classification', 'regression',
    'clustering', 'recommendation systems', 'text classification', 'named entity recognition',
    'time series', 'feature engineering', 'statistical analysis', 'model evaluation'
]

# Step 5: Run the matcher with your actual files
matched_jobs, recommended_skills = run_resume_matcher("resume.pdf", "job_descriptions.csv", skills)

print("\nTop Matching Jobs:\n")
display(matched_jobs)

print("\n💡 Suggested Skills to Learn (Skill Gap Across Top Jobs):\n")
print(", ".join(recommended_skills))


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\singa\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
  return forward_call(*args, **kwargs)


Unnamed: 0,JobTitle,MatchScore,RequiredSkills,SkillGap
0,Data Scientist,0.470481,"[python, sql, pandas, data visualization, mach...",[data visualization]
2,NLP Engineer,0.3378,"[python, nlp, transformers, classification, te...","[text classification, named entity recognition..."
3,Business Analyst,0.310052,"[sql, excel, power bi, communication]","[power bi, excel]"
4,AI Researcher,0.295483,"[python, deep learning, neural networks]",[neural networks]
1,Machine Learning Engineer,0.283514,"[python, tensorflow, keras, deep learning]",[]
