In [1]:
!pip install spacy nltk pdfplumber docx2txt scikit-learn torch transformers faiss-cpu requests gradio
!python -m spacy download en_core_web_sm

Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
     ---------------------------------------- 0.0/12.8 MB ? eta -:--:--
     -------------- ------------------------- 4.7/12.8 MB 25.9 MB/s eta 0:00:01
     --------------------------------------  12.6/12.8 MB 41.5 MB/s eta 0:00:01
     --------------------------------------  12.6/12.8 MB 41.5 MB/s eta 0:00:01
     --------------------------------------- 12.8/12.8 MB 18.7 MB/s eta 0:00:00
[38;5;2m[+] Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')


In [3]:
import os
import json
import pdfplumber
import docx2txt
import requests
import spacy
import gradio as gr
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sentence_transformers import SentenceTransformer
from nltk.tokenize import word_tokenize


In [4]:
import nltk
nltk.download('punkt_tab')
nltk.download("punkt")
nltk.download("stopwords")
nlp = spacy.load("en_core_web_sm")


[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\GAD\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\GAD\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\GAD\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [6]:
from openai import OpenAI
 
OLLAMA_API = "http://localhost:11434/api/chat"
HEADERS = {"Content-Type": "application/json"}
MODEL = "llama3.2"
 
# Initialize the OpenAI client for Ollama integration
ollama_via_openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')

In [8]:
# Define job roles and required skills
JOB_ROLES = {
    "Data Scientist": {"Python", "SQL", "Machine Learning", "Deep Learning", "NLP", "Statistics", "Pandas", "Scikit-Learn"},
    "Software Engineer": {"Python", "Java", "C++", "Git", "OOP", "Algorithms"},
    "Cloud Engineer": {"AWS", "Azure", "Docker", "Kubernetes", "Terraform", "Networking"},
    "Cybersecurity Analyst": {"Cybersecurity", "Ethical Hacking", "Network Security", "Penetration Testing"},
    "AI Engineer": {"Python", "TensorFlow", "PyTorch", "Machine Learning", "Deep Learning", "AI"}
}

# Predefined common skills
COMMON_SKILLS = {
    "Python", "Java", "C++", "SQL", "Machine Learning", "Deep Learning", "NLP", "Pandas", "Scikit-Learn",
    "TensorFlow", "PyTorch", "Data Analysis", "Cybersecurity", "Ethical Hacking", "AWS", "Azure", "Docker",
    "Kubernetes", "Flask", "Django", "Linux", "JavaScript", "React", "Node.js", "Computer Vision", "Statistics",
    "Mathematics", "Tableau", "Power BI", "Time Management", "Problem Solving", "Communication", "Teamwork"
}

# Learning recommendations for missing skills
LEARNING_RESOURCES = {
    "Python": "https://www.udemy.com/course/python-for-data-science-and-machine-learning-bootcamp/",
    "SQL": "https://www.coursera.org/learn/sql-for-data-science",
    "Machine Learning": "https://www.coursera.org/learn/machine-learning",
    "Deep Learning": "https://www.udemy.com/course/deep-learning-a-z/",
    "NLP": "https://www.udemy.com/course/nlp-natural-language-processing-with-python/",
    "Statistics": "https://www.khanacademy.org/math/statistics-probability",
    "AWS": "https://www.udemy.com/course/aws-certified-solutions-architect-associate/",
    "Cybersecurity": "https://www.udemy.com/course/the-complete-cyber-security-course-hacker-exposed/",
}

def extract_text_from_pdf(pdf_file):
    """Extract text from a PDF file."""
    text = ""
    with pdfplumber.open(pdf_file) as pdf:
        for page in pdf.pages:
            text += page.extract_text() + "\n"
    return text if text else "Error extracting text from PDF"

def extract_skills(text):
    """Extract skills from resume using predefined list and NLP."""
    extracted_skills = set()
    text_lower = text.lower()

    # Match predefined skills
    for skill in COMMON_SKILLS:
        if skill.lower() in text_lower:
            extracted_skills.add(skill)

    # Use spaCy for Named Entity Recognition (NER)
    doc = nlp(text)
    for ent in doc.ents:
        if ent.label_ in ["ORG", "PERSON", "GPE", "FACILITY", "EVENT"]:  # Avoid extracting non-skills
            continue
        if ent.text in COMMON_SKILLS:  # Extract only valid skills
            extracted_skills.add(ent.text)

    return list(extracted_skills)

def extract_summary(text):
    """Extract a concise summary from the resume using NLP."""
    doc = nlp(text)
    for sent in doc.sents:
        if len(sent.text.split()) > 5:  # Ensure it's a meaningful sentence
            return sent.text
    return "Summary not found."

def calculate_match_score(resume_skills, job_role):
    """Calculate match score and identify matched/missing skills."""
    required_skills = JOB_ROLES.get(job_role, set())
    if not required_skills:
        return 0.0, set(), set(), []

    matched_skills = set(resume_skills) & required_skills
    missing_skills = required_skills - matched_skills
    match_percentage = (len(matched_skills) / len(required_skills)) * 100

    # Generate learning recommendations
    learning_links = [f"{skill}: {LEARNING_RESOURCES.get(skill, 'No course available')}" for skill in missing_skills]

    return round(match_percentage, 2), matched_skills, missing_skills, learning_links

def process_resume(file, job_role):
    """Process resume, extract skills, summary, and calculate match score."""
    resume_text = extract_text_from_pdf(file.name)
    extracted_skills = extract_skills(resume_text)
    summary = extract_summary(resume_text)
    match_score, matched_skills, missing_skills, learning_links = calculate_match_score(extracted_skills, job_role)

    return f" **Match Score:** {match_score}%\n\n" \
           f" **Resume Summary:** {summary}\n\n" \
           f" **Extracted Skills:** {', '.join(extracted_skills) if extracted_skills else 'None'}\n\n" \
           f" **Matched Skills:** {', '.join(matched_skills) if matched_skills else 'None'}\n\n" \
           f" **Missing Skills:** {', '.join(missing_skills) if missing_skills else 'None'}\n\n" \
           f" **Learning Recommendations:**\n{chr(10).join(learning_links) if learning_links else 'No recommendations'}"

# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("## 📄 Resume Skill Matcher & Learning Recommendations")
    gr.Markdown("Upload your resume and select a job role to check your match score and get learning recommendations.")

    file_input = gr.File(label="Upload Resume (PDF)")
    job_dropdown = gr.Dropdown(choices=list(JOB_ROLES.keys()), label="Select Job Role")
    output_text = gr.Textbox(label="Result", interactive=False)

    submit_button = gr.Button("Check Match Score")
    submit_button.click(fn=process_resume, inputs=[file_input, job_dropdown], outputs=output_text)

# Run the Gradio app
demo.launch(share=True)


### VERSION 2 BY USING LLM 

In [11]:
import threading

In [14]:
import re


JOB_ROLES = {
    "Data Scientist": {"Python", "SQL", "Machine Learning", "Deep Learning", "NLP", "Statistics", "Pandas", "Scikit-Learn"},
    "Software Engineer": {"Python", "Java", "C++", "Git", "OOP", "Algorithms"},
    "Cloud Engineer": {"AWS", "Azure", "Docker", "Kubernetes", "Terraform", "Networking"},
    "Cybersecurity Analyst": {"Cybersecurity", "Ethical Hacking", "Network Security", "Penetration Testing"},
    "AI Engineer": {"Python", "TensorFlow", "PyTorch", "Machine Learning", "Deep Learning", "AI"}
}

# Learning resources for missing skills
LEARNING_RESOURCES = {
    "Python": "https://www.udemy.com/course/python-for-data-science-and-machine-learning-bootcamp/",
    "SQL": "https://www.coursera.org/learn/sql-for-data-science",
    "Machine Learning": "https://www.coursera.org/learn/machine-learning",
    "Deep Learning": "https://www.udemy.com/course/deep-learning-a-z/",
    "NLP": "https://www.udemy.com/course/nlp-natural-language-processing-with-python/",
    "Statistics": "https://www.khanacademy.org/math/statistics-probability",
    "AWS": "https://www.udemy.com/course/aws-certified-solutions-architect-associate/",
    "Cybersecurity": "https://www.udemy.com/course/the-complete-cyber-security-course-hacker-exposed/",
}

def extract_text_from_pdf(pdf_file):
    """Extract text from a PDF file."""
    text = ""
    with pdfplumber.open(pdf_file) as pdf:
        for page in pdf.pages:
            extracted = page.extract_text()
            if extracted:
                text += extracted + "\n"
    return text if text else "Error extracting text from PDF"

def extract_summary_with_llama(text):
    """Generate a professional summary using LLaMA 3.2."""
    response = ollama_via_openai.chat.completions.create(
        model=MODEL,
        messages=[{"role": "user", "content": f"Summarize this resume in a professional tone: {text}"}]
    )
    return response.choices[0].message.content.strip()

def extract_skills_fast(text):
    """Fast skill extraction using regex-based keyword matching."""
    skills = set()
    lowercase_text = text.lower()
    
    for job, required_skills in JOB_ROLES.items():
        for skill in required_skills:
            if re.search(rf"\b{skill.lower()}\b", lowercase_text):
                skills.add(skill)

    return list(skills)

def calculate_match_score(resume_skills, job_role):
    """Calculate match score and identify matched/missing skills."""
    required_skills = JOB_ROLES.get(job_role, set())
    if not required_skills:
        return 0.0, set(), set(), []

    matched_skills = set(resume_skills) & required_skills
    missing_skills = required_skills - matched_skills
    match_percentage = (len(matched_skills) / len(required_skills)) * 100 if required_skills else 0

    # Generate learning recommendations
    learning_links = [f"{skill}: {LEARNING_RESOURCES.get(skill, 'No course available')}" for skill in missing_skills]

    return round(match_percentage, 2), matched_skills, missing_skills, learning_links

def process_resume(file, job_role):
    """Process resume, extract skills (fast), summary (LLaMA), and calculate match score."""
    resume_text = extract_text_from_pdf(file.name)

    # Use LLaMA only for summary
    summary = extract_summary_with_llama(resume_text)

    # Use fast skill extraction (regex)
    extracted_skills = extract_skills_fast(resume_text)

    match_score, matched_skills, missing_skills, learning_links = calculate_match_score(extracted_skills, job_role)

    return f" **Match Score:** {match_score}%\n\n" \
           f" **Resume Summary:** {summary}\n\n" \
           f" **Extracted Skills:** {', '.join(extracted_skills) if extracted_skills else 'None'}\n\n" \
           f" **Matched Skills:** {', '.join(matched_skills) if matched_skills else 'None'}\n\n" \
           f" **Missing Skills:** {', '.join(missing_skills) if missing_skills else 'None'}\n\n" \
           f" **Learning Recommendations:**\n{chr(10).join(learning_links) if learning_links else 'No recommendations'}"

# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("## 📄 Resume Skill Matcher & Learning Recommendations (Optimized)")
    gr.Markdown("Upload your resume and select a job role to check your match score and get learning recommendations.")

    file_input = gr.File(label="Upload Resume (PDF)")
    job_dropdown = gr.Dropdown(choices=list(JOB_ROLES.keys()), label="Select Job Role")
    output_text = gr.Textbox(label="Result", interactive=False)

    submit_button = gr.Button("Check Match Score")
    submit_button.click(fn=process_resume, inputs=[file_input, job_dropdown], outputs=output_text)

# Run the Gradio app
demo.launch(share=True)

* Running on local URL:  http://127.0.0.1:7878
* Running on public URL: https://880fd94b9621361725.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


