In [1]:
!pip install spacy nltk pdfplumber docx2txt scikit-learn torch transformers faiss-cpu requests gradio
!python -m spacy download en_core_web_sm

Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m30.7 MB/s[0m eta [36m0:00:00[0m [36m0:00:01[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')


In [2]:
import os
import json
import pdfplumber
import docx2txt
import requests
import spacy
import gradio as gr
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sentence_transformers import SentenceTransformer
from nltk.tokenize import word_tokenize


In [3]:
import nltk
nltk.download('punkt_tab')
nltk.download("punkt")
nltk.download("stopwords")
nlp = spacy.load("en_core_web_sm")


[nltk_data] Downloading package punkt_tab to
[nltk_data]     /Users/nitish/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package punkt to /Users/nitish/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/nitish/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [4]:
import gradio as gr
import pdfplumber
import spacy
import json
import google.generativeai as genai

# Initialize spaCy and Gemini API
nlp = spacy.load("en_core_web_sm")
genai.configure(api_key="AIzaSyDS0SXbtLKaawt2IdjTezO8HzsaSoM6RJM")
model = genai.GenerativeModel('gemini-1.5-flash')

# Define job roles and required skills, and job descriptions
JOB_ROLES = {
    "Data Scientist": {
        "skills": {"Python", "SQL", "Machine Learning", "Deep Learning", "NLP", "Statistics", "Pandas", "Scikit-Learn"},
        "description": """We are seeking a Data Scientist to analyze complex datasets, develop machine learning models, and provide actionable insights. The ideal candidate should have strong skills in Python, SQL, and various machine learning techniques."""
    },
    "Software Engineer": {
        "skills": {"Python", "Java", "C++", "Git", "OOP", "Algorithms"},
        "description": """We are looking for a Software Engineer to develop and maintain high-quality software applications. The candidate should be proficient in Python, Java, or C++, and have a good understanding of object-oriented programming and algorithms."""
    },
    "Cloud Engineer": {
        "skills": {"AWS", "Azure", "Docker", "Kubernetes", "Terraform", "Networking"},
        "description": """We need a Cloud Engineer to manage and optimize our cloud infrastructure. The candidate should have experience with AWS or Azure, Docker, Kubernetes, and Terraform."""
    },
    "Cybersecurity Analyst": {
        "skills": {"Cybersecurity", "Ethical Hacking", "Network Security", "Penetration Testing"},
        "description": """We are hiring a Cybersecurity Analyst to protect our systems from cyber threats. The candidate should have expertise in ethical hacking, network security, and penetration testing."""
    },
    "AI Engineer": {
        "skills": {"Python", "TensorFlow", "PyTorch", "Machine Learning", "Deep Learning", "AI"},
        "description": """We're looking for an AI Engineer to build and deploy advanced AI models. Proficiency in Python, TensorFlow, and PyTorch is essential."""
    }
}

# Predefined common skills
COMMON_SKILLS = {
    "Python", "Java", "C++", "SQL", "Machine Learning", "Deep Learning", "NLP", "Pandas", "Scikit-Learn",
    "TensorFlow", "PyTorch", "Data Analysis", "Cybersecurity", "Ethical Hacking", "AWS", "Azure", "Docker",
    "Kubernetes", "Flask", "Django", "Linux", "JavaScript", "React", "Node.js", "Computer Vision", "Statistics",
    "Mathematics", "Tableau", "Power BI", "Time Management", "Problem Solving", "Communication", "Teamwork"
}

def extract_text_from_pdf(pdf_file):
    """Extract text from a PDF file."""
    text = ""
    with pdfplumber.open(pdf_file) as pdf:
        for page in pdf.pages:
            text += page.extract_text() + "\n"
    return text if text else "Error extracting text from PDF"

def extract_resume_data(text):
    """Extract resume data as JSON, including skills and summary."""
    extracted_skills = set()
    text_lower = text.lower()

    # Match predefined skills
    for skill in COMMON_SKILLS:
        if skill.lower() in text_lower:
            extracted_skills.add(skill)

    # Use spaCy for Named Entity Recognition (NER)
    doc = nlp(text)
    for ent in doc.ents:
        if ent.label_ in ["ORG", "PERSON", "GPE", "FACILITY", "EVENT"]:  # Avoid extracting non-skills
            continue
        if ent.text in COMMON_SKILLS:  # Extract only valid skills
            extracted_skills.add(ent.text)

    # Extract summary
    summary = "Summary not found."
    for sent in doc.sents:
        if len(sent.text.split()) > 5:
            summary = sent.text
            break

    resume_data = {
        "skills": list(extracted_skills),
        "summary": summary
    }
    return json.dumps(resume_data, indent=4)

def calculate_match_score_with_llm(resume_json, job_description, job_skills):
    """Calculate match score using Gemini LLM and provide detailed analysis."""
    prompt = f"""
    Given the following resume data in JSON format:
    {resume_json}

    And the following job description:
    {job_description}

    And the following job required skills:
    {job_skills}

    Analyze the resume data and determine the match score as a percentage.
    List the skills that matched and the skills that did not match.
    Provide a brief explanation of why you gave the score.
    Return the score, matched skills, unmatched skills, and explanation in a JSON format.
    """

    response = model.generate_content(prompt)
    try:
        response_text = response.text
        start_index = response_text.find('{')
        end_index = response_text.rfind('}') + 1
        json_output = json.loads(response_text[start_index:end_index])
        return json_output
    except (json.JSONDecodeError, ValueError):
        return {"score": 0, "matched_skills": [], "unmatched_skills": [], "explanation": "Failed to calculate match score."}

def process_resume(file, job_role):
    """Process resume, extract data, and calculate match score using LLM."""
    resume_text = extract_text_from_pdf(file.name)
    resume_json = extract_resume_data(resume_text)
    job_description = JOB_ROLES[job_role]["description"]
    job_skills = JOB_ROLES[job_role]["skills"]
    match_result = calculate_match_score_with_llm(resume_json, job_description, job_skills)

    return f" **Resume Data (JSON):**\n{resume_json}\n\n" \
           f" **Match Score:** {match_result.get('score', 0)}%\n\n" \
           f" **Matched Skills:** {', '.join(match_result.get('matched_skills', []))}\n\n" \
           f" **Unmatched Skills:** {', '.join(match_result.get('unmatched_skills', []))}\n\n" \
           f" **Explanation:** {match_result.get('explanation', 'No explanation available.')}"

# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("## 📄 Resume Matcher using LLM")
    gr.Markdown("Upload your resume and select a job role to check your match score using LLM.")

    file_input = gr.File(label="Upload Resume (PDF)")
    job_dropdown = gr.Dropdown(choices=list(JOB_ROLES.keys()), label="Select Job Role")
    output_text = gr.Textbox(label="Result", interactive=False)

    submit_button = gr.Button("Check Match Score")
    submit_button.click(fn=process_resume, inputs=[file_input, job_dropdown], outputs=output_text)

# Run the Gradio app
demo.launch(share=True)

* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://99ac9879a5737a7a70.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [5]:
import gradio as gr
import pdfplumber
import spacy
import json
import google.generativeai as genai

# Initialize spaCy and Gemini API
nlp = spacy.load("en_core_web_sm")
genai.configure(api_key="AIzaSyDS0SXbtLKaawt2IdjTezO8HzsaSoM6RJM")
model = genai.GenerativeModel('gemini-1.5-flash')

# Define job roles and required skills, and job descriptions
JOB_ROLES = {
    "Data Scientist": {
        "skills": {"Python", "SQL", "Machine Learning", "Deep Learning", "NLP", "Statistics", "Pandas", "Scikit-Learn"},
        "description": """We are seeking a Data Scientist to analyze complex datasets, develop machine learning models, and provide actionable insights. The ideal candidate should have strong skills in Python, SQL, and various machine learning techniques."""
    },
    "Software Engineer": {
        "skills": {"Python", "Java", "C++", "Git", "OOP", "Algorithms"},
        "description": """We are looking for a Software Engineer to develop and maintain high-quality software applications. The candidate should be proficient in Python, Java, or C++, and have a good understanding of object-oriented programming and algorithms."""
    },
    "Cloud Engineer": {
        "skills": {"AWS", "Azure", "Docker", "Kubernetes", "Terraform", "Networking"},
        "description": """We need a Cloud Engineer to manage and optimize our cloud infrastructure. The candidate should have experience with AWS or Azure, Docker, Kubernetes, and Terraform."""
    },
    "Cybersecurity Analyst": {
        "skills": {"Cybersecurity", "Ethical Hacking", "Network Security", "Penetration Testing"},
        "description": """We are hiring a Cybersecurity Analyst to protect our systems from cyber threats. The candidate should have expertise in ethical hacking, network security, and penetration testing."""
    },
    "AI Engineer": {
        "skills": {"Python", "TensorFlow", "PyTorch", "Machine Learning", "Deep Learning", "AI"},
        "description": """We're looking for an AI Engineer to build and deploy advanced AI models. Proficiency in Python, TensorFlow, and PyTorch is essential."""
    }
}

# Predefined common skills
COMMON_SKILLS = {
    "Python", "Java", "C++", "SQL", "Machine Learning", "Deep Learning", "NLP", "Pandas", "Scikit-Learn",
    "TensorFlow", "PyTorch", "Data Analysis", "Cybersecurity", "Ethical Hacking", "AWS", "Azure", "Docker",
    "Kubernetes", "Flask", "Django", "Linux", "JavaScript", "React", "Node.js", "Computer Vision", "Statistics",
    "Mathematics", "Tableau", "Power BI", "Time Management", "Problem Solving", "Communication", "Teamwork"
}

def extract_text_from_pdf(pdf_file):
    """Extract text from a PDF file."""
    text = ""
    with pdfplumber.open(pdf_file) as pdf:
        for page in pdf.pages:
            text += page.extract_text() + "\n"
    return text if text else "Error extracting text from PDF"

def extract_resume_data(text):
    """Extract resume data as JSON, including skills and summary."""
    extracted_skills = set()
    text_lower = text.lower()

    # Match predefined skills
    for skill in COMMON_SKILLS:
        if skill.lower() in text_lower:
            extracted_skills.add(skill)

    # Use spaCy for Named Entity Recognition (NER)
    doc = nlp(text)
    for ent in doc.ents:
        if ent.label_ in ["ORG", "PERSON", "GPE", "FACILITY", "EVENT"]:  # Avoid extracting non-skills
            continue
        if ent.text in COMMON_SKILLS:  # Extract only valid skills
            extracted_skills.add(ent.text)

    # Extract summary
    summary = "Summary not found."
    for sent in doc.sents:
        if len(sent.text.split()) > 5:
            summary = sent.text
            break

    resume_data = {
        "skills": list(extracted_skills),
        "summary": summary
    }
    return json.dumps(resume_data, indent=4)

def calculate_match_score_with_llm(resume_json, job_description, job_skills):
    """Calculate match score using Gemini LLM and provide detailed analysis and improvement tips."""
    prompt = f"""
    Given the following resume data in JSON format:
    {resume_json}

    And the following job description:
    {job_description}

    And the following job required skills:
    {job_skills}

    Analyze the resume data and determine the match score as a percentage.
    List the skills that matched and the skills that did not match.
    Provide a detailed explanation of why you gave the score, and provide advice to the candidate on how to improve the missing skills to match the job role.
    Return the score, matched skills, unmatched skills, and explanation and improvement tips in a JSON format.
    """

    response = model.generate_content(prompt)
    try:
        response_text = response.text
        start_index = response_text.find('{')
        end_index = response_text.rfind('}') + 1
        json_output = json.loads(response_text[start_index:end_index])
        return json_output
    except (json.JSONDecodeError, ValueError):
        return {"score": 0, "matched_skills": [], "unmatched_skills": [], "explanation": "Failed to calculate match score.", "improvement_tips": "Failed to provide improvement tips."}

def process_resume(file, job_role):
    """Process resume, extract data, and calculate match score using LLM."""
    resume_text = extract_text_from_pdf(file.name)
    resume_json = extract_resume_data(resume_text)
    job_description = JOB_ROLES[job_role]["description"]
    job_skills = JOB_ROLES[job_role]["skills"]
    match_result = calculate_match_score_with_llm(resume_json, job_description, job_skills)
    matched_skills = match_result.get('matched_skills', [])
    unmatched_skills = match_result.get('unmatched_skills', [])
    job_skills_set = JOB_ROLES[job_role]['skills']
    calculated_score = 0
    if len(job_skills_set) > 0 :
        calculated_score = (len(set(matched_skills).intersection(job_skills_set)) / len(job_skills_set)) * 100

    return f" **Resume Data (JSON):**\n{resume_json}\n\n" \
           f" **Match Score:** {calculated_score}%\n\n" \
           f" **Matched Skills:** {', '.join(matched_skills)}\n\n" \
           f" **Unmatched Skills:** {', '.join(unmatched_skills)}\n\n" \
           f" **Explanation:** {match_result.get('explanation', 'No explanation available.')}\n\n" \
           f" **Improvement Tips:** {match_result.get('improvement_tips', 'No improvement tips available.')}"

# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("## 📄 Resume Matcher using LLM")
    gr.Markdown("Upload your resume and select a job role to check your match score using LLM.")

    file_input = gr.File(label="Upload Resume (PDF)")
    job_dropdown = gr.Dropdown(choices=list(JOB_ROLES.keys()), label="Select Job Role")
    output_text = gr.Textbox(label="Result", interactive=False)

    submit_button = gr.Button("Check Match Score")
    submit_button.click(fn=process_resume, inputs=[file_input, job_dropdown], outputs=output_text)

# Run the Gradio app
demo.launch(share=True)

* Running on local URL:  http://127.0.0.1:7861
* Running on public URL: https://17e6cadcd6363529d4.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [6]:
import gradio as gr
import pdfplumber
import spacy
import json
import google.generativeai as genai
import re

# Initialize spaCy and Gemini API
nlp = spacy.load("en_core_web_sm")
genai.configure(api_key="AIzaSyDS0SXbtLKaawt2IdjTezO8HzsaSoM6RJM")
model = genai.GenerativeModel('gemini-1.5-flash')

# Define job roles and required skills, and job descriptions
JOB_ROLES = {
    "Data Scientist": {
        "skills": {"Python", "SQL", "Machine Learning", "Deep Learning", "NLP", "Statistics", "Pandas", "Scikit-Learn"},
        "description": """We are seeking a Data Scientist to analyze complex datasets, develop machine learning models, and provide actionable insights. The ideal candidate should have strong skills in Python, SQL, and various machine learning techniques."""
    },
    "Software Engineer": {
        "skills": {"Python", "Java", "C++", "Git", "OOP", "Algorithms"},
        "description": """We are looking for a Software Engineer to develop and maintain high-quality software applications. The candidate should be proficient in Python, Java, or C++, and have a good understanding of object-oriented programming and algorithms."""
    },
    "Cloud Engineer": {
        "skills": {"AWS", "Azure", "Docker", "Kubernetes", "Terraform", "Networking"},
        "description": """We need a Cloud Engineer to manage and optimize our cloud infrastructure. The candidate should have experience with AWS or Azure, Docker, Kubernetes, and Terraform."""
    },
    "Cybersecurity Analyst": {
        "skills": {"Cybersecurity", "Ethical Hacking", "Network Security", "Penetration Testing"},
        "description": """We are hiring a Cybersecurity Analyst to protect our systems from cyber threats. The candidate should have expertise in ethical hacking, network security, and penetration testing."""
    },
    "AI Engineer": {
        "skills": {"Python", "TensorFlow", "PyTorch", "Machine Learning", "Deep Learning", "AI"},
        "description": """We're looking for an AI Engineer to build and deploy advanced AI models. Proficiency in Python, TensorFlow, and PyTorch is essential."""
    }
}

# Predefined common skills
COMMON_SKILLS = {
    "Python", "Java", "C++", "SQL", "Machine Learning", "Deep Learning", "NLP", "Pandas", "Scikit-Learn",
    "TensorFlow", "PyTorch", "Data Analysis", "Cybersecurity", "Ethical Hacking", "AWS", "Azure", "Docker",
    "Kubernetes", "Flask", "Django", "Linux", "JavaScript", "React", "Node.js", "Computer Vision", "Statistics",
    "Mathematics", "Tableau", "Power BI", "Time Management", "Problem Solving", "Communication", "Teamwork"
}

def extract_text_from_pdf(pdf_file):
    """Extract text from a PDF file."""
    text = ""
    with pdfplumber.open(pdf_file) as pdf:
        for page in pdf.pages:
            text += page.extract_text() + "\n"
    return text if text else "Error extracting text from PDF"

def extract_resume_data(text):
    """Extract detailed resume data as JSON."""
    doc = nlp(text)
    data = {
        "personal": {
            "name": "",
            "phone": "",
            "email": "",
            "address": "",
            "gender": "",
            "date_of_birth": "",
            "social": [],
            "about": ""
        },
        "skills": [],
        "education": [],
        "experience": [],
        "overall_experience": ""
    }

    # Personal Information Extraction
    for ent in doc.ents:
        if ent.label_ == "PERSON" and not data["personal"]["name"]:
            data["personal"]["name"] = ent.text
        elif ent.label_ == "GPE" and not data["personal"]["address"]:
            data["personal"]["address"] = ent.text
        elif ent.label_ == "DATE" and not data["personal"]["date_of_birth"]:
            data["personal"]["date_of_birth"] = ent.text

    phone_match = re.search(r'(\+\d{1,3}\s?)?(\(\d{3}\)\s?)?(\d{3}[-\.\s]??\d{4}|\d{10})', text)
    if phone_match:
        data["personal"]["phone"] = phone_match.group(0)

    email_match = re.search(r"[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+", text)
    if email_match:
        data[".
        personal"]["email"] = email_match.group(0)

    social_matches = re.findall(r"(https?://[^\s]+)", text)
    data["personal"]["social"] = social_matches

    # Skills Extraction
    text_lower = text.lower()
    for skill in COMMON_SKILLS:
        if skill.lower() in text_lower:
            data["skills"].append({"name": skill, "type": "technical", "experience": "1"})

    # Education and Experience Extraction (Simplified)
    education_matches = re.findall(r"(Bachelor|Master|Ph\.D)\s+of\s+([\w\s]+)\s+from\s+([\w\s]+)\s+(?:(\d{4}-\d{4}|\d{4}-\w+))", text, re.IGNORECASE)
    for degree, field, institution, dates in education_matches:
        data["education"].append({"degree": f"{degree} of {field}", "institution": institution, "dates": dates})

    experience_matches = re.findall(r"([\w\s]+)\s+at\s+([\w\s]+)\s+(?:(\d{4}-\d{4}|\d{4}-\w+))\s+([\w\s,]+)", text)
    for title, company, dates, location in experience_matches:
        data["experience"].append({"title": title, "company": company, "dates": dates, "location": location})

    # Summary/About
    for sent in doc.sents:
        if len(sent.text.split()) > 10:
            data['personal']['about'] += sent.text

    return json.dumps(data, indent=4)

def calculate_match_score_with_llm(resume_json, job_description, job_skills):
    """Calculate match score using Gemini LLM and provide detailed analysis and improvement tips."""
    prompt = f"""
    Given the following resume data in JSON format:
    {resume_json}

    And the following job description:
    {job_description}

    And the following job required skills:
    {job_skills}

    Analyze the resume data and determine the match score as a percentage.
    List the skills that matched and the skills that did not match.
    Provide a detailed explanation of why you gave the score, and provide advice to the candidate on how to improve the missing skills to match the job role.
    Return the score, matched skills, unmatched skills, and explanation and improvement tips in a JSON format.
    """

    response = model.generate_content(prompt)
    try:
        response_text = response.text
        start_index = response_text.find('{')
        end_index = response_text.rfind('}') + 1
        json_output = json.loads(response_text[start_index:end_index])
        return json_output
    except (json.JSONDecodeError, ValueError):
        return {"score": 0, "matched_skills": [], "unmatched_skills": [], "explanation": "Failed to calculate match score.", "improvement_tips": "Failed to provide improvement tips."}

def process_resume(file, job_role):
    """Process resume, extract data, and calculate match score using LLM."""
    resume_text = extract_text_from_pdf(file.name)
    resume_json = extract_resume_data(resume_text)
    job_description = JOB_ROLES[job_role]["description"]
    job_skills = JOB_ROLES[job_role]["skills"]
    match_result = calculate_match_score_with_llm(resume_json, job_description, job_skills)
    matched_skills = match_result.get('matched_skills', [])
    unmatched_skills = match_result.get('unmatched_skills', [])
    job_skills_set = JOB_ROLES[job_role]['skills']
    calculated_score = 0
    if len(job_skills_set) > 0:
        calculated_score = len(matched_skills) / len(job_skills_set) * 100
    return {
        "match_score": calculated_score,
        "matched_skills": matched_skills,
        "unmatched_skills": unmatched_skills,
        "job_description": job_description,
        "resume_data": resume_json
    }

# Create the Gradio interface for file upload and job role selection
def gradio_interface():
    job_roles = list(JOB_ROLES.keys())
    
    def gradio_function(resume_file, job_role):
        return process_resume(resume_file, job_role)

    iface = gr.Interface(
        fn=gradio_function,
        inputs=[gr.File(label="Upload Resume"), gr.Dropdown(choices=job_roles, label="Job Role", value=job_roles[0])],
        outputs="json"
    )

    iface.launch()

# Start Gradio interface
gradio_interface()


SyntaxError: unterminated string literal (detected at line 88) (430324283.py, line 88)

In [7]:
import gradio as gr
import pdfplumber
import spacy
import json
import google.generativeai as genai
import re

# Initialize spaCy and Gemini API
nlp = spacy.load("en_core_web_sm")
genai.configure(api_key="AIzaSyDS0SXbtLKaawt2IdjTezO8HzsaSoM6RJM")
model = genai.GenerativeModel('gemini-1.5-flash')

# Define job roles and required skills, and job descriptions
JOB_ROLES = {
    "Data Scientist": {
        "skills": {"Python", "SQL", "Machine Learning", "Deep Learning", "NLP", "Statistics", "Pandas", "Scikit-Learn"},
        "description": """We are seeking a Data Scientist to analyze complex datasets, develop machine learning models, and provide actionable insights. The ideal candidate should have strong skills in Python, SQL, and various machine learning techniques."""
    },
    "Software Engineer": {
        "skills": {"Python", "Java", "C++", "Git", "OOP", "Algorithms"},
        "description": """We are looking for a Software Engineer to develop and maintain high-quality software applications. The candidate should be proficient in Python, Java, or C++, and have a good understanding of object-oriented programming and algorithms."""
    },
    "Cloud Engineer": {
        "skills": {"AWS", "Azure", "Docker", "Kubernetes", "Terraform", "Networking"},
        "description": """We need a Cloud Engineer to manage and optimize our cloud infrastructure. The candidate should have experience with AWS or Azure, Docker, Kubernetes, and Terraform."""
    },
    "Cybersecurity Analyst": {
        "skills": {"Cybersecurity", "Ethical Hacking", "Network Security", "Penetration Testing"},
        "description": """We are hiring a Cybersecurity Analyst to protect our systems from cyber threats. The candidate should have expertise in ethical hacking, network security, and penetration testing."""
    },
    "AI Engineer": {
        "skills": {"Python", "TensorFlow", "PyTorch", "Machine Learning", "Deep Learning", "AI"},
        "description": """We're looking for an AI Engineer to build and deploy advanced AI models. Proficiency in Python, TensorFlow, and PyTorch is essential."""
    }
}

# Predefined common skills
COMMON_SKILLS = {
    "Python", "Java", "C++", "SQL", "Machine Learning", "Deep Learning", "NLP", "Pandas", "Scikit-Learn",
    "TensorFlow", "PyTorch", "Data Analysis", "Cybersecurity", "Ethical Hacking", "AWS", "Azure", "Docker",
    "Kubernetes", "Flask", "Django", "Linux", "JavaScript", "React", "Node.js", "Computer Vision", "Statistics",
    "Mathematics", "Tableau", "Power BI", "Time Management", "Problem Solving", "Communication", "Teamwork"
}

def extract_text_from_pdf(pdf_file):
    """Extract text from a PDF file."""
    text = ""
    with pdfplumber.open(pdf_file) as pdf:
        for page in pdf.pages:
            text += page.extract_text() + "\n"
    return text if text else "Error extracting text from PDF"

def extract_resume_data(text):
    """Extract detailed resume data as JSON."""
    doc = nlp(text)
    data = {
        "personal": {
            "name": "",
            "phone": "",
            "email": "",
            "address": "",
            "gender": "",
            "date_of_birth": "",
            "social": [],
            "about": ""
        },
        "skills": [],
        "education": [],
        "experience": [],
        "overall_experience": "missing"
    }

    # Personal Information Extraction
    for ent in doc.ents:
        if ent.label_ == "PERSON" and not data["personal"]["name"]:
            data["personal"]["name"] = ent.text
        elif ent.label_ == "GPE" and not data["personal"]["address"]:
            data["personal"]["address"] = ent.text
        elif ent.label_ == "DATE" and not data["personal"]["date_of_birth"]:
            data["personal"]["date_of_birth"] = ent.text

    phone_match = re.search(r'(\+\d{1,3}\s?)?(\(\d{3}\)\s?)?(\d{3}[-\.\s]??\d{4}|\d{10})', text)
    if phone_match:
        data["personal"]["phone"] = phone_match.group(0)

    email_match = re.search(r"[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+", text)
    if email_match:
        data["personal"]["email"] = email_match.group(0)

    social_matches = re.findall(r"(https?://[^\s]+)", text)
    data["personal"]["social"] = social_matches

    # Skills Extraction
    text_lower = text.lower()
    for skill in COMMON_SKILLS:
        if skill.lower() in text_lower:
            data["skills"].append({"name": skill, "type": "technical", "experience": "1"})

    # Education and Experience Extraction (Simplified)
    education_matches = re.findall(r"(Bachelor|Master|Ph\.D)\s+of\s+([\w\s]+)\s+from\s+([\w\s]+)\s+(?:(\d{4}-\d{4}|\d{4}-\w+))", text, re.IGNORECASE)
    for degree, field, institution, dates in education_matches:
        data["education"].append({"degree": f"{degree} of {field}", "institution": institution, "dates": dates})

    experience_matches = re.findall(r"([\w\s]+)\s+at\s+([\w\s]+)\s+(?:(\d{4}-\d{4}|\d{4}-\w+))\s+([\w\s,]+)", text)
    for title, company, dates, location in experience_matches:
        data["experience"].append({"title": title, "company": company, "dates": dates, "location": location})

    # Summary/About
    for sent in doc.sents:
        if len(sent.text.split()) > 10:
            data['personal']['about'] += sent.text

    # Handle missing values after extraction
    for key, value in data["personal"].items():
        if not value and key != "social":  # social can be an empty list
            data["personal"][key] = "missing"
    if not data["skills"]:
        data["skills"] = []
    if not data["education"]:
        data["education"] = []
    if not data["experience"]:
        data["experience"] = []
    if not data["overall_experience"]:
        data["overall_experience"] = "missing"

    return json.dumps(data, indent=4)

def calculate_match_score_with_llm(resume_json, job_description, job_skills):
    """Calculate match score using Gemini LLM and provide detailed analysis and improvement tips."""
    prompt = f"""
    Given the following resume data in JSON format:
    {resume_json}

    And the following job description:
    {job_description}

    And the following job required skills:
    {job_skills}

    Analyze the resume data and determine the match score as a percentage.
    List the skills that matched and the skills that did not match.
    Provide a detailed explanation of why you gave the score, and provide advice to the candidate on how to improve the missing skills to match the job role.
    Return the score, matched skills, unmatched skills, and explanation and improvement tips in a JSON format.
    """

    response = model.generate_content(prompt)
    try:
        response_text = response.text
        start_index = response_text.find('{')
        end_index = response_text.rfind('}') + 1
        json_output = json.loads(response_text[start_index:end_index])
        return json_output
    except (json.JSONDecodeError, ValueError):
        return {"score": 0, "matched_skills": [], "unmatched_skills": [], "explanation": "Failed to calculate match score.", "improvement_tips": "Failed to provide improvement tips."}

def process_resume(file, job_role):
    """Process resume, extract data, and calculate match score using LLM."""
    resume_text = extract_text_from_pdf(file.name)
    resume_json = extract_resume_data(resume_text)
    job_description = JOB_ROLES[job_role]["description"]
    job_skills = JOB_ROLES[job_role]["skills"]
    match_result = calculate_match_score_with_llm(resume_json, job_description, job_skills)
    matched_skills = match_result.get('matched_skills', [])
    unmatched_skills = match_result.get('unmatched_skills', [])
    job_skills_set = JOB_ROLES[job_role]['skills']
    calculated_score = 0
    if len(job_skills_set) > 0:
        calculated_score = len(matched_skills) / len(job_skills_set) * 100
    return {
        "match_score": calculated_score,
        "matched_skills": matched_skills,
        "unmatched_skills": unmatched_skills,
        "job_description": job_description,
        "resume_data": resume_json
    }

# Create the Gradio interface for file upload and job role selection
def gradio_interface():
    job_roles = list(JOB_ROLES.keys())

    def gradio_function(resume_file, job_role):
        return process_resume(resume_file, job_role)

    iface = gr.Interface(
        fn=gradio_function,
        inputs=[gr.File(label="Upload Resume"), gr.Dropdown(choices=job_roles, label="Job Role", value=job_roles[0])],
        outputs="json"
    )

    iface.launch()

# Start Gradio interface
gradio_interface()


* Running on local URL:  http://127.0.0.1:7862

To create a public link, set `share=True` in `launch()`.
