# Import Libraries

In [5]:
import re
import PyPDF2
from docx import Document
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import gradio as gr

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('punkt_tab')

  from .autonotebook import tqdm as notebook_tqdm
[nltk_data] Downloading package punkt to C:\Users\Nishtha
[nltk_data]     Singla\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping tokenizers\punkt.zip.
[nltk_data] Downloading package stopwords to C:\Users\Nishtha
[nltk_data]     Singla\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\stopwords.zip.
[nltk_data] Downloading package punkt_tab to C:\Users\Nishtha
[nltk_data]     Singla\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping tokenizers\punkt_tab.zip.


True

# Resume Parsing Functions

In [19]:
def extract_text_from_pdf(pdf_path):
    with open(pdf_path, 'rb') as file:
        reader = PyPDF2.PdfReader(file)
        text = ''
        for page in reader.pages:
            text += page.extract_text()
        return text

def extract_text_from_docx(docx_path):
    doc = Document(docx_path)
    text = ''
    for paragraph in doc.paragraphs:
        text += paragraph.text + '\n'
    return text

def extract_text(file_path):
    if file_path.endswith('.pdf'):
        return extract_text_from_pdf(file_path)
    elif file_path.endswith('.docx'):
        return extract_text_from_docx(file_path)
    else:
        raise ValueError("Unsupported file format. Use PDF or DOCX.")

# Text Preprocessing

In [20]:
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    tokens = word_tokenize(text)
    stop_words = set(stopwords.words('english'))
    tokens = [word for word in tokens if word not in stop_words]
    return tokens

#  Keyword Extraction

In [21]:
def extract_keywords(job_description):
    tokens = preprocess_text(job_description)
    return set(tokens)

# Calculate ATS Score

In [22]:
def calculate_score(resume_text, job_keywords):
    resume_tokens = preprocess_text(resume_text)
    # Count unique matches (each keyword in job_keywords is counted only once)
    match_count = sum(1 for keyword in job_keywords if keyword in resume_tokens)
    if len(job_keywords) > 0:
        score = (match_count / len(job_keywords)) * 100
    elif score > 100:
        score = 100
    else:
        score = 0
    return score

# ATS Scanner Function

In [24]:
def ats_scanner(resume_path, job_description):
    resume_text = extract_text(resume_path)
    job_keywords = extract_keywords(job_description)
    score = calculate_score(resume_text, job_keywords)
    return score

# Add Default Jobs and Job Descriptions

In [25]:
default_jobs = {
    "Python Developer": """
    We are looking for a Python developer with experience in data analysis, machine learning, and web development.
    Skills required: Python, Pandas, NumPy, Flask, Django, SQL.
    """,
    "Data Scientist": """
    We are hiring a Data Scientist with expertise in machine learning, statistical analysis, and data visualization.
    Skills required: Python, R, TensorFlow, Scikit-learn, SQL, Tableau.
    """,
    "Frontend Developer": """
    We need a Frontend Developer proficient in HTML, CSS, JavaScript, and modern frameworks like React or Angular.
    Skills required: HTML, CSS, JavaScript, React, Angular, Git.
    """
}

# Create a Gradio Interface

In [26]:
def ats_scanner_interface(resume_file, selected_job, custom_job_description):
    # Use the selected job description or custom job description
    if selected_job != "Custom":
        job_description = default_jobs[selected_job]
    else:
        job_description = custom_job_description
    
    # Get the temporary file path from the uploaded file
    resume_path = resume_file.name
    
    # Calculate ATS score
    score = ats_scanner(resume_path, job_description)
    return f"ATS Score: {score:.2f}%"

# Create Gradio interface
interface = gr.Interface(
    fn=ats_scanner_interface,
    inputs=[
        gr.components.File(label="Upload Resume (PDF or DOCX)"),
        gr.components.Dropdown(choices=list(default_jobs.keys()) + ["Custom"], label="Select Job"),
        gr.components.Textbox(label="Custom Job Description (if 'Custom' is selected)")
    ],
    outputs="text",
    title="ATS Resume Scanner",
    description="Upload your resume, select a job, or enter a custom job description to get an ATS score."
)

# Launch the interface
interface.launch()

* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




Created dataset file at: .gradio\flagged\dataset1.csv


# feedback and gpt2 integartion


In [None]:
import re
import PyPDF2
from docx import Document
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import gradio as gr
import google.generativeai as genai

# Download NLTK resources
nltk.download('punkt')
nltk.download('stopwords')

# Set your Google Gemini API key
genai.configure(api_key="AIzaSyCcXmmz8ZYl3uAXu8o0y2tq_Seup1OJ0zQ")

# Resume parsing functions
def extract_text_from_pdf(pdf_path):
    with open(pdf_path, 'rb') as file:
        reader = PyPDF2.PdfReader(file)
        text = ''
        for page in reader.pages:
            text += page.extract_text() or ''
        return text

def extract_text_from_docx(docx_path):
    doc = Document(docx_path)
    text = '\n'.join([paragraph.text for paragraph in doc.paragraphs])
    return text

def extract_text(file_path):
    if file_path.endswith('.pdf'):
        return extract_text_from_pdf(file_path)
    elif file_path.endswith('.docx'):
        return extract_text_from_docx(file_path)
    else:
        raise ValueError("Unsupported file format. Use PDF or DOCX.")

# Text preprocessing
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    tokens = word_tokenize(text)
    stop_words = set(stopwords.words('english'))
    tokens = [word for word in tokens if word not in stop_words]
    return tokens

# Keyword extraction
def extract_keywords(job_description):
    tokens = preprocess_text(job_description)
    return set(tokens)

# Calculate ATS score
def calculate_score(resume_text, job_keywords):
    resume_tokens = preprocess_text(resume_text)
    match_count = sum(1 for keyword in job_keywords if keyword in resume_tokens)
    return (match_count / len(job_keywords)) * 100 if job_keywords else 0

# ATS scanner function
def ats_scanner(resume_path, job_description):
    resume_text = extract_text(resume_path)
    job_keywords = extract_keywords(job_description)
    score = calculate_score(resume_text, job_keywords)
    return score, resume_text

# AI feedback function using Gemini
def get_ai_feedback(resume_text, job_description, score):
    prompt = f"""
    Job Description:
    {job_description}

    Resume Text:
    {resume_text}

    The resume received an ATS score of {score:.2f}% for this job. Please provide feedback on why the score is what it is and suggest specific improvements to increase the score. Focus on missing keywords, skills, or experiences that are relevant to the job description.
    Format the output using HTML to highlight important parts instead of using Markdown-style bold (**).
    """
    model = genai.GenerativeModel('gemini-1.5-pro-latest')  
    response = model.generate_content(prompt)
    feedback = response.text

    # Replace Markdown bold **text** with HTML <mark>text</mark>
    feedback = re.sub(r'\*\*(.*?)\*\*', r'<mark>\1</mark>', feedback)

    return feedback

# Gradio interface
def ats_scanner_interface(resume_file, selected_job, custom_job_description):
    if selected_job != "Custom":
        job_description = default_jobs[selected_job]
    else:
        job_description = custom_job_description
    resume_path = resume_file.name
    score, resume_text = ats_scanner(resume_path, job_description)
    feedback = get_ai_feedback(resume_text, job_description, score)
    return f"ATS Score: {score:.2f}%", feedback

# Default jobs and descriptions
default_jobs = {
    "Python Developer": """
    We are looking for a Python developer with experience in data analysis, machine learning, and web development.
    Skills required: Python, Pandas, NumPy, Flask, Django, SQL.
    """,
    "Data Scientist": """
    We are hiring a Data Scientist with expertise in machine learning, statistical analysis, and data visualization.
    Skills required: Python, R, TensorFlow, Scikit-learn, SQL, Tableau.
    """,
    "Frontend Developer": """
    We need a Frontend Developer proficient in HTML, CSS, JavaScript, and modern frameworks like React or Angular.
    Skills required: HTML, CSS, JavaScript, React, Angular, Git.
    """
}

# Create Gradio interface
interface = gr.Interface(
    fn=ats_scanner_interface,
    inputs=[
        gr.File(label="Upload Resume (PDF or DOCX)"),
        gr.Dropdown(choices=list(default_jobs.keys()) + ["Custom"], label="Select Job"),
        gr.Textbox(label="Custom Job Description (if 'Custom' is selected)")
    ],
    outputs=[
        gr.Textbox(label="ATS Score"),
        gr.HTML(label="AI Feedback")  # Updated to HTML for formatted output
    ],
    title="AI-Powered ATS Resume Scanner (Gemini)",
    description="Upload your resume, select a job, or enter a custom job description to get an ATS score and AI feedback using Google Gemini."
)

# Launch the interface
interface.launch()



[nltk_data] Downloading package punkt to C:\Users\Nishtha
[nltk_data]     Singla\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to C:\Users\Nishtha
[nltk_data]     Singla\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


* Running on local URL:  http://127.0.0.1:7870

To create a public link, set `share=True` in `launch()`.




In [43]:
import google.generativeai as genai

# Configure the API key
genai.configure(api_key="AIzaSyCcXmmz8ZYl3uAXu8o0y2tq_Seup1OJ0zQ")

# List available models
for model in genai.list_models():
    print(model.name)

models/chat-bison-001
models/text-bison-001
models/embedding-gecko-001
models/gemini-1.0-pro-vision-latest
models/gemini-pro-vision
models/gemini-1.5-pro-latest
models/gemini-1.5-pro-001
models/gemini-1.5-pro-002
models/gemini-1.5-pro
models/gemini-1.5-flash-latest
models/gemini-1.5-flash-001
models/gemini-1.5-flash-001-tuning
models/gemini-1.5-flash
models/gemini-1.5-flash-002
models/gemini-1.5-flash-8b
models/gemini-1.5-flash-8b-001
models/gemini-1.5-flash-8b-latest
models/gemini-1.5-flash-8b-exp-0827
models/gemini-1.5-flash-8b-exp-0924
models/gemini-2.5-pro-exp-03-25
models/gemini-2.5-pro-preview-03-25
models/gemini-2.0-flash-exp
models/gemini-2.0-flash
models/gemini-2.0-flash-001
models/gemini-2.0-flash-exp-image-generation
models/gemini-2.0-flash-lite-001
models/gemini-2.0-flash-lite
models/gemini-2.0-flash-lite-preview-02-05
models/gemini-2.0-flash-lite-preview
models/gemini-2.0-pro-exp
models/gemini-2.0-pro-exp-02-05
models/gemini-exp-1206
models/gemini-2.0-flash-thinking-exp-01