<a href="https://colab.research.google.com/github/Harsh6959Pathak/Resume-tester/blob/main/Resume_Ranker_Clean.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install gradio

import gradio as gr
import pandas as pd
import re
import numpy as np
import nltk
import torch
import time
import traceback
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer

# Download stopwords
nltk.download('stopwords')

# Function to clean text
def clean_text(text):
    if pd.isna(text):
        return ""
    text = text.lower()
    text = re.sub(r'[^a-zA-Z0-9 ]', '', text)
    text = ' '.join([word for word in text.split() if word not in stopwords.words('english')])
    return text

# Resume Processing Function
def process_resumes(file, job_description):
    start_time = time.time()  # Start timer

    try:
        if file is None:
            return "⚠️ Please upload a valid CSV file!"

        df = pd.read_csv(file)

        required_columns = {'skills', 'career_objective', 'degree_names', 'major_field_of_studies', 'positions', 'responsibilities'}
        if not required_columns.issubset(set(df.columns)):
            return f"⚠️ Missing required columns! Expected: {required_columns}, Found: {set(df.columns)}"

        # Clean text columns
        for col in required_columns:
            df[col] = df[col].apply(clean_text)

        # Combine resume text
        df['resume_text'] = df[list(required_columns)].agg(' '.join, axis=1)

        # Clean job description
        jd_cleaned = clean_text(job_description)

        # TF-IDF Vectorization
        tfidf = TfidfVectorizer()
        resume_tfidf = tfidf.fit_transform(df['resume_text'])
        jd_tfidf = tfidf.transform([jd_cleaned])

        # Compute Cosine Similarity
        cosine_scores = cosine_similarity(jd_tfidf, resume_tfidf)[0]
        df['similarity_score'] = cosine_scores

        # Load BERT Model
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        bert_model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2').to(device)

        resume_embeddings = bert_model.encode(df['resume_text'].tolist(), convert_to_tensor=True, device=device)
        jd_embedding = bert_model.encode([jd_cleaned], convert_to_tensor=True, device=device)

        # Compute BERT Cosine Similarity
        bert_scores = cosine_similarity(jd_embedding.cpu().numpy(), resume_embeddings.cpu().numpy())[0]
        df['bert_score'] = bert_scores

        # Final Score Calculation
        df['final_score'] = 0.5 * df['similarity_score'] + 0.5 * df['bert_score']

        # Get Top 3 Candidates
        top_candidates = df.nlargest(3, 'final_score')[['resume_text', 'final_score']]

        # Stop timer
        end_time = time.time()
        elapsed_time = round(end_time - start_time, 2)

        # Format output as Markdown
        output = f"### 📌 Processed in {elapsed_time} sec\n\n"
        output += "| Rank | Resume Preview | Final Score |\n|------|---------------|-------------|\n"
        for i, row in top_candidates.iterrows():
            preview_text = row['resume_text'][:100] + "..."  # Limit preview length
            output += f"| {i+1} | {preview_text} | {row['final_score']:.4f} |\n"

        return output

    except Exception as e:
        return f"⚠️ Error: {str(e)}\n\n```\n{traceback.format_exc()}\n```"

# Gradio UI
with gr.Blocks() as interface:
    gr.Markdown("# 📌 Resume Ranking App")

    with gr.Row():
        file_input = gr.File(label="📂 Upload CSV File")
        job_desc_input = gr.Textbox(label="📋 Job Description", placeholder="Enter job description here...", lines=5)

    submit_button = gr.Button("🔍 Rank Resumes")

    output_md = gr.Markdown()

    submit_button.click(process_resumes, inputs=[file_input, job_desc_input], outputs=output_md)

# Launch Web App
interface.launch(share=True)


Collecting gradio
  Downloading gradio-5.23.3-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.8.0 (from gradio)
  Downloading gradio_client-1.8.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.3-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://5f78af53c27c42c3ab.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


