In [1]:
!pip install spacy gradio transformers pandas torch
!python -m spacy download en_core_web_sm


Collecting gradio
  Downloading gradio-4.42.0-py3-none-any.whl.metadata (15 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.3.0 (from gradio)
  Downloading gradio_client-1.3.0-py3-none-any.whl.metadata (7.1 kB)
Collecting ruff>=0.2.2 (from gradio)
  Downloading ruff-0.6.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting semantic-version~=2.0 (from gradio)
  Downloading semantic_version-2.10.0-py2.py3-none-any.whl.metadata (9.7 kB)
Collecting tomlkit==0.12.0 (from gradio)
  Downloading tomlkit-0.12.0-py3-none-any.whl.metadata (2.7 kB)
Collecting urllib3~=2.0 (from gradio)
  Downloading urllib3-2.2.2-py3-none-any.whl.metadata (6.4 kB)
Downloading gradio-4.42.0-py3-none-any.whl (16.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.8/16.8 MB[0m [31m78.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading gradio_client-1.3.0-py3-none-any.whl (318 

In [2]:
import spacy
import pandas as pd
import torch
import gradio as gr
from transformers import BertTokenizer, BertForSequenceClassification
import torch.nn.functional as F


In [3]:
# Load spaCy model
nlp = spacy.load("en_core_web_sm")

# Load the BERT tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased')

# Move the model to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]



model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [4]:
# Load resumes from CSV file
# Replace this path with the correct file path to your CSV
resume_df = pd.read_csv('/kaggle/input/resume-dataset/UpdatedResumeDataSet.csv')

# Print the first few resumes to ensure loading is correct
print(resume_df.head())


       Category                                             Resume
0  Data Science  Skills * Programming Languages: Python (pandas...
1  Data Science  Education Details \r\nMay 2013 to May 2017 B.E...
2  Data Science  Areas of Interest Deep Learning, Control Syste...
3  Data Science  Skills â¢ R â¢ Python â¢ SAP HANA â¢ Table...
4  Data Science  Education Details \r\n MCA   YMCAUST,  Faridab...


In [5]:
def parse_resume(resume_text):
    doc = nlp(resume_text)
    entities = [(ent.text, ent.label_) for ent in doc.ents]
    return entities

def calculate_match_score(resume_text, job_description_text):
    # Tokenize the text for BERT
    inputs = tokenizer(resume_text, job_description_text, return_tensors='pt', max_length=512, truncation=True, padding='max_length')
    
    # Move inputs to the same device as the model
    inputs = {key: value.to(device) for key, value in inputs.items()}
    
    # Get model outputs (logits)
    outputs = model(**inputs)
    logits = outputs.logits
    
    # Apply softmax to get probabilities
    probabilities = F.softmax(logits, dim=1)
    
    # Assuming the second class represents the match score
    match_score = probabilities[0][1].item() * 100
    
    return match_score


In [6]:
def get_top_resumes(job_description_text, num_top_resumes):
    # List to store resumes and their match scores
    top_resumes = []
    
    # Iterate over all resumes in the dataframe
    for i, resume_text in enumerate(resume_df['Resume']):
        # Calculate match score for each resume
        match_score = calculate_match_score(resume_text, job_description_text)
        top_resumes.append((resume_text, match_score))
    
    # Sort the resumes by match score in descending order and get the top N resumes
    top_resumes = sorted(top_resumes, key=lambda x: x[1], reverse=True)[:num_top_resumes]
    
    # Format the results for display
    results = "\n\n".join([f"Resume {i+1}:\n{resume[:500]}...\nScore: {score:.2f}%" for i, (resume, score) in enumerate(top_resumes)])
    
    return results


In [7]:
import gradio as gr

# Gradio Interface with improved layout
interface = gr.Interface(
    fn=get_top_resumes,
    inputs=[
        gr.Textbox(label="Enter Job Description", placeholder="Type the job description here...", lines=5),
        gr.Textbox(label="Number of Top Resumes to Display", placeholder="Enter a number")  # Textbox to enter any number
    ],
    outputs="text",
    title="Resume Matcher",
    description="Enter a job description and select the number of top matching resumes to display."
)

# Launch the interface
interface.launch(share=True)




Running on local URL:  http://127.0.0.1:7860
Running on public URL: https://cc6bd4a1e64d982fe0.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


