In [1]:
!pip install pandas scikit-learn nltk


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


Step 2: Import Libraries

In [3]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /Users/divya/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

Step 3: Input Data
For simplicity, let’s assume you're comparing one job description vs multiple resumes.

In [5]:
# Sample job description
job_description = """
We are seeking a Python Developer with experience in data analysis, machine learning, and Flask development.
The candidate should have strong skills in Pandas, NumPy, and SQL.
"""

# Sample resumes
resumes = {
    "Resume_1": "Experienced Python developer skilled in Flask, SQL, and data pipelines.",
    "Resume_2": "Expert in JavaScript and React, with knowledge of HTML and CSS.",
    "Resume_3": "Skilled in machine learning, Pandas, and statistical analysis using Python."
}

df = pd.DataFrame(resumes.items(), columns=['Candidate', 'Resume_Text'])


 Step 4: TF-IDF Vectorization and Similarity Score

In [7]:
# Combine job description and resumes for vectorization
documents = [job_description] + df['Resume_Text'].tolist()

# Vectorize
vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = vectorizer.fit_transform(documents)

# Compute cosine similarity (between job desc and each resume)
cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:]).flatten()

# Attach scores to dataframe
df['Match_Score (%)'] = np.round(cosine_sim * 100, 2)

# Sort by best match
df = df.sort_values(by='Match_Score (%)', ascending=False)
df


Unnamed: 0,Candidate,Resume_Text,Match_Score (%)
0,Resume_1,"Experienced Python developer skilled in Flask,...",35.02
2,Resume_3,"Skilled in machine learning, Pandas, and stati...",35.02
1,Resume_2,"Expert in JavaScript and React, with knowledge...",0.0


 Step 5: Export or Display Results

In [9]:
# Save as CSV (optional)
df.to_csv("resume_match_scores.csv", index=False)

# Or display top match
print("Top Matching Resume:")
print(df.iloc[0])


Top Matching Resume:
Candidate                                                   Resume_1
Resume_Text        Experienced Python developer skilled in Flask,...
Match_Score (%)                                                35.02
Name: 0, dtype: object


Step 6: Add Summary Output Table

In [11]:
print("All Candidate Match Scores:")
print(df[['Candidate', 'Match_Score (%)']])


All Candidate Match Scores:
  Candidate  Match_Score (%)
0  Resume_1            35.02
2  Resume_3            35.02
1  Resume_2             0.00


Step 7: Wrap in Functions (Optional but Cleaner)

In [13]:
def match_resumes(job_desc, resumes_dict):
    import pandas as pd
    from sklearn.feature_extraction.text import TfidfVectorizer
    from sklearn.metrics.pairwise import cosine_similarity

    # Create DataFrame from resumes
    df = pd.DataFrame(resumes_dict.items(), columns=['Candidate', 'Resume_Text'])

    # Combine documents
    documents = [job_desc] + df['Resume_Text'].tolist()

    # Vectorize the text
    vectorizer = TfidfVectorizer(stop_words='english')
    tfidf_matrix = vectorizer.fit_transform(documents)

    # Calculate cosine similarity between job and resumes
    cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:]).flatten()

    # Add match score to dataframe
    df['Match_Score (%)'] = (cosine_sim * 100).round(2)

    # Return sorted DataFrame
    return df.sort_values(by='Match_Score (%)', ascending=False)


In [14]:
results = match_resumes(job_description, resumes)
print(results)


  Candidate                                        Resume_Text  \
0  Resume_1  Experienced Python developer skilled in Flask,...   
2  Resume_3  Skilled in machine learning, Pandas, and stati...   
1  Resume_2  Expert in JavaScript and React, with knowledge...   

   Match_Score (%)  
0            35.02  
2            35.02  
1             0.00  
