In [1]:
import os
import spacy
import nltk
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
import numpy as np

# Download NLTK tokenizer models (only once)
nltk.download('punkt')

print("✅ All libraries loaded.")


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\delld\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping tokenizers\punkt.zip.


✅ All libraries loaded.


In [2]:
# Sample resumes (replace with your actual resume content later)
resumes = [
    "Experienced data scientist with a strong background in machine learning, Python, and data analysis. Worked on predictive modeling and customer segmentation projects.",
    "AI/ML engineer skilled in Python, TensorFlow, and computer vision. Developed deep learning models for image classification and object detection.",
    "Entry-level data analyst familiar with SQL, Excel, Power BI, and basic statistics. Completed internship in sales data visualization and reporting."
]

# Sample job descriptions (replace with actual job posts)
job_descriptions = [
    "Looking for a data scientist with experience in Python, machine learning, and predictive analytics to work on customer churn models.",
    "Hiring an AI engineer skilled in deep learning, computer vision, and TensorFlow for image processing tasks.",
    "Need a junior data analyst with skills in Excel, Power BI, and data visualization to support our sales team."
]

print("✅ Sample resumes and job descriptions loaded.")


✅ Sample resumes and job descriptions loaded.


In [3]:
# Combine all resumes and job descriptions for vectorization
all_texts = resumes + job_descriptions

# Initialize TF-IDF Vectorizer
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(all_texts)

# Split back the vectors
resume_vectors = tfidf_matrix[:len(resumes)]
job_vectors = tfidf_matrix[len(resumes):]

# Compute cosine similarity between each resume and job description
similarity_matrix = cosine_similarity(resume_vectors, job_vectors)

# Display similarity scores
similarity_df = pd.DataFrame(similarity_matrix, 
                             index=[f"Resume {i+1}" for i in range(len(resumes))],
                             columns=[f"Job {j+1}" for j in range(len(job_descriptions))])

print("✅ Similarity Matrix:")
similarity_df


✅ Similarity Matrix:


Unnamed: 0,Job 1,Job 2,Job 3
Resume 1,0.434128,0.07493,0.16961
Resume 2,0.202421,0.571704,0.045118
Resume 3,0.119118,0.045698,0.44824


In [5]:
# Find the best job match for each resume
best_matches = similarity_df.idxmax(axis=1)
best_scores = similarity_df.max(axis=1)

for i in range(len(best_matches)):
    print(f"🧾 Resume {i+1} best matches with {best_matches.iloc[i]} (Score: {best_scores.iloc[i]:.2f})")



🧾 Resume 1 best matches with Job 1 (Score: 0.43)
🧾 Resume 2 best matches with Job 2 (Score: 0.57)
🧾 Resume 3 best matches with Job 3 (Score: 0.45)


In [6]:
# Create a DataFrame with results
results_df = pd.DataFrame({
    'Resume': [f'Resume {i+1}' for i in range(len(best_matches))],
    'Best Matching Job': best_matches.values,
    'Similarity Score': best_scores.values
})

# Save to CSV
results_df.to_csv('resume_job_matching_results.csv', index=False)
print("✅ Results saved to 'resume_job_matching_results.csv'")


✅ Results saved to 'resume_job_matching_results.csv'
