# Intelligent Resume Screening and Job Matching Analytics

This notebook provides analysis and insights into the resume screening system.

In [1]:
import os
os.getcwd()

'c:\\Users\\dell\\Desktop\\resume_project\\Intelligent-resume-checker-and-job-matching-analytics-system\\notebooks'

In [1]:
import pandas as pd

resumes = pd.read_csv("../data/raw/resume_dataset.csv")
jobs = pd.read_csv("../data/raw/job_description_dataset.csv")

resumes.head()

Unnamed: 0,resume_id,candidate_name,email,phone,location,education,experience_years,current_role,target_role,skills,certifications,expected_salary_lpa,resume_summary
0,1,Jerry Thomas,scannon@example.net,5815367803,Bangalore,M.Tech,9,Data Analyst,AI Engineer,"AWS, Python, Kubernetes, React, C++, Git, GCP,...",,27,9 years experienced professional skilled in AW...
1,2,Mr. Zachary Miller DVM,spearsmiranda@example.net,(787)477-4212,Delhi,MBA,5,Backend Developer,Cloud Engineer,"Linux, Communication, Leadership, Power BI, Ja...",,3,5 years experienced professional skilled in Li...
2,3,Leah Johnson,brandon52@example.net,535.685.0007x731,Delhi,M.Tech,5,Full Stack Developer,Full Stack Developer,"CSS, Power BI, MongoDB, Teamwork, C, GCP, Seab...",AWS Certified,13,5 years experienced professional skilled in CS...
3,4,Evan Flores,merrittwilliam@example.org,(308)377-9563x2228,Delhi,M.Sc,1,Frontend Developer,Frontend Developer,"Kubernetes, Machine Learning, GitHub, Python, ...",Google Data Engineer,30,1 years experienced professional skilled in Ku...
4,5,Tammy Freeman,james27@example.org,2154384207,Bangalore,M.Sc,7,Full Stack Developer,Frontend Developer,"Linux, MySQL, Communication, PostgreSQL, R, Gi...",AWS Certified,28,7 years experienced professional skilled in Li...


In [4]:
import re

def clean_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z ]', '', text)
    return text

resumes['combined'] = (
    resumes['skills'].astype(str) + " " +
    resumes['experience_years'].astype(str) + " " +
    resumes['resume_summary'].astype(str)
)
resumes['clean_text'] = resumes['combined'].apply(clean_text)

jobs['clean_text'] = (jobs['required_skills'] + " " + jobs['job_description']).apply(clean_text)

In [5]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(stop_words='english')

resume_vec = vectorizer.fit_transform(resumes['clean_text'])
job_vec = vectorizer.transform(jobs['clean_text'])

In [6]:
!pip install scikit-learn

'pip' is not recognized as an internal or external command,
operable program or batch file.


In [8]:
from sklearn.metrics.pairwise import cosine_similarity

scores = cosine_similarity(job_vec[0], resume_vec)[0]
resumes['match_score'] = scores * 100

resumes[['candidate_name', 'match_score']].sort_values(by='match_score', ascending=False).head(5)

Unnamed: 0,candidate_name,match_score
1594,Michelle King,73.275992
8530,Christopher Nolan,73.266198
2993,Alicia Sullivan,73.235941
9886,Kimberly Newton,70.874127
5660,Kelli Smith,69.454058


In [None]:
import os

os.makedirs("../data/processed", exist_ok=True)

OSError: Cannot save file into a non-existent directory: 'data\processed'

In [None]:
# Analyze match predictions
from src.ml.predict import predict_match

# Sample predictions
sample_matches = []
for i in range(min(10, len(resumes_df))):
    resume = resumes_df.iloc[i]
    job = jobs_df.iloc[i % len(jobs_df)]
    
    score = predict_match(resume['resume_text'], job['job_description'])
    sample_matches.append({
        'resume_id': resume['id'],
        'job_id': job['id'],
        'match_score': score,
        'resume_title': resume['name'],
        'job_title': job['title']
    })

matches_df = pd.DataFrame(sample_matches)
print("Sample Match Predictions:")
print(matches_df.sort_values('match_score', ascending=False))

In [None]:
# Visualize match score distribution
plt.figure(figsize=(10, 6))
plt.hist(matches_df['match_score'], bins=20, edgecolor='black')
plt.title('Distribution of Match Scores')
plt.xlabel('Match Score')
plt.ylabel('Frequency')
plt.grid(True, alpha=0.3)
plt.show()

In [None]:
# Correlation analysis
correlation_data = []
for _, match in matches_df.iterrows():
    resume = resumes_df[resumes_df['id'] == match['resume_id']].iloc[0]
    job = jobs_df[jobs_df['id'] == match['job_id']].iloc[0]
    
    resume_exp = {'Entry Level': 1, 'Mid Level': 2, 'Senior Level': 3, 'Lead': 4, 'Principal': 5}.get(resume['experience_level'], 1)
    job_exp = {'Entry Level': 1, 'Mid Level': 2, 'Senior Level': 3, 'Lead': 4, 'Principal': 5}.get(job['experience_level'], 1)
    
    correlation_data.append({
        'match_score': match['match_score'],
        'experience_match': 1 if resume_exp >= job_exp else 0,
        'salary': job['salary']
    })

corr_df = pd.DataFrame(correlation_data)
correlation_matrix = corr_df.corr()

plt.figure(figsize=(8, 6))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0)
plt.title('Correlation Matrix')
plt.show()

## Summary

This notebook provides comprehensive analysis of the resume screening system including:
- Dataset exploration and statistics
- Skills analysis
- Model performance evaluation
- Match prediction analysis
- Correlation insights

Key findings:
- [Add your key insights here based on the analysis]