In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pickle
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from email.mime.base import MIMEBase
from email import encoders
import os

# Step 1: Load data
data_path = "B:\OneDrive - Amity University\Desktop\Intern\Infosys\Assignment-6\glove_word2vec_embss_bert_embedss_EDA_dataset.xlsx"  # Update this to your file path
data = pd.read_excel(data_path)



# Step 3: TF-IDF Vectorization
vectorizer = TfidfVectorizer()
data['Job Description'] = data['job_description'].fillna('')
data['Resume'] = data['resume'].fillna('')

job_desc_vectors = vectorizer.fit_transform(data['Job Description'])
resume_vectors = vectorizer.transform(data['Resume'])

# Save the vectorizer
with open('tfidf_vectorizer.pkl', 'wb') as f:
    pickle.dump(vectorizer, f)

# Step 4: Calculate similarity scores
data['resume_job_similarity'] = [
    cosine_similarity(resume_vectors[i], job_desc_vectors[i])[0][0]
    for i in range(len(data))
]

# Step 5: Define thresholds based on roles
role_thresholds = {
    'software engineer': 0.4,
    'data scientist': 0.2,
    'ui engineer': 0.0,  # No threshold for UI Engineer
    'Data Engineer': 0.3,
    'Product Manager': 0.35
}

# Apply thresholds to make decisions
def decide(row):
    threshold = role_thresholds.get(row['role'].lower(), 0.0)
    return 'Select' if row['resume_job_similarity'] >= threshold else 'Reject'

data['decision'] = data.apply(decide, axis=1)

# Step 6: Categorize similarity into bins
bins = [0, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0]
labels = ['0-0.1', '0.1-0.2', '0.2-0.4', '0.4-0.6', '0.6-0.8', '0.8-1.0']
data['similarity_bin'] = pd.cut(data['resume_job_similarity'], bins=bins, labels=labels, include_lowest=True)

# Step 7: Group and analyze results
results = data.groupby(['similarity_bin', 'decision', 'role']).size().unstack(fill_value=0)
print(results)

# Save the results to an Excel file
output_file = "resume_screening_results.xlsx"
data.to_excel(output_file, index=False)

  results = data.groupby(['similarity_bin', 'decision', 'role']).size().unstack(fill_value=0)


role                     AI Engineer  Business Analyst  Cloud Architect  \
similarity_bin decision                                                   
0-0.1          Reject              0                 0                0   
               Select              0                 0                0   
0.1-0.2        Reject              0                 0                0   
               Select              0                 0                0   
0.2-0.4        Reject              0                 0                0   
               Select              0                 0                0   
0.4-0.6        Reject              0                 0                0   
               Select              3                 2                2   
0.6-0.8        Reject              0                 0                0   
               Select             10                 9                6   
0.8-1.0        Reject              0                 0                0   
               Select    