# Create a resume scanner using keyword extraction

In [16]:
import pandas as pd  
from sklearn.feature_extraction.text import TfidfVectorizer  
from sklearn.metrics.pairwise import cosine_similarity  

# Sample resumes and job description data  
data = {  
    'resume_id': [1, 2, 3],  
    'resume_text': [  
        "Experienced data scientist with skills in python, machine learning and data analysis.",  
        "Software developer with expertise in java, cloud computing, and project management.",  
        "Data analyst with proficiency in SQL, python and data visualization."  
    ]  
}  
job_description = "Looking for data scientist skilled in python, machine learning, SQL, and data analysis."  

# Convert it into a data frame  
df = pd.DataFrame(data)  
print("Resume:\n", df)  

# Combine job description with resumes for TF-IDF vectorization  
documents = df['resume_text'].tolist()  # Initialize documents list  
documents.append(job_description)  # Append job description  

# Initialize the TfidfVectorizer  
vectorizer = TfidfVectorizer(stop_words='english')  
tfidf_matrix = vectorizer.fit_transform(documents)  

# Calculate similarity scores between job description and each resume  
similarity_scores = cosine_similarity(tfidf_matrix[-1], tfidf_matrix[:-1]).flatten()  

# Display similarity scores for each resume  
df['similarity_score'] = similarity_scores  
print('\nResume Similarity Scores:\n', df[['resume_id', 'similarity_score']])  

# Identify resumes that match the job requirements (threshold can be adjusted)  
threshold = 0.2  
matching_resumes = df[df['similarity_score'] >= threshold]  
print("\nResumes matching the job requirements:\n", matching_resumes[['resume_id', 'similarity_score']])

Resume:
    resume_id                                        resume_text
0          1  Experienced data scientist with skills in pyth...
1          2  Software developer with expertise in java, clo...
2          3  Data analyst with proficiency in SQL, python a...

Resume Similarity Scores:
    resume_id  similarity_score
0          1          0.662564
1          2          0.000000
2          3          0.418121

Resumes matching the job requirements:
    resume_id  similarity_score
0          1          0.662564
2          3          0.418121
