In [1]:
import nltk

In [2]:
!pip install sklearn



In [3]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer

def preprocess_text(text):
    # Tokenize the text
    tokens = word_tokenize(text.lower())

    # Remove stopwords
    stop_words = set(stopwords.words("english"))
    tokens = [token for token in tokens if token not in stop_words]

    # Lemmatize the tokens
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(token) for token in tokens]

    # Reconstruct the preprocessed text
    preprocessed_text = " ".join(tokens)
    return preprocessed_text

def calculate_similarity(job_description, resumes):
    # Preprocess job description
    preprocessed_job_description = preprocess_text(job_description)

    # Preprocess candidate resumes
    preprocessed_resumes = [preprocess_text(resume) for resume in resumes]

    # Create TF-IDF vectorizer
    vectorizer = TfidfVectorizer()

    # Vectorize job description and candidate resumes
    job_vec = vectorizer.fit_transform([preprocessed_job_description])
    resume_vecs = vectorizer.transform(preprocessed_resumes)

    # Calculate cosine similarity between job description and candidate resumes
    similarity_scores = []
    for resume_vec in resume_vecs:
        similarity_score = (job_vec * resume_vec.T).toarray()[0][0]
        similarity_scores.append(similarity_score)

    return similarity_scores

# Example usage
job_description = "We are looking for a highly motivated candidate with strong programming and communication skills."
candidate_resumes = [
    "I have excellent programming and communication skills.",
    "I am a hardworking individual with experience in programming.",
    "My communication skills are top-notch, and I enjoy working in a team."
]

similarity_scores = calculate_similarity(job_description, candidate_resumes)

# Find the index of the most similar resume
best_candidate_index = similarity_scores.index(max(similarity_scores))
best_candidate = candidate_resumes[best_candidate_index]

print("Best candidate: ", best_candidate)

Best candidate:  I have excellent programming and communication skills.
