In [None]:
import PyPDF2
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import re

def extract_text_from_pdf(pdf_path):
    with open(pdf_path, 'rb') as file:
        reader = PyPDF2.PdfReader(file)
        text = ''
        for page in reader.pages:
            text += page.extract_text()
    return text

def preprocess_text(text):
    # Convert to lowercase and remove special characters
    text = re.sub(r'[^\w\s]', '', text.lower())
    return text

def calculate_similarity(text1, text2):
    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform([text1, text2])
    return cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]

def is_resume_good_fit(resume_path, job_description, threshold=0.3):
    resume_text = extract_text_from_pdf(resume_path)
    
    processed_resume = preprocess_text(resume_text)
    processed_job_description = preprocess_text(job_description)
    
    similarity_score = calculate_similarity(processed_resume, processed_job_description)
    
    is_good_fit = similarity_score >= threshold
    
    return is_good_fit, similarity_score

# Example usage
resume_path = '/Users/sakshigupta/Desktop/FYP/AI-RecruitmentTool/back-end/resumes/sample.pdf'
job_description = """
We are looking for a Python developer with experience in web development,
data analysis, and machine learning. The ideal candidate should have strong
problem-solving skills and be familiar with frameworks like Django or Flask.
"""

is_fit, score = is_resume_good_fit(resume_path, job_description)
print(f"Is the resume a good fit? {'Yes' if is_fit else 'No'}")
print(f"Similarity score: {score:.2f}")