In [4]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from openai import OpenAI
from datetime import datetime
import os

# Download required NLTK data
nltk.download('stopwords')
nltk.download('wordnet')

# Initialize OpenAI client
client = OpenAI(api_key="<open-ai key>")

# Initialize lemmatizer and stopwords
lemmatizer = WordNetLemmatizer()
stop_words = stopwords.words('english')

# Define base path
BASE_PATH = '/Users/raghav/Desktop/Apexon/JD Optimization'

def preprocess(text):
    """Preprocess text by converting to lowercase, removing stopwords, and lemmatizing"""
    text = str(text).lower()
    words = text.split()
    words = [lemmatizer.lemmatize(word) for word in words if word not in stop_words]
    return ' '.join(words)

def compute_similarity(job_desc, resume_df):
    """Compute similarity scores between job description and resumes"""
    # Combine and preprocess job description fields
    job_desc_processed = preprocess(str(job_desc['Skills'])) + ' ' + preprocess(str(job_desc['Tools']))
    
    # Preprocess resume data
    resume_df['Processed'] = (resume_df['Skills'].apply(lambda x: preprocess(str(x))) + ' ' + 
                            resume_df['Tools'].apply(lambda x: preprocess(str(x))) + ' ' + 
                            resume_df['Certifications'].apply(lambda x: preprocess(str(x))))
    
    # TF-IDF Vectorization
    vectorizer = TfidfVectorizer()
    job_desc_matrix = vectorizer.fit_transform([job_desc_processed])
    resume_matrix = vectorizer.transform(resume_df['Processed'])

    # Calculate cosine similarity
    similarity_scores = cosine_similarity(job_desc_matrix, resume_matrix)[0]
    return similarity_scores

def generate_openai_insights(job_desc, resume):
    """Generate insights using OpenAI API"""
    prompt = f"""
    You are an expert in talent evaluation. Analyze the following job description and resume:
    - Job Description:
        Skills: {job_desc['Skills']}
        Tools: {job_desc['Tools']}
    - Resume:
        Skills: {resume['Skills']}
        Tools: {resume['Tools']}
        Certifications: {resume['Certifications']}
    
    Provide:
    1. Why the candidate is a good fit for the job.
    2. What are the strengths of this candidate's profile?
    """

    try:
        response = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "You are an expert in HR and talent evaluation."},
                {"role": "user", "content": prompt}
            ],
            max_tokens=150,
            temperature=0.7
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        return f"Error generating insights: {e}"

def rank_resumes_and_save():
    """Main function to rank resumes and save results to CSV"""
    # Define input and output file paths
    job_descriptions_file = os.path.join(BASE_PATH, '/Users/raghav/Desktop/Apexon/JD Optimization/job_descriptions_analysis_output.csv')
    resumes_file = os.path.join(BASE_PATH, '/Users/raghav/Desktop/Apexon/JD Optimization/resumes_analysis_output.csv')
    
    # Generate output filename with timestamp
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    output_file = os.path.join(BASE_PATH, f'resume_ranking_results_{timestamp}.csv')
    
    # Load data
    job_df = pd.read_csv(job_descriptions_file)
    resume_df = pd.read_csv(resumes_file)
    
    # Create empty list to store results
    all_results = []
    
    # Process each job description
    for _, job_desc in job_df.iterrows():
        # Compute similarity scores
        similarity_scores = compute_similarity(job_desc, resume_df)
        
        # Get top 3 resumes for this job
        top_indices = np.argsort(similarity_scores)[-3:][::-1]
        
        # Process each top resume
        for rank, idx in enumerate(top_indices, 1):
            resume = resume_df.iloc[idx]
            score = similarity_scores[idx]
            
            # Generate insights
            insights = generate_openai_insights(job_desc, resume)
            
            # Store results
            result = {
                'Job Description': job_desc['File Name'],
                'Rank': rank,
                'Resume ID': resume['File Name'],
                'Match Score': score,
                'Skills Match': resume['Skills'],
                'Tools Match': resume['Tools'],
                'Certifications': resume['Certifications'],
                'AI Insights': insights
            }
            all_results.append(result)
    
    # Create results DataFrame
    results_df = pd.DataFrame(all_results)
    
    # Save results to CSV
    results_df.to_csv(output_file, index=False)
    print(f"Results saved to {output_file}")
    
    return results_df

if __name__ == "__main__":
    results = rank_resumes_and_save()


[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/raghav/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /Users/raghav/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Results saved to /Users/raghav/Desktop/Apexon/JD Optimization/resume_ranking_results_20250127_155107.csv
