# load dependencies


In [None]:
import _resume_eval_import_helper

In [None]:
import os 
import tiktoken 
import pandas as pd
from pathlib import Path
from dotenv import load_dotenv, find_dotenv
from uuid import uuid4
import pandas as pd
import json
from langchain_core.prompts import PromptTemplate
from langchain_groq import ChatGroq
from langchain_openai import ChatOpenAI 
from langchain_anthropic import ChatAnthropic
from langchain_ollama import ChatOllama

from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from prompts.resume_eval import RESUME_EVALUATION_PROMPT


import logging
import time
from datetime import datetime 
from typing import Dict, Any, Union

from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm.auto import tqdm

In [None]:
load_dotenv(find_dotenv("../../.env"))

# helper functions


In [None]:
# estimating the cost 
def count_tokens(input_string: str) -> int:
    tokenizer = tiktoken.get_encoding("cl100k_base")
    tokens = tokenizer.encode(input_string)
    return len(tokens)

def calculate_cost(input_string: str, cost_per_million_tokens: float=5) -> float:
    num_tokens = count_tokens(input_string)
    total_cost = (num_tokens/1_000_000) * cost_per_million_tokens
    return total_cost

# import anthropic

# client = anthropic.Client()
# token_count = client.count_tokens(complete_template)
# print(token_count)
# generate unique id
def generate_unique_id(_):
    return str(uuid4())

# compare model outputs
def compare_model_outputs(model_results):
    """
    Compare original and recalibrated scores from different models and output a pretty table.
    
    :param model_results: Dict with model names as keys and JSON outputs as values
    :return: pandas DataFrame with a pretty table comparison
    """
    comparison_data = []
    
    score_types = [
        "technical_skills",
        "soft_skills",
        "required_experience",
        "qualifications"
    ]
    
    for model, result in model_results.items():
        # Ensure result is a dictionary
        if isinstance(result, str):
            result = json.loads(result)
        
        model_data = {"Model": model}
        
        # Extract original scores
        original_scores = result.get("resume_evaluation", {}).get("original_scores", {})
        
        # Extract recalibrated scores
        recalibrated_scores = result.get("recalibrated_scores", {})
        
        for score_type in score_types:
            model_data[f"original_{score_type}"] = original_scores.get(score_type, "N/A")
            model_data[f"recalibrated_{score_type}"] = recalibrated_scores.get(score_type, "N/A")
        
        # Add suitability
        model_data["suitability"] = result.get("assessment", {}).get("suitability", "N/A")
        
        comparison_data.append(model_data)
    
    # Create DataFrame
    df = pd.DataFrame(comparison_data)
    
    # Reorder columns
    column_order = ["Model"] + [f"{prefix}_{score_type}" for score_type in score_types for prefix in ["original", "recalibrated"]] + ["suitability"]
    df = df[column_order]
    
    return df

# evaluate resume 
def evaluate_resume(job_description: str, resume: str, job_id: str, cv_id: str, output_dir: str = "./output/") -> Union[pd.DataFrame, None]:
    model_results = {}
    
    for model_name, grader in [("llama3", llama3_grader), ("gpt", gpt_grader), ("anthropic", anthropic_grader)]:
        try:
            result = grader.invoke({"job_description": job_description, "resume": resume})
            model_results[model_name] = result

            # save model result 
            json_file = os.path.join(output_dir, f"{job_id}_{cv_id}_{model_name}.json")
            with open(json_file, "w") as f:
                json.dump(result, f, indent=4)
            
            time.sleep(1)  # Add a small delay to avoid rate limiting
        except Exception as e:
            error_msg = f"Error with {model_name} for job_id: {job_id}, cv_id: {cv_id}. Error: {str(e)}"
            logging.error(error_msg)
            print(error_msg)

    if not model_results:
        error_msg = f"All models failed for job_id: {job_id}, cv_id: {cv_id}."
        logging.error(error_msg)
        print(error_msg)
        return None
    
    try:
        
        comparison_df = compare_model_outputs(model_results)
        
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
        
        # save comparison result 
        csv_file = os.path.join(output_dir, f"{job_id}_{cv_id}.csv")
        comparison_df.to_csv(csv_file, index=False)
        
        logging.info(f"Successfully processed and saved results for job_id: {job_id}, cv_id: {cv_id}.")
        
        # return comparison_df
        
    except Exception as e:
        error_msg = f"Error in processing or saving results for job_id: {job_id}, cv_id: {cv_id}. Error: {error_msg}."
        logging.error(error_msg)
        print(error_msg)
        return None




# global variables


In [None]:
# get current time 
current_time = datetime.now().strftime(("%Y%m%d_%H%M"))
output_dir = f"./output_{current_time}/"

# create output directory 
os.makedirs(output_dir, exist_ok=True)

temperature = 0
max_tokens = 2048

# set up logger 
log_file = os.path.join(output_dir, "evaluation_log.txt")
logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s", level=logging.INFO, filename=log_file, datefmt="%Y-%m-%d %H:%M:%S")

# import dataset


In [None]:
data_path = "../repos/Resume-Screening-RAG-Pipeline/data/supplementary-data/"
synthetic_data_path = "../repos/Resume-Screening-RAG-Pipeline/data/main-data/"

job_description_path = data_path + "job_title_des.csv"
cleaned_resume_path = data_path + "cleaned_resume.csv"
synthetic_resume_path = synthetic_data_path + "synthetic-resumes.csv"

cleaned_resume = pd.read_csv(cleaned_resume_path)
synthetic_resumes = pd.read_csv(synthetic_resume_path)
job_description = pd.read_csv(job_description_path)

In [None]:
len(cleaned_resume), len(synthetic_resumes), len(job_description)

In [None]:
cleaned_resume["Category"].value_counts()

In [None]:
cleaned_resume.columns, synthetic_resumes.columns, job_description.columns

In [None]:
# apply unique id 
job_description["Job ID"] = job_description["Job Title"].apply(generate_unique_id)

## data preparation


In [None]:
# categorize the resumes 
# we can sample the job pool to reduce the cost 
tech_jobs = ['Data Science', 
      'Java Developer', 'Business Analyst',
      'SAP Developer','Python Developer', 'DevOps Engineer',
      'Network Security Engineer','Database', 'Hadoop',
      'ETL Developer', 'DotNet Developer', 'Blockchain']

non_tech_jobs = [
  'HR', 'Advocate', 'Arts', 'Web Designing', 'Mechanical Engineer', 'Sales', 'Health and fitness',
  'Civil Engineer',  'Automation Testing', 'Electrical Engineering',
  'Operations Manager',  'PMO',
]

tech_pool = cleaned_resume[cleaned_resume["Category"].isin(tech_jobs)] # 88
non_tech_pool = cleaned_resume[cleaned_resume["Category"].isin(non_tech_jobs)] # 67

In [None]:
tech_pool = cleaned_resume[cleaned_resume["Category"].isin(tech_jobs)] # 88
non_tech_pool = cleaned_resume[cleaned_resume["Category"].isin(non_tech_jobs)] # 67

reduced_tech_pool = tech_pool.groupby("Category").apply(lambda x: x.sample(frac=0.5, random_state=42)).reset_index(drop=True)
reduced_non_tech_pool = non_tech_pool[non_tech_pool["Category"].isin(["HR", "Advocate", "Arts"])].sample(3, random_state=42)

talent_pool = pd.concat([reduced_tech_pool, reduced_non_tech_pool]).sample(frac=1, random_state=42).reset_index(drop=True)

In [None]:
jobs = pd.DataFrame()

for title in job_description["Job Title"].unique():
  jobs = pd.concat([jobs, job_description[job_description["Job Title"]==title].sample(1, random_state=42)])

jobs.reset_index(drop=True, inplace=True)

## saving dataset


In [None]:
job_description.to_csv(os.path.join(output_dir, "job_description.csv"), index=False)
talent_pool.to_csv(os.path.join(output_dir, "filtered_talent_pool.csv"), index=False)
jobs.to_csv(os.path.join(output_dir, "filtered_job_description.csv"), index=False)

# evaluate the resume


sanitary check


In [None]:
# jd = job_description[job_description["Job Title"]=="Machine Learning"]["Job Description"].values[0]
# jd_id = job_description[job_description["Job Title"]=="Machine Learning"]["Job ID"].values[0]
# cv = cleaned_resume.iloc[77]["Resume"]
# cv_id = cleaned_resume.iloc[77]["ID"]

In [None]:
# temperature = 0
# max_tokens = 2048

# # groq_llm = ChatGroq(model="llama3-70b-8192", temperature=temperature, max_tokens=max_tokens)
# gpt_llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=temperature, max_tokens=max_tokens)
# anthropic_llm = ChatAnthropic(model="claude-3-5-sonnet-20240620", temperature=temperature, max_tokens=max_tokens)
# llama3_llm = ChatOllama(model="llama3", temperature=temperature, max_tokens=max_tokens)

# resume_eval_prompt = PromptTemplate(
#     input_variables=["job_description", "resume"],
#     template=RESUME_EVALUATION_PROMPT
#     )

# # llama3
# groq_grader = resume_eval_prompt | groq_llm | JsonOutputParser()
# groq_result = groq_grader.invoke({"job_description": jd, "resume": cv})

# # gpt
# gpt_grader = resume_eval_prompt | gpt_llm | JsonOutputParser()
# gpt_result = gpt_grader.invoke({"job_description": jd, "resume": cv}) 

# # claude
# anthropic_grader = resume_eval_prompt | anthropic_llm | JsonOutputParser()
# anthropic_result = anthropic_grader.invoke({"job_description": jd, "resume": cv})

# llama3_grader = resume_eval_prompt | llama3_llm | JsonOutputParser()
# llama3_result = llama3_grader.invoke({"job_description": jd, "resume": cv})

# model_results = {
#     "groq": groq_result,
#     "anthropic": anthropic_result,
#     "gpt": gpt_result
# }

# comparison_df = compare_model_outputs(model_results)

# # Display the pretty table
# pd.set_option('display.max_columns', None)
# pd.set_option('display.width', None)
# print(comparison_df.to_string(index=False))

# # If you want to save it to a CSV file:
# # comparison_df.to_csv("model_comparison.csv", index=False)

In [None]:
# comparison_df

## set up the models


In [None]:
# groq_llm = ChatGroq(model="llama3-70b-8192", temperature=temperature, max_tokens=max_tokens)
gpt_llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=temperature, max_tokens=max_tokens)
anthropic_llm = ChatAnthropic(model="claude-3-5-sonnet-20240620", temperature=temperature, max_tokens=max_tokens)
llama3_llm = ChatOllama(model="llama3", temperature=temperature, max_tokens=max_tokens)

resume_eval_prompt = PromptTemplate(
    input_variables=["job_description", "resume"],
    template=RESUME_EVALUATION_PROMPT
    )

gpt_grader = resume_eval_prompt | gpt_llm | JsonOutputParser()
# groq_grader = resume_eval_prompt | groq_llm | JsonOutputParser()
anthropic_grader = resume_eval_prompt | anthropic_llm | JsonOutputParser()
llama3_grader = resume_eval_prompt | llama3_llm | JsonOutputParser()

In [None]:
def process_job_cv_pair(jod_data, cv_data):
    job_id, job_description = jod_data
    cv_id, cv = cv_data
    return evaluate_resume(job_description, cv, job_id, cv_id)

def process_all_pairs():
    job_data = jobs[["Job ID", "Job Description"]].values
    cv_data = talent_pool[["ID", "Resume"]].values

    total_pairs = len(job_data) * len(cv_data)
    
    with ThreadPoolExecutor(max_workers=4) as executor:
        futures = [] 
        for job in job_data:
            for cv in cv_data:
                futures.append(executor.submit(process_job_cv_pair, job, cv))
            
        for future in tqdm(as_completed(futures), total=total_pairs, desc="Processing job-cv pairs"):
            try:
                future = future.result()
            except Exception as e:
                print(f"Error: {e}")


In [None]:
process_all_pairs()