## 1. Setup Environment

In [1]:
%pip install voyageai

Collecting voyageai
  Downloading voyageai-0.3.5-py3-none-any.whl.metadata (2.9 kB)
Collecting aiolimiter (from voyageai)
  Downloading aiolimiter-1.2.1-py3-none-any.whl.metadata (4.5 kB)
Downloading voyageai-0.3.5-py3-none-any.whl (28 kB)
Downloading aiolimiter-1.2.1-py3-none-any.whl (6.7 kB)
Installing collected packages: aiolimiter, voyageai
Successfully installed aiolimiter-1.2.1 voyageai-0.3.5


In [2]:
# Import required libraries
from transformers import AutoModel, AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import torch
import torch.nn.functional as F
import openai
import numpy as np
import pandas as pd
from sentence_transformers import SentenceTransformer
import voyageai
import json
from sklearn.metrics import classification_report, accuracy_score, precision_recall_fscore_support

print("All libraries imported successfully!")

All libraries imported successfully!


In [3]:
# Configure API keys
from google.colab import userdata

# Get API keys from environment variables
openai.api_key = userdata.get('OPENAI_API_KEY')
voyage_api_key = userdata.get('VOYAGE_API_KEY')

# Set up Voyage API client if key exists
if voyage_api_key:
    voyageai.api_key = voyage_api_key
    print("‚úÖ Voyage API key configured.")
else:
    print("‚ö†Ô∏è  Voyage API key not found.")

# Configure Hugging Face token for remote LLaMA inference
hf_token = userdata.get('HF_TOKEN')
if hf_token:
    print("‚úÖ Hugging Face token configured.")
else:
    print("‚ö†Ô∏è  Hugging Face token not found.")

‚úÖ Voyage API key configured.
‚úÖ Hugging Face token configured.


## 2. Data Preparation

Load the dataset and prepare a balanced sample for testing. We'll also set up model configuration flags.

In [4]:
# Define model configuration
model_config = {
    'bge': {'enabled': True},
    'openai': {'enabled': False},
    'bge_m3': {'enabled': False},
    'careerbert': {'enabled': False},
    'confit': {'enabled': False},
    'voyage': {'enabled': False},
    'llama_remote': {'enabled': False}
}

print("Model configuration initialized.")

Model configuration initialized.


In [5]:
# Load the dataset
from google.colab import drive
drive.mount('/content/drive')
DATASET_PATH='/content/drive/MyDrive/AI-ML Self Learning/next_horizon/resume_job_recommendation/model-shashu2325-resume-job-matcher-lora'

# Load and sample the dataset
print("Loading dataset...")
df = pd.read_csv(f'{DATASET_PATH}/dataset.csv')

# Display dataset info
print(f"\nDataset Info:")
print(f"Total records: {len(df)}")
print(f"Columns: {list(df.columns)}")

# Create balanced sample
select_df = df[df['Decision'] == 'select']
reject_df = df[df['Decision'] == 'reject']

total_samples = 100
num_select = min(len(select_df), total_samples // 2)
num_reject = min(len(reject_df), total_samples - num_select)

# Adjust if needed
if num_select + num_reject < total_samples:
    if len(select_df) > len(reject_df):
        num_select = min(len(select_df), total_samples - num_reject)
    else:
        num_reject = min(len(reject_df), total_samples - num_select)

# Sample and combine, preserving original indices
sampled_select_df = select_df.sample(n=num_select, random_state=42)
sampled_reject_df = reject_df.sample(n=num_reject, random_state=42)
sampled_df = pd.concat([sampled_select_df, sampled_reject_df])
sampled_df = sampled_df.sample(frac=1, random_state=42)  # Shuffle but keep original index

print(f"\n‚úÖ Final sample size: {len(sampled_df)} records")
print(f"Sample distribution: {sampled_df['Decision'].value_counts().to_dict()}")

Mounted at /content/drive
Loading dataset...

Dataset Info:
Total records: 10174
Columns: ['Role', 'Resume', 'Decision', 'Reason_for_decision', 'Job_Description']

‚úÖ Final sample size: 100 records
Sample distribution: {'reject': 50, 'select': 50}


## 3. Model-Specific Operations

In [None]:
# Utility function for cosine similarity
def cosine_similarity(a, b):
    """Calculate cosine similarity between two vectors"""
    a = np.asarray(a, dtype=float)
    b = np.asarray(b, dtype=float)
    na = np.linalg.norm(a)
    nb = np.linalg.norm(b)
    if na == 0 or nb == 0:
        return 0.0
    return float(a.dot(b) / (na * nb))

In [6]:
# BGE Model
if model_config['bge']['enabled']:
    print("Loading BGE model...")
    try:
        model = SentenceTransformer("BAAI/bge-large-en-v1.5")
        # Load and apply LoRA adapter
        # Note: Since we're using SentenceTransformer, we'll handle the embedding differently

        def calculate_bge_similarity(resume_text, job_text):
            resume_embedding = model.encode(resume_text, normalize_embeddings=True)
            job_embedding = model.encode(job_text, normalize_embeddings=True)
            return cosine_similarity(resume_embedding, job_embedding)

        print("‚úÖ BGE model loaded and similarity function defined.")
    except Exception as e:
        print(f"‚ùå Error loading BGE model: {e}")
        model_config['bge']['enabled'] = False

Loading BGE model...


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/779 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/191 [00:00<?, ?B/s]

‚úÖ BGE model loaded and similarity function defined.


In [7]:
# OpenAI Model
if model_config['openai']['enabled'] and openai.api_key:
    print("Setting up OpenAI model...")
    try:
        def get_openai_embedding(text, model="text-embedding-3-small"):
            text = text.replace("\n", " ")
            return openai.embeddings.create(input=[text], model=model).data[0].embedding

        def calculate_openai_similarity(resume_text, job_text):
            resume_embedding = get_openai_embedding(resume_text)
            job_embedding = get_openai_embedding(job_text)
            return cosine_similarity(resume_embedding, job_embedding)

        print("‚úÖ OpenAI embedding function defined.")
    except Exception as e:
        print(f"‚ùå Error setting up OpenAI: {e}")
        model_config['openai']['enabled'] = False
else:
    print("‚ö†Ô∏è OpenAI model disabled (no API key)")
    model_config['openai']['enabled'] = False

‚ö†Ô∏è OpenAI model disabled (no API key)


In [8]:
# Voyage Model
if model_config['voyage']['enabled'] and voyage_api_key:
    print("Setting up Voyage model...")
    try:
        voyageai.api_key = voyage_api_key

        def calculate_voyage_similarity(resume_text, job_text, model="voyage-3-large"):
            try:
                # Create a new client instance for each request
                vo = voyageai.Client()
                # Clean and normalize texts
                resume_text = ' '.join(resume_text.split())  # Normalize whitespace
                job_text = ' '.join(job_text.split())
                # Get embeddings
                resume_embedding = vo.embed([resume_text], model=model).embeddings[0]
                job_embedding = vo.embed([job_text], model=model).embeddings[0]
                return cosine_similarity(resume_embedding, job_embedding)
            except Exception as e:
                print(f"Error in Voyage embedding calculation: {str(e)}")
                return None

        print("‚úÖ Voyage embedding function defined.")
    except Exception as e:
        print(f"‚ùå Error setting up Voyage: {e}")
        model_config['voyage']['enabled'] = False
else:
    print("‚ö†Ô∏è Voyage model disabled (no API key)")
    model_config['voyage']['enabled'] = False

‚ö†Ô∏è Voyage model disabled (no API key)


In [9]:
# Remote LLaMA Model
if model_config['llama_remote']['enabled'] and hf_token:
    print("Setting up remote LLaMA inference...")
    try:
        from openai import OpenAI
        client = OpenAI(
            base_url="https://router.huggingface.co/v1",
            api_key=hf_token,
        )

        def clean_text(text):
            """Clean and prepare text for LLaMA processing"""
            # Remove any problematic characters and normalize whitespace
            text = text.replace('\n', ' ').replace('\r', ' ')
            text = ' '.join(text.split())  # Normalize whitespace
            # Truncate long texts to prevent context overflow
            max_chars = 1000
            if len(text) > max_chars:
                text = text[:max_chars] + "..."
            return text

        def calculate_llama_remote_similarity(resume_text, job_text):
            # Clean and prepare the texts
            resume_text = clean_text(resume_text)
            job_text = clean_text(job_text)

            messages = [
                {
                    "role": "system",
                    "content": """You are a CV-job matcher. Analyze the match and return ONLY a JSON with {score: <number>}.
The score should be between 0-100, indicating compatibility. Do not include any other fields or explanations."""
                },
                {
                    "role": "user",
                    "content": f"CV: {resume_text}\nJob: {job_text}"
                }
            ]

            try:
                completion = client.chat.completions.create(
                    model="meta-llama/Llama-3.1-8B-Instruct",
                    messages=messages,
                    max_tokens=32,  # Very short response needed
                    response_format={ "type": "json_object" },
                    temperature=0.1  # Lower temperature for more consistent outputs
                )
                response_text = completion.choices[0].message.content.strip()

                try:
                    # Try to parse the JSON response
                    response = json.loads(response_text)
                    score = response.get('score', 0)
                except json.JSONDecodeError:
                    # If JSON parsing fails, try to extract just the score using regex
                    import re
                    # Look for any number pattern after "score"
                    score_match = re.search(r'"score"\s*:\s*(\d+)', response_text)
                    if score_match:
                        score = int(score_match.group(1))
                    else:
                        # Try to find any number between 0-100 in the response
                        number_match = re.search(r'\b([0-9]{1,2}|100)\b', response_text)
                        if number_match:
                            score = int(number_match.group(1))
                        else:
                            print(f"Could not find score in response: {response_text}")
                            return None

                # Ensure score is within valid range
                score = max(0, min(100, int(score)))
                return score / 100.0  # Normalize to 0-1

            except Exception as e:
                print(f"Error in LLaMA remote inference: {str(e)}")
                return None

        print("‚úÖ Remote LLaMA inference function defined.")
    except Exception as e:
        print(f"‚ùå Error setting up remote LLaMA: {e}")
        model_config['llama_remote']['enabled'] = False
else:
    print("‚ö†Ô∏è Remote LLaMA model disabled (no HF token)")
    model_config['llama_remote']['enabled'] = False

Setting up remote LLaMA inference...
‚úÖ Remote LLaMA inference function defined.


In [10]:
# BGE-M3 Model
if model_config['bge_m3']['enabled']:
    print("Loading BGE-M3 model...")
    try:
        bge_m3_model = SentenceTransformer("BAAI/bge-m3")

        def calculate_bge_m3_similarity(resume_text, job_text):
            # Clean and normalize texts
            resume_text = ' '.join(resume_text.split())
            job_text = ' '.join(job_text.split())

            # Get embeddings with BGE-M3
            resume_embedding = bge_m3_model.encode(resume_text, normalize_embeddings=True)
            job_embedding = bge_m3_model.encode(job_text, normalize_embeddings=True)
            return cosine_similarity(resume_embedding, job_embedding)

        print("‚úÖ BGE-M3 model loaded and similarity function defined.")
    except Exception as e:
        print(f"‚ùå Error loading BGE-M3 model: {e}")
        model_config['bge_m3']['enabled'] = False
else:
    print("‚ö†Ô∏è BGE-M3 model disabled")

‚ö†Ô∏è BGE-M3 model disabled


In [11]:
# CareerBERT Model
if model_config['careerbert']['enabled']:
    print("Loading CareerBERT model...")
    try:
        careerbert_model = SentenceTransformer("yxchar/career-bert-base")

        def calculate_careerbert_similarity(resume_text, job_text):
            # Clean and normalize texts
            resume_text = ' '.join(resume_text.split())
            job_text = ' '.join(job_text.split())

            # Get embeddings with CareerBERT
            resume_embedding = careerbert_model.encode(resume_text, normalize_embeddings=True)
            job_embedding = careerbert_model.encode(job_text, normalize_embeddings=True)
            return cosine_similarity(resume_embedding, job_embedding)

        print("‚úÖ CareerBERT model loaded and similarity function defined.")
    except Exception as e:
        print(f"‚ùå Error loading CareerBERT model: {e}")
        model_config['careerbert']['enabled'] = False
else:
    print("‚ö†Ô∏è CareerBERT model disabled")

‚ö†Ô∏è CareerBERT model disabled


In [12]:
# ConFit Model
if model_config['confit']['enabled']:
    print("Loading ConFit model...")
    try:
        confit_model = SentenceTransformer("jamescalam/resume-job-confit-base")

        def calculate_confit_similarity(resume_text, job_text):
            # Clean and normalize texts
            resume_text = ' '.join(resume_text.split())
            job_text = ' '.join(job_text.split())

            # Get embeddings with ConFit
            resume_embedding = confit_model.encode(resume_text, normalize_embeddings=True)
            job_embedding = confit_model.encode(job_text, normalize_embeddings=True)
            return cosine_similarity(resume_embedding, job_embedding)

        print("‚úÖ ConFit model loaded and similarity function defined.")
    except Exception as e:
        print(f"‚ùå Error loading ConFit model: {e}")
        model_config['confit']['enabled'] = False
else:
    print("‚ö†Ô∏è ConFit model disabled")

‚ö†Ô∏è ConFit model disabled


## 4. Generate Predictions and Classifications

Process the dataset through all enabled models and create a comparative table with role indices.

In [13]:
# Initialize score dictionary for each model
scores = {
    'bge': [],
    'openai': [],
    'bge_m3': [],
    'careerbert': [],
    'confit': [],
    'voyage': [],
    'llama_remote': []
}

# Process each resume-job pair
print("üöÄ Calculating similarity scores for all enabled models...")
for index, row in sampled_df.iterrows():
    resume_text = str(row['Resume']) if pd.notna(row['Resume']) else ""
    job_text = str(row['Job_Description']) if pd.notna(row['Job_Description']) else ""

    print(f"\nüìã Processing pair {index+1}/{len(sampled_df)}")

    # Calculate scores for each enabled model
    if model_config['bge']['enabled']:
        try:
            scores['bge'].append(calculate_bge_similarity(resume_text, job_text))
        except Exception as e:
            scores['bge'].append(None)
            print(f"‚ùå BGE error: {e}")
    else:
        scores['bge'].append(None)

    if model_config['openai']['enabled']:
        try:
            scores['openai'].append(calculate_openai_similarity(resume_text, job_text))
        except Exception as e:
            scores['openai'].append(None)
            print(f"‚ùå OpenAI error: {e}")
    else:
        scores['openai'].append(None)

    if model_config['bge_m3']['enabled']:
        try:
            scores['bge_m3'].append(calculate_bge_m3_similarity(resume_text, job_text))
        except Exception as e:
            scores['bge_m3'].append(None)
            print(f"‚ùå BGE-M3 error: {e}")
    else:
        scores['bge_m3'].append(None)

    if model_config['careerbert']['enabled']:
        try:
            scores['careerbert'].append(calculate_careerbert_similarity(resume_text, job_text))
        except Exception as e:
            scores['careerbert'].append(None)
            print(f"‚ùå CareerBERT error: {e}")
    else:
        scores['careerbert'].append(None)

    if model_config['confit']['enabled']:
        try:
            scores['confit'].append(calculate_confit_similarity(resume_text, job_text))
        except Exception as e:
            scores['confit'].append(None)
            print(f"‚ùå ConFit error: {e}")
    else:
        scores['confit'].append(None)

    if model_config['voyage']['enabled']:
        try:
            scores['voyage'].append(calculate_voyage_similarity(resume_text, job_text))
        except Exception as e:
            scores['voyage'].append(None)
            print(f"‚ùå Voyage error: {e}")
    else:
        scores['voyage'].append(None)

    if model_config['llama_remote']['enabled']:
        try:
            scores['llama_remote'].append(calculate_llama_remote_similarity(resume_text, job_text))
        except Exception as e:
            scores['llama_remote'].append(None)
            print(f"‚ùå LLaMA Remote error: {e}")
    else:
        scores['llama_remote'].append(None)

print("\n‚úÖ Score calculation complete!")

üöÄ Calculating similarity scores for all enabled models...

üìã Processing pair 7548/100

üìã Processing pair 2644/100

üìã Processing pair 4814/100

üìã Processing pair 2877/100

üìã Processing pair 1746/100

üìã Processing pair 1176/100

üìã Processing pair 6368/100

üìã Processing pair 4668/100

üìã Processing pair 3673/100

üìã Processing pair 6143/100

üìã Processing pair 7891/100

üìã Processing pair 5265/100

üìã Processing pair 1774/100

üìã Processing pair 4468/100

üìã Processing pair 1524/100

üìã Processing pair 4507/100
Error in LLaMA remote inference: Error code: 402 - {'error': 'You have exceeded your monthly included credits for Inference Providers. Subscribe to PRO to get 20x more monthly included credits.'}

üìã Processing pair 1458/100
Error in LLaMA remote inference: Error code: 402 - {'error': 'You have exceeded your monthly included credits for Inference Providers. Subscribe to PRO to get 20x more monthly included credits.'}

üìã Processing pai

In [14]:
# Add scores to DataFrame and generate predictions
threshold = 0.5
print(f"Using classification threshold: {threshold}")

# Function to make predictions
def make_prediction(score, threshold=0.5):
    if score is None or pd.isna(score):
        return 'unknown'
    return 'select' if score > threshold else 'reject'

# Add scores and predictions to DataFrame
for model in scores.keys():
    if model_config[model]['enabled']:
        sampled_df[f'{model}_similarity'] = scores[model]
        sampled_df[f'{model}_prediction'] = [make_prediction(s, threshold) for s in scores[model]]

# Create display columns, moving Reason_for_decision to the end
display_columns = ['Role', 'Decision']
for model in scores.keys():
    if model_config[model]['enabled']:
        display_columns.extend([f'{model}_similarity', f'{model}_prediction'])
display_columns.append('Reason_for_decision')  # Add Reason_for_decision at the end

# Create comparative table with original index
comparison_table = sampled_df[display_columns]

print("\nüìä Comparative Table of Similarity Scores and Predictions (with original dataset indices):")
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.float_format', lambda x: '%.4f' % x)
print(comparison_table.to_string())

Using classification threshold: 0.5

üìä Comparative Table of Similarity Scores and Predictions (with original dataset indices):
                             Role Decision  bge_similarity bge_prediction  llama_remote_similarity llama_remote_prediction                                                                                                                                                                                Reason_for_decision
7547            Robotics Engineer   reject          0.5283         select                   0.9200                  select                                                                                                                                                    Lacked leadership skills for a senior position.
2643        E-commerce Specialist   reject          0.6009         select                   0.8500                  select                                                                                                                

In [15]:
# Calculate performance metrics for each enabled model
print("\nüìà Model Performance Metrics:")
print("=" * 80)

for model in scores.keys():
    if model_config[model]['enabled']:
        print(f"\nüîç {model.upper()} Model Performance:")
        print("-" * 50)

        # Filter out unknown predictions
        valid_mask = sampled_df[f'{model}_prediction'] != 'unknown'
        valid_df = sampled_df[valid_mask]

        if len(valid_df) > 0:
            accuracy = accuracy_score(valid_df['Decision'], valid_df[f'{model}_prediction'])
            precision, recall, f1, _ = precision_recall_fscore_support(
                valid_df['Decision'],
                valid_df[f'{model}_prediction'],
                average='weighted'
            )

            print(f"Accuracy: {accuracy:.4f}")
            print(f"Precision: {precision:.4f}")
            print(f"Recall: {recall:.4f}")
            print(f"F1-Score: {f1:.4f}")
            print(f"Valid predictions: {len(valid_df)}/{len(sampled_df)}")

            print("\nDetailed Classification Report:")
            print(classification_report(valid_df['Decision'], valid_df[f'{model}_prediction']))
        else:
            print("No valid predictions available")


üìà Model Performance Metrics:

üîç BGE Model Performance:
--------------------------------------------------
Accuracy: 0.4500
Precision: 0.3723
Recall: 0.4500
F1-Score: 0.3513
Valid predictions: 100/100

Detailed Classification Report:
              precision    recall  f1-score   support

      reject       0.27      0.06      0.10        50
      select       0.47      0.84      0.60        50

    accuracy                           0.45       100
   macro avg       0.37      0.45      0.35       100
weighted avg       0.37      0.45      0.35       100


üîç LLAMA_REMOTE Model Performance:
--------------------------------------------------
Accuracy: 0.6000
Precision: 0.3600
Recall: 0.6000
F1-Score: 0.4500
Valid predictions: 15/100

Detailed Classification Report:
              precision    recall  f1-score   support

      reject       0.00      0.00      0.00         6
      select       0.60      1.00      0.75         9

    accuracy                           0.60        15


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
