# Test 2: Finetuned Models (DistilBERT & RoBERTa)

Testing if finetuned transformer models have **bias towards resume writing style**.

**Metrics:** Accuracy, Inconsistency Rate, Rank Difference

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import warnings
warnings.filterwarnings('ignore')

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

In [None]:
# Load test data
df = pd.read_csv("../Test 2 Data/test2_resumes.csv")
print(f"Loaded {len(df)} resume sets")
print(f"Seniority distribution: {df['seniority'].value_counts().to_dict()}")

In [None]:
# Load models
MODEL_PATHS = {
    'distilbert': '../Smaller Models/distilbert_resume_level',
    'roberta': '../Smaller Models/roberta_resume_level'
}

models = {}
tokenizers = {}

for name, path in MODEL_PATHS.items():
    print(f"Loading {name}...", end=" ")
    tokenizers[name] = AutoTokenizer.from_pretrained(path)
    models[name] = AutoModelForSequenceClassification.from_pretrained(path).to(device)
    models[name].eval()
    print("Done")

id2label = {0: 'junior', 1: 'mid', 2: 'senior'}
model_names = list(models.keys())

In [None]:
def predict(model_name, text):
    tokenizer = tokenizers[model_name]
    model = models[model_name]
    
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512, padding=True)
    inputs = {k: v.to(device) for k, v in inputs.items()}
    
    with torch.no_grad():
        outputs = model(**inputs)
        pred_id = torch.argmax(outputs.logits, dim=1).item()
    
    return id2label[pred_id]

In [None]:
# Run predictions
results = []
styles = ['neutral', 'overstated', 'understated']

for idx, row in df.iterrows():
    for style in styles:
        for model_name in model_names:
            pred = predict(model_name, str(row[style]))
            results.append({
                'idx': idx,
                'true_seniority': row['seniority'],
                'style': style,
                'model': model_name,
                'prediction': pred,
                'correct': pred == row['seniority']
            })
    if (idx + 1) % 20 == 0:
        print(f"Processed {idx + 1}/{len(df)}")

results_df = pd.DataFrame(results)
print(f"\nTotal predictions: {len(results_df)}")

In [None]:
# Save results
results_df.to_csv("finetuned_predictions.csv", index=False)
print("Saved to finetuned_predictions.csv")

In [None]:
# Add rank columns
seniority_rank = {'junior': 0, 'mid': 1, 'senior': 2}
results_df['true_rank'] = results_df['true_seniority'].map(seniority_rank)
results_df['pred_rank'] = results_df['prediction'].map(seniority_rank)
results_df['rank_diff'] = results_df['pred_rank'] - results_df['true_rank']

---
## Metric 1: Accuracy by Style

How often does the model predict the correct seniority for each resume style?

In [None]:
print("ACCURACY BY STYLE")
print("=" * 50)

for model_name in model_names:
    print(f"\n{model_name.upper()}")
    model_df = results_df[results_df['model'] == model_name]
    for style in styles:
        acc = model_df[model_df['style'] == style]['correct'].mean()
        print(f"  {style:<15}: {acc:.1%}")

In [None]:
# Accuracy visualization
fig, ax = plt.subplots(figsize=(8, 5))
x = np.arange(len(styles))
width = 0.35

for i, model_name in enumerate(model_names):
    model_df = results_df[results_df['model'] == model_name]
    accs = [model_df[model_df['style'] == s]['correct'].mean() for s in styles]
    ax.bar(x + i*width, accs, width, label=model_name.upper())

ax.set_ylabel('Accuracy')
ax.set_title('Accuracy by Resume Style')
ax.set_xticks(x + width/2)
ax.set_xticklabels(['Neutral', 'Overstated', 'Understated'])
ax.legend()
ax.set_ylim(0, 1)
plt.tight_layout()
plt.show()

---
## Metric 2: Inconsistency Rate

For the same person, do different resume styles get different predictions? Higher = more biased.

In [None]:
print("INCONSISTENCY RATE")
print("=" * 50)

inconsistency_rates = {}
for model_name in model_names:
    model_df = results_df[results_df['model'] == model_name]
    inconsistent = 0
    for idx in df.index:
        preds = model_df[model_df['idx'] == idx]['prediction'].unique()
        if len(preds) > 1:
            inconsistent += 1
    rate = inconsistent / len(df) * 100
    inconsistency_rates[model_name] = rate
    print(f"{model_name.upper()}: {inconsistent}/{len(df)} ({rate:.1f}%)")

In [None]:
# Inconsistency visualization
fig, ax = plt.subplots(figsize=(6, 4))
ax.bar(model_names, [inconsistency_rates[m] for m in model_names], color=['steelblue', 'coral'])
ax.set_ylabel('Inconsistency Rate (%)')
ax.set_title('Prediction Inconsistency by Model')
ax.set_ylim(0, 100)
for i, m in enumerate(model_names):
    ax.text(i, inconsistency_rates[m] + 2, f'{inconsistency_rates[m]:.1f}%', ha='center')
plt.tight_layout()
plt.show()

---
## Metric 3: Rank Difference

Average difference between predicted and true seniority rank. Positive = overestimates, Negative = underestimates.

In [None]:
print("RANK DIFFERENCE BY STYLE")
print("=" * 50)

for model_name in model_names:
    print(f"\n{model_name.upper()}")
    model_df = results_df[results_df['model'] == model_name]
    for style in styles:
        diff = model_df[model_df['style'] == style]['rank_diff'].mean()
        print(f"  {style:<15}: {diff:+.3f}")

In [None]:
# Rank difference visualization
fig, ax = plt.subplots(figsize=(8, 5))
x = np.arange(len(styles))
width = 0.35

for i, model_name in enumerate(model_names):
    model_df = results_df[results_df['model'] == model_name]
    diffs = [model_df[model_df['style'] == s]['rank_diff'].mean() for s in styles]
    ax.bar(x + i*width, diffs, width, label=model_name.upper())

ax.set_ylabel('Avg Rank Difference')
ax.set_title('Prediction Bias by Style (+ = overestimate)')
ax.set_xticks(x + width/2)
ax.set_xticklabels(['Neutral', 'Overstated', 'Understated'])
ax.legend()
ax.axhline(y=0, color='black', linestyle='--', linewidth=0.5)
plt.tight_layout()
plt.show()