In [None]:
# 📦 Step 1: Setup & Imports
import torch
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

import sys
import os

# 🔧 Add project root to Python path
project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
if project_root not in sys.path:
    sys.path.append(project_root)

from model.model import RewardModel
from data.prepare_dataset import PairwisePreferenceDataset

# 📍 Config
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_name = "bert-base-uncased"
checkpoint_path = "checkpoints/reward_model_epoch3.pt"
max_length = 256
batch_size = 4

# ✅ Load model
model = RewardModel(model_name)
model.load_state_dict(torch.load(checkpoint_path, map_location=device))
model.to(device)
model.eval()
print("✅ Model loaded.")

# ✅ Load validation data
dataset = PairwisePreferenceDataset(split="test", model_name=model_name, max_length=max_length)
loader = DataLoader(dataset, batch_size=batch_size)

# 🧪 Step 2: Score chosen and rejected
chosen_scores = []
rejected_scores = []

with torch.no_grad():
    for batch in loader:
        chosen_ids = batch["input_ids_chosen"].to(device)
        chosen_mask = batch["attention_mask_chosen"].to(device)
        rejected_ids = batch["input_ids_rejected"].to(device)
        rejected_mask = batch["attention_mask_rejected"].to(device)

        r_chosen = model(chosen_ids, chosen_mask).cpu().numpy()
        r_rejected = model(rejected_ids, rejected_mask).cpu().numpy()

        chosen_scores.extend(r_chosen)
        rejected_scores.extend(r_rejected)

# 💡 Convert to NumPy arrays
chosen_scores = np.array(chosen_scores)
rejected_scores = np.array(rejected_scores)
score_diff = chosen_scores - rejected_scores
accuracy = np.mean(score_diff > 0)

print(f"✅ Model Accuracy: {accuracy * 100:.2f}%")


In [None]:
# 📊 Histogram: Reward Score Distribution
plt.figure(figsize=(10, 6))
sns.histplot(chosen_scores, color="green", label="Chosen", kde=True, stat="density")
sns.histplot(rejected_scores, color="red", label="Rejected", kde=True, stat="density")
plt.axvline(0, linestyle='--', color='gray')
plt.title("Reward Score Distribution")
plt.xlabel("Reward Score")
plt.ylabel("Density")
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# 📉 Histogram: Difference (Chosen - Rejected)
plt.figure(figsize=(10, 6))
sns.histplot(score_diff, bins=50, kde=True, color="purple")
plt.axvline(0, linestyle='--', color='gray')
plt.title("Reward Score Differences (Chosen - Rejected)")
plt.xlabel("Difference")
plt.ylabel("Count")
plt.grid(True)
plt.show()
