# DPO Win Rate Summary

Quick summary of DPO model wins from judged JSONL files.

In [None]:
import json
from pathlib import Path

In [None]:
def summarize_judged(file_path):
    """Read judged JSONL and return DPO wins / total."""
    wins = losses = ties = 0
    
    with open(file_path, 'r') as f:
        for line in f:
            if not line.strip():
                continue
            ex = json.loads(line)
            result = ex.get("winner_model_vs_chosen", "")
            
            if result == "model":
                wins += 1
            elif result == "chosen":
                losses += 1
            else:
                ties += 1
    
    total = wins + losses + ties
    return wins, losses, ties, total

In [None]:
# Analyze beta=0.1 judged file
file_path = Path("../beta=0.1/judged_policy_vs_chosen.jsonl")
wins, losses, ties, total = summarize_judged(file_path)

print(f"File: {file_path.name}")
print(f"DPO Wins: {wins} / {total}")
print(f"Win Rate: {wins/total:.1%}")
print(f"\nBreakdown: Wins={wins}, Losses={losses}, Ties={ties}")

In [None]:
# Also check base model vs chosen
base_file = Path("../beta=0.1/judged_base_vs_chosen.jsonl")
if base_file.exists():
    wins_b, losses_b, ties_b, total_b = summarize_judged(base_file)
    print(f"\nFile: {base_file.name}")
    print(f"Base Model Wins: {wins_b} / {total_b}")
    print(f"Win Rate: {wins_b/total_b:.1%}")