# Werewolf Batch Metrics — Win Rate & Vote Hit Rate

Fill in the list of folders to analyze. Each folder should contain JSON game logs (one per game).  
This notebook will compute:
- **Win rate** per folder (overall) and per folder × werewolf archetype.
- **Vote hit rate** (fraction of *villager* votes that correctly target a werewolf) per archetype and overall.

> Assumptions: final-day votes appear as events of type `final_vote_round` and (optionally) `final_vote_runoff` with a `votes` map.


In [1]:
# --- Imports & display helpers
from __future__ import annotations
import os, json, glob
from typing import Dict, Any, List, Tuple
import pandas as pd

# For nice interactive tables in ChatGPT (harmless if not present in your env)
try:
    from caas_jupyter_tools import display_dataframe_to_user
    _HAS_CE = True
except Exception:
    _HAS_CE = False

def _show_df(df: pd.DataFrame, name: str):
    if _HAS_CE:
        display_dataframe_to_user(name=name, dataframe=df)
    else:
        display(df)
        
print("Ready. Edit the FOLDERS list in the next cell and run all cells.")


Ready. Edit the FOLDERS list in the next cell and run all cells.


In [7]:
# --- Parameters: fill in the folders you want to analyze ---
# Example:
# FOLDERS = [
#     "batch1.1_suspicionRuleRubric_5A_1W_2Discussion_interAgentRating",
#     "batch1.2_altPolicy_5A_1W_2Discussion"
# ]

FOLDERS = [
    "batch1.1_suspicionRuleRubric_5A_1W_2Discussion_interAgentRating"
]

# Optional: file glob to narrow selection within each folder (default: recursive all json)
#FILE_GLOB = "**/*.json"

assert isinstance(FOLDERS, list), "FOLDERS must be a list of folder paths (strings)."


In [8]:
# --- Utilities to parse logs ---

def find_json_files(folders: List[str], file_glob: str="**/*.json") -> List[str]:
    files = []
    for root in folders:
        if not os.path.isdir(root):
            print(f"[warn] Not a directory or missing: {root}")
            continue
        files.extend(glob.glob(os.path.join(root, file_glob), recursive=True))
    # De-dup & sort for determinism
    files = sorted(set(files))
    return files

def infer_batch_root(path: str, roots: List[str]) -> str:
    # Return the first folder from roots that is a prefix of the path; else top-level directory name
    path_abs = os.path.abspath(path)
    best = None
    for r in roots:
        try:
            if os.path.commonpath([os.path.abspath(r), path_abs]) == os.path.abspath(r):
                best = r
                break
        except Exception:
            pass
    return os.path.basename(best) if best else os.path.basename(os.path.dirname(path))

def extract_roles(meta: Dict[str, Any]) -> Tuple[Dict[str, str], List[str], List[str]]:
    roles = meta.get("roles", {})
    werewolves = [a for a, role in roles.items() if role.lower() == "werewolf"]
    villagers = [a for a, role in roles.items() if role.lower() != "werewolf"]
    return roles, villagers, werewolves

def collect_final_votes(events: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    # Collect 'final_vote_round' and 'final_vote_runoff' events with a votes map
    res = []
    for ev in events:
        if ev.get("event_type") in ("final_vote_round", "final_vote_runoff"):
            votes = ev.get("votes")
            if isinstance(votes, dict):
                res.append(ev)
    return res

def vote_hit_counts(events: List[Dict[str, Any]], roles: Dict[str, str]) -> Tuple[int, int]:
    """Return (villager_votes_total, villager_hits_on_wolf) considering only final vote rounds.
    A 'hit' means a villager voted for any agent with role == 'werewolf'.
    """
    finals = collect_final_votes(events)
    total, hits = 0, 0
    # Identify all werewolves (could be >1)
    wolves = {a for a, r in roles.items() if isinstance(r, str) and r.lower() == "werewolf"}
    for ev in finals:
        votes = ev.get("votes", {})
        for voter, target in votes.items():
            # Consider votes by villagers (anything not 'werewolf' counts as villager-aligned for now)
            role = roles.get(voter, "").lower()
            if role == "werewolf":
                continue
            total += 1
            if target in wolves:
                hits += 1
    return total, hits

def parse_game_file(fp: str, roots: List[str]) -> Dict[str, Any]:
    with open(fp, "r") as f:
        data = json.load(f)
    meta = data.get("metadata", {})
    events = data.get("events", [])
    winner = data.get("winner", None)
    werewolf_archetype = meta.get("werewolf_archetype", "unknown")
    roles, villagers, werewolves = extract_roles(meta)
    v_total, v_hits = vote_hit_counts(events, roles)
    return {
        "file": fp,
        "folder_root": infer_batch_root(fp, roots),
        "winner": winner,
        "werewolf_archetype": werewolf_archetype,
        "villager_votes_total": v_total,
        "villager_vote_hits": v_hits,
        "n_werewolves": len(werewolves),
    }


In [9]:
# --- Load all game logs into a DataFrame ---
all_files = find_json_files(FOLDERS, FILE_GLOB) if FOLDERS else []
print(f"Found {len(all_files)} json files")
rows = []
bad = []

for fp in all_files:
    try:
        rows.append(parse_game_file(fp, FOLDERS))
    except Exception as e:
        bad.append((fp, str(e)))

games_df = pd.DataFrame(rows)

if len(bad):
    print(f"[warn] Skipped {len(bad)} bad files (see bad_files_df)")
    bad_files_df = pd.DataFrame(bad, columns=["file", "error"])
else:
    bad_files_df = pd.DataFrame(columns=["file", "error"])

print("Preview:")
display(games_df.head(3))


Found 51 json files
Preview:


Unnamed: 0,file,folder_root,winner,werewolf_archetype,villager_votes_total,villager_vote_hits,n_werewolves
0,batch1.1_suspicionRuleRubric_5A_1W_2Discussion...,batch1.1_suspicionRuleRubric_5A_1W_2Discussion...,,unknown,0,0,1
1,batch1.1_suspicionRuleRubric_5A_1W_2Discussion...,batch1.1_suspicionRuleRubric_5A_1W_2Discussion...,villagers,Chaos Agent,3,3,1
2,batch1.1_suspicionRuleRubric_5A_1W_2Discussion...,batch1.1_suspicionRuleRubric_5A_1W_2Discussion...,villagers,Chaos Agent,3,3,1


In [12]:
# --- Win rate calculations ---
if games_df.empty:
    print("No games loaded. Please set FOLDERS and re-run.")
else:
    # Prepare a base frame with a clean villager_win indicator
    base = games_df.assign(
        villager_win = games_df["winner"].fillna("").str.lower().eq("villagers").astype(int)
    )

    # Overall win rate per folder (use DataFrameGroupBy for named-agg tuples)
    folder_win = (
        base
        .groupby("folder_root", as_index=False)
        .agg(
            games=('villager_win', 'size'),
            villager_wins=('villager_win', 'sum')
        )
    )
    folder_win["villager_win_rate"] = folder_win["villager_wins"] / folder_win["games"]
    folder_win["werewolf_win_rate"] = 1.0 - folder_win["villager_win_rate"]
    folder_win = folder_win.sort_values(["folder_root"]).reset_index(drop=True)
    print("Win rate per folder (overall):")
    _show_df(folder_win, "Win Rate — Per Folder (Overall)")

    # Win rate per folder × archetype
    fa = (
        base
        .groupby(["folder_root", "werewolf_archetype"], as_index=False)
        .agg(
            games=('villager_win', 'size'),
            villager_wins=('villager_win', 'sum')
        )
    )
    fa["villager_win_rate"] = fa["villager_wins"] / fa["games"]
    fa["werewolf_win_rate"] = 1.0 - fa["villager_win_rate"]
    fa = fa.sort_values(["folder_root", "werewolf_archetype"]).reset_index(drop=True)
    print("Win rate per folder × werewolf archetype:")
    _show_df(fa, "Win Rate — Per Folder and Archetype")


Win rate per folder (overall):


Unnamed: 0,folder_root,games,villager_wins,villager_win_rate,werewolf_win_rate
0,batch1.1_suspicionRuleRubric_5A_1W_2Discussion...,51,35,0.686275,0.313725


Win rate per folder × werewolf archetype:


Unnamed: 0,folder_root,werewolf_archetype,games,villager_wins,villager_win_rate,werewolf_win_rate
0,batch1.1_suspicionRuleRubric_5A_1W_2Discussion...,Chaos Agent,50,35,0.7,0.3
1,batch1.1_suspicionRuleRubric_5A_1W_2Discussion...,unknown,1,0,0.0,1.0


In [13]:
# --- Vote hit rate: per archetype + overall ---
if games_df.empty:
    print("No games loaded. Please set FOLDERS and re-run.")
else:
    # Aggregate villager vote hits across all final votes
    by_arch = (
        games_df
        .groupby("werewolf_archetype", as_index=False)[["villager_votes_total","villager_vote_hits"]]
        .sum()
    )
    by_arch["vote_hit_rate"] = by_arch.apply(
        lambda r: (r["villager_vote_hits"]/r["villager_votes_total"]) if r["villager_votes_total"]>0 else float('nan'),
        axis=1
    )
    by_arch = by_arch.sort_values("werewolf_archetype").reset_index(drop=True)
    print("Villager vote hit rate per werewolf archetype (final vote rounds only):")
    _show_df(by_arch, "Vote Hit Rate — By Archetype")

    # Overall (all archetypes combined)
    overall = games_df[["villager_votes_total","villager_vote_hits"]].sum()
    overall_rate = overall["villager_vote_hits"]/overall["villager_votes_total"] if overall["villager_votes_total"]>0 else float('nan')
    overall_df = pd.DataFrame([{
        "villager_votes_total": int(overall["villager_votes_total"]),
        "villager_vote_hits": int(overall["villager_vote_hits"]),
        "vote_hit_rate": overall_rate
    }])
    print("Villager vote hit rate — overall (all archetypes):")
    _show_df(overall_df, "Vote Hit Rate — Overall")


Villager vote hit rate per werewolf archetype (final vote rounds only):


Unnamed: 0,werewolf_archetype,villager_votes_total,villager_vote_hits,vote_hit_rate
0,Chaos Agent,312,176,0.564103
1,unknown,0,0,


Villager vote hit rate — overall (all archetypes):


Unnamed: 0,villager_votes_total,villager_vote_hits,vote_hit_rate
0,312,176,0.564103
