# Werewolf Arena - LLM WAU Dataset EDA

Exploratory data analysis, data loading and transforming

In [115]:
import ast
import os
import json
import random 

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from collections import Counter
from itertools import chain

random.seed(42)

In [116]:
basepath = os.path.normpath("../Data/Werewolf-Arena")
out_path = os.path.normpath("../Data/Output/EDA_WA/")

## Load and transform data

In [126]:
# Process all games into a general rounds DF, and a DF by each utterance

rounds = []
utterances = []
for folder in os.listdir(basepath):
    if not folder.startswith("session_"):
        continue
    
    game_df = pd.read_json(os.path.join(basepath, folder, "game_complete.json"), orient="index").T
    playerinfo = game_df["players"].to_dict()[0]
    players = {k: v["role"] for k, v in playerinfo.items()}
    
    rounds_df = pd.DataFrame(game_df["rounds"][0])
    rounds_df["session_id"]  = int(random.random() * 1000)
    # rounds_df["session_id"]  = game_df["session_id"][0] # Session ID is always 10 in the files....
    rounds_df["winner"]  = game_df["winner"][0]
    rounds_df["round"] = rounds_df.index
    rounds_df["roles"] = rounds_df["players"].map(lambda x: [players[i] for i in x])
    rounds_df["models"] = rounds_df["players"].map(lambda x: [playerinfo[i]["model"] for i in x])
    rounds_df = rounds_df.drop(columns="bids")
    
    utterances_df = rounds_df.explode(["debate", "votes"], ignore_index=True)
    utterances_df["debate"] = utterances_df["debate"].fillna("00").apply(list) # Deal with NaN text
    utterances_df[["speaker", "text"]] = utterances_df["debate"].tolist()
    utterances_df["votes"] = utterances_df["votes"].to_dict()
    utterances_df["timestamp"] = utterances_df.index
    utterances_df = utterances_df.drop(columns=["debate"])
    
    rounds_df = rounds_df.drop(columns=["debate", "votes"])
    rounds.append(rounds_df)
    utterances.append(utterances_df)

allrounds_df = pd.concat(rounds, ignore_index=True)
alltext_df = pd.concat(utterances, ignore_index=True)

BID OPTIONS:

- 0: I would like to observe and listen for now.
- 1: I have some general thoughts to share with the group.
- 2: I have something critical and specific to contribute to this discussion.
- 3: It is absolutely urgent for me to speak next.
- 4: Someone has addressed me directly and I must respond

### Save transformed data into CSVs

In [127]:
allrounds_df = allrounds_df.rename(columns={"session_id": "game_id"})
print(allrounds_df.info())

allrounds_df.to_csv(os.path.join(out_path, "allrounds.csv"))
allrounds_df

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 63 entries, 0 to 62
Data columns (total 11 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   players     63 non-null     object
 1   eliminated  63 non-null     object
 2   unmasked    40 non-null     object
 3   protected   49 non-null     object
 4   exiled      37 non-null     object
 5   success     63 non-null     bool  
 6   game_id     63 non-null     int64 
 7   winner      63 non-null     object
 8   round       63 non-null     int64 
 9   roles       63 non-null     object
 10  models      63 non-null     object
dtypes: bool(1), int64(2), object(8)
memory usage: 5.1+ KB
None


Unnamed: 0,players,eliminated,unmasked,protected,exiled,success,game_id,winner,round,roles,models
0,"[Harold, Will, Sam, Jackson, Hayley, Jacob, Ma...",Dan,Will,Hayley,,True,861,Werewolves,0,"[Villager, Villager, Villager, Villager, Werew...","[gpt-4o-2024-05-13, gpt-4o-2024-05-13, gpt-4o-..."
1,"[Harold, Sam, Jackson, Hayley, Jacob, Mason]",Will,,Sam,,True,861,Werewolves,1,"[Villager, Villager, Villager, Werewolf, Werew...","[gpt-4o-2024-05-13, gpt-4o-2024-05-13, gpt-4o-..."
2,"[Sam, Hayley, Jacob, Mason]",Harold,,Sam,Jackson,True,861,Werewolves,2,"[Villager, Werewolf, Werewolf, Doctor]","[gpt-4o-2024-05-13, gpt-4o-2024-05-13, gpt-4o-..."
3,"[Jackson, Mason, Ginger, Scott, Sam, Jacob]",Paul,Paul,Jackson,Dan,True,577,Villagers,0,"[Villager, Villager, Werewolf, Werewolf, Docto...","[gpt-4.1-2025-04-14, gpt-4.1-2025-04-14, deeps..."
4,"[Jackson, Scott, Sam, Jacob]",Mason,Ginger,Sam,Ginger,True,577,Villagers,1,"[Villager, Werewolf, Doctor, Seer]","[gpt-4.1-2025-04-14, deepseek-chat, gpt-4.1-20..."
...,...,...,...,...,...,...,...,...,...,...,...
58,"[Mason, Jacob, Dan, Bert, Ginger, Harold]",Hayley,Mason,Ginger,David,True,609,Villagers,0,"[Villager, Villager, Villager, Werewolf, Docto...","[deepseek-reasoner, deepseek-reasoner, deepsee..."
59,"[Jacob, Dan, Ginger, Harold]",Mason,Bert,Harold,Bert,True,609,Villagers,1,"[Villager, Villager, Doctor, Seer]","[deepseek-reasoner, deepseek-reasoner, deepsee..."
60,"[Mason, Dan, Paul, Jackson, Sam, Hayley, Derek...",Mason,Paul,Mason,,True,171,Villagers,0,"[Villager, Villager, Villager, Villager, Werew...","[gpt-4o-mini-2024-07-18, gpt-4o-mini-2024-07-1..."
61,"[Mason, Dan, Paul, Hayley, Derek, Harold]",Jackson,Jackson,Harold,Sam,True,171,Villagers,1,"[Villager, Villager, Villager, Werewolf, Docto...","[gpt-4o-mini-2024-07-18, gpt-4o-mini-2024-07-1..."


In [128]:
strategies_df = pd.read_csv(os.path.join(out_path, "annotations.csv"), index_col=0)
strategies_df

Unnamed: 0,strategy
0,"['Interrogation', 'Accusation', 'Call for Acti..."
1,"['Interrogation', 'Accusation', 'Call for Acti..."
2,"['Interrogation', 'Evidence', 'Call for Action']"
3,"['Defense', 'Call for Action', 'Identity Decla..."
4,"['Interrogation', 'Accusation', 'Defense', 'Ev..."
...,...
436,"['Accusation', 'Evidence', 'Call for Action', ..."
437,"['Accusation', 'Evidence', 'Call for Action', ..."
438,"['Evidence', 'Call for Action', 'Identity Decl..."
439,"['Accusation', 'Evidence', 'Call for Action', ..."


In [129]:
alltext_df = alltext_df.rename(columns={"session_id": "game_id"})
alltext_df["strategy"] = strategies_df["strategy"]
print(alltext_df.info())

alltext_df.to_csv(os.path.join(out_path, "alltext.csv"))
alltext_df

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 441 entries, 0 to 440
Data columns (total 16 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   players     441 non-null    object
 1   eliminated  441 non-null    object
 2   unmasked    306 non-null    object
 3   protected   363 non-null    object
 4   exiled      280 non-null    object
 5   votes       436 non-null    object
 6   success     441 non-null    bool  
 7   game_id     441 non-null    int64 
 8   winner      441 non-null    object
 9   round       441 non-null    int64 
 10  roles       441 non-null    object
 11  models      441 non-null    object
 12  speaker     441 non-null    object
 13  text        441 non-null    object
 14  timestamp   441 non-null    int64 
 15  strategy    441 non-null    object
dtypes: bool(1), int64(3), object(12)
memory usage: 52.2+ KB
None


Unnamed: 0,players,eliminated,unmasked,protected,exiled,votes,success,game_id,winner,round,roles,models,speaker,text,timestamp,strategy
0,"[Harold, Will, Sam, Jackson, Hayley, Jacob, Ma...",Dan,Will,Hayley,,"{'Harold': 'Jackson', 'Will': 'Jackson', 'Sam'...",True,861,Werewolves,0,"[Villager, Villager, Villager, Villager, Werew...","[gpt-4o-2024-05-13, gpt-4o-2024-05-13, gpt-4o-...",Will,"Alright everyone, it's unfortunate that we've ...",0,"['Interrogation', 'Accusation', 'Call for Acti..."
1,"[Harold, Will, Sam, Jackson, Hayley, Jacob, Ma...",Dan,Will,Hayley,,"{'Harold': 'Jackson', 'Will': 'Mason', 'Sam': ...",True,861,Werewolves,0,"[Villager, Villager, Villager, Villager, Werew...","[gpt-4o-2024-05-13, gpt-4o-2024-05-13, gpt-4o-...",Hayley,"Thanks, Will. It's indeed a tough start losing...",1,"['Interrogation', 'Accusation', 'Call for Acti..."
2,"[Harold, Will, Sam, Jackson, Hayley, Jacob, Ma...",Dan,Will,Hayley,,"{'Harold': 'Jackson', 'Will': 'Jackson', 'Sam'...",True,861,Werewolves,0,"[Villager, Villager, Villager, Villager, Werew...","[gpt-4o-2024-05-13, gpt-4o-2024-05-13, gpt-4o-...",Jacob,"Thanks, Hayley. I agree with you and Will that...",2,"['Interrogation', 'Evidence', 'Call for Action']"
3,"[Harold, Will, Sam, Jackson, Hayley, Jacob, Ma...",Dan,Will,Hayley,,"{'Harold': 'Jacob', 'Will': 'Jacob', 'Sam': 'J...",True,861,Werewolves,0,"[Villager, Villager, Villager, Villager, Werew...","[gpt-4o-2024-05-13, gpt-4o-2024-05-13, gpt-4o-...",Jackson,"Jacob, I appreciate you pointing out that I wa...",3,"['Defense', 'Call for Action', 'Identity Decla..."
4,"[Harold, Will, Sam, Jackson, Hayley, Jacob, Ma...",Dan,Will,Hayley,,"{'Harold': 'Jacob', 'Will': 'Jacob', 'Sam': 'J...",True,861,Werewolves,0,"[Villager, Villager, Villager, Villager, Werew...","[gpt-4o-2024-05-13, gpt-4o-2024-05-13, gpt-4o-...",Jacob,"Thanks for addressing that, Jackson. I complet...",4,"['Interrogation', 'Accusation', 'Defense', 'Ev..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
436,"[Mason, Dan, Paul, Derek, Harold]",Harold,Hayley,Harold,Hayley,"{'Mason': 'Paul', 'Dan': 'Hayley', 'Paul': 'Ha...",True,171,Villagers,2,"[Villager, Villager, Villager, Doctor, Seer]","[gpt-4o-mini-2024-07-18, gpt-4o-mini-2024-07-1...",Harold,"Hayley, your accusations are unfounded and ser...",19,"['Accusation', 'Evidence', 'Call for Action', ..."
437,"[Mason, Dan, Paul, Derek, Harold]",Harold,Hayley,Harold,Hayley,"{'Mason': 'Hayley', 'Dan': 'Hayley', 'Paul': '...",True,171,Villagers,2,"[Villager, Villager, Villager, Doctor, Seer]","[gpt-4o-mini-2024-07-18, gpt-4o-mini-2024-07-1...",Hayley,"Harold, your insistence on labeling me as the ...",20,"['Accusation', 'Evidence', 'Call for Action', ..."
438,"[Mason, Dan, Paul, Derek, Harold]",Harold,Hayley,Harold,Hayley,"{'Mason': 'Hayley', 'Dan': 'Hayley', 'Paul': '...",True,171,Villagers,2,"[Villager, Villager, Villager, Doctor, Seer]","[gpt-4o-mini-2024-07-18, gpt-4o-mini-2024-07-1...",Harold,"Hayley, your tactics of distraction are transp...",21,"['Evidence', 'Call for Action', 'Identity Decl..."
439,"[Mason, Dan, Paul, Derek, Harold]",Harold,Hayley,Harold,Hayley,"{'Mason': 'Hayley', 'Dan': 'Hayley', 'Paul': '...",True,171,Villagers,2,"[Villager, Villager, Villager, Doctor, Seer]","[gpt-4o-mini-2024-07-18, gpt-4o-mini-2024-07-1...",Paul,I want to echo what Harold has shared with us:...,22,"['Accusation', 'Evidence', 'Call for Action', ..."


## EDA