# Werewolf Arena - LLM WAU Dataset EDA

Exploratory data analysis, data loading and transforming

In [71]:
import ast
import os
import json
import random 

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from collections import Counter
from itertools import chain

random.seed(42)

In [2]:
basepath = os.path.normpath("../Data/Werewolf-Arena")
out_path = os.path.normpath("../Data/Output/EDA_WA/")

## Load and transform data

In [99]:
# Process all games into a general rounds DF, and a DF by each utterance

rounds = []
utterances = []
for folder in os.listdir(basepath):
    if not folder.startswith("session_"):
        continue
    
    game_df = pd.read_json(os.path.join(basepath, folder, "game_complete.json"), orient="index").T
    players = {k: v["role"] for k, v in game_df["players"].to_dict()[0].items()}
    
    rounds_df = pd.DataFrame(game_df["rounds"][0])
    rounds_df["session_id"]  = int(random.random() * 1000)
    # rounds_df["session_id"]  = game_df["session_id"][0] # Session ID is always 10 in the files....
    rounds_df["winner"]  = game_df["winner"][0]
    rounds_df["round"] = rounds_df.index
    rounds_df["roles"] = rounds_df["players"].map(lambda x: [players[i] for i in x])
    rounds_df = rounds_df.drop(columns="bids")
    
    utterances_df = rounds_df.explode(["debate", "votes"], ignore_index=True)
    utterances_df["debate"] = utterances_df["debate"].fillna("00").apply(list) # Deal with NaN text
    utterances_df[["speaker", "text"]] = utterances_df["debate"].tolist()
    utterances_df["votes"] = utterances_df["votes"].to_dict()
    utterances_df["timestamp"] = utterances_df.index
    utterances_df = utterances_df.drop(columns=["debate"])
    
    rounds_df = rounds_df.drop(columns=["debate", "votes"])
    rounds.append(rounds_df)
    utterances.append(utterances_df)

allrounds_df = pd.concat(rounds, ignore_index=True)
alltext_df = pd.concat(utterances, ignore_index=True)

BID OPTIONS:

- 0: I would like to observe and listen for now.
- 1: I have some general thoughts to share with the group.
- 2: I have something critical and specific to contribute to this discussion.
- 3: It is absolutely urgent for me to speak next.
- 4: Someone has addressed me directly and I must respond

### Save transformed data into CSVs

In [113]:
allrounds_df = allrounds_df.rename(columns={"session_id": "game_id"})
print(allrounds_df.info())

allrounds_df.to_csv(os.path.join(out_path, "allrounds.csv"))
allrounds_df

<class 'pandas.core.frame.DataFrame'>
Index: 36 entries, 0 to 35
Data columns (total 10 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   players     36 non-null     object
 1   eliminated  36 non-null     object
 2   unmasked    21 non-null     object
 3   protected   30 non-null     object
 4   exiled      21 non-null     object
 5   success     36 non-null     bool  
 6   game_id     36 non-null     int64 
 7   winner      36 non-null     object
 8   round       36 non-null     int64 
 9   roles       36 non-null     object
dtypes: bool(1), int64(2), object(7)
memory usage: 2.8+ KB
None


Unnamed: 0,players,eliminated,unmasked,protected,exiled,success,game_id,winner,round,roles
0,"[Harold, Will, Sam, Jackson, Hayley, Jacob, Ma...",Dan,Will,Hayley,,True,861,Werewolves,0,"[Villager, Villager, Villager, Villager, Werew..."
1,"[Harold, Sam, Jackson, Hayley, Jacob, Mason]",Will,,Sam,,True,861,Werewolves,1,"[Villager, Villager, Villager, Werewolf, Werew..."
2,"[Sam, Hayley, Jacob, Mason]",Harold,,Sam,Jackson,True,861,Werewolves,2,"[Villager, Werewolf, Werewolf, Doctor]"
3,"[Tyler, Ginger, Jackson, Leah, Hayley, Sam, Is...",Dan,Dan,Ginger,,True,577,Werewolves,0,"[Villager, Villager, Villager, Werewolf, Werew..."
4,"[Tyler, Ginger, Jackson, Leah, Hayley, Sam]",Isaac,Tyler,Tyler,,True,577,Werewolves,1,"[Villager, Villager, Villager, Werewolf, Werew..."
5,"[Tyler, Ginger, Leah, Hayley, Sam]",Jackson,,Tyler,,True,577,Werewolves,2,"[Villager, Villager, Werewolf, Werewolf, Doctor]"
6,"[Tyler, Ginger, Leah, Hayley]",Sam,,Leah,,True,577,Werewolves,3,"[Villager, Villager, Werewolf, Werewolf]"
7,"[David, Bert, Jacob, Harold, Hayley, Mason, Will]",Scott,Jacob,David,,True,704,Villagers,0,"[Villager, Villager, Villager, Werewolf, Werew..."
8,"[David, Bert, Jacob, Harold, Mason, Will]",Will,Hayley,Will,Hayley,True,704,Villagers,1,"[Villager, Villager, Villager, Werewolf, Docto..."
9,"[David, Bert, Jacob, Will]",Mason,David,David,Harold,True,704,Villagers,2,"[Villager, Villager, Villager, Seer]"


In [111]:
strategies_df = pd.read_csv(os.path.join(out_path, "annotations.csv"), index_col=0)
strategies_df

Unnamed: 0,strategy
0,"['Interrogation', 'Accusation', 'Call for Acti..."
1,"['Interrogation', 'Accusation', 'Call for Acti..."
2,"['Interrogation', 'Evidence', 'Call for Action']"
3,"['Defense', 'Call for Action', 'Identity Decla..."
4,"['Interrogation', 'Accusation', 'Defense', 'Ev..."
...,...
250,"['Accusation', 'Call for Action']"
251,"['Accusation', 'Call for Action']"
252,['Call for Action']
253,['Call for Action']


In [114]:
alltext_df = alltext_df.rename(columns={"session_id": "game_id"})
alltext_df["strategy"] = strategies_df["strategy"]
print(alltext_df.info())

alltext_df.to_csv(os.path.join(out_path, "alltext.csv"))
alltext_df

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 255 entries, 0 to 254
Data columns (total 15 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   players     255 non-null    object
 1   eliminated  255 non-null    object
 2   unmasked    164 non-null    object
 3   protected   221 non-null    object
 4   exiled      160 non-null    object
 5   votes       252 non-null    object
 6   success     255 non-null    bool  
 7   game_id     255 non-null    int64 
 8   winner      255 non-null    object
 9   round       255 non-null    int64 
 10  roles       255 non-null    object
 11  speaker     255 non-null    object
 12  text        255 non-null    object
 13  timestamp   255 non-null    int64 
 14  strategy    255 non-null    object
dtypes: bool(1), int64(3), object(11)
memory usage: 28.3+ KB
None


Unnamed: 0,players,eliminated,unmasked,protected,exiled,votes,success,game_id,winner,round,roles,speaker,text,timestamp,strategy
0,"[Harold, Will, Sam, Jackson, Hayley, Jacob, Ma...",Dan,Will,Hayley,,"{'Harold': 'Jackson', 'Will': 'Jackson', 'Sam'...",True,861,Werewolves,0,"[Villager, Villager, Villager, Villager, Werew...",Will,"Alright everyone, it's unfortunate that we've ...",0,"['Interrogation', 'Accusation', 'Call for Acti..."
1,"[Harold, Will, Sam, Jackson, Hayley, Jacob, Ma...",Dan,Will,Hayley,,"{'Harold': 'Jackson', 'Will': 'Mason', 'Sam': ...",True,861,Werewolves,0,"[Villager, Villager, Villager, Villager, Werew...",Hayley,"Thanks, Will. It's indeed a tough start losing...",1,"['Interrogation', 'Accusation', 'Call for Acti..."
2,"[Harold, Will, Sam, Jackson, Hayley, Jacob, Ma...",Dan,Will,Hayley,,"{'Harold': 'Jackson', 'Will': 'Jackson', 'Sam'...",True,861,Werewolves,0,"[Villager, Villager, Villager, Villager, Werew...",Jacob,"Thanks, Hayley. I agree with you and Will that...",2,"['Interrogation', 'Evidence', 'Call for Action']"
3,"[Harold, Will, Sam, Jackson, Hayley, Jacob, Ma...",Dan,Will,Hayley,,"{'Harold': 'Jacob', 'Will': 'Jacob', 'Sam': 'J...",True,861,Werewolves,0,"[Villager, Villager, Villager, Villager, Werew...",Jackson,"Jacob, I appreciate you pointing out that I wa...",3,"['Defense', 'Call for Action', 'Identity Decla..."
4,"[Harold, Will, Sam, Jackson, Hayley, Jacob, Ma...",Dan,Will,Hayley,,"{'Harold': 'Jacob', 'Will': 'Jacob', 'Sam': 'J...",True,861,Werewolves,0,"[Villager, Villager, Villager, Villager, Werew...",Jacob,"Thanks for addressing that, Jackson. I complet...",4,"['Interrogation', 'Accusation', 'Defense', 'Ev..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
250,"[Mason, Dan, Paul, Derek, Harold]",Harold,Hayley,Harold,Hayley,"{'Mason': 'Paul', 'Dan': 'Hayley', 'Paul': 'Ha...",True,635,Villagers,2,"[Villager, Villager, Villager, Doctor, Seer]",Harold,"Hayley, your accusations are unfounded and ser...",19,"['Accusation', 'Call for Action']"
251,"[Mason, Dan, Paul, Derek, Harold]",Harold,Hayley,Harold,Hayley,"{'Mason': 'Hayley', 'Dan': 'Hayley', 'Paul': '...",True,635,Villagers,2,"[Villager, Villager, Villager, Doctor, Seer]",Hayley,"Harold, your insistence on labeling me as the ...",20,"['Accusation', 'Call for Action']"
252,"[Mason, Dan, Paul, Derek, Harold]",Harold,Hayley,Harold,Hayley,"{'Mason': 'Hayley', 'Dan': 'Hayley', 'Paul': '...",True,635,Villagers,2,"[Villager, Villager, Villager, Doctor, Seer]",Harold,"Hayley, your tactics of distraction are transp...",21,['Call for Action']
253,"[Mason, Dan, Paul, Derek, Harold]",Harold,Hayley,Harold,Hayley,"{'Mason': 'Hayley', 'Dan': 'Hayley', 'Paul': '...",True,635,Villagers,2,"[Villager, Villager, Villager, Doctor, Seer]",Paul,I want to echo what Harold has shared with us:...,22,['Call for Action']


## EDA