In [2]:
import pandas as pd
from pandas import DataFrame

# Notes:

The file /interactions contains the complete game interactions of all players in each episode.

A game is stored as JSON object with on key that is **turn** which contains an array of all turns.
The last turn indicates whether a game ended successful or not.
The turn object contains an action object with the key type. This action type can have the following values:
- "correct guess" -> indicates that a game ended with a correct guess of the player
- "invalid format" -> indicates that a game ended because the answer format was incorrect. This can happen when a related word was used or the output is not in the CLUE: \< ... \> Format
- "max turns reached" -> indicates that a game ended because the maximum amount of turns is reached

In [8]:
base_path_llama3_70B: str = './v1.6/Meta-Llama-3-70B-Instruct-hf-t0.0--Meta-Llama-3-70B-Instruct-hf-t0.0/'
game: str = 'taboo/2_low_en/'
episode: str = 'episode_0/'

instance_data: DataFrame = pd.read_json(base_path_llama3_70B + game + episode + 'instance.json')
request_data: DataFrame = pd.read_json(base_path_llama3_70B + game + episode + 'requests.json')

In [9]:
instance_data.head()

Unnamed: 0,game_id,target_word,related_word,target_word_stem,related_word_stem
0,0,autograph,signature,autograph,signatur
1,0,autograph,hand,autograph,hand
2,0,autograph,sign,autograph,sign


In [10]:
request_data.head()

Unnamed: 0,timestamp,manipulated_prompt_obj,raw_response_obj
0,2024-05-28 09:08:55.997695,{'inputs': '<|begin_of_text|><|start_header_id...,{'response': '<|begin_of_text|><|start_header_...
1,2024-05-28 09:09:00.340713,{'inputs': '<|begin_of_text|><|start_header_id...,{'response': '<|begin_of_text|><|start_header_...


In [16]:
print(request_data.manipulated_prompt_obj.iloc[1]['inputs'])

<|begin_of_text|><|start_header_id|>user<|end_header_id|>

You are playing a collaborative word guessing game in which you have to guess a target word that another player describes to you.

You can make one guess at each trial. You win when you guess the target word. You lose when you cannot guess it in 3 tries.

After each trial you will get a new hint from the other player which starts with CLUE.

Make your guesses by just saying the word using the following form: GUESS: <a word>

Let us start.


CLUE: Written confirmation of identity by a famous person.<|eot_id|><|start_header_id|>assistant<|end_header_id|>


In [17]:
print(request_data.raw_response_obj.iloc[1]['response'])

<|begin_of_text|><|start_header_id|>user<|end_header_id|>

You are playing a collaborative word guessing game in which you have to guess a target word that another player describes to you.

You can make one guess at each trial. You win when you guess the target word. You lose when you cannot guess it in 3 tries.

After each trial you will get a new hint from the other player which starts with CLUE.

Make your guesses by just saying the word using the following form: GUESS: <a word>

Let us start.


CLUE: Written confirmation of identity by a famous person.<|eot_id|><|start_header_id|>assistant<|end_header_id|>

GUESS: Autograph<|eot_id|>


In [2]:
import os
import plotly.graph_objects as go
import json

In [3]:
top_level_directories: list = ['v0.9', 'v1.0', 'v1.5', 'v1.5_quantized', 'v1.6', 'v1.6_backends', 'v1.6_quantized']
game: str = 'taboo'
game_variants: list = ['0_high_en', '1_medium_en', '2_low_en']

In [11]:
results: dict = {
    'available_episodes': 0,
    '0_high_en': {
        'total_episodes': 0,
        'correct guess': 0,
        'invalid format': 0,
        'max turns reached': 0,
        'invalid clue': 0,
    },
    '1_medium_en': {
        'total_episodes': 0,
        'correct guess': 0,
        'invalid format': 0,
        'max turns reached': 0,
        'invalid clue': 0,
    },
    '2_low_en': {
        'total_episodes': 0,
        'correct guess': 0,
        'invalid format': 0,
        'max turns reached': 0,
        'invalid clue': 0,
    },
}
available_files: int = 0
all_endings: list = []

for top_level_directory in top_level_directories:
    # loop over all directories with all variants of the benchmark
    all_available_models: list = os.listdir(f'./{top_level_directory}')
    # filter all files that are not a directory
    all_available_models = [model_name for model_name in all_available_models if os.path.isdir(f'./{top_level_directory}/{model_name}')]
    for model_name in all_available_models:
        # for each model pick all episodes from the taboo game folder
        for game_variant in game_variants:
            all_episodes: list =  os.listdir(f'./{top_level_directory}/{model_name}/{game}/{game_variant}')
            # filter all file names that do not start with episode to ensure to not read files without episodes
            all_episodes = [episode for episode in all_episodes if episode.startswith('episode')]
            
            # count the number of all episodes
            results['available_episodes'] += len(all_episodes)
            # count the number of episodes of a particular game variant
            results[game_variant]['total_episodes'] += len(all_episodes)
            
            # loop over all episodes to check whether an episode ended with success or any problems
            for episode in all_episodes:
                path_interactions: str = f'./{top_level_directory}/{model_name}/{game}/{game_variant}/{episode}/interactions.json'
                if os.path.isfile(path_interactions):
                    with open(path_interactions,  encoding="utf8") as json_file:
                        interactions: dict = json.load(json_file)
                        
                        # get the last turn of the interaction
                        last_turn: list = interactions['turns'][-1]
                        
                        # get game ending 
                        game_ending: str = last_turn[-1]['action']['type']
                        
                        # add game ending
                        results[game_variant][game_ending] += 1

[]


In [12]:
results

{'available_episodes': 9900,
 '0_high_en': {'total_episodes': 3300,
  'correct guess': 1271,
  'invalid format': 1191,
  'max turns reached': 224,
  'invalid clue': 608},
 '1_medium_en': {'total_episodes': 3300,
  'correct guess': 1035,
  'invalid format': 1244,
  'max turns reached': 205,
  'invalid clue': 765},
 '2_low_en': {'total_episodes': 3300,
  'correct guess': 1175,
  'invalid format': 1220,
  'max turns reached': 151,
  'invalid clue': 754}}

In [37]:
# define data
data = results

# Define nodes
nodes = {
    "label": ["Total Episodes of Taboo Played", "Category: High English", "Category: Medium English", "Category: Low English", "Game Ending: Correct Guess", "Game Ending: Invalid Format", "Game Ending: Max Turns Reached", "Game Ending: Invalid Clue"],
    "color": ["#a6dced", "#a6dced", "#a6dced", "#a6dced", "#89c289", "#f2a1a5", "#f2a1a5", "#f2a1a5"]
}

# Define links
links = {
    "source": [0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3],
    "target": [1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7],
    "value": [
        data['0_high_en']['total_episodes'],
        data['1_medium_en']['total_episodes'],
        data['2_low_en']['total_episodes'],
        data['0_high_en']['correct guess'],
        data['0_high_en']['invalid format'],
        data['0_high_en']['max turns reached'],
        data['0_high_en']['invalid clue'],
        data['1_medium_en']['correct guess'],
        data['1_medium_en']['invalid format'],
        data['1_medium_en']['max turns reached'],
        data['1_medium_en']['invalid clue'],
        data['2_low_en']['correct guess'],
        data['2_low_en']['invalid format'],
        data['2_low_en']['max turns reached'],
        data['2_low_en']['invalid clue']
    ],
    "color": [
        "#a6dced", "#a6dced", "#a6dced",
        "#89c289", "#f2a1a5", "#f2a1a5", "#f2a1a5",
        "#89c289", "#f2a1a5", "#f2a1a5", "#f2a1a5",
        "#89c289", "#f2a1a5", "#f2a1a5", "#f2a1a5"
    ]
}

# Create the Sankey diagram
fig = go.Figure(data=[go.Sankey(
    node=dict(
        pad=15,
        thickness=20,
        line=dict(color="black", width=0.5),
        label=nodes["label"],
        color=nodes["color"],
    ),
    link=dict(
        source=links["source"],  # indices correspond to labels in the nodes list
        target=links["target"],
        value=links["value"],
        color=links["color"],
    )
)])

fig.update_layout(title_text="Sankey Diagram of Game Episodes of Taboo from Start to Ending", font_size=15)
fig.write_html("sankey_diagram_episodes_taboo.html")
fig.show()