In [None]:
import os
import pandas as pd
from utils.embeding_utils import (get_embeddings_for_table, calculate_pca_for_embeddings)
from utils.data_loading import load_sql_data, load_csv
import pingouin as pg
import numpy

db_name = "clean_db.db"
csv_name = "data/games.csv"

# 1) Load the data
if not os.path.exists(csv_name):
    players_df, games_df = load_sql_data(db_name)
    games_df.to_csv(csv_name, index=False)
else:
    games_df = pd.read_csv(csv_name)

games_df.head()

# Calculate embedings and CL scores

## Embedings

In [None]:
embedding_model = "word2vec"
games_df = get_embeddings_for_table( games_df, model_name=embedding_model,)
games_df = calculate_pca_for_embeddings(
    games_df,
    model_name=embedding_model,
    num_pca_components=15,
)

In [None]:
games_df.head()

## CL scores

In [None]:
from importlib import reload
from utils import game_statistics

reload(game_statistics)
from utils.game_statistics import calculate_game_metrics_per_player, calculate_game_metrics_per_configuration

player_metrics = calculate_game_metrics_per_configuration(games_df, plot_box=True, separate_per_config=True)
print("Success Rate and Average Rounds for Winning Games:")
print(player_metrics)

In [None]:

player_metrics = calculate_game_metrics_per_player(games_df)
print("Average Number of Rounds and Success Rate per Player:")
print(player_metrics)


In [None]:
import strategy_analysis
reload(strategy_analysis)
from strategy_analysis import strategy_analysis

results_df = strategy_analysis(games_df, embedding_model, use_pca=False, use_conceptual_linking_score=True)


In [None]:
strategy_results_file = "data/results.csv"

# Save results with strategies to CSV but only a subset of columns
cols_to_save = [
    "gameId",
    "playerId",
    "player1Id",
    "player2Id",
    "botId",
    "status",
    "roundCount",
    "wordsPlayed1",
    "wordsPlayed2",
    "gameConfigPlayer1",
    "gameConfigPlayer2",
    "gameConfig",
    "word_my",
    "word_opponent",
    "surveyAnswers1",
    "surveyAnswers2",
    "semantic_strategy_name",
    "quantitative_strategy_name",
    "conceptual_linking_score_my",
    "conceptual_linking_score_opponent",
    "collocation_score_my",
    "collocation_score_opponent",
]
results_df_partial = results_df[cols_to_save]
results_df_partial.to_csv(strategy_results_file, index=False)

# Graphs

## Load data

In [None]:
import matplotlib.pyplot as plt
plt.rcParams.update({'font.size': 14})

strategy_results_file = "data/results.csv"
# read the result file from csv
results_df_partial = pd.read_csv(strategy_results_file)
results_df = results_df_partial.copy()
results_df.head()

In [None]:
player_id_array = ["DvplG2Kz", "IfM2wWHp", "1zx9ju2S", "BVSUju5U", "lW70ICul", "OZ52imkd", "GorQHcOB", "8PSrn9JD", "u5LgXigL",
                  "1Ax1SPCe", "l8ND7njk", "sfiXibsa", "PvUHGrDy", "MO2pWpjE", "gsbTiZwO", "gDnhDODC", "Ska8rixW", "6DbWtL5r",
                  "TCFqdHBb", "wQhT1jrv"]
filtered_df = results_df[results_df["playerId"].isin(player_id_array)]
filtered_df = filtered_df[(filtered_df["roundCount"] > 2)]

## Conceptual linking

In [None]:
from strategy_analysis.conceptual_linking_analysis import conceptual_linking_score

def conceptual_linking_scores(player_games):
    player_games['conceptual_linking_score_my'] = None

    for index, game in player_games.iterrows():
        # Evaluate or directly use the lists of words played by "myself"
        words_played_my = game['wordsPlayedMy']
        if not isinstance(words_played_my, list):
            words_played_my = eval(words_played_my)

        num_rounds = len(words_played_my)

        conceptual_linking_my_list = []

        for i in range(num_rounds):
            if i == 0:
                # First round has no previous word to compare, so we set score to 0 or None
                cl_score_my = 0
            else:
                current_word = words_played_my[i]
                prev_word = words_played_my[i - 1]

                cl_score_my = conceptual_linking_score(current_word, prev_word, verbose=True)


            conceptual_linking_my_list.append(cl_score_my)

        player_games.at[index, 'conceptual_linking_score_my'] = conceptual_linking_my_list

    return player_games

cl_df = conceptual_linking_scores(selected_df)

In [None]:
cl_df.head()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import ast

def plot_score_heatmap(df, score_columns=("score_my", "score_opponent"), max_num_round=16):
    
    config_map = {
        "human_vs_bot_(bot_shown)":     "LLM-vs-Human (AI shown)",
        "human_vs_bot_(human_shown)":   "LLM-vs-Human (Human shown)",
    }
    df['gameConfig'] = df['gameConfig'].map(config_map)
    df = df[df['gameConfig'].isin(config_map.values())]
    
    def parse_score_value(val):
        # If the value is a string, try to convert it to a list or array
        if isinstance(val, str):
            try:
                val = ast.literal_eval(val)
            except Exception:
                return 0
        # Now, if it's a list or numpy array, compute the mean for the first max_num_round items
        if isinstance(val, (list, np.ndarray)) and len(val) > 0:
            return np.nanmean(val[:max_num_round])
        return 0
    
    for col in score_columns:
        df[col + '_mean'] = df[col].apply(parse_score_value)
    
    agg_cols = [col + '_mean' for col in score_columns]
    grouped = df.groupby('gameConfig')[agg_cols].mean().reset_index()
    
    print(agg_cols)
    print(grouped[['gameConfig'] + agg_cols])
    
    matrix = grouped.set_index('gameConfig')
    
    fig, ax = plt.subplots(figsize=(8, 4))
    data = matrix.values
    vmin = np.nanmin(data)
    vmax = np.nanmax(data)
    
    cax = ax.imshow(data, cmap='viridis', aspect='auto', interpolation='nearest', vmin=vmin, vmax=vmax)
    cbar = fig.colorbar(cax, ax=ax)
    
    x_label_map = {
        score_columns[0]: "Bot\nPrevious Word",
        score_columns[1]: "Partner\nPrevious Word"
    }
    
    ax.set_xticks(np.arange(len(matrix.columns)))
    x_labels = [x_label_map.get(col.replace('_mean',''), col.replace('_mean','')) for col in matrix.columns]
    ax.set_xticklabels(x_labels, rotation=0, ha='center')
    
    ax.set_yticks(np.arange(len(matrix.index)))
    ax.set_yticklabels(matrix.index)
    
    for i in range(data.shape[0]):
        for j in range(data.shape[1]):
            val = data[i, j]
            if not np.isnan(val):
                text_color = 'white' if val < (vmin + vmax) / 2 else 'black'
                ax.text(j, i, f"{val:.2f}", ha='center', va='center', color=text_color)
    
    plt.tight_layout()
    plt.show()

# Assuming cl_df is your original DataFrame
cl_df_copy = cl_df.copy()
plot_score_heatmap(cl_df_copy, score_columns=("conceptual_linking_score_my", "conceptual_linking_score_opponent"), max_num_round=16)

In [None]:
import numpy as np
import pandas as pd

def plot_score_heatmap(results_df,
                       score_columns=("score_my", "score_opponent"),
                       max_num_round=16):

    # 1 – 4 ▸ identical data-prep as before … -----------------------------------
    results_df['gameConfig'] = results_df.apply(
        lambda row: row['gameConfigPlayer1']
        if row['playerId'] == row['player1Id']
        else (row['gameConfigPlayer2']
              if row['playerId'] == row['player2Id']
              else 'Unknown'),
        axis=1
    )

    config_map = {
        "human_vs_bot_(bot_shown)":     "vs-LLM\n(AI shown)",
        "human_vs_bot_(human_shown)":   "vs-LLM\n(Human shown)",
        "human_vs_human_(human_shown)": "vs-Human\n(Human shown)",
        "human_vs_human_(bot_shown)":   "vs-Human\n(AI shown)",
    }
    results_df['gameConfig'] = results_df['gameConfig'].map(config_map)\
                                                     .fillna(results_df['gameConfig'])

    for col in score_columns:
        results_df[col + '_mean'] = results_df[col].apply(eval).apply(
            lambda arr: np.nanmean(arr[:max_num_round])
            if isinstance(arr, (list, np.ndarray)) and len(arr) > 0 else 0
        )

    agg_cols = [col + '_mean' for col in score_columns]
    grouped  = results_df.groupby('gameConfig')[agg_cols].mean().reset_index()

    # 5  ▸ matrix for the plot --------------------------------------------------
    matrix = grouped.set_index('gameConfig')
    data   = matrix.values

    # 6  ▸ bubble heat-map ------------------------------------------------------
    fig, ax = plt.subplots(figsize=(8, 6))

    # colour-mapping and colour-bar (without outline)
    norm = Normalize(vmin=np.nanmin(data), vmax=np.nanmax(data))
    sm   = ScalarMappable(norm=norm, cmap='viridis')
    sm.set_array([])
    cbar = fig.colorbar(sm, ax=ax, fraction=0.046, pad=0.04)
    cbar.ax.yaxis.set_major_formatter(mticker.FormatStrFormatter('%.2f'))
    cbar.outline.set_visible(False)              # ⬅︎ remove colour-bar edge

    # parameters controlling look & feel
    h_spacing  = 1.3      # horizontal gap multiplier
    max_radius = 0.40     # absolute max radius (largest bubble)

    n_rows, n_cols = data.shape
    for i in range(n_rows):
        for j in range(n_cols):
            val = data[i, j]
            if np.isnan(val) or val <= 0:
                continue                          # skip empty / non-positive
            # radius ∝ √value  (so *area* ∝ value)
            radius = np.sqrt(val / norm.vmax) * max_radius
            x      = j * h_spacing
            y      = i

            ax.add_patch(
                Circle((x, y), radius=radius,
                       facecolor=sm.to_rgba(val),
                       edgecolor='grey', lw=0.5)
            )

            txt_col = 'white' if val < (norm.vmin + norm.vmax) / 2 else 'black'
            ax.text(x, y, f"{val:.2f}", ha='center', va='center',
                    fontweight='bold', color=txt_col)

    # ticks, labels, limits -----------------------------------------------------
    xtick_pos = [k * h_spacing for k in range(n_cols)]
    ax.set_xticks(xtick_pos)
    ax.set_xticklabels(
        ["Player\nPrevious Word", "Partner's\nPrevious Word"], ha='center'
    )

    ax.set_yticks(np.arange(n_rows))
    ax.set_yticklabels(matrix.index)

    ax.set_xlim(-0.5 * h_spacing,
                (n_cols - 1) * h_spacing + 0.5 * h_spacing)
    ax.set_ylim(n_rows - 0.5, -0.5)
    ax.set_aspect('equal')

    # hide frame but keep ticks
    for spine in ax.spines.values():
        spine.set_visible(False)

    plt.tight_layout()
    plt.show()

# we don't take into account the unfinished game
filtered_df_cl = filtered_df[results_df["status"].isin(["won", "lost"])]

plot_score_heatmap(filtered_df_cl, score_columns=("conceptual_linking_score_my", "conceptual_linking_score_opponent"), max_num_round=16)

## Quantitative strategies

In [None]:
from importlib import reload
import strategy_analysis.strategy_analysis_main
reload(strategy_analysis.strategy_analysis_main)
from strategy_analysis.strategy_analysis_main import plot_strategy_heatmap

In [None]:
plot_strategy_heatmap(filtered_df, strategy_col="quantitative_strategy_name", groupby='gameConfig')

## Questionnaire data

In [None]:
import pandas as pd
import numpy as np

def extract_survey_fields(survey, keys):
    """
    Given a survey answer (expected to be a list with one dictionary),
    extract the values for each key in `keys` and convert them to numeric.
    Returns a dictionary mapping each key to its numeric value (or NaN if not available).
    """
    if not isinstance(survey, list):
        survey = eval(survey)
    if isinstance(survey, list) and len(survey) > 0:
        d = survey[0]
        return { key: pd.to_numeric(d.get(key, np.nan), errors='coerce') for key in keys }
    else:
        return { key: np.nan for key in keys }

def compile_survey_scores_for_keys(df, keys):
    """
    For each row in the DataFrame, extract the survey answers from both
    surveyAnswer1 and surveyAnswer2 for the given keys, then compute
    the average for each key (ignoring NaN values).
    The resulting averages are stored in new columns with the same key names.
    """
    # Extract dictionaries for each survey column
    survey1 = df['surveyAnswers1'].apply(lambda x: extract_survey_fields(x, keys))
    survey2 = df['surveyAnswers2'].apply(lambda x: extract_survey_fields(x, keys))
    
    # For each key, combine the values from survey1 and survey2 by averaging.
    for key in keys:
        def average_two(s1, s2):
            values = [v for v in (s1.get(key), s2.get(key)) if not pd.isna(v)]
            return np.mean(values) if values else np.nan
        df[key] = [average_two(s1, s2) for s1, s2 in zip(survey1, survey2)]
    return df

def compile_average_ratings_by_config(results_df, keys):
    """
    Computes a game configuration column based on the player's perspective,
    extracts the survey values for the provided keys, and then groups by game
    configuration to compute the average for each key.
    
    Parameters
    ----------
    results_df : pd.DataFrame
        DataFrame that must include:
          - 'surveyAnswer1' and 'surveyAnswer2' (lists with one dictionary each),
          - 'playerId', 'player1Id', 'player2Id',
          - 'gameConfigPlayer1' and 'gameConfigPlayer2'.
    keys : list of str
        The keys from the survey dictionaries to extract and average.
        
    Returns
    -------
    pd.DataFrame
        A DataFrame grouped by game configuration with average values for each key.
    """
    # Create a new 'gameConfig' column based on the player's perspective.
    results_df['gameConfig'] = results_df.apply(
        lambda row: row['gameConfigPlayer1']
                    if row['playerId'] == row['player1Id']
                    else (row['gameConfigPlayer2']
                          if row['playerId'] == row['player2Id']
                          else 'Unknown'),
        axis=1
    )
    
    # Extract and average the survey values for the given keys.
    results_df = compile_survey_scores_for_keys(results_df, keys)
    
    # Group by game configuration and compute the mean for each specified key.
    grouped = results_df.groupby('gameConfig')[keys].mean().reset_index()
    return grouped

# Example usage:
# Suppose you want to analyze these keys:
keys_to_average = [
    'otherPlayerUnderstoodYourStrategies',
    'didYouUnderstandOtherPlayerStrategy',
    'otherPlayerRating',
    'connectionFeeling'
]

grouped_ratings = compile_average_ratings_by_config(filtered_df, keys_to_average)
grouped_ratings

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
from matplotlib.patches import Circle
from matplotlib.cm import ScalarMappable
from matplotlib.colors import Normalize

def extract_categorical_responses(survey, key, categories):
    """
    For a given survey answer (expected to be a list with one dictionary),
    extract responses for the specified key and compute the relative frequency
    for each category.
    
    Parameters
    ----------
    survey : list
        Expected to be a list with one dictionary.
    key : str
        The dictionary key to extract (e.g., "quantitativeOtherPlayerStrategy").
    categories : iterable of str
        Allowed responses.
    
    Returns
    -------
    dict
        A dictionary mapping each category to the fraction of responses.
    """
    responses = []
    if not isinstance(survey, list):
        survey = eval(survey)
        
    if isinstance(survey, list) and len(survey) > 0:
        d = survey[0]
        val = d.get(key, None)
        # If the value is a list, use its elements; otherwise, treat it as a single value.
        if isinstance(val, list):
            responses.extend(val)
        elif val is not None:
            responses.append(val)
    total = len(responses)
    if total == 0:
        return {cat: np.nan for cat in categories}
    freq = {cat: responses.count(cat) / total for cat in categories}
    return freq

def compile_quantitative_other_strategy(df, key, categories):
    """
    For each row in the DataFrame, extract the categorical responses for the given key
    from both surveyAnswer1 and surveyAnswer2. New columns are created for each category,
    with names formatted as "{key}_{category}".
    
    Parameters
    ----------
    df : pd.DataFrame
        Must contain 'surveyAnswer1' and 'surveyAnswer2'.
    key : str
        e.g. "quantitativeOtherPlayerStrategy".
    categories : iterable of str
        Allowed responses (e.g. ("mirroring", "stayingClose", "averaging")).
    
    Returns
    -------
    pd.DataFrame
        The DataFrame with additional columns for each category.
    """
    extracted = df.apply(
        lambda row: extract_categorical_responses(row['surveyAnswers1'], key, categories) if pd.notna(row['surveyAnswers1']) else {},
        axis=1
    )
    # Also extract for surveyAnswer2 and then average the two.
    extracted2 = df.apply(
        lambda row: extract_categorical_responses(row['surveyAnswers2'], key, categories) if pd.notna(row['surveyAnswers2']) else {},
        axis=1
    )

    
    
    # For each row, average the values from the two surveys for each category.
    for cat in categories:
        def avg_two(s1, s2):
            vals = []
            if s1.get(cat) is not None and not pd.isna(s1.get(cat)):
                vals.append(s1.get(cat))
            if s2.get(cat) is not None and not pd.isna(s2.get(cat)):
                vals.append(s2.get(cat))
            return np.mean(vals) if vals else np.nan
        df[key + '_' + cat] = [avg_two(s1, s2) for s1, s2 in zip(extracted, extracted2)]
    return df

def plot_quantitative_other_strategy_heatmap(results_df,
                                             key="quantitativeOtherPlayerStrategy",
                                             categories=("averaging", "mirroring", "stayingClose"),
                                             groupby='gameConfig'):
    """
    Plots a heatmap of the average relative frequency of each quantitative other-player strategy
    per game configuration.
    
    The DataFrame must contain:
      - 'surveyAnswer1' and 'surveyAnswer2' (each a list with one dictionary),
      - 'playerId', 'player1Id', and 'player2Id',
      - 'gameConfigPlayer1' and 'gameConfigPlayer2'.
      
    Parameters
    ----------
    results_df : pd.DataFrame
        The input DataFrame.
    key : str, default "quantitativeOtherPlayerStrategy"
        The survey key to extract.
    categories : tuple of str, default ("mirroring", "stayingClose", "averaging")
        The allowed responses.
    groupby : str, default 'gameConfig'
        How to group rows. In this function, grouping is done by game configuration
        based on the player's perspective.
    """
    # 1) Create a new 'gameConfig' column based on player's perspective.
    results_df['gameConfig'] = results_df.apply(
        lambda row: row['gameConfigPlayer1']
                    if row['playerId'] == row['player1Id']
                    else (row['gameConfigPlayer2']
                          if row['playerId'] == row['player2Id']
                          else 'Unknown'),
        axis=1
    )
    
    # 2) Extract the categorical responses for quantitativeOtherPlayerStrategy.
    results_df = compile_quantitative_other_strategy(results_df, key, categories)
    
    # 3) Identify the new columns.
    new_cols = [f"{key}_{cat}" for cat in categories]

       # 2) Map the original config strings to the requested labels.
    config_map = {
        "human_vs_bot_(bot_shown)":     "vs-LLM\n(AI shown)",
        "human_vs_bot_(human_shown)":   "vs-LLM\n(Human shown)",
        "human_vs_human_(human_shown)": "vs-Human\n(Human shown)",
        "human_vs_human_(bot_shown)":   "vs-Human\n(AI shown)",
    }
    # Apply the mapping, leaving other configs unchanged.
    results_df['gameConfig'] = results_df['gameConfig'].map(config_map).fillna(results_df['gameConfig'])
    
    
    # 4) Group by game configuration and compute the average for each new column.
    grouped = results_df.groupby('gameConfig')[new_cols].mean().reset_index()
    
    # 5) Create a matrix for the heatmap: rows are game configurations, columns are the strategy categories.
    matrix = grouped.set_index('gameConfig')
    

    # 6) Plot with circles instead of imshow squares.
    fig, ax = plt.subplots(figsize=(10, 6))
    data = matrix.values
    n_rows, n_cols = data.shape

    # Prepare the colormap normalization and a ScalarMappable for the colorbar
    norm = Normalize(vmin=np.nanmin(data), vmax=np.nanmax(data))
    sm = ScalarMappable(norm=norm, cmap='coolwarm')
    sm.set_array([])  # for the colorbar

    # Draw one circle per cell
    h_spacing   = 1.3           # multiplier to separate columns
    max_radius  = 0.40

    for i in range(n_rows):
        for j in range(n_cols):
            val = data[i, j]
            if np.isnan(val):                                 # skip empty cells
                continue
            # scale radius ∝ √value so area is proportional
            radius = np.sqrt(val / norm.vmax) * max_radius
            x      = j * h_spacing          # apply horizontal spacing
            y      = i

            ax.add_patch(
                Circle((x, y), radius=radius,
                       facecolor=sm.to_rgba(val),
                       edgecolor='grey', lw=0.5)
            )

            # bold percentage text
            text_col = 'white' if val < (norm.vmin + norm.vmax) / 3 else 'black'
            ax.text(x, y, f"{val*100:.0f}%",
                    ha='center', va='center',
                    color=text_col, fontweight='bold')

    # ── ticks, labels, limits ──────────────────────────────────────────────────
    xtick_pos = [i * h_spacing for i in range(n_cols)]
    ax.set_xticks(xtick_pos)
    ax.set_xticklabels(
        [col.replace(key + '_', '') for col in matrix.columns],
        rotation=0, ha='center'
    )
    ax.set_yticks(np.arange(n_rows))
    ax.set_yticklabels(matrix.index)

    ax.set_xlim(-0.5 * h_spacing, (n_cols - 1) * h_spacing + 0.5 * h_spacing)
    ax.set_ylim(n_rows - 0.5, -0.5)
    ax.set_aspect('equal')

    # remove the four plot spines (frame) but keep the ticks
    for spine in ax.spines.values():
        spine.set_visible(False)

    # add the colorbar
    cbar = fig.colorbar(sm, ax=ax, fraction=0.046, pad=0.04)
    cbar.ax.yaxis.set_major_formatter(mticker.PercentFormatter(xmax=1))
    cbar.outline.set_visible(False)  # This removes the colorbar's surrounding frame

    plt.tight_layout()
    plt.show()


plot_quantitative_other_strategy_heatmap(filtered_df)

## Average Response time

In [None]:

def analyze_response_time(games_df, config_col="gameConfig"):
    # 1) Convert the time columns to datetime (if they're not already).
    #    If they're already datetime, you can skip this step.
    games_df['createdAt'] = pd.to_datetime(games_df['createdAt'], errors='coerce')
    games_df['updatedAt'] = pd.to_datetime(games_df['updatedAt'], errors='coerce')

    # 2) Compute the duration of each game in seconds (or minutes/hours if you prefer).
    games_df['game_duration_seconds'] = (
        games_df['updatedAt'] - games_df['createdAt']
    ).dt.total_seconds()

    # 3) Group by configuration and compute the average game duration.
    grouped = (
        games_df
        .groupby(config_col)['game_duration_seconds']
        .mean()
        .reset_index()
        .rename(columns={'game_duration_seconds': 'average_game_duration_seconds'})
    )

    return grouped
    
time_df = games_df[['gameId', 'createdAt', 'updatedAt']]
# Merge on 'gameId'
merged_df = pd.merge(results_df, time_df, on='gameId', how='left', suffixes=('_orig', ''))
player_id_array = ["DvplG2Kz", "IfM2wWHp", "1zx9ju2S", "BVSUju5U", "lW70ICul", "OZ52imkd", "GorQHcOB", "8PSrn9JD", "u5LgXigL",
                  "1Ax1SPCe", "l8ND7njk", "sfiXibsa", "PvUHGrDy", "MO2pWpjE", "gsbTiZwO", "gDnhDODC", "Ska8rixW", "6DbWtL5r",
                  "TCFqdHBb", "wQhT1jrv"]
filtered_df = results_df[merged_df["playerId"].isin(player_id_array)]
filtered_df = results_df[(merged_df["roundCount"] > 2) & (results_df["roundCount"] <= 16)]

# Example usage:
results = analyze_response_time(filtered_df, config_col="gameConfig")
print(results)

## Example dynamics

In [None]:
filtered_df.columns

In [None]:
embedding_model = "word2vec"
filtered_df = get_embeddings_for_table(filtered_df, model_name=embedding_model,)
filtered_df = calculate_pca_for_embeddings(
    filtered_df,
    model_name=embedding_model,
    num_pca_components=15,
)

In [None]:
filtered_df["botId"] 

In [None]:
df = filtered_df.copy().reset_index()

df[df["botId"] == "gpt-4o"][df["roundCount"] == 7][["gameId", "roundCount", "wordsPlayed1", "wordsPlayed2", "embedding1_word2vec", "embedding2_word2vec"]]

In [None]:
import plotly.graph_objects as go
import numpy as np
from sklearn.decomposition import PCA
from visualization import create_fixed_color_lines

df = filtered_df.copy().reindex()

tryout = 24

words_player1 = eval(df['word_my'].iloc[tryout])
words_player2 = eval(df['word_opponent'].iloc[tryout])
embeddings_1 = df['embedding1_word2vec'].iloc[tryout]
embeddings_2 = df['embedding2_word2vec'].iloc[tryout]

print(words_player1)
print(words_player2)
print(df['gameConfig'].iloc[tryout])

# merge the embeddings
embeddings = embeddings_1 + embeddings_2

# Use PCA to reduce to 3 dimensions
pca = PCA(n_components=3)
pca_result = pca.fit(np.array(embeddings))
reduced_embeddings_1 = pca.transform(embeddings_1)
reduced_embeddings_2 = pca.transform(embeddings_2)

# Create 3D scatter plot for player 1 with text labels showing index and word
trace1 = go.Scatter3d(
    x=reduced_embeddings_1[:, 0],
    y=reduced_embeddings_1[:, 1],
    z=reduced_embeddings_1[:, 2],
    mode='markers+text',
    marker=dict(size=4, color='blue'),
    text=[f"{i}: {word}" for i, word in enumerate(words_player1)],
    name='Player'
)

# Create 3D scatter plot for player 2 with text labels showing index and word
trace2 = go.Scatter3d(
    x=reduced_embeddings_2[:, 0],
    y=reduced_embeddings_2[:, 1],
    z=reduced_embeddings_2[:, 2],
    mode='markers+text',
    marker=dict(size=4, color='red'),
    text=[f"{i}: {word}" for i, word in enumerate(words_player2)],
    name='Bot'
)

# If the game converged, highlight the final point with a special marker
if words_player2[-1] == words_player1[-1]:
    print("won")
    last_point_player = go.Scatter3d(
        x=[reduced_embeddings_2[-1, 0]],
        y=[reduced_embeddings_2[-1, 1]],
        z=[reduced_embeddings_2[-1, 2]],
        mode='markers+text',
        marker=dict(size=8, color='green', symbol="diamond-open"),
        text=[f"{len(words_player2)-1}: {words_player2[-1]}"],
        name='Final word'
    )

print(len(words_player1))
# Create gradient lines for both players using Plotly colormaps
lines_player1 = create_fixed_color_lines(reduced_embeddings_1, len(words_player1), 'blue')
lines_player2 = create_fixed_color_lines(reduced_embeddings_2, len(words_player2), 'red')

# Combine all traces
data = [trace1, trace2] + lines_player1 + lines_player2
if words_player2[-1] == words_player1[-1]:
    data = data + [last_point_player]

# Define layout with larger figure size
layout = go.Layout(
    title='3D Scatter Plot with Colormap Gradient Lines',
    scene=dict(
        xaxis=dict(title='PCA1'),
        yaxis=dict(title='PCA2'),
        zaxis=dict(title='PCA3'),
    ),
    legend_title_text='Legend',
    width=800,
    height=800,
)

# Create figure and show
fig = go.Figure(data=data, layout=layout)
fig.show()