In [None]:
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
# load data

path = "results/model_evaluations/V63a_GPT2_350k_4E_xLANplus_RIGHT_PAD_sequence_results_1000x125.csv" # V63a
# path = "results/model_evaluations/V70_Mamba_350k_4E_xLANplus_RIGHT_PAD_sequence_results_1000x125.csv" # V70
model_name = path.split("/")[-1].split(".csv")[0]
version_number = path.split("/")[-1].split("_")[0]

# create dataframe

df = pd.read_csv(path)
# df headers: "Game String", "Moves Until Error", "Error Type", "First Illegal Move", "Predicted Tokens"

print(f"version_number = {version_number}")
print(f"model_name = {model_name}")

In [None]:
# for every "Game String", convert it to xLAN

from src.notation_converter import xlanplus_sequence_to_xlan

df["xLAN"] = df["Game String"].apply(xlanplus_sequence_to_xlan)

In [None]:
# for every "xLAN", convert it to xLAN-tokens

from src.tokenizer.tokenizer import tokenize_data

token_path = "src/tokenizer/xlan_tokens.json"

df["xLAN_tokens"] = df["xLAN"].apply(lambda x: tokenize_data(x, token_path))
df

In [None]:
# for every "xLAN" and "xLAN_tokens", evaluate the sequence, and store the results in separate columns

from src.validation.validate_sequence import evaluate_sequence

df["evaluation"] = df.apply(lambda x: evaluate_sequence([x["xLAN"]], [x["xLAN_tokens"]]), axis=1)
df["moves_until_error"] = df["evaluation"].apply(lambda x: x[0][1])
df["error_type"] = df["evaluation"].apply(lambda x: x[0][2])
df["first_illegal_move"] = df["evaluation"].apply(lambda x: x[0][3])
df

In [None]:
# export df_v47 as a csv file
df.to_csv(f"results/model_evaluations/{model_name}_reevaluated_xlan.csv", index=False)

In [None]:
def show_error_frequencies_absolute(df, version_number):

    # Create buckets for "Moves Until Error" (every ten moves)
    min_moves = df["moves_until_error"].min()
    max_moves = df["moves_until_error"].max()

    bins = range(0, max_moves + 10, 10)
    labels = [f"{i+1}-{i+10}" for i in bins[:-1]]

    df["Error Bucket"] = pd.cut(df["moves_until_error"], bins=bins, labels=labels)

    # Group by error bucket and error type
    error_summary = df.groupby(["Error Bucket", "error_type"]).size().unstack(fill_value=0)

    error_types = ["syntax", "piece logic", "path obstruction", "pseudolegal", "indicator error", "no error", "max length"]
    
    for error_type in error_types:
        if error_type not in error_summary.columns:
            error_summary[error_type] = 0
    
    error_summary = error_summary[error_types]

    # Plot the results in a stacked bar chart
    ax = error_summary.plot(kind='bar', stacked=True, figsize=(12, 6))
    ax.set_xlabel("Moves Until Error")
    ax.set_ylabel("Number of Errors")
    ax.set_title(f"Number of Errors by Moves Until Error and Error Type ({version_number})")
    plt.xticks(rotation=45)
    plt.legend(title="Error Type", loc='upper left')
    plt.tight_layout()
    plt.savefig(f"results/plots/error_frequencies/{version_number}_error_frequencies_reevaluated_xlan.png")
    plt.show()

In [None]:
show_error_frequencies_absolute(df, version_number)

In [None]:
# calculate average number of correct plies

def calculate_average_correct_plies(df):
    total_moves = df["moves_until_error"].sum()
    total_games = len(df)
    average_correct_plies = total_moves / total_games
    return average_correct_plies

In [None]:
calculate_average_correct_plies(df)