### Statistical Analysis of the Data

#### Time Complexity Analysis 
Here we look at the time taken and the number of nodes visited for the different algorithms

In [1]:
# Modules 
import pandas as pd
import numpy as np
import ast

def compute_row_wise_statistics(file_path):
    # Load data
    data = pd.read_csv(file_path)
    
    # Columns to analyze
    analysis_columns = ["Black score", "White score", "Pieces played",
                        "Black evaluated states", "White evaluated states",
                        "Number of nodes generated by Black", "Number of nodes generated by White",
                        "Time"]
    
    non_analysis_columns = [column for column in data.columns if column not in analysis_columns]
    
    # Compute a min, max, mean, std, and median across the columns and only keep one row
    aggregated_stats = {}
    for column in analysis_columns:
        column_data = data[column]
        if "evaluated states" in column or "nodes generated by" in column:
            column_data = column_data.apply(ast.literal_eval)
        else:
            column_data = column_data.apply(lambda x: [x]) # Convert the value into a list for structure consistency
            
        # Compute the statistics for each index of the lists
        min_values, max_values, mean_values, std_values, median_values = [], [], [], [], []
        for index in range(max([len(row) for row in column_data])):
            values = []
            for row in column_data:
                values.append(row[index]) if index < len(row) else values.append(0)
            min_values.append(min(values))
            max_values.append(max(values))
            mean_values.append(np.mean(values))
            std_values.append(np.std(values))
            median_values.append(np.median(values))
        
        aggregated_stats[column + " min"] = min_values if "evaluated states" in column or "nodes generated by" in column else min_values[0]
        aggregated_stats[column + " max"] = max_values if "evaluated states" in column or "nodes generated by" in column else max_values[0]
        aggregated_stats[column + " mean"] = mean_values if "evaluated states" in column or "nodes generated by" in column else mean_values[0]
        aggregated_stats[column + " std"] = std_values if "evaluated states" in column or "nodes generated by" in column else std_values[0]
        aggregated_stats[column + " median"] = median_values if "evaluated states" in column or "nodes generated by" in column else median_values[0]
        
    # Add the non-analysis columns
    for column in non_analysis_columns:
        aggregated_stats[column] = data[column].values[0]
    
    # Convert aggregated statistics into a DataFrame
    aggregated_stats_df = pd.DataFrame(aggregated_stats)
    
    # Put the non-analysis columns first
    columns = list(aggregated_stats_df.columns)
    columns = non_analysis_columns + [column for column in columns if column not in non_analysis_columns]
    aggregated_stats_df = aggregated_stats_df[columns]
    
    return aggregated_stats_df

def rows_to_list(df) -> pd.DataFrame:
    # Go through each row and concat the values into a list
    columns = {}   
    for column in df.columns:
        row_values = df[column].values
        # Concat the values into a list only if the column is not a repetition of the same value for all rows
        if len(set(row_values)) > 1: # Keep only the first 40 values as there shouldn't be more that around 32 moves for each player
            columns[column] = [list(row_values)[:40]]
        else:
            columns[column] = [row_values[0]]
    return pd.DataFrame(columns)

In [2]:
# Load data
import glob
import pandas as pd
file_paths = glob.glob("data/complexity/*.csv")
aggregated_stats_dfs = pd.DataFrame()
for file in file_paths:
    stats_df = compute_row_wise_statistics(file)
    new_df = rows_to_list(stats_df)
    aggregated_stats_dfs = pd.concat([aggregated_stats_dfs, new_df], ignore_index=True)

In [3]:
aggregated_stats_dfs.head(24)

Unnamed: 0,StrategyBlack,StrategyWhite,Depth,TableBlack,TableWhite,AlgorithmBlack,AlgorithmWhite,Black score min,Black score max,Black score mean,...,Number of nodes generated by White min,Number of nodes generated by White max,Number of nodes generated by White mean,Number of nodes generated by White std,Number of nodes generated by White median,Time min,Time max,Time mean,Time std,Time median
0,2,1,2,2,0,3,0,23,58,40.98,...,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",0.028786,0.097934,0.044224,0.010384,0.042823
1,2,1,2,2,0,4,0,0,54,40.47,...,"[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.99,...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.099...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",0.005505,0.161832,0.032771,0.020634,0.028469
2,2,1,4,2,0,3,0,24,56,40.32,...,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",2.299836,13.465451,6.213564,2.05975,5.969241
3,2,1,4,2,0,4,0,25,60,40.48,...,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",0.384127,1.815611,0.904395,0.29619,0.873614
4,2,1,6,2,0,3,0,25,51,41.0,...,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",279.343929,582.921995,416.654451,108.489391,402.175941
5,2,1,6,2,0,4,0,42,46,43.666667,...,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",32.973344,39.178037,35.204304,2.816913,33.461533
6,3,1,2,2,0,3,0,12,64,39.88,...,"[1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.98, 0.97...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.13999999...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",0.001656,0.070559,0.01674,0.00918,0.016099
7,3,1,2,2,0,4,0,13,57,36.35,...,"[1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.99, 0.98, 0.9...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0994987437106...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",0.001097,0.173347,0.014553,0.019117,0.012244
8,3,1,4,2,0,3,0,13,61,38.1,...,"[1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[1.0, 1.0, 1.0, 1.0, 0.99, 0.98, 0.96, 0.95, 0...","[0.0, 0.0, 0.0, 0.0, 0.09949874371066199, 0.13...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",0.030092,7.11376,2.101145,1.152596,2.025939
9,3,1,4,2,0,4,0,13,60,40.03,...,"[1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...","[1.0, 1.0, 1.0, 1.0, 0.99, 0.99, 0.99, 0.98, 0...","[0.0, 0.0, 0.0, 0.0, 0.09949874371066199, 0.09...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",0.009345,0.943421,0.506164,0.194962,0.483702


In [4]:
import matplotlib.pyplot as plt

strategy_to_name = {
    2: 'Positional',
    3: 'Absolute',
    4: 'Mobility',
    5: 'Mixed (thresholds=[30, 55])',
}

algorithm_to_name = {
    3: 'Negamax',
    4: 'Negamax with Alpha-Beta Pruning',
}

def plot_nb_nodes(data, strategy_column='StrategyBlack', depth_column='Depth', algorithm_column='AlgorithmBlack', 
                  stats_string="Number of nodes generated by Black", save_folder=None):
    # Get unique depths, strategies, and algorithms for plotting
    unique_depths = data[depth_column].unique()
    unique_strategies = data[strategy_column].unique()
    unique_algorithms = data[algorithm_column].unique()
    
    # Plot data for each depth
    for depth in sorted(unique_depths):
        # Create a figure for all algorithms for the current depth
        fig_all, axes_all = plt.subplots(nrows=1, ncols=len(unique_algorithms), figsize=(20, 5), sharey=True)
        if len(unique_algorithms) == 1:
            axes_all = [axes_all]  # Ensure axes_all is iterable even if there's only one subplot
        
        for ax_all, algorithm in zip(axes_all, sorted(unique_algorithms)):
            # Create a separate figure for each algorithm
            fig, ax = plt.subplots(figsize=(10, 5))
            for strategy in unique_strategies:
                strategy_data = data[(data[strategy_column] == strategy) & (data[depth_column] == depth) & (data[algorithm_column] == algorithm)]
                if not strategy_data.empty:
                    # Extract indices for x-axis
                    indices = range(len(strategy_data[stats_string+' mean'].iloc[0]))
                    
                    # Plot each type of statistic on both figures
                    for ax_in_use in [ax, ax_all]:
                        ax_in_use.fill_between(indices, 
                                               strategy_data[stats_string+' min'].iloc[0], 
                                               strategy_data[stats_string+' max'].iloc[0], 
                                               alpha=0.5, label=f'{strategy_to_name[strategy]} Min-Max')
                        ax_in_use.plot(indices, strategy_data[stats_string+' mean'].iloc[0], 
                                       label=f'{strategy_to_name[strategy]} Mean')
                        ax_in_use.errorbar(indices, strategy_data[stats_string+' mean'].iloc[0], 
                                           yerr=strategy_data[stats_string+' std'].iloc[0], 
                                           fmt='o', label=f'{strategy_to_name[strategy]} STD')
            
            # Setup titles, labels, and layout for individual algorithm plot
            ax.set_title(f"{stats_string} at depth {depth}, {algorithm_to_name[algorithm]}")
            ax.set_xlabel("Move number")
            ax.set_ylabel(stats_string)
            ax.legend()
            plt.tight_layout()
            if save_folder:
                plt.savefig(f"{save_folder}/{stats_string}_depth_{depth}_{algorithm_to_name[algorithm]}.png")
            plt.close(fig)
            
            # Setup titles, labels for the all-algorithm plot
            ax_all.set_title(f"{algorithm_to_name[algorithm]}")
            ax_all.set_xlabel("Move number")
            ax_all.set_ylabel(stats_string)
            ax_all.legend()

        plt.tight_layout()
        # Save the combined plot for all algorithms at this depth
        if save_folder:
            plt.savefig(f"{save_folder}/{stats_string}_depth_{depth}_combined.png")
        plt.close(fig_all)
        
# plot_nb_nodes(aggregated_stats_dfs, save_folder="data/complexity")

In [12]:
def compare_stats(data, strategy_column='StrategyBlack', depth_column='Depth', algorithm_column='AlgorithmBlack',
                  stats_string="Number of nodes generated by Black"):
    unique_depths = data[depth_column].unique()
    unique_strategies = data[strategy_column].unique()
    unique_algorithms = data[algorithm_column].unique()
    
    # Loop through each depth and strategy
    for depth in sorted(unique_depths):
        print(f"\n--- Depth {depth} ---")
        for strategy in unique_strategies:
            print(f"\nStrategy: {strategy_to_name[strategy]}")
            stats = {}
            # Collect statistics for each algorithm
            for algorithm in unique_algorithms:
                strategy_data = data[(data[strategy_column] == strategy) & 
                                     (data[depth_column] == depth) & 
                                     (data[algorithm_column] == algorithm)]

                if not strategy_data.empty:
                    if isinstance(strategy_data[stats_string + ' mean'].iloc[0] , list):
                        mins = [min(x) for x in strategy_data[stats_string + ' min']]
                        maxs = [max(x) for x in strategy_data[stats_string + ' max']]
                        means = [np.mean(x) for x in strategy_data[stats_string + ' mean']]
                        stds = [np.mean(x) for x in strategy_data[stats_string + ' std']]
                    else:
                        mins = [strategy_data[stats_string + ' min'].iloc[0]]
                        maxs = [strategy_data[stats_string + ' max'].iloc[0]]
                        means = [strategy_data[stats_string + ' mean'].iloc[0]]
                        stds = [strategy_data[stats_string + ' std'].iloc[0]]
                    
                    stats[algorithm] = {
                        'Min of mins': min(mins),
                        'Max of maxs': max(maxs),
                        'Mean of means': np.mean(means),
                        'Mean of stds': np.mean(stds)
                    }
            
            # Compare NegamaxAlphaBeta/Negamax
            if 3 in stats and 4 in stats:
                print("\nNegamaxAlphaBeta/Negamax Comparison:")
                print(f"Max of maxs: {stats[4]['Max of maxs'] / stats[3]['Max of maxs'] * 100:.2f}% ({stats[4]['Max of maxs']:.2f} vs {stats[3]['Max of maxs']:.2f})")
                print(f"Mean of means: {stats[4]['Mean of means'] / stats[3]['Mean of means'] * 100:.2f}% ({stats[4]['Mean of means']:.2f} vs {stats[3]['Mean of means']:.2f})")
                print(f"Mean of stds: {stats[4]['Mean of stds'] / stats[3]['Mean of stds'] * 100:.2f}% ({stats[4]['Mean of stds']:.2f} vs {stats[3]['Mean of stds']:.2f})")

In [13]:
compare_stats(aggregated_stats_dfs, stats_string="Number of nodes generated by Black")


--- Depth 2 ---

Strategy: Positional

NegamaxAlphaBeta/Negamax Comparison:
Max of maxs: 58.80% (137.00 vs 233.00)
Mean of means: 57.52% (31.93 vs 55.51)
Mean of stds: 65.20% (10.91 vs 16.73)

Strategy: Absolute

NegamaxAlphaBeta/Negamax Comparison:
Max of maxs: 64.90% (159.00 vs 245.00)
Mean of means: 55.79% (25.79 vs 46.22)
Mean of stds: 63.92% (14.11 vs 22.08)

Strategy: Mobility

NegamaxAlphaBeta/Negamax Comparison:
Max of maxs: 54.04% (147.00 vs 272.00)
Mean of means: 53.95% (32.56 vs 60.36)
Mean of stds: 60.26% (11.02 vs 18.30)

Strategy: Mixed (thresholds=[30, 55])

NegamaxAlphaBeta/Negamax Comparison:
Max of maxs: 61.09% (146.00 vs 239.00)
Mean of means: 58.35% (31.78 vs 54.45)
Mean of stds: 62.72% (10.87 vs 17.34)

--- Depth 4 ---

Strategy: Positional

NegamaxAlphaBeta/Negamax Comparison:
Max of maxs: 13.19% (7103.00 vs 53861.00)
Mean of means: 14.77% (787.08 vs 5329.77)
Mean of stds: 16.35% (467.24 vs 2856.85)

Strategy: Absolute

NegamaxAlphaBeta/Negamax Comparison:
Max of

In [14]:
compare_stats(aggregated_stats_dfs, stats_string="Time")


--- Depth 2 ---

Strategy: Positional

NegamaxAlphaBeta/Negamax Comparison:
Max of maxs: 165.25% (0.16 vs 0.10)
Mean of means: 74.10% (0.03 vs 0.04)
Mean of stds: 198.71% (0.02 vs 0.01)

Strategy: Absolute

NegamaxAlphaBeta/Negamax Comparison:
Max of maxs: 245.68% (0.17 vs 0.07)
Mean of means: 86.94% (0.01 vs 0.02)
Mean of stds: 208.26% (0.02 vs 0.01)

Strategy: Mobility

NegamaxAlphaBeta/Negamax Comparison:
Max of maxs: 114.02% (0.24 vs 0.21)
Mean of means: 54.55% (0.06 vs 0.12)
Mean of stds: 83.37% (0.02 vs 0.03)

Strategy: Mixed (thresholds=[30, 55])

NegamaxAlphaBeta/Negamax Comparison:
Max of maxs: 116.59% (0.22 vs 0.19)
Mean of means: 67.22% (0.06 vs 0.08)
Mean of stds: 140.54% (0.04 vs 0.03)

--- Depth 4 ---

Strategy: Positional

NegamaxAlphaBeta/Negamax Comparison:
Max of maxs: 13.48% (1.82 vs 13.47)
Mean of means: 14.56% (0.90 vs 6.21)
Mean of stds: 14.38% (0.30 vs 2.06)

Strategy: Absolute

NegamaxAlphaBeta/Negamax Comparison:
Max of maxs: 13.26% (0.94 vs 7.11)
Mean of mean