In [1]:
import pandas as pd
from scipy.stats import mannwhitneyu

In [2]:
data_wdgsm = pd.read_csv('../main/log_crossover' + "/filtered_11nov_wdgsm.csv", names=["algo", "experiment_id", "dataset", "seed", "generation",
                                        "training_fitness", "timing", "pop_node_count",
                                        "test_fitness", "elite_size", "log_level", "algo_type"])

data_wodgsm = pd.read_csv('../main/log_crossover' + "/filtered_11nov_wodgsm.csv", names=["algo", "experiment_id", "dataset", "seed", "generation",
                                        "training_fitness", "timing", "pop_node_count",
                                        "test_fitness", "elite_size", "log_level", "algo_type"])

In [6]:
data = pd.concat((data_wdgsm, data_wodgsm)).reset_index(drop = True)

In [7]:
data = data.query("algo in ['XOSw w DGSM', 'SLIM-GSGP', 'XODn w DGSM', 'GSGP', 'XOSw wo DGSM', 'XODn wo DGSM']")

In [8]:
new_data = data.query("algo == 'GSGP'").copy()

In [9]:
new_data["algo_type"] = 'SlimGSGP_1_mul_True'

In [10]:
data = pd.concat((data, new_data)).reset_index(drop = True)

In [17]:
algo_types = ['SlimGSGP_1_mul_True',  'SlimGSGP_2_sum_True'] #'SlimGSGP_1_mul_False',
datasets =['yatch', 'istanbul', 'airfoil',
       'concrete_strength', 'concrete_slump'] #, 'concrete_slump'

In [18]:
data.algo.unique()

array(['SLIM-GSGP', 'XOSw w DGSM', 'XODn w DGSM', 'XOSw wo DGSM',
       'XODn wo DGSM', 'GSGP'], dtype=object)

In [19]:
for algo_type in algo_types:
    for dataset in datasets:
        # Filter the data for the current algo_type and dataset
        subset = data[(data['algo_type'] == algo_type) & (data['dataset'] == dataset) & (data['generation'] == 1000)]
        
        # Get the list of unique algorithms within the subset
        algorithms = ['SLIM-GSGP', 'XOSw w DGSM', 'XODn w DGSM', 'XOSw wo DGSM',
       'XODn wo DGSM', 'GSGP']
        
        # Create an empty DataFrame for the pairwise comparison matrix
        matrix = pd.DataFrame(index=algorithms, columns=algorithms)
        
        # Loop over each pair of algorithms to calculate Mann-Whitney U test
        for i in range(len(algorithms)):
            for j in range(i + 1, len(algorithms)):
                algo_1 = algorithms[i]
                algo_2 = algorithms[j]
                
                # Extract the test fitness scores for both algorithms
                fitness_algo_1 = subset[subset['algo'] == algo_1]['test_fitness'].values
                fitness_algo_2 = subset[subset['algo'] == algo_2]['test_fitness'].values
                
                # Perform the Mann-Whitney U test
                stat, p_value = mannwhitneyu(fitness_algo_1, fitness_algo_2)

                                # Format p-value in scientific notation
                p_value_sci = f"{p_value:.2e}"  # Change precision as desired
                
                # Store the result in the upper triangle of the matrix
                matrix.loc[algo_1, algo_2] = p_value_sci
                

        # Print LaTeX code for the table
        # print(f"\n\\textbf{{Test Fitness Algorithm Type: {algo_type}, Dataset: {dataset}}}\n")
        print(matrix.to_latex(na_rep="", escape=False, position = '!ht', caption = f"\n\\textbf{{Metric: Test Fitness, Algorithm Type: {algo_type}, Dataset: {dataset}}}\n"))
        

\begin{table}[!ht]
\caption{
\textbf{Metric: Test Fitness, Algorithm Type: SlimGSGP_1_mul_True, Dataset: yatch}
}
\begin{tabular}{lllllll}
\toprule
 & SLIM-GSGP & XOSw w DGSM & XODn w DGSM & XOSw wo DGSM & XODn wo DGSM & GSGP \\
\midrule
SLIM-GSGP &  & 5.52e-01 & 9.02e-09 & 2.53e-04 & 6.65e-09 & 8.15e-10 \\
XOSw w DGSM &  &  & 3.81e-07 & 8.12e-04 & 2.78e-07 & 1.20e-08 \\
XODn w DGSM &  &  &  & 8.77e-02 & 8.65e-01 & 6.12e-10 \\
XOSw wo DGSM &  &  &  &  & 3.64e-02 & 3.08e-08 \\
XODn wo DGSM &  &  &  &  &  & 6.52e-09 \\
GSGP &  &  &  &  &  &  \\
\bottomrule
\end{tabular}
\end{table}

\begin{table}[!ht]
\caption{
\textbf{Metric: Test Fitness, Algorithm Type: SlimGSGP_1_mul_True, Dataset: istanbul}
}
\begin{tabular}{lllllll}
\toprule
 & SLIM-GSGP & XOSw w DGSM & XODn w DGSM & XOSw wo DGSM & XODn wo DGSM & GSGP \\
\midrule
SLIM-GSGP &  & 8.30e-01 & 6.10e-01 & 2.52e-01 & 1.15e-01 & 1.25e-05 \\
XOSw w DGSM &  &  & 6.52e-01 & 3.26e-01 & 1.33e-01 & 1.87e-05 \\
XODn w DGSM &  &  &  & 5.30e-01 & 2

In [20]:
for algo_type in algo_types:
    for dataset in datasets:
        # Filter the data for the current algo_type and dataset
        subset = data[(data['algo_type'] == algo_type) & (data['dataset'] == dataset)& (data['generation'] == 1000)]
        
        # Get the list of unique algorithms within the subset
        algorithms = ['SLIM-GSGP', 'XOSw w DGSM', 'XODn w DGSM', 'XOSw wo DGSM',
       'XODn wo DGSM', 'GSGP']
        
        # Create an empty DataFrame for the pairwise comparison matrix
        matrix = pd.DataFrame(index=algorithms, columns=algorithms)
        
        # Loop over each pair of algorithms to calculate Mann-Whitney U test
        for i in range(len(algorithms)):
            for j in range(i + 1, len(algorithms)):
                algo_1 = algorithms[i]
                algo_2 = algorithms[j]
                
                # Extract the test fitness scores for both algorithms
                fitness_algo_1 = subset[subset['algo'] == algo_1]['elite_size'].values
                fitness_algo_2 = subset[subset['algo'] == algo_2]['elite_size'].values
                
                # Perform the Mann-Whitney U test
                stat, p_value = mannwhitneyu(fitness_algo_1, fitness_algo_2)
                
                # Format p-value in scientific notation
                p_value_sci = f"{p_value:.2e}"  # Change precision as desired
                
                # Store the result in the upper triangle of the matrix
                matrix.loc[algo_1, algo_2] = p_value_sci

        # Print LaTeX code for the table
        # print(f"\n\\textbf{{Elite Size Algorithm Type: {algo_type}, Dataset: {dataset}}}\n")
        print(matrix.to_latex(na_rep="", escape=False, position = '!ht', caption = f"\n\\textbf{{Metric: Elite Size, Algorithm Type: {algo_type}, Dataset: {dataset}}}\n"))
        

\begin{table}[!ht]
\caption{
\textbf{Metric: Elite Size, Algorithm Type: SlimGSGP_1_mul_True, Dataset: yatch}
}
\begin{tabular}{lllllll}
\toprule
 & SLIM-GSGP & XOSw w DGSM & XODn w DGSM & XOSw wo DGSM & XODn wo DGSM & GSGP \\
\midrule
SLIM-GSGP &  & 8.53e-08 & 7.54e-14 & 1.35e-14 & 1.35e-14 & 1.35e-14 \\
XOSw w DGSM &  &  & 3.01e-11 & 3.01e-11 & 3.01e-11 & 3.01e-11 \\
XODn w DGSM &  &  &  & 1.75e-05 & 1.56e-01 & 3.02e-11 \\
XOSw wo DGSM &  &  &  &  & 7.04e-10 & 3.02e-11 \\
XODn wo DGSM &  &  &  &  &  & 3.02e-11 \\
GSGP &  &  &  &  &  &  \\
\bottomrule
\end{tabular}
\end{table}

\begin{table}[!ht]
\caption{
\textbf{Metric: Elite Size, Algorithm Type: SlimGSGP_1_mul_True, Dataset: istanbul}
}
\begin{tabular}{lllllll}
\toprule
 & SLIM-GSGP & XOSw w DGSM & XODn w DGSM & XOSw wo DGSM & XODn wo DGSM & GSGP \\
\midrule
SLIM-GSGP &  & 8.53e-11 & 3.01e-11 & 3.01e-11 & 3.02e-11 & 3.02e-11 \\
XOSw w DGSM &  &  & 9.73e-10 & 3.00e-11 & 3.01e-11 & 1.43e-08 \\
XODn w DGSM &  &  &  & 5.94e-02 & 7.42e

In [26]:
for dataset in datasets:
    print(dataset)
    gsgp = data.query("dataset == @dataset and algo == 'GSGP' and generation == 1000")
    print(gsgp['elite_size'].median())

yatch
3.4146366467495643e+68
istanbul
2427.5
airfoil
2.7368042155397647e+60
concrete_strength
2.1323947178180478e+70
concrete_slump
8.048215556212395e+86
