In [64]:
import numpy as np
import pandas as pd
import csv

from os import listdir

import matplotlib.pyplot as plt
from scipy.stats import shapiro
from scipy.stats import ttest_ind
from scipy.stats import mannwhitneyu
from statsmodels.stats.multitest import multipletests

In [65]:
dir_run = '../results'

NSEEDS = 10


In [66]:
def get_best_result_df(best_result_file_path):
    history = []
    with open(best_result_file_path, 'r', newline='') as file:
        reader = csv.reader(file)
        counter = 0
        for row in reader:
            if counter == 1:
                params = row
            elif counter == 3:
                history = row
            elif counter == 5:
                configs = row
            counter += 1
    # D, AEdAO, PdD, Z, fitness
    D       = float(params[0])
    AEdAO   = float(params[1])
    PdD     = float(params[2])
    Z       = int(params[3]) if len(params[3]) == 1 else int(float(params[3]))
    fitness = float(params[6])
    # history    
    if len(history) > 0:
        history = [float(h) for h in history]
    # configs
    solver_name = configs[0]
    vs        = float(configs[1])
    seed        = int(configs[4])
    
    if (solver_name == 'openaies' or solver_name == 'cmaes'):
        fitness = -fitness
    
    # create new entry in df
    new_data = {'D': D, 
                'AEdAO': AEdAO, 
                'PdD': PdD,
                'Z': int(Z),
                'Brake Power': fitness,
                'Seed': seed,
                'Algorithm': solver_name,
                'VS': vs}
    return new_data

## Get best by seed

In [67]:
data = {'D': [], 'AEdAO': [], 'PdD': [], 'Z': [], 'Brake Power': [], 'Seed': [], 'Algorithm': [], 'VS': []}
df_results = pd.DataFrame(data)

for algorithm in listdir(dir_run):
    algorithm_folder = dir_run + '/' + algorithm

    for speed in listdir(algorithm_folder):  # Loop sobre as velocidades
        speed_folder = algorithm_folder + '/' + speed
        
        for seed in range(NSEEDS):
            seed_folder = speed_folder + '/' + str(seed)
                
            best_result_file = [filename for filename in listdir(seed_folder) if 'best_results' in filename]
            
            if len(best_result_file) > 0:
                best_result_file = best_result_file[0]
                best_result_file = seed_folder+'/'+best_result_file
                new_data = get_best_result_df(best_result_file)
                df_results = pd.concat([df_results, pd.DataFrame(new_data, index=[0])], ignore_index=True)
 
df_results = df_results.astype({"Z": int, "Seed": int})   
df_results = df_results.drop('Seed', axis=1)

In [None]:
df_results

In [68]:
df_cmaes = df_results.loc[df_results['Algorithm'] == 'cmaes']
df_openaies = df_results.loc[df_results['Algorithm'] == 'openaies']
df_de = df_results.loc[df_results['Algorithm'] == 'DE']
df_de_mod = df_results.loc[df_results['Algorithm'] == 'DE_mod']

df_cmaes = df_cmaes.drop('Algorithm', axis=1)
df_openaies = df_openaies.drop('Algorithm', axis=1)
df_de = df_de.drop('Algorithm', axis=1)
df_de_mod = df_de_mod.drop('Algorithm', axis=1)

In [None]:
df_de_mod

In [69]:
# Generate replicaed DE dataset
# Replicated DE has only 2 because it only run for 7 and 7.5 of V_S

df_de_7   = df_de.loc[df_de['VS'] == 7.0]
df_de_7_5 = df_de.loc[df_de['VS'] == 7.5]

df_de_7   = df_de_7.drop('VS', axis=1)
df_de_7_5 = df_de_7_5.drop('VS', axis=1)

In [70]:
# Generate modified DE dataset

df_de_mod_7   = df_de_mod.loc[df_de_mod['VS'] == 7.0]
df_de_mod_7_5 = df_de_mod.loc[df_de_mod['VS'] == 7.5]
df_de_mod_8   = df_de_mod.loc[df_de_mod['VS'] == 8.0]
df_de_mod_8_5 = df_de_mod.loc[df_de_mod['VS'] == 8.5]

df_de_mod_7   = df_de_mod_7.drop('VS', axis=1)
df_de_mod_7_5 = df_de_mod_7_5.drop('VS', axis=1)
df_de_mod_8   = df_de_mod_8.drop('VS', axis=1)
df_de_mod_8_5 = df_de_mod_8_5.drop('VS', axis=1)

In [71]:
# Generate cmaes dataset

df_cmaes_7      = df_cmaes.loc[df_cmaes['VS'] == 7.0]
df_cmaes_7_5    = df_cmaes.loc[df_cmaes['VS'] == 7.5]
df_cmaes_8      = df_cmaes.loc[df_cmaes['VS'] == 8.0]
df_cmaes_8_5    = df_cmaes.loc[df_cmaes['VS'] == 8.5]

df_cmaes_7      = df_cmaes_7.drop('VS', axis=1)
df_cmaes_7_5    = df_cmaes_7_5.drop('VS', axis=1)
df_cmaes_8      = df_cmaes_8.drop('VS', axis=1)
df_cmaes_8_5    = df_cmaes_8_5.drop('VS', axis=1)

In [72]:
# Generate openaies dataset

df_openaies_7   = df_openaies.loc[df_openaies['VS'] == 7.0]
df_openaies_7_5 = df_openaies.loc[df_openaies['VS'] == 7.5]
df_openaies_8   = df_openaies.loc[df_openaies['VS'] == 8.0]
df_openaies_8_5 = df_openaies.loc[df_openaies['VS'] == 8.5]

df_openaies_7   = df_openaies_7.drop('VS', axis=1)
df_openaies_7_5 = df_openaies_7_5.drop('VS', axis=1)
df_openaies_8   = df_openaies_8.drop('VS', axis=1)
df_openaies_8_5 = df_openaies_8_5.drop('VS', axis=1)

## Statistics

In [10]:
df_de_7.describe()

Unnamed: 0,D,AEdAO,PdD,Z,Brake Power
count,10.0,10.0,10.0,10.0,10.0
mean,0.8,0.661627,0.669448,5.0,81.551321
std,0.0,0.028011,0.018635,0.0,0.125404
min,0.8,0.625847,0.640228,5.0,81.432608
25%,0.8,0.64703,0.658366,5.0,81.477026
50%,0.8,0.657047,0.664718,5.0,81.528028
75%,0.8,0.668007,0.678906,5.0,81.5751
max,0.8,0.730914,0.701546,5.0,81.865217


## Shapiro-Wilk Test

In [73]:
def shapiro_wilk(df):
    try:
        stat, p_value = shapiro(df['Brake Power'])
        return {'Estatística de Shapiro-Wilk': stat, 'Valor p': p_value}
    except Exception as e:
        print('Erro ' + str(e))
        return {'Erro': str(e)}

## Statistical Tests

Hypothesis H0 (Null Hypothesis): Brake Power obtained by Algorithm 1 does not have statistical difference compared to that obtained by Algorithm 2.

We will try to prove this with 95% confidence.

In the hypothesis test (which is either the t-test or Mann-Whitney depending on the distribution format), it will return a value. If the value is less than 5% (p=0.05), they are different; otherwise, there is no way to prove the difference.

In [74]:
def run_statistical_test(df_a, df_b, alpha=0.05):
    
    df_a_result = shapiro_wilk(df_a)
    df_b_result = shapiro_wilk(df_b)
    
    # If p-values less than 0.05 (Significance Level) use Mann-Whitney U, otherwise, use T-Test
    if df_a_result['Valor p'] > alpha and df_b_result['Valor p'] > alpha:
        # Use T-Test
        t_statistic, p_value = ttest_ind(df_a['Brake Power'], df_b['Brake Power'])
        return {'Algorithm':'t-test', 'Statistic':t_statistic, 'p-value':p_value}
    else:
        # Use Mann-Whitney U
        statistic, p_value = mannwhitneyu(df_a['Brake Power'], df_b['Brake Power'], alternative='two-sided')
        return {'Algorithm':'mann-whitney-u', 'Statistic':statistic, 'p-value':p_value}
        


#### Combinations of tests

For V_S [7 and 7.5], we need to compare the four algorithms:

DE      - DE_MOD \
DE      - CMA-ES \
DE      - OpenAI-ES \
DE_MOD  - CMA-ES \
DE_MOD  - OpenAI-ES \
CMA-ES  - OpenAI-ES 

For V_S [8 and 8.5], replicated DE didn't run. Because of that we need to run less tests:

DE_MOD  - CMA-ES \
DE_MOD  - OpenAI-ES \
CMA-ES  - OpenAI-ES 

In [75]:
# V_S = 7
de_de_mod_7     = run_statistical_test(df_de_7, df_de_mod_7)
de_cma_7        = run_statistical_test(df_de_7, df_cmaes_7)
de_openai_7     = run_statistical_test(df_de_7, df_openaies_7)
de_mod_cma_7    = run_statistical_test(df_de_mod_7, df_cmaes_7)
de_mod_openai_7 = run_statistical_test(df_de_mod_7, df_openaies_7)
cma_openai_7    = run_statistical_test(df_cmaes_7, df_openaies_7)

In [77]:
print('V_S = 7.0 \t\t | ALGORITHM \t\t | STATISTIC \t | P-VALUE')
print('---------------------------------------------------------------------------------------')
print(f"DE - DE MOD \t\t | {de_de_mod_7['Algorithm']} \t | {de_de_mod_7['Statistic']} \t | {de_de_mod_7['p-value']} ")
print(f"DE - CMA-ES \t\t | {de_cma_7['Algorithm']} \t | {de_cma_7['Statistic']} \t | {de_cma_7['p-value']} ")
print(f"DE - OPENAI-ES \t\t | {de_openai_7['Algorithm']} \t | {de_openai_7['Statistic']} \t | {de_openai_7['p-value']} ")
print(f"DE MOD - CMA-ES \t | {de_mod_cma_7['Algorithm']} \t | {de_mod_cma_7['Statistic']} \t | {de_mod_cma_7['p-value']} ")
print(f"DE MOD - OPENAI-ES \t | {de_mod_openai_7['Algorithm']} \t | {de_mod_openai_7['Statistic']} \t | {de_mod_openai_7['p-value']} ")
print(f"CMA-ES - OPENAI-ES \t | {cma_openai_7['Algorithm']} \t | {cma_openai_7['Statistic']} \t | {cma_openai_7['p-value']} ")


V_S = 7.0 		 | ALGORITHM 		 | STATISTIC 	 | P-VALUE
---------------------------------------------------------------------------------------
DE - DE MOD 		 | mann-whitney-u 	 | 78.0 	 | 0.03763531378731424 
DE - CMA-ES 		 | mann-whitney-u 	 | 82.0 	 | 0.017257456083119765 
DE - OPENAI-ES 		 | mann-whitney-u 	 | 30.0 	 | 0.14046504815835495 
DE MOD - CMA-ES 	 | mann-whitney-u 	 | 57.0 	 | 0.6231762238821174 
DE MOD - OPENAI-ES 	 | mann-whitney-u 	 | 15.0 	 | 0.009108496398030963 
CMA-ES - OPENAI-ES 	 | mann-whitney-u 	 | 12.0 	 | 0.004586392080253494 


In [78]:
# V_S = 7_5
de_de_mod_7_5       = run_statistical_test(df_de_7_5, df_de_mod_7_5)
de_cma_7_5          = run_statistical_test(df_de_7_5, df_cmaes_7_5)
de_openai_7_5       = run_statistical_test(df_de_7_5, df_openaies_7_5)
de_mod_cma_7_5      = run_statistical_test(df_de_mod_7_5, df_cmaes_7_5)
de_mod_openai_7_5   = run_statistical_test(df_de_mod_7_5, df_openaies_7_5)
cma_openai_7_5      = run_statistical_test(df_cmaes_7_5, df_openaies_7_5)

In [80]:
print('V_S = 7.5 \t\t | ALGORITHM \t\t | STATISTIC \t | P-VALUE')
print('---------------------------------------------------------------------------------------')
print(f"DE - DE MOD \t\t | {de_de_mod_7_5['Algorithm']} \t | {de_de_mod_7_5['Statistic']} \t | {de_de_mod_7_5['p-value']} ")
print(f"DE - CMA-ES \t\t | {de_cma_7_5['Algorithm']} \t | {de_cma_7_5['Statistic']} \t | {de_cma_7_5['p-value']} ")
print(f"DE - OPENAI-ES \t\t | {de_openai_7_5['Algorithm']} \t | {de_openai_7_5['Statistic']} \t | {de_openai_7_5['p-value']} ")
print(f"DE MOD - CMA-ES \t | {de_mod_cma_7_5['Algorithm']} \t | {de_mod_cma_7_5['Statistic']} \t | {de_mod_cma_7_5['p-value']} ")
print(f"DE MOD - OPENAI-ES \t | {de_mod_openai_7_5['Algorithm']} \t | {de_mod_openai_7_5['Statistic']} \t\t | {de_mod_openai_7_5['p-value']} ")
print(f"CMA-ES - OPENAI-ES \t | {cma_openai_7_5['Algorithm']} \t | {cma_openai_7_5['Statistic']} \t\t | {cma_openai_7_5['p-value']} ")

V_S = 7.5 		 | ALGORITHM 		 | STATISTIC 	 | P-VALUE
---------------------------------------------------------------------------------------
DE - DE MOD 		 | mann-whitney-u 	 | 93.0 	 | 0.0013149446697132139 
DE - CMA-ES 		 | mann-whitney-u 	 | 95.0 	 | 0.0007685389131627665 
DE - OPENAI-ES 		 | mann-whitney-u 	 | 28.0 	 | 0.10410988966022681 
DE MOD - CMA-ES 	 | mann-whitney-u 	 | 48.0 	 | 0.9097218891455553 
DE MOD - OPENAI-ES 	 | mann-whitney-u 	 | 1.0 		 | 0.00024612812790522973 
CMA-ES - OPENAI-ES 	 | mann-whitney-u 	 | 0.0 		 | 0.00018267179110955002 


In [81]:
# V_S = 8
de_mod_cma_8      = run_statistical_test(df_de_mod_8, df_cmaes_8)
de_mod_openai_8   = run_statistical_test(df_de_mod_8, df_openaies_8)
cma_openai_8      = run_statistical_test(df_cmaes_8, df_openaies_8)

In [83]:
print('V_S = 8.0 \t\t | ALGORITHM \t\t | STATISTIC \t\t | P-VALUE')
print('------------------------------------------------------------------------------------------------')
print(f"DE MOD - CMA-ES \t | {de_mod_cma_8['Algorithm']} \t\t | {de_mod_cma_8['Statistic']} \t | {de_mod_cma_8['p-value']} ")
print(f"DE MOD - OPENAI-ES \t | {de_mod_openai_8['Algorithm']} \t | {de_mod_openai_8['Statistic']} \t\t | {de_mod_openai_8['p-value']} ")
print(f"CMA-ES - OPENAI-ES \t | {cma_openai_8['Algorithm']} \t | {cma_openai_8['Statistic']} \t\t\t | {cma_openai_8['p-value']} ")


V_S = 8.0 		 | ALGORITHM 		 | STATISTIC 		 | P-VALUE
------------------------------------------------------------------------------------------------
DE MOD - CMA-ES 	 | t-test 		 | 3.1860557635531324 	 | 0.0051169133656497655 
DE MOD - OPENAI-ES 	 | mann-whitney-u 	 | 35.0 		 | 0.27303633975118835 
CMA-ES - OPENAI-ES 	 | mann-whitney-u 	 | 2.0 			 | 0.00032983852077799353 


In [88]:
# V_S = 8.5
de_mod_cma_8_5      = run_statistical_test(df_de_mod_8_5, df_cmaes_8_5)
de_mod_openai_8_5   = run_statistical_test(df_de_mod_8_5, df_openaies_8_5)
cma_openai_8_5      = run_statistical_test(df_cmaes_8_5, df_openaies_8_5)

In [89]:
print('V_S = 8.5 \t\t | ALGORITHM \t\t | STATISTIC \t | P-VALUE')
print('---------------------------------------------------------------------------------------')
print(f"DE MOD - CMA-ES \t | {de_mod_cma_8_5['Algorithm']} \t | {de_mod_cma_8_5['Statistic']} \t | {de_mod_cma_8_5['p-value']} ")
print(f"DE MOD - OPENAI-ES \t | {de_mod_openai_8_5['Algorithm']} \t | {de_mod_openai_8_5['Statistic']} \t | {de_mod_openai_8_5['p-value']} ")
print(f"CMA-ES - OPENAI-ES \t | {cma_openai_8_5['Algorithm']} \t | {cma_openai_8_5['Statistic']} \t | {cma_openai_8_5['p-value']} ")

V_S = 8.5 		 | ALGORITHM 		 | STATISTIC 	 | P-VALUE
---------------------------------------------------------------------------------------
DE MOD - CMA-ES 	 | mann-whitney-u 	 | 87.0 	 | 0.00579535854433471 
DE MOD - OPENAI-ES 	 | mann-whitney-u 	 | 35.0 	 | 0.27303633975118835 
CMA-ES - OPENAI-ES 	 | mann-whitney-u 	 | 14.0 	 | 0.0072845570094796615 


### Bonferroni correction

In [90]:
def bonferroni_alfa_correction(n_tests, alpha=0.05):
    
    return alpha / n_tests

In [91]:
alfa_3_tests = bonferroni_alfa_correction(3)
alfa_6_tests = bonferroni_alfa_correction(6)

In [92]:
# V_S = 7

de_de_mod_7_corrected     = (de_de_mod_7['p-value'] < alfa_6_tests)
de_cma_7_corrected        = (de_cma_7['p-value'] < alfa_6_tests)
de_openai_7_corrected     = (de_openai_7['p-value'] < alfa_6_tests)
de_mod_cma_7_corrected    = (de_mod_cma_7['p-value'] < alfa_6_tests)
de_mod_openai_7_corrected = (de_mod_openai_7['p-value'] < alfa_6_tests)
cma_openai_7_corrected    = (cma_openai_7['p-value'] < alfa_6_tests)


In [93]:
print("H0: Brake Power obtained by Algorithm 1 have statistical difference compared to that obtained by Algorithm 2")
print()
print(f'V_S = 7.0 \t\t | BONFERRONI alpha = {alfa_6_tests:.3f}')
print('----------------------------------------------')
print(f"DE - DE MOD \t\t | {de_de_mod_7_corrected}")
print(f"DE - CMA-ES \t\t | {de_cma_7_corrected}")
print(f"DE - OPENAI-ES \t\t | {de_openai_7_corrected}")
print(f"DE MOD - CMA-ES \t | {de_mod_cma_7_corrected}")
print(f"DE MOD - OPENAI-ES \t | {de_mod_openai_7_corrected}")
print(f"CMA-ES - OPENAI-ES \t | {cma_openai_7_corrected}")

H0: Brake Power obtained by Algorithm 1 have statistical difference compared to that obtained by Algorithm 2

V_S = 7.0 		 | BONFERRONI alpha = 0.008
----------------------------------------------
DE - DE MOD 		 | False
DE - CMA-ES 		 | False
DE - OPENAI-ES 		 | False
DE MOD - CMA-ES 	 | False
DE MOD - OPENAI-ES 	 | False
CMA-ES - OPENAI-ES 	 | True


In [94]:
# V_S = 7.5

de_de_mod_7_5_corrected     = (de_de_mod_7_5['p-value'] < alfa_6_tests)
de_cma_7_5_corrected        = (de_cma_7_5['p-value'] < alfa_6_tests)
de_openai_7_5_corrected     = (de_openai_7_5['p-value'] < alfa_6_tests)
de_mod_cma_7_5_corrected    = (de_mod_cma_7_5['p-value'] < alfa_6_tests)
de_mod_openai_7_5_corrected = (de_mod_openai_7_5['p-value'] < alfa_6_tests)
cma_openai_7_5_corrected    = (cma_openai_7_5['p-value'] < alfa_6_tests)

In [95]:
print("H0: Brake Power obtained by Algorithm 1 have statistical difference compared to that obtained by Algorithm 2")
print()
print(f'V_S = 7.5 \t\t | BONFERRONI alpha = {alfa_6_tests:.3f}')
print('----------------------------------------------')
print(f"DE - DE MOD \t\t | {de_de_mod_7_5_corrected}")
print(f"DE - CMA-ES \t\t | {de_cma_7_5_corrected}")
print(f"DE - OPENAI-ES \t\t | {de_openai_7_5_corrected}")
print(f"DE MOD - CMA-ES \t | {de_mod_cma_7_5_corrected}")
print(f"DE MOD - OPENAI-ES \t | {de_mod_openai_7_5_corrected}")
print(f"CMA-ES - OPENAI-ES \t | {cma_openai_7_5_corrected}")

H0: Brake Power obtained by Algorithm 1 have statistical difference compared to that obtained by Algorithm 2

V_S = 7.5 		 | BONFERRONI alpha = 0.008
----------------------------------------------
DE - DE MOD 		 | True
DE - CMA-ES 		 | True
DE - OPENAI-ES 		 | False
DE MOD - CMA-ES 	 | False
DE MOD - OPENAI-ES 	 | True
CMA-ES - OPENAI-ES 	 | True


In [96]:
# V_S = 8

de_mod_cma_8_corrected    = (de_mod_cma_8['p-value'] < alfa_3_tests)
de_mod_openai_8_corrected = (de_mod_openai_8['p-value'] < alfa_3_tests)
cma_openai_8_corrected    = (cma_openai_8['p-value'] < alfa_3_tests)

In [97]:
print("H0: Brake Power obtained by Algorithm 1 have statistical difference compared to that obtained by Algorithm 2")
print()
print(f'V_S = 8.0 \t\t | BONFERRONI alpha = {alfa_3_tests:.3f}')
print('---------------------------------------------------')
print(f"DE MOD - CMA-ES \t | {de_mod_cma_8_corrected}")
print(f"DE MOD - OPENAI-ES \t | {de_mod_openai_8_corrected}")
print(f"CMA-ES - OPENAI-ES \t | {cma_openai_8_corrected}")

H0: Brake Power obtained by Algorithm 1 have statistical difference compared to that obtained by Algorithm 2

V_S = 8.0 		 | BONFERRONI alpha = 0.017
---------------------------------------------------
DE MOD - CMA-ES 	 | True
DE MOD - OPENAI-ES 	 | False
CMA-ES - OPENAI-ES 	 | True


In [98]:
# V_S = 8.5

de_mod_cma_8_5_corrected    = (de_mod_cma_8_5['p-value'] < alfa_3_tests)
de_mod_openai_8_5_corrected = (de_mod_openai_8_5['p-value'] < alfa_3_tests)
cma_openai_8_5_corrected    = (cma_openai_8_5['p-value'] < alfa_3_tests)

In [99]:
print("H0: Brake Power obtained by Algorithm 1 have statistical difference compared to that obtained by Algorithm 2")
print()
print(f'V_S = 8.5 \t\t | BONFERRONI alpha = {alfa_3_tests:.3f}')
print('---------------------------------------------------')
print(f"DE MOD - CMA-ES \t | {de_mod_cma_8_5_corrected}")
print(f"DE MOD - OPENAI-ES \t | {de_mod_openai_8_5_corrected}")
print(f"CMA-ES - OPENAI-ES \t | {cma_openai_8_5_corrected}")

H0: Brake Power obtained by Algorithm 1 have statistical difference compared to that obtained by Algorithm 2

V_S = 8.5 		 | BONFERRONI alpha = 0.017
---------------------------------------------------
DE MOD - CMA-ES 	 | True
DE MOD - OPENAI-ES 	 | False
CMA-ES - OPENAI-ES 	 | True
