In [1]:
import os
os.chdir('..')

In [2]:
import pickle as pkl
import numpy as np

In [3]:
from src.common.fitness import (schwefel, ackley, griewank, rastrigin)

In [4]:
benchmarks = (schwefel, ackley, griewank, rastrigin)

In [5]:
root_dirs = {
    'efga': './log/efga_param_tuning/',
    'gendered_FGA': './log/gendered_param_tuning/', 
    'standard_GA': './log/standard_GA', 
    'particle_swarm': './log/particle_swarm/'
}
# Checking for dir existence
for d in root_dirs:
    assert os.path.exists(root_dirs[d])

In [6]:
N_EXPERIMENTS: int = 25
filename_generators = {
    'efga': lambda x: f'{x}_priority_3_experiments_{N_EXPERIMENTS}.pkl',
    'gendered_FGA': lambda x: f'{x}_priority_3_experiments_{N_EXPERIMENTS}.pkl', 
    'standard_GA': lambda x: f'{x}_experiments_{N_EXPERIMENTS}.pkl', 
    'particle_swarm': lambda x: f'{x}_experiments_{N_EXPERIMENTS}.pkl',
}

# Checking for artifact existence
for d in root_dirs:
    for fn in benchmarks:
        pth = os.path.join(root_dirs[d], filename_generators[d](fn.__name__))
        assert os.path.exists(pth)

In [12]:
comparison_matrix = np.zeros(shape=(len(filename_generators), N_EXPERIMENTS * len(benchmarks)))

for i, d in enumerate(root_dirs):
    for j, fn in enumerate(benchmarks):
        pth = os.path.join(root_dirs[d], filename_generators[d](fn.__name__))
        # Loading params
        with open(pth, 'rb') as f:
            log, params = pkl.load(f)
        
        # Vector of best results
        best_res = np.round(log.groupby('seed').min()['best_fitness'].to_numpy(), 2)
        comparison_matrix[i, j*N_EXPERIMENTS: (j+1)*N_EXPERIMENTS] = best_res

In [16]:
from scikit_posthocs import posthoc_nemenyi_friedman
from scipy.stats import wilcoxon

In [15]:
posthoc_nemenyi_friedman(comparison_matrix.T)

Unnamed: 0,0,1,2,3
0,1.0,0.001,0.808619,0.9
1,0.001,1.0,0.001,0.001
2,0.808619,0.001,1.0,0.716389
3,0.9,0.001,0.716389,1.0


In [17]:
wilcoxon(comparison_matrix[0], comparison_matrix[1], alternative='less')

WilcoxonResult(statistic=1.0, pvalue=2.007172505030583e-18)

Currently, we can say for sure that gendered FGA is inferior in performance to all other methods. As for the 3 methods left, we can find their results comparable. 

In [22]:
import pandas as pd
records = pd.DataFrame(comparison_matrix.T, columns=list(root_dirs.keys()))
records['function'] = None

for i, fn in benchmarks:
    