In [61]:
import json
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm

data_folder = "data"

names = [x for x in os.listdir(data_folder) if not x.endswith("zip")]
discrepancy_data  = []
for name in names:
    
    path = os.path.join(data_folder, name)
    functions = [x for x in os.listdir(path) if x.endswith("json")]
    for filename in functions:
        filepath = os.path.join(path, filename)
        
        with open(filepath) as f:
            data = json.load(f)
        
        for scen in data['scenarios']:
            *x, cache = data['algorithm']['name'].split("-")
            for run_id, run in enumerate(scen['runs'], 1):
                discrepancy_data.append((
                    data['function_id'], 
                    data['algorithm']['name'],
                    scen['dimension'],
                    run_id, 
                    run['instance'],
                    run['cache_discrepancy'],
                    run['best']['evals'],
                    run['best']['y'],
                    int(cache) if cache.isdigit() else 0
                ))

data = pd.DataFrame(discrepancy_data, 
                    columns=["fid", "alg", "dimension",  
                             "run", "instance", "discrepancy", "evals", "y", "cache_size"])

In [62]:
data.query("discrepancy > 0 and alg.str.endswith('16')").groupby([ "fid", "dimension", "alg"])['discrepancy'].mean().unstack()

Unnamed: 0_level_0,alg,CMA-ES-GAUSSIAN-cache-16,CMA-ES-GAUSSIAN-mirror-cache-16,CMA-ES-GAUSSIAN-mirror-orthogonal-cache-16,CMA-ES-GAUSSIAN-orthogonal-cache-16,CMA-ES-HALTON-cache-16,CMA-ES-HALTON-mirror-cache-16,CMA-ES-HALTON-mirror-orthogonal-cache-16,CMA-ES-HALTON-orthogonal-cache-16,CMA-ES-SOBOL-cache-16,CMA-ES-SOBOL-mirror-cache-16,CMA-ES-SOBOL-mirror-orthogonal-cache-16,CMA-ES-SOBOL-orthogonal-cache-16
fid,dimension,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1,2,0.082297,0.072810,0.071169,0.128551,0.040733,0.048507,0.068948,0.108047,0.040026,0.057238,0.075914,0.116462
1,5,0.038380,0.037492,0.033132,0.048542,0.031217,0.034598,0.031740,0.043563,0.027786,0.033624,0.031799,0.046037
1,10,0.007219,0.007372,0.006985,0.008257,0.006921,0.007339,0.007137,0.007813,0.004463,0.004476,0.006745,0.008249
1,20,0.000204,0.000220,0.000169,0.000227,0.000169,0.000168,0.000149,0.000181,0.000069,0.000064,0.000208,0.000204
2,2,0.085647,0.076509,0.073881,0.132154,0.040077,0.048260,0.070015,0.107302,0.040026,0.054943,0.070911,0.116874
...,...,...,...,...,...,...,...,...,...,...,...,...,...
23,20,0.000159,0.000163,0.000188,0.000248,0.000208,0.000191,0.000183,0.000231,0.000069,0.000064,0.000213,0.000231
24,2,0.095480,0.068196,0.073873,0.135084,0.041112,0.046892,0.068374,0.106937,0.040026,0.054226,0.070150,0.117341
24,5,0.040811,0.037730,0.033016,0.049952,0.031405,0.034302,0.031857,0.042401,0.027786,0.033363,0.031740,0.043674
24,10,0.007248,0.007033,0.006904,0.008642,0.006593,0.006623,0.007240,0.007881,0.004463,0.004489,0.006977,0.007797


In [None]:
view = data.query(
    "discrepancy > 0 and not alg.str.contains('mirror') and not alg.str.contains('orthogonal')"
).groupby(["dimension", "alg"])['discrepancy'].mean().unstack().T.sort_index()

sns.heatmap(view, annot=True, norm=LogNorm())