 DAEDALUS – Distributed and Automated Evolutionary Deep Architecture Learning with Unprecedented Scalability

This research code was developed as part of the research programme Open Technology Programme with project number 18373, which was financed by the Dutch Research Council (NWO), Elekta, and Ortec Logiqcare.

Project leaders: Peter A.N. Bosman, Tanja Alderliesten
Researchers: Alex Chebykin, Arthur Guijt, Vangelis Kostoulas
Main code developer: Arthur Guijt

In [1]:
import pandas as pd
import numpy as np
from pathlib import Path

from scipy.stats import mannwhitneyu, shapiro

# from bokeh.plotting import figure, show
# from bokeh.models import ColumnDataSource, CustomJS, Button
# from bokeh.io import output_notebook
# from bokeh.transform import factor_cmap, linear_cmap
# from bokeh.palettes import Category20
# from bokeh.layouts import column
# output_notebook()
from itertools import repeat, cycle

%load_ext rpy2.ipython

In [2]:
# https://cloud.r-project.org can be used to install Cran packages.

In [3]:
%%R
library(ggplot2)
library(dplyr)

R[write to console]: 
Attaching package: ‘dplyr’


R[write to console]: The following objects are masked from ‘package:stats’:

    filter, lag


R[write to console]: The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




In [4]:
date_of_processing = "2023-02-10"

# Loading & Configuration

In [5]:
# experiment_path = Path("./results/aa5b6af/")
# results = pd.read_csv(experiment_path / "archive.csv.gz")
results = pd.concat([
    pd.read_csv("./results/2022-12-13-scalability-gomea-trap-re-149d6ac-archive.csv.gz"),
    pd.read_csv("./results/2022-12-13-scalability-ecga-trap-re-149d6ac-2aa20e4-archive.csv.gz"),
    pd.read_csv("./results/2022-12-13-scalability-ga-trap-re-149d6ac-archive.csv.gz"),
    # gomea - imm
    pd.read_csv("./results/2023-01-11-imm-gomea-trap-f418675-archive.csv.gz"),
])

remapping_algorithm_type = {
    "sync": "ecga-sync",
    "async-throttled": "ecga-async-th",
}
results["algorithm_type"] = results["algorithm_type"].replace(remapping_algorithm_type)

problem_columns = ["problem", "l", "runtime_type"]
approach_columns = ["replacement_strategy", "algorithm_type", "tournament_size"]
run_columns = ["seed"]

In [6]:
results

Unnamed: 0.2,Unnamed: 0,#evaluations,simulation time (s),objectives,archive ordinal,archive ordinals removed,genotype (categorical),problem,seed,l,vtr,instance,replacement_strategy,tournament_size,runtime_type,algorithm_type,population_size,Unnamed: 0.1
0,0,44.0,38.62,-21.0,1.0,,0 0 0 1 0 0 1 0 0 0 0 1 1 0 0 0 0 1 0 0 0 1 0 ...,deceptive trap,54,50,50,instances/trap__l_50__k_5.txt,0,4,expensive-ones-100,gomea-sync,44.0,
1,1,44.0,48.52,-22.0,2.0,1.0,0 0 1 0 0 1 0 0 1 1 0 1 0 1 0 0 1 0 0 0 0 1 0 ...,deceptive trap,54,50,50,instances/trap__l_50__k_5.txt,0,4,expensive-ones-100,gomea-sync,44.0,
2,2,44.0,52.48,-26.0,3.0,2.0,0 0 0 0 0 0 1 1 0 0 1 0 1 0 1 0 0 1 1 0 0 1 0 ...,deceptive trap,54,50,50,instances/trap__l_50__k_5.txt,0,4,expensive-ones-100,gomea-sync,44.0,
3,3,421.0,441.62,-27.0,4.0,3.0,0 0 0 0 0 0 0 1 0 0 1 0 1 0 1 0 0 1 1 0 0 1 0 ...,deceptive trap,54,50,50,instances/trap__l_50__k_5.txt,0,4,expensive-ones-100,gomea-sync,44.0,
4,4,475.0,490.14,-28.0,5.0,4.0,0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 0 1 1 0 0 1 0 ...,deceptive trap,54,50,50,instances/trap__l_50__k_5.txt,0,4,expensive-ones-100,gomea-sync,44.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37635,20,10778.0,1522.00,-46.0,21.0,20.0,1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 ...,deceptive trap,141,50,50,instances/trap__l_50__k_5.txt,0,4,cheap-ones-10,gomea-immidiate-sync,64.0,
37636,21,10853.0,1532.10,-47.0,22.0,21.0,1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 1 1 1 1 1 0 0 0 ...,deceptive trap,141,50,50,instances/trap__l_50__k_5.txt,0,4,cheap-ones-10,gomea-immidiate-sync,64.0,
37637,22,10980.0,1545.00,-48.0,23.0,22.0,1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 ...,deceptive trap,141,50,50,instances/trap__l_50__k_5.txt,0,4,cheap-ones-10,gomea-immidiate-sync,64.0,
37638,23,10988.0,1548.16,-49.0,24.0,23.0,0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ...,deceptive trap,141,50,50,instances/trap__l_50__k_5.txt,0,4,cheap-ones-10,gomea-immidiate-sync,64.0,


In [33]:
def relabel(df):
    # Relabeling
    # - Approach
    df["approach"] = "GA"
    df.loc[df["algorithm_type"].str.contains("ecga"), "approach"] = "ECGA"
    df.loc[df["algorithm_type"].str.contains("gomea"), "approach"] = "GOMEA"
    df.loc[(df["algorithm_type"].str.contains("gomea") & df["algorithm_type"].str.contains("immidiate")), "approach"] = "GOMEA-IMM"
    df["approach"] = pd.Categorical(df["approach"], categories=["GOMEA", "GOMEA-IMM", "ECGA", "GA"], ordered=True)
    # - selection
    df["selection"] = "-"
    df.loc[df["replacement_strategy"] == 5, "selection"] = "steady-state"
    df.loc[df["replacement_strategy"] == 6, "selection"] = "generational"
    df["selection"] = pd.Categorical(df["selection"], categories=["-", "steady-state", "generational"], ordered=True)
    # - CX
    df["cx"] = "-"
    df.loc[df["algorithm_type"].str.contains("subfunction"), "cx"] = "SFX"
    df.loc[df["algorithm_type"].str.contains("uniform"), "cx"] = "UX"
    df.loc[df["algorithm_type"].str.contains("twopoint"), "cx"] = "TPX"
    df["cx"] = pd.Categorical(df["cx"], categories=["-", "UX", "TPX", "SFX"], ordered=True)
    # - (a)sync 
    df["(a)sync"] = "s"
    df.loc[df["algorithm_type"].str.contains("async"), "(a)sync"] = "a"
    df["(a)sync"] = pd.Categorical(df["(a)sync"], categories=["a", "s"], ordered=True)
    # - timing distribution
    df["timing"] = pd.Categorical(df["runtime_type"].replace({
        "cheap-ones-100": "100:1",
        "cheap-ones-10": "10:1",
        "cheap-ones": "2:1",
        "constant": "1:1",
        "expensive-ones": "1:2",
        "expensive-ones-10": "1:10",
        "expensive-ones-100": "1:100",
    }), categories = ["100:1", "10:1", "2:1", "1:1", "1:2", "1:10", "1:100"], ordered=True)

    # 

In [19]:
# relabel(results)

In [20]:
results[~np.isnan(results["population_size"])]

Unnamed: 0.2,Unnamed: 0,#evaluations,simulation time (s),objectives,archive ordinal,archive ordinals removed,genotype (categorical),problem,seed,l,vtr,instance,replacement_strategy,tournament_size,runtime_type,algorithm_type,population_size,Unnamed: 0.1
0,0,44.0,38.62,-21.0,1.0,,0 0 0 1 0 0 1 0 0 0 0 1 1 0 0 0 0 1 0 0 0 1 0 ...,deceptive trap,54,50,50,instances/trap__l_50__k_5.txt,0,4,expensive-ones-100,gomea-sync,44.0,
1,1,44.0,48.52,-22.0,2.0,1.0,0 0 1 0 0 1 0 0 1 1 0 1 0 1 0 0 1 0 0 0 0 1 0 ...,deceptive trap,54,50,50,instances/trap__l_50__k_5.txt,0,4,expensive-ones-100,gomea-sync,44.0,
2,2,44.0,52.48,-26.0,3.0,2.0,0 0 0 0 0 0 1 1 0 0 1 0 1 0 1 0 0 1 1 0 0 1 0 ...,deceptive trap,54,50,50,instances/trap__l_50__k_5.txt,0,4,expensive-ones-100,gomea-sync,44.0,
3,3,421.0,441.62,-27.0,4.0,3.0,0 0 0 0 0 0 0 1 0 0 1 0 1 0 1 0 0 1 1 0 0 1 0 ...,deceptive trap,54,50,50,instances/trap__l_50__k_5.txt,0,4,expensive-ones-100,gomea-sync,44.0,
4,4,475.0,490.14,-28.0,5.0,4.0,0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 0 1 1 0 0 1 0 ...,deceptive trap,54,50,50,instances/trap__l_50__k_5.txt,0,4,expensive-ones-100,gomea-sync,44.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37635,20,10778.0,1522.00,-46.0,21.0,20.0,1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 ...,deceptive trap,141,50,50,instances/trap__l_50__k_5.txt,0,4,cheap-ones-10,gomea-immidiate-sync,64.0,
37636,21,10853.0,1532.10,-47.0,22.0,21.0,1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 1 1 1 1 1 0 0 0 ...,deceptive trap,141,50,50,instances/trap__l_50__k_5.txt,0,4,cheap-ones-10,gomea-immidiate-sync,64.0,
37637,22,10980.0,1545.00,-48.0,23.0,22.0,1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 ...,deceptive trap,141,50,50,instances/trap__l_50__k_5.txt,0,4,cheap-ones-10,gomea-immidiate-sync,64.0,
37638,23,10988.0,1548.16,-49.0,24.0,23.0,0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ...,deceptive trap,141,50,50,instances/trap__l_50__k_5.txt,0,4,cheap-ones-10,gomea-immidiate-sync,64.0,


In [21]:
results[np.isnan(results["population_size"])]

Unnamed: 0.2,Unnamed: 0,#evaluations,simulation time (s),objectives,archive ordinal,archive ordinals removed,genotype (categorical),problem,seed,l,vtr,instance,replacement_strategy,tournament_size,runtime_type,algorithm_type,population_size,Unnamed: 0.1
15,0,,,,,,,deceptive trap,111,50,50,instances/trap__l_50__k_5.txt,5,4,expensive-ones-10,ga-uniform-async,,
16,0,,,,,,,deceptive trap,92,50,50,instances/trap__l_50__k_5.txt,5,4,cheap-ones-10,ga-uniform-sync,,
56,0,,,,,,,deceptive trap,44,50,50,instances/trap__l_50__k_5.txt,5,4,cheap-ones,ga-uniform-sync,,
78,0,,,,,,,deceptive trap,49,50,50,instances/trap__l_50__k_5.txt,5,4,cheap-ones,ga-uniform-sync,,
152,0,,,,,,,deceptive trap,91,50,50,instances/trap__l_50__k_5.txt,6,4,expensive-ones,ga-uniform-async,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
109618,0,,,,,,,deceptive trap,89,50,50,instances/trap__l_50__k_5.txt,5,4,cheap-ones-100,ga-uniform-async,,
109720,0,,,,,,,deceptive trap,113,50,50,instances/trap__l_50__k_5.txt,5,4,expensive-ones,ga-uniform-async,,
109721,0,,,,,,,deceptive trap,140,50,50,instances/trap__l_50__k_5.txt,6,4,expensive-ones,ga-uniform-async,,
109741,0,,,,,,,deceptive trap,75,50,50,instances/trap__l_50__k_5.txt,6,4,cheap-ones-10,ga-uniform-sync,,


In [22]:
results

Unnamed: 0.2,Unnamed: 0,#evaluations,simulation time (s),objectives,archive ordinal,archive ordinals removed,genotype (categorical),problem,seed,l,vtr,instance,replacement_strategy,tournament_size,runtime_type,algorithm_type,population_size,Unnamed: 0.1
0,0,44.0,38.62,-21.0,1.0,,0 0 0 1 0 0 1 0 0 0 0 1 1 0 0 0 0 1 0 0 0 1 0 ...,deceptive trap,54,50,50,instances/trap__l_50__k_5.txt,0,4,expensive-ones-100,gomea-sync,44.0,
1,1,44.0,48.52,-22.0,2.0,1.0,0 0 1 0 0 1 0 0 1 1 0 1 0 1 0 0 1 0 0 0 0 1 0 ...,deceptive trap,54,50,50,instances/trap__l_50__k_5.txt,0,4,expensive-ones-100,gomea-sync,44.0,
2,2,44.0,52.48,-26.0,3.0,2.0,0 0 0 0 0 0 1 1 0 0 1 0 1 0 1 0 0 1 1 0 0 1 0 ...,deceptive trap,54,50,50,instances/trap__l_50__k_5.txt,0,4,expensive-ones-100,gomea-sync,44.0,
3,3,421.0,441.62,-27.0,4.0,3.0,0 0 0 0 0 0 0 1 0 0 1 0 1 0 1 0 0 1 1 0 0 1 0 ...,deceptive trap,54,50,50,instances/trap__l_50__k_5.txt,0,4,expensive-ones-100,gomea-sync,44.0,
4,4,475.0,490.14,-28.0,5.0,4.0,0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 0 1 1 0 0 1 0 ...,deceptive trap,54,50,50,instances/trap__l_50__k_5.txt,0,4,expensive-ones-100,gomea-sync,44.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37635,20,10778.0,1522.00,-46.0,21.0,20.0,1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 ...,deceptive trap,141,50,50,instances/trap__l_50__k_5.txt,0,4,cheap-ones-10,gomea-immidiate-sync,64.0,
37636,21,10853.0,1532.10,-47.0,22.0,21.0,1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 1 1 1 1 1 0 0 0 ...,deceptive trap,141,50,50,instances/trap__l_50__k_5.txt,0,4,cheap-ones-10,gomea-immidiate-sync,64.0,
37637,22,10980.0,1545.00,-48.0,23.0,22.0,1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 ...,deceptive trap,141,50,50,instances/trap__l_50__k_5.txt,0,4,cheap-ones-10,gomea-immidiate-sync,64.0,
37638,23,10988.0,1548.16,-49.0,24.0,23.0,0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ...,deceptive trap,141,50,50,instances/trap__l_50__k_5.txt,0,4,cheap-ones-10,gomea-immidiate-sync,64.0,


In [23]:
# Compute the hitting evaluation
# def select_hit(df):
#     return df.iloc[df["archive ordinal"].argmax()]
# results_hit = results.groupby(problem_columns + approach_columns + run_columns).apply(select_hit).reset_index(drop=True).fillna(np.inf)
results_hit = results.sort_values("archive ordinal").groupby(problem_columns + approach_columns + run_columns).last().reset_index()#.fillna(np.inf)

In [24]:
results_hit

Unnamed: 0.2,problem,l,runtime_type,replacement_strategy,algorithm_type,tournament_size,seed,Unnamed: 0,#evaluations,simulation time (s),objectives,archive ordinal,archive ordinals removed,genotype (categorical),vtr,instance,population_size,Unnamed: 0.1
0,deceptive trap,50,cheap-ones,0,gomea-async,4,42,20,5940.0,451.68,-50.0,21.0,20.0,1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ...,50,instances/trap__l_50__k_5.txt,24.0,
1,deceptive trap,50,cheap-ones,0,gomea-async,4,43,33,16026.0,495.96,-50.0,34.0,33.0,1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ...,50,instances/trap__l_50__k_5.txt,60.0,
2,deceptive trap,50,cheap-ones,0,gomea-async,4,44,30,8183.0,471.86,-50.0,31.0,30.0,1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ...,50,instances/trap__l_50__k_5.txt,32.0,
3,deceptive trap,50,cheap-ones,0,gomea-async,4,45,28,20539.0,501.84,-50.0,29.0,28.0,1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ...,50,instances/trap__l_50__k_5.txt,76.0,
4,deceptive trap,50,cheap-ones,0,gomea-async,4,46,31,7594.0,500.56,-50.0,32.0,31.0,1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ...,50,instances/trap__l_50__k_5.txt,28.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13995,deceptive trap,50,expensive-ones-100,6,ga-uniform-sync,4,137,0,,,,,,,50,instances/trap__l_50__k_5.txt,,
13996,deceptive trap,50,expensive-ones-100,6,ga-uniform-sync,4,138,0,,,,,,,50,instances/trap__l_50__k_5.txt,,
13997,deceptive trap,50,expensive-ones-100,6,ga-uniform-sync,4,139,0,,,,,,,50,instances/trap__l_50__k_5.txt,,
13998,deceptive trap,50,expensive-ones-100,6,ga-uniform-sync,4,140,0,,,,,,,50,instances/trap__l_50__k_5.txt,,


# Required population size

In [26]:
results_hit_f = results_hit.copy()# [(results_hit["replacement_strategy"] == 5) | (results_hit["replacement_strategy"] == 6)]
results_hit_f["population_size"] = np.where(np.isfinite(results_hit_f["population_size"]), results_hit_f["population_size"], np.inf)
approach_columns_r = approach_columns#["algorithm_type", "replacement_strategy"]
population_size_required = results_hit_f.groupby(problem_columns + approach_columns_r)["population_size"].median()
pt = population_size_required.rename("minimally required population size").reset_index().copy()
relabel(pt)
# Drop GOMEA-IMM sync - it is identical in behavior.
pt = pt[(pt["approach"] != "GOMEA-IMM") | (pt["(a)sync"] != "s")]
str_c = pt["(a)sync"].astype(str)
str_c[(pt["approach"] == "GOMEA-IMM") & (pt["(a)sync"] == "a")] = "a/i"
str_c[(pt["approach"] == "GOMEA") & (pt["(a)sync"] == "a")] = "a/e"
pt["(a)sync"] = pd.Categorical(str_c, ["a", "a/e", "a/i", "s"], ordered=True)
pt["approach"] = pt["approach"].replace({"GOMEA-IMM": "GOMEA"})

pt = pt\
    .replace({np.nan: np.inf})\
    .pivot_table(index=["timing"], columns=["selection", "approach", "cx", "(a)sync"], values="minimally required population size", sort=True)\
    .sort_index()\
    .replace({np.inf: np.nan})

tab = pt.style.format(precision=0, na_rep="-")
# tab.to_excel(f"{date_of_processing}-trap-popsize.xlsx")
tab


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["(a)sync"] = pd.Categorical(str_c, ["a", "a/e", "a/i", "s"], ordered=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["approach"] = df["approach"].replace({"GOMEA-IMM": "GOMEA"})


selection,-,-,-,steady-state,steady-state,steady-state,steady-state,steady-state,steady-state,steady-state,steady-state,generational,generational,generational,generational,generational,generational,generational,generational
approach,GOMEA,GOMEA,GOMEA,ECGA,ECGA,GA,GA,GA,GA,GA,GA,ECGA,ECGA,GA,GA,GA,GA,GA,GA
cx,-,-,-,-,-,UX,UX,TPX,TPX,SFX,SFX,-,-,UX,UX,TPX,TPX,SFX,SFX
(a)sync,a/e,a/i,s,a,s,a,s,a,s,a,s,a,s,a,s,a,s,a,s
timing,Unnamed: 1_level_4,Unnamed: 2_level_4,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4,Unnamed: 8_level_4,Unnamed: 9_level_4,Unnamed: 10_level_4,Unnamed: 11_level_4,Unnamed: 12_level_4,Unnamed: 13_level_4,Unnamed: 14_level_4,Unnamed: 15_level_4,Unnamed: 16_level_4,Unnamed: 17_level_4,Unnamed: 18_level_4,Unnamed: 19_level_4
100:1,44,44,44,1944,4216,-,-,192,262,102,132,1906,4046,-,-,434,546,176,190
10:1,44,44,44,2038,4216,-,-,196,262,102,132,2022,4046,-,-,450,546,174,190
2:1,44,44,44,2166,4216,-,-,208,262,104,132,2042,4046,-,-,512,546,164,190
1:1,44,44,44,4110,4110,-,-,264,264,130,130,4052,4052,-,-,496,496,182,182
1:2,44,44,44,2110,4118,-,-,256,244,120,122,2060,4050,-,-,482,508,168,188
1:10,44,44,44,2394,4118,-,-,286,244,132,122,2316,4050,-,-,584,508,198,188
1:100,52,44,44,2562,4118,-,-,288,244,128,122,2534,4050,-,-,634,508,234,188


In [27]:
results_hit_f = results_hit.copy()# [(results_hit["replacement_strategy"] == 5) | (results_hit["replacement_strategy"] == 6)]
approach_columns_r = approach_columns#["algorithm_type", "replacement_strategy"]
results_hit_f["population_size"] = np.where(np.isfinite(results_hit_f["population_size"]), results_hit_f["population_size"], np.inf)
population_size_required = results_hit_f.groupby(problem_columns + approach_columns_r)["population_size"].quantile([0.25, 0.50, 0.75])
population_size_required
# table = population_size_required.reset_index().sort_values(approach_columns_r).pivot(index=problem_columns, columns=approach_columns_r).style.format(precision=0)
pt = population_size_required.rename("minimally required population size").reset_index().rename(columns={"level_6": "quantile"}).copy()
pt["quantile"] = [f"{q:.2f}" for q in pt["quantile"]]
relabel(pt)
# Drop GOMEA-IMM sync - it is identical in behavior.
pt = pt[(pt["approach"] != "GOMEA-IMM") | (pt["(a)sync"] != "s")]
str_c = pt["(a)sync"].astype(str)
str_c[(pt["approach"] == "GOMEA-IMM") & (pt["(a)sync"] == "a")] = "a/i"
str_c[(pt["approach"] == "GOMEA") & (pt["(a)sync"] == "a")] = "a/e"
pt["(a)sync"] = pd.Categorical(str_c, ["a", "a/e", "a/i", "s"], ordered=True)
pt["approach"] = pt["approach"].replace({"GOMEA-IMM": "GOMEA"})

pt = pt\
    .pivot_table(index=["timing"], columns=["selection", "approach", "cx", "(a)sync", "quantile"], values="minimally required population size", sort=False)\
    .sort_index()\
    .replace({np.inf: np.nan})

tab = pt.style.format(precision=0, na_rep="-")
tab.to_excel(f"{date_of_processing}-deceptivetrap-popsize-extended.xlsx")
tab

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["(a)sync"] = pd.Categorical(str_c, ["a", "a/e", "a/i", "s"], ordered=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["approach"] = df["approach"].replace({"GOMEA-IMM": "GOMEA"})


selection,-,-,-,-,-,-,-,-,-,steady-state,steady-state,steady-state,steady-state,steady-state,steady-state,steady-state,steady-state,steady-state,steady-state,steady-state,steady-state,steady-state,steady-state,steady-state,steady-state,steady-state,steady-state,generational,generational,generational,generational,generational,generational,generational,generational,generational,generational,generational,generational,generational,generational,generational,generational,generational,generational
approach,GOMEA,GOMEA,GOMEA,GOMEA,GOMEA,GOMEA,GOMEA,GOMEA,GOMEA,ECGA,ECGA,ECGA,ECGA,ECGA,ECGA,GA,GA,GA,GA,GA,GA,GA,GA,GA,GA,GA,GA,ECGA,ECGA,ECGA,ECGA,ECGA,ECGA,GA,GA,GA,GA,GA,GA,GA,GA,GA,GA,GA,GA
cx,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,TPX,TPX,TPX,TPX,TPX,TPX,SFX,SFX,SFX,SFX,SFX,SFX,-,-,-,-,-,-,TPX,TPX,TPX,TPX,TPX,TPX,SFX,SFX,SFX,SFX,SFX,SFX
(a)sync,a/e,a/e,a/e,a/i,a/i,a/i,s,s,s,a,a,a,s,s,s,a,a,a,s,s,s,a,a,a,s,s,s,a,a,a,s,s,s,a,a,a,s,s,s,a,a,a,s,s,s
quantile,0.25,0.50,0.75,0.25,0.50,0.75,0.25,0.50,0.75,0.25,0.50,0.75,0.25,0.50,0.75,0.25,0.50,0.75,0.25,0.50,0.75,0.25,0.50,0.75,0.25,0.50,0.75,0.25,0.50,0.75,0.25,0.50,0.75,0.25,0.50,0.75,0.25,0.50,0.75,0.25,0.50,0.75,0.25,0.50,0.75
timing,Unnamed: 1_level_5,Unnamed: 2_level_5,Unnamed: 3_level_5,Unnamed: 4_level_5,Unnamed: 5_level_5,Unnamed: 6_level_5,Unnamed: 7_level_5,Unnamed: 8_level_5,Unnamed: 9_level_5,Unnamed: 10_level_5,Unnamed: 11_level_5,Unnamed: 12_level_5,Unnamed: 13_level_5,Unnamed: 14_level_5,Unnamed: 15_level_5,Unnamed: 16_level_5,Unnamed: 17_level_5,Unnamed: 18_level_5,Unnamed: 19_level_5,Unnamed: 20_level_5,Unnamed: 21_level_5,Unnamed: 22_level_5,Unnamed: 23_level_5,Unnamed: 24_level_5,Unnamed: 25_level_5,Unnamed: 26_level_5,Unnamed: 27_level_5,Unnamed: 28_level_5,Unnamed: 29_level_5,Unnamed: 30_level_5,Unnamed: 31_level_5,Unnamed: 32_level_5,Unnamed: 33_level_5,Unnamed: 34_level_5,Unnamed: 35_level_5,Unnamed: 36_level_5,Unnamed: 37_level_5,Unnamed: 38_level_5,Unnamed: 39_level_5,Unnamed: 40_level_5,Unnamed: 41_level_5,Unnamed: 42_level_5,Unnamed: 43_level_5,Unnamed: 44_level_5,Unnamed: 45_level_5
100:1,36,44,53,36,44,53,36,44,53,1787,1944,2100,4024,4216,4655,152,192,241,216,262,304,80,102,128,108,132,158,1753,1906,2085,3759,4046,4380,316,434,537,403,546,684,128,176,204,152,190,224
10:1,36,44,53,36,44,53,36,44,53,1909,2038,2136,4024,4216,4655,160,196,238,216,262,304,87,102,128,108,132,158,1824,2022,2173,3759,4046,4380,319,450,558,403,546,684,147,174,212,152,190,224
2:1,36,44,53,36,44,53,36,44,53,2014,2166,2365,4024,4216,4655,160,208,246,216,262,304,76,104,116,108,132,158,1915,2042,2180,3759,4046,4380,422,512,661,403,546,684,128,164,228,152,190,224
1:1,36,44,53,36,44,53,36,44,53,3894,4110,4612,3894,4110,4612,205,264,329,205,264,329,96,130,161,96,130,161,3712,4052,4269,3712,4052,4269,435,496,646,435,496,646,143,182,248,143,182,248
1:2,36,44,56,36,44,53,36,44,53,1975,2110,2313,3955,4118,4632,206,256,328,192,244,304,96,120,148,96,122,148,1920,2060,2222,3823,4050,4492,320,482,598,384,508,637,128,168,218,152,188,220
1:10,36,44,56,36,44,53,36,44,53,2226,2394,2593,3955,4118,4632,243,286,341,192,244,304,104,132,152,96,122,148,2149,2316,2528,3823,4050,4492,451,584,772,384,508,637,155,198,240,152,188,220
1:100,40,52,64,36,44,53,36,44,53,2396,2562,2756,3955,4118,4632,239,288,356,192,244,304,100,128,156,96,122,148,2304,2534,2693,3823,4050,4492,507,634,863,384,508,637,180,234,268,152,188,220


In [41]:
# Compute normalization transformation for 1:1, and apply it to everything.
# Sidenote: the transformed value did not 
timing_ref = "1:1"
ps = results_hit_f.copy()
relabel(ps)
# Drop GOMEA-IMM sync - it is identical in behavior.
ps = ps[(ps["approach"] != "GOMEA-IMM") | (ps["(a)sync"] != "s")]
str_c = ps["(a)sync"].astype(str)
str_c[(ps["approach"] == "GOMEA-IMM") & (ps["(a)sync"] == "a")] = "a/i"
str_c[(ps["approach"] == "GOMEA") & (ps["(a)sync"] == "a")] = "a/e"
ps["(a)sync"] = pd.Categorical(str_c, ["a", "a/e", "a/i", "s"], ordered=True)
ps["approach"] = ps["approach"].replace({"GOMEA-IMM": "GOMEA"})
#
c = ["selection", "approach", "cx", "(a)sync"]
ranges = ps[ps["timing"] == "1:1"].groupby(c)["population_size"].agg([np.median, np.mean, np.std, np.nanmin, np.nanmax]).reset_index()
ps = pd.merge(ps, ranges, on=c)
# 1: Normalize to unit normal - loses scale
# ps["normalized population size"] = (ps["population_size"] - ps["mean"]) / ps["std"]
ps["normalized population size"] = (ps["population_size"] - ps["median"]) / ps["median"]
# "normalized population size"
columns_of_interest = ["problem", "timing", "population_size", "approach", "cx", "(a)sync", "selection"]
ps = ps[columns_of_interest].sort_values("timing")
ps.to_csv(f"{date_of_processing}-summary-trap.csv.gz")

In [40]:
%%R -i ps -w 400 -h 300
ps %>%
    mutate(
  approach = gsub("--", "", paste(approach, cx, sep = "-"))
) %>%
filter(`(a)sync` == "a") %>%
    ggplot(aes(x=`timing`, y=`normalized population size`, color=`approach`)) +
    geom_boxplot() +
    theme_bw()

Error in `geom_boxplot()`:
! Problem while computing aesthetics.
ℹ Error occurred in the 1st layer.
Caused by error in `FUN()`:
! object 'normalized population size' not found
Run `rlang::last_error()` to see where the error occurred.


RRuntimeError: Error in geom_boxplot() : 
ℹ Error occurred in the 1st layer.
Caused by error in `FUN()`:
! object 'normalized population size' not found


In [52]:
# Statistical Test against differently timed variants
# use_continuity = True
# problem_columns_not_runtime = [a for a in problem_columns if a != "runtime_type"]
# samples_per_config = results_hit_f.groupby(problem_columns + approach_columns)["population_size"].agg(list).reset_index()
# runtime_pairings = pd.merge(samples_per_config, samples_per_config, on=problem_columns_not_runtime + approach_columns)
# # runtime_pairings["statistical_test"] = runtime_pairings.apply(lambda r: mannwhitneyu(r["population_size_x"], r["population_size_y"]), axis=1)
# runtime_pairings["statistical_test"] = runtime_pairings.apply(lambda r: mannwhitneyu(r["population_size_x"], r["population_size_y"], use_continuity=use_continuity).pvalue, axis=1)
# columns_to_keep = problem_columns_not_runtime + approach_columns + ["runtime_type_x", "runtime_type_y", "statistical_test"]
# paired_test_result = runtime_pairings[columns_to_keep].pivot(index=problem_columns_not_runtime + approach_columns + ["runtime_type_x"], columns=["runtime_type_y"]).style
# # paired_test_result.to_excel(f"{date_of_processing}-nklandscape-stattest.xlsx")
# paired_test_result

In [52]:
# Statistical Test against differently timed variants
p = 0.05
use_continuity = True
problem_columns_not_runtime = [a for a in problem_columns if a != "runtime_type"]
samples_per_config = results_hit_f.groupby(problem_columns + approach_columns)["population_size"].agg(list).reset_index()
relabel(samples_per_config)
samples_per_config = samples_per_config[(samples_per_config["approach"] != "GOMEA-IMM") | (samples_per_config["(a)sync"] != "s")]
str_c = samples_per_config["(a)sync"].astype(str)
str_c[(samples_per_config["approach"] == "GOMEA-IMM") & (samples_per_config["(a)sync"] == "a")] = "a/i"
str_c[(samples_per_config["approach"] == "GOMEA") & (samples_per_config["(a)sync"] == "a")] = "a/e"
samples_per_config["(a)sync"] = pd.Categorical(str_c, ["a", "a/e", "a/i", "s"], ordered=True)
samples_per_config["approach"] = samples_per_config["approach"].replace({"GOMEA-IMM": "GOMEA"})

samples_per_config["timing"] = pd.Categorical(samples_per_config["timing"], categories=["100:1", "10:1", "2:1", "1:1", "1:2", "1:10", "1:100"])
relabeled_approach_columns = ["selection", "approach", "cx", "(a)sync"]
runtime_pairings = pd.merge(samples_per_config, samples_per_config, on=problem_columns_not_runtime + relabeled_approach_columns)
# runtime_pairings
# runtime_pairings["statistical_test"] = runtime_pairings.apply(lambda r: mannwhitneyu(r["population_size_x"], r["population_size_y"]), axis=1)
runtime_pairings[["U", "p_value"]]  = runtime_pairings.apply(lambda r: pd.Series(mannwhitneyu(r["population_size_x"], r["population_size_y"], use_continuity=use_continuity)), axis=1)
columns_to_keep = problem_columns_not_runtime + relabeled_approach_columns + ["timing_x", "timing_y", "U", "p_value"]
paired_test_result = runtime_pairings[columns_to_keep].pivot_table(index=relabeled_approach_columns + ["timing_x"], columns=["timing_y"], values=["p_value"])
# paired_test_result.to_excel(f"{date_of_processing}-nklandscape-stattest.xlsx")
paired_test_result_f = paired_test_result.style.format("{:.3f}")

def mark_significance_row(x):
    # x is a row
    o = np.argsort(x)
    o_inv = np.argsort(o)
    # test against 1/i - though in this case one of the samples is
    # ourselves - exclude that from the count.
    rcs = np.concatenate([np.flip(np.arange(len(x) - 1) + 1), [1]])
    ps = p / rcs
    s = x[o] < ps
    s = np.cumsum(s) == np.arange(len(x)) + 1
    return np.where(s[o_inv], "color: red", "")

# paired_test_result_f = paired_test_result_f.applymap(lambda x: np.where(x < 0.05, "color: red", ""))
paired_test_result_f.apply(mark_significance_row, axis=1)
paired_test_result_f.to_excel(f"{date_of_processing}-deceptivetrap-stattest-p.xlsx")

paired_test_result_U = runtime_pairings[columns_to_keep].pivot_table(index=relabeled_approach_columns + ["timing_x"], columns=["timing_y"], values=["U"])
paired_test_result_U_f = paired_test_result_U.style.format("{:i}")
paired_test_result_U_f.to_excel(f"{date_of_processing}-deceptivetrap-stattest-U.xlsx")

paired_test_result_f

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,p_value,p_value,p_value,p_value,p_value,p_value,p_value
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,timing_y,100:1,10:1,2:1,1:1,1:2,1:10,1:100
selection,approach,cx,(a)sync,timing_x,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
-,GOMEA,-,a/e,100:1,1.0,1.0,1.0,1.0,0.692,0.609,0.001
-,GOMEA,-,a/e,10:1,1.0,1.0,1.0,1.0,0.692,0.609,0.001
-,GOMEA,-,a/e,2:1,1.0,1.0,1.0,1.0,0.692,0.609,0.001
-,GOMEA,-,a/e,1:1,1.0,1.0,1.0,1.0,0.692,0.609,0.001
-,GOMEA,-,a/e,1:2,0.692,0.692,0.692,0.692,1.0,0.921,0.004
-,GOMEA,-,a/e,1:10,0.609,0.609,0.609,0.609,0.921,1.0,0.003
-,GOMEA,-,a/e,1:100,0.001,0.001,0.001,0.001,0.004,0.003,1.0
-,GOMEA,-,a/i,100:1,1.0,1.0,1.0,1.0,1.0,1.0,0.979
-,GOMEA,-,a/i,10:1,1.0,1.0,1.0,1.0,1.0,1.0,0.979
-,GOMEA,-,a/i,2:1,1.0,1.0,1.0,1.0,1.0,1.0,0.979


In [96]:
success_rate = (results_hit["population_size"] != np.inf).groupby([results_hit[x] for x in (problem_columns + approach_columns_r)]).mean()
table = success_rate.reset_index().sort_values(approach_columns_r).pivot(index=problem_columns, columns=approach_columns_r).style.format(precision=3)
table.to_excel(f"{date_of_processing}-nklandscape-success-rate.xlsx")
table

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,population_size,population_size,population_size,population_size,population_size,population_size,population_size,population_size,population_size,population_size,population_size,population_size,population_size,population_size,population_size,population_size,population_size,population_size
Unnamed: 0_level_1,Unnamed: 1_level_1,replacement_strategy,0,0,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6
Unnamed: 0_level_2,Unnamed: 1_level_2,algorithm_type,gomea-async,gomea-sync,ecga-async-th,ecga-sync,ga-subfunction-async,ga-subfunction-sync,ga-twopoint-async,ga-twopoint-sync,ga-uniform-async,ga-uniform-sync,ecga-async-th,ecga-sync,ga-subfunction-async,ga-subfunction-sync,ga-twopoint-async,ga-twopoint-sync,ga-uniform-async,ga-uniform-sync
Unnamed: 0_level_3,Unnamed: 1_level_3,tournament_size,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4
problem,l,runtime_type,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4,Unnamed: 8_level_4,Unnamed: 9_level_4,Unnamed: 10_level_4,Unnamed: 11_level_4,Unnamed: 12_level_4,Unnamed: 13_level_4,Unnamed: 14_level_4,Unnamed: 15_level_4,Unnamed: 16_level_4,Unnamed: 17_level_4,Unnamed: 18_level_4,Unnamed: 19_level_4,Unnamed: 20_level_4
nk-landscape,40,cheap-optimum,1.0,1.0,1.0,1.0,1.0,0.98,1.0,1.0,0.83,0.84,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
nk-landscape,40,cheap-optimum-10,1.0,1.0,1.0,1.0,1.0,0.96,1.0,1.0,0.98,0.84,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
nk-landscape,40,cheap-optimum-100,1.0,1.0,1.0,1.0,1.0,0.96,1.0,1.0,1.0,0.88,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
nk-landscape,40,constant,1.0,1.0,1.0,1.0,0.85,0.99,1.0,1.0,0.25,0.87,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
nk-landscape,40,expensive-optimum,1.0,1.0,1.0,1.0,0.69,0.96,1.0,1.0,0.08,0.92,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
nk-landscape,40,expensive-optimum-10,1.0,1.0,1.0,1.0,0.59,0.97,1.0,1.0,0.04,0.89,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
nk-landscape,40,expensive-optimum-100,1.0,1.0,1.0,1.0,0.44,0.95,1.0,1.0,0.08,0.92,1.0,1.0,1.0,1.0,1.0,1.0,0.98,1.0


In [16]:
sample_count = results_hit["population_size"].groupby([results_hit[x] for x in (problem_columns + approach_columns_r)]).count()
table = sample_count.reset_index().sort_values(approach_columns_r).pivot(problem_columns, approach_columns_r).style.format(precision=3)
table.to_excel(f"{date_of_processing}-nklandscape-sample-count.xlsx")
table

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,population_size,population_size,population_size,population_size,population_size,population_size,population_size,population_size,population_size,population_size,population_size,population_size,population_size,population_size,population_size,population_size,population_size,population_size,population_size,population_size,population_size,population_size,population_size,population_size,population_size,population_size,population_size
Unnamed: 0_level_1,Unnamed: 1_level_1,replacement_strategy,0,0,0,3,3,3,3,4,4,4,4,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6
Unnamed: 0_level_2,Unnamed: 1_level_2,algorithm_type,gomea-async,gomea-sync,kernel-gomea-async,ecga-async-th,ecga-async-th,ecga-sync,ecga-sync,ecga-async-th,ecga-async-th,ecga-sync,ecga-sync,ecga-async-th,ecga-async-th,ecga-sync,ecga-sync,ga-subfunction-async,ga-subfunction-sync,ga-twopoint-async,ga-twopoint-sync,ga-uniform-async,ga-uniform-sync,ga-subfunction-async,ga-subfunction-sync,ga-twopoint-async,ga-twopoint-sync,ga-uniform-async,ga-uniform-sync
Unnamed: 0_level_3,Unnamed: 1_level_3,tournament_size,4,4,4,4,8,4,8,4,8,4,8,4,8,4,8,4,4,4,4,4,4,4,4,4,4,4,4
problem,l,runtime_type,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4,Unnamed: 8_level_4,Unnamed: 9_level_4,Unnamed: 10_level_4,Unnamed: 11_level_4,Unnamed: 12_level_4,Unnamed: 13_level_4,Unnamed: 14_level_4,Unnamed: 15_level_4,Unnamed: 16_level_4,Unnamed: 17_level_4,Unnamed: 18_level_4,Unnamed: 19_level_4,Unnamed: 20_level_4,Unnamed: 21_level_4,Unnamed: 22_level_4,Unnamed: 23_level_4,Unnamed: 24_level_4,Unnamed: 25_level_4,Unnamed: 26_level_4,Unnamed: 27_level_4,Unnamed: 28_level_4,Unnamed: 29_level_4
nk-landscape,20,cheap-optimum,100,100,100,100,100,100,100,100,99,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100
nk-landscape,20,cheap-optimum-10,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,99,100,100,100,100,100,100,100,100,100
nk-landscape,20,constant,100,100,100,100,100,100,100,100,100,100,100,100,98,100,100,100,100,100,100,100,100,100,100,100,100,100,100
nk-landscape,20,expensive-optimum,100,100,100,100,99,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100
nk-landscape,20,expensive-optimum-10,100,100,100,100,100,100,100,100,100,100,100,100,100,99,100,100,100,100,100,100,100,100,100,100,100,100,100
nk-landscape,40,cheap-optimum,100,100,100,100,99,100,100,100,100,100,100,100,100,99,100,100,100,100,100,100,100,100,100,100,99,100,100
nk-landscape,40,cheap-optimum-10,100,100,100,100,100,100,100,100,100,100,99,100,100,100,100,100,100,100,100,100,100,99,100,100,100,100,100
nk-landscape,40,constant,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,99,100,100,100,100,100,100,100,99,100,100
nk-landscape,40,expensive-optimum,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,99,100,100,100
nk-landscape,40,expensive-optimum-10,100,100,100,100,100,100,100,99,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100


# Evaluations required

In [10]:
evaluations_required = results_hit.groupby(problem_columns + approach_columns)["#evaluations"].median()
table = evaluations_required.reset_index().sort_values(approach_columns).pivot(problem_columns, approach_columns).style
table.to_excel(f"{date_of_processing}-nklandscape-evaluations-required-median.xlsx")
table

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,#evaluations,#evaluations,#evaluations,#evaluations,#evaluations,#evaluations,#evaluations,#evaluations,#evaluations,#evaluations,#evaluations,#evaluations,#evaluations,#evaluations,#evaluations,#evaluations,#evaluations,#evaluations,#evaluations,#evaluations,#evaluations,#evaluations,#evaluations,#evaluations,#evaluations,#evaluations,#evaluations
Unnamed: 0_level_1,Unnamed: 1_level_1,replacement_strategy,0,0,0,3,3,3,3,4,4,4,4,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6
Unnamed: 0_level_2,Unnamed: 1_level_2,algorithm_type,gomea-async,gomea-sync,kernel-gomea-async,ecga-async-th,ecga-async-th,ecga-sync,ecga-sync,ecga-async-th,ecga-async-th,ecga-sync,ecga-sync,ecga-async-th,ecga-async-th,ecga-sync,ecga-sync,ga-subfunction-async,ga-subfunction-sync,ga-twopoint-async,ga-twopoint-sync,ga-uniform-async,ga-uniform-sync,ga-subfunction-async,ga-subfunction-sync,ga-twopoint-async,ga-twopoint-sync,ga-uniform-async,ga-uniform-sync
Unnamed: 0_level_3,Unnamed: 1_level_3,tournament_size,4,4,4,4,8,4,8,4,8,4,8,4,8,4,8,4,4,4,4,4,4,4,4,4,4,4,4
problem,l,runtime_type,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4,Unnamed: 8_level_4,Unnamed: 9_level_4,Unnamed: 10_level_4,Unnamed: 11_level_4,Unnamed: 12_level_4,Unnamed: 13_level_4,Unnamed: 14_level_4,Unnamed: 15_level_4,Unnamed: 16_level_4,Unnamed: 17_level_4,Unnamed: 18_level_4,Unnamed: 19_level_4,Unnamed: 20_level_4,Unnamed: 21_level_4,Unnamed: 22_level_4,Unnamed: 23_level_4,Unnamed: 24_level_4,Unnamed: 25_level_4,Unnamed: 26_level_4,Unnamed: 27_level_4,Unnamed: 28_level_4,Unnamed: 29_level_4
nk-landscape,20,cheap-optimum,2274.0,1932.0,2270.0,5863.5,2999.5,7892.0,5592.0,4283.5,2748.0,8152.0,4790.0,4402.5,2584.0,6488.0,5080.0,4080.0,6584.0,1903.0,1750.0,4806.0,7694.0,4639.0,3500.0,2173.0,1806.0,4572.0,4328.0
nk-landscape,20,cheap-optimum-10,1770.0,1912.0,2051.5,2368.5,1506.5,7892.0,5592.0,2631.0,1527.0,8152.0,4790.0,2359.0,1749.5,6488.0,5080.0,3137.0,6584.0,1169.0,1750.0,3964.5,7694.0,2419.5,3500.0,1162.0,1806.0,2894.0,4328.0
nk-landscape,20,constant,2502.0,2428.0,2732.5,8152.0,4808.0,8152.0,4790.0,8152.0,4808.0,8152.0,4790.0,6120.0,4198.0,6196.0,4198.0,7296.0,6840.0,1740.0,1740.0,6098.0,6576.0,3650.0,4192.0,1760.0,1760.0,3578.0,3578.0
nk-landscape,20,expensive-optimum,3029.5,2150.0,3015.5,7645.5,4866.0,7550.0,4460.0,7833.0,4727.0,8152.0,4790.0,6705.0,7088.0,6848.0,4644.0,10432.0,7380.0,2655.0,1758.0,10071.5,7172.0,5023.5,4368.0,2222.0,1696.0,4767.0,4220.0
nk-landscape,20,expensive-optimum-10,3437.5,2201.0,3569.5,15764.5,10973.5,7550.0,4460.0,12149.5,10403.5,8152.0,4790.0,14672.0,10219.5,6784.0,4644.0,19620.5,7380.0,3059.0,1758.0,19086.5,7172.0,5211.0,4368.0,2908.5,1696.0,7966.0,4220.0
nk-landscape,40,cheap-optimum,20415.0,16442.5,30116.0,22607.5,15615.0,123912.0,27872.0,26637.0,12871.5,29994.0,18272.0,28821.5,15148.5,34848.0,18288.0,431580.0,972624.0,6184.0,7630.0,1086061.5,2104974.0,107021.5,81808.0,10717.5,8000.0,191966.0,107964.0
nk-landscape,40,cheap-optimum-10,19984.0,16304.5,31658.5,9257.0,7808.5,123912.0,27872.0,13798.5,7681.0,29994.0,18400.0,10065.5,7357.5,34896.0,18288.0,187501.5,972624.0,4911.0,7630.0,305036.5,2104974.0,43683.0,81808.0,7028.5,8012.0,59072.5,107964.0
nk-landscape,40,constant,17893.5,16692.0,39818.5,29994.0,18272.0,29994.0,18272.0,29994.0,18272.0,29994.0,18272.0,40556.0,17374.0,40556.0,17374.0,1576252.0,944248.0,6360.0,6360.0,inf,2303232.0,92364.0,90680.0,6396.0,6400.0,141520.0,118572.0
nk-landscape,40,expensive-optimum,22575.5,18600.0,40929.0,33669.0,18381.0,128364.0,32216.0,29035.5,21351.0,29994.0,18272.0,28765.5,18553.0,41066.0,17784.0,4033941.0,1023882.0,10051.5,7408.0,inf,2332344.0,109343.0,72400.0,11384.0,7776.0,156239.5,106856.0
nk-landscape,40,expensive-optimum-10,22563.0,17202.5,35536.5,67886.0,33537.5,128364.0,32216.0,46143.0,29804.0,29994.0,18272.0,48336.5,29312.5,41066.0,17784.0,7772563.5,1023882.0,12309.0,7408.0,inf,2287280.0,163516.0,72400.0,14696.0,7776.0,251704.0,106856.0


In [None]:
evaluations_required = results_hit.groupby(problem_columns + approach_columns)["#evaluations"].quantile(0.95)
evaluations_required.reset_index().sort_values(approach_columns).pivot(problem_columns, approach_columns).style

In [None]:
evaluations_required = results_hit.groupby(problem_columns + approach_columns)["#evaluations"].quantile([0.05, 0.5, 0.95])
evaluations_required = evaluations_required.unstack().reset_index().rename(columns={
    0.05: "0.05",
    0.50: "0.50",
    0.95: "0.95",
})
evaluations_required["approach"] = evaluations_required["algorithm_type"] + "_" + evaluations_required["replacement_strategy"].map(str) + "_" + evaluations_required["tournament_size"].map(str)
evaluations_required["problem and t"] = evaluations_required["problem"] + "_" + evaluations_required["runtime_type"]

In [None]:
s = evaluations_required[(evaluations_required["problem"] == "onemax") & (evaluations_required["runtime_type"] == "constant")]

c = Category20[20]

TOOLTIPS = [
    ("approach", "@approach"),
    ("l", "@l"),
    ("#evaluations", "@{0.50}")
]
f = figure(
    tools=["hover"],
    plot_width=800, plot_height=800
)
f.hover.tooltips = TOOLTIPS
# approach_colormapped = factor_cmap(field_name="approach", palette=Category20[20], factors=s["approach"].unique())
for (name, df), color in zip(s.groupby("approach"), cycle(c)):
    cds = ColumnDataSource(df)
    r = f.line(
        source=cds,
        x="l",
        y="0.50",
        color=color,
        line_width=2,
        legend_label = name)
    f.varea(
        source=cds,
        x="l",
        y1="0.95",
        y2="0.05",
        color=color,
        alpha=0.5,
        muted_alpha=0.0,
        legend_label = name
    )

# f.legend.click_policy = "mute"
f.legend.click_policy = "hide"
f.legend.orientation = "vertical"
f.legend.location = "top_left"

b = Button(label='Clear selection')
b.js_on_click(CustomJS(args=dict(cds=cds), code="cds.selected.indices = [];"))
show(f)

# (Simulated) Time required

In [11]:
sim_time_required = results_hit.groupby(problem_columns + approach_columns)["simulation time (s)"].median()
table = sim_time_required.reset_index().sort_values(approach_columns).pivot(problem_columns, approach_columns).style
table.to_excel(f"{date_of_processing}-nklandscape-sim-time-required-median.xlsx")
table

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,simulation time (s),simulation time (s),simulation time (s),simulation time (s),simulation time (s),simulation time (s),simulation time (s),simulation time (s),simulation time (s),simulation time (s),simulation time (s),simulation time (s),simulation time (s),simulation time (s),simulation time (s),simulation time (s),simulation time (s),simulation time (s),simulation time (s),simulation time (s),simulation time (s),simulation time (s),simulation time (s),simulation time (s),simulation time (s),simulation time (s),simulation time (s)
Unnamed: 0_level_1,Unnamed: 1_level_1,replacement_strategy,0,0,0,3,3,3,3,4,4,4,4,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6
Unnamed: 0_level_2,Unnamed: 1_level_2,algorithm_type,gomea-async,gomea-sync,kernel-gomea-async,ecga-async-th,ecga-async-th,ecga-sync,ecga-sync,ecga-async-th,ecga-async-th,ecga-sync,ecga-sync,ecga-async-th,ecga-async-th,ecga-sync,ecga-sync,ga-subfunction-async,ga-subfunction-sync,ga-twopoint-async,ga-twopoint-sync,ga-uniform-async,ga-uniform-sync,ga-subfunction-async,ga-subfunction-sync,ga-twopoint-async,ga-twopoint-sync,ga-uniform-async,ga-uniform-sync
Unnamed: 0_level_3,Unnamed: 1_level_3,tournament_size,4,4,4,4,8,4,8,4,8,4,8,4,8,4,8,4,4,4,4,4,4,4,4,4,4,4,4
problem,l,runtime_type,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4,Unnamed: 8_level_4,Unnamed: 9_level_4,Unnamed: 10_level_4,Unnamed: 11_level_4,Unnamed: 12_level_4,Unnamed: 13_level_4,Unnamed: 14_level_4,Unnamed: 15_level_4,Unnamed: 16_level_4,Unnamed: 17_level_4,Unnamed: 18_level_4,Unnamed: 19_level_4,Unnamed: 20_level_4,Unnamed: 21_level_4,Unnamed: 22_level_4,Unnamed: 23_level_4,Unnamed: 24_level_4,Unnamed: 25_level_4,Unnamed: 26_level_4,Unnamed: 27_level_4,Unnamed: 28_level_4,Unnamed: 29_level_4
nk-landscape,20,cheap-optimum,155.9,131.375,117.025,14.95,11.825,10.275,8.3,15.775,12.25,10.35,8.3,15.475,12.525,11.625,9.15,54.975,68.35,40.825,38.0,55.0,68.925,21.775,21.725,19.3,18.65,21.45,20.875
nk-landscape,20,cheap-optimum-10,401.25,522.875,324.875,50.2,37.075,44.475,34.7,52.5,36.95,45.075,34.7,53.8,36.55,48.625,38.35,168.125,277.225,109.3,158.025,165.55,281.35,65.75,88.425,58.475,77.15,68.05,89.4
nk-landscape,20,constant,87.0,89.5,109.0,6.0,5.0,6.0,5.0,6.0,5.0,6.0,5.0,7.0,5.0,7.0,5.0,45.5,46.0,25.0,25.0,35.0,39.0,12.0,12.0,11.0,11.0,13.0,12.5
nk-landscape,20,expensive-optimum,182.45,153.975,175.5,17.95,14.7,11.5,9.6,18.825,14.025,11.55,9.5,18.0,15.15,13.225,11.35,67.475,79.275,45.75,45.45,61.4,72.275,24.025,24.3,21.75,18.9,23.975,22.725
nk-landscape,20,expensive-optimum-10,771.4,675.775,671.425,57.425,51.925,55.5,46.4,65.675,47.825,55.95,45.5,68.55,53.725,62.8,54.15,265.2,361.475,199.35,214.8,286.3,338.475,93.65,114.7,89.65,90.1,96.95,108.525
nk-landscape,40,cheap-optimum,676.65,578.0625,639.65,25.875,21.625,15.2125,13.1125,25.4375,20.75,17.9375,13.2,27.075,21.3625,18.325,14.375,76.4,164.1125,59.525,63.8,90.0875,279.025,40.575,40.45,34.0375,29.625,42.475,42.15
nk-landscape,40,cheap-optimum-10,2186.15,2120.9,2042.55,82.45,60.075,64.9125,54.0125,78.475,60.525,73.375,54.35,80.9375,62.6875,76.925,57.375,236.6125,580.5875,170.6625,246.925,249.375,988.15,109.95,166.425,99.9625,117.675,112.4,170.35
nk-landscape,40,constant,370.0,371.0,501.0,11.0,8.0,11.0,8.0,11.0,8.0,11.0,8.0,11.0,9.0,11.0,9.0,75.0,104.0,42.0,42.0,inf,180.0,23.0,25.0,18.5,19.0,26.0,27.0
nk-landscape,40,expensive-optimum,817.9375,637.0,850.15,30.8125,24.625,17.175,15.0875,30.85,24.0375,20.7625,15.2375,32.275,24.825,22.25,16.8,190.3625,244.125,84.125,75.425,inf,377.5875,48.925,46.075,41.725,33.6875,48.975,50.1
nk-landscape,40,expensive-optimum-10,3420.6625,2893.75,3258.7875,119.8125,98.8625,82.575,71.7875,117.225,96.3375,98.8625,73.1375,121.6,98.25,104.25,79.2,915.0625,1129.75,343.025,354.825,inf,1838.35,205.425,218.675,168.45,159.1875,225.875,238.9


In [None]:
sim_time_required = results_hit.groupby(problem_columns + approach_columns)["simulation time (s)"].quantile([0.05, 0.5, 0.95])
sim_time_required = sim_time_required.unstack().reset_index().rename(columns={
    0.05: "0.05",
    0.50: "0.50",
    0.95: "0.95",
})
sim_time_required["approach"] = sim_time_required["algorithm_type"] + "_" + sim_time_required["replacement_strategy"].map(str) + "_" + sim_time_required["tournament_size"].map(str)
sim_time_required["problem and t"] = sim_time_required["problem"] + "_" + sim_time_required["runtime_type"]

In [None]:
s = sim_time_required[(sim_time_required["problem"] == "onemax") & (sim_time_required["runtime_type"] == "constant")]

c = Category20[20]

TOOLTIPS = [
    ("approach", "@approach"),
    ("l", "@l"),
    ("simulation time (s)", "@{0.50}")
]
f = figure(
    tools=["hover"],
    plot_width=800, plot_height=800
)
f.hover.tooltips = TOOLTIPS
# approach_colormapped = factor_cmap(field_name="approach", palette=Category20[20], factors=s["approach"].unique())
for (name, df), color in zip(s.groupby("approach"), cycle(c)):
    cds = ColumnDataSource(df)
    r = f.line(
        source=cds,
        x="l",
        y="0.50",
        color=color,
        line_width=2,
        legend_label = name)
    f.varea(
        source=cds,
        x="l",
        y1="0.95",
        y2="0.05",
        color=color,
        alpha=0.05,
        muted_alpha=0.0,
        legend_label = name
    )

# f.legend.click_policy = "mute"
f.legend.click_policy = "hide"
f.legend.orientation = "vertical"
f.legend.location = "top_left"

show(f)

# (Simulated) CPU Time required
As the previous metric can be minimized by evaluating the entire search space in parallel.
This metric multiplies by the number of parallel processors (in this case: the population size).
Note that if an approach does not scale perfectly, this score will be worse than single threaded.

In [None]:
results_hit["simulated cpu time (s)"] = results_hit["simulation time (s)"] * results_hit["population_size"]
sim_cpu_time_required = results_hit.groupby(problem_columns + approach_columns)["simulated cpu time (s)"].mean()
sim_cpu_time_required