In [1]:
import json
import numpy as np
import pandas as pd
import json

In [22]:
### load data into a table (with some preprocessing on names)

# load data into a table
with open("main_data.json","r") as f:
    rs = json.load(f)
t = pd.DataFrame(rs) 

# remove data that we don't need and shorten names
t = t[~t.experiment.str.contains("dyn__with_static")]
t = t[~t.experiment.str.contains("binning_comparison")]
t.loc[:,"experiment"] = [s.replace("dynamic","dyn") for s in t.experiment]
t = t[~t.experiment.str.contains("dyn__with_static")]
t = t[t.n < 5000]
print("Experiment names: ")
print(t.experiment.unique())

# add one to the number of bins
for exp in ["tuned_dyn_bin", "tuned_dyn_cas_bin"]:
    t.loc[t.experiment.str.contains(exp), "experiment"] = [f"{exp}{int(s.replace(exp,''))+1}" if "sc" not in s else f"{exp}_sc{int(s.replace(exp+'_sc',''))+1}" for s in t[t.experiment.str.contains(exp)].experiment.values]

print("\nExperiment results:")
display(t)

Experiment names: 
['theory_dyn' 'tuned_dyn' 'tuned_static' 'dyn_with_static'
 'tuned_dyn_cas_bin0' 'tuned_dyn_cas_bin1' 'tuned_dyn_cas_bin2'
 'tuned_dyn_cas_bin3' 'tuned_dyn_cas_bin4' 'tuned_dyn_cas_bin5'
 'tuned_dyn_cas_bin6' 'tuned_dyn_cas_bin7' 'tuned_dyn_cas_bin8'
 'tuned_dyn_cas_bin9' 'tuned_dyn_cas_bin10' 'tuned_dyn_cas_bin11'
 'binned_theory_dyn_start' 'binned_theory_dyn_end'
 'binned_theory_dyn_middle' 'tuned_dyn_bin0' 'tuned_dyn_bin1'
 'tuned_dyn_bin2' 'tuned_dyn_bin3' 'tuned_dyn_bin4' 'tuned_dyn_bin5'
 'tuned_dyn_bin6' 'tuned_dyn_bin7' 'tuned_dyn_bin8' 'tuned_dyn_bin9'
 'tuned_dyn_bin10' 'tuned_dyn_bin11' 'binned_optimal_dyn_start'
 'binned_optimal_dyn_end' 'binned_optimal_dyn_middle' 'tuned_dyn_bin_sc5'
 'tuned_dyn_bin_sc6' 'tuned_dyn_bin_sc8' 'tuned_dyn_bin_sc9'
 'tuned_dyn_cas_bin_sc5' 'tuned_dyn_cas_bin_sc6' 'optimal_dyn'
 'optimal_dyn_bin']

Experiment results:


Unnamed: 0,n,experiment,max_evals,tuning_budget,tuning_time,evaluation_results,best_configuration,sum_of_time
0,10,theory_dyn,0,0,0.0,"[41, 19, 18, 25, 42, 62, 48, 28, 86, 28, 22, 3...","{'fx': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 'lbd': ...",
1,50,theory_dyn,0,0,0.0,"[240, 245, 302, 420, 181, 257, 275, 284, 265, ...","{'fx': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, ...",
2,100,theory_dyn,0,0,0.0,"[590, 471, 420, 536, 409, 617, 501, 858, 472, ...","{'fx': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, ...",
3,200,theory_dyn,0,0,0.0,"[938, 1136, 1539, 1306, 1582, 1438, 1062, 1073...","{'fx': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, ...",
4,500,theory_dyn,0,0,0.0,"[3075, 2382, 3092, 3038, 3343, 3641, 3060, 305...","{'fx': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, ...",
...,...,...,...,...,...,...,...,...
388,1000,optimal_dyn,0,0,0.0,"[5999, 6275, 6308, 5359, 5995, 6212, 5278, 615...","{'fx': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, ...",
389,2000,optimal_dyn,0,0,0.0,"[11619, 11870, 10865, 11948, 12094, 11454, 122...","{'fx': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, ...",
390,500,optimal_dyn_bin,0,0,0.0,"[3225, 2892, 2594, 3244, 2655, 3282, 2941, 232...","{'fx': [0, 250, 375, 438, 469, 485, 493, 497, ...",
391,1000,optimal_dyn_bin,0,0,0.0,"[6283, 5864, 6598, 5713, 5128, 5645, 6761, 635...","{'fx': [0, 500, 750, 875, 938, 969, 985, 993, ...",


In [None]:
#### Notes on how to read the data

# Tuning experiments:
    # tuned_dyn_bin<k>: irace with binning with #bins=k (tunings with #bins=k-1 and #bins=k are independent of each other)
    # tuned_dyn_cas_bin<k>: irace with binning and cascading with #bins=k (results of #bin=k-1 is seeded to the tuning with #bins=k)

# Baselines:
    # optimal_dyn: (near-)optimal policy calculated by CMA-ES
    # optimal_dyn_bin: (near-)optimal binned policy calculated by CMA-ES (#bins=ceil(log2(n)) where n is the problem size)
    # theory_dyn: a baseline policy taken from theory
    # binned_theory_dyn_<start/middle/end>: binned version of theory_dyn where lbd is chosen at the start/middle/end of each bin

# Columns in t:
    # max_evals: maximum number of evaluations given to each run of the OLL algorithm (in principle we want to set this as infinite, but in practice it will result in wasting a lot of compute time on bad configurations during the tuning)
    # tuning_budget: #runs of the OLL algorithm during each tuning.
    # best_configuration: the best configuration found by the corresponding experiment, read based on two arrays (fx and lbd) as follows:
        # lbd[i] is the best lbd value for all objective values o where fx[i] <= o < fx[i+1] (note that for the last value of i, fx[i+1]=n)
    # evaluation_results: performance of best_configuration over 500 runs



In [48]:
# Example: comparing results of irace+binning+cascading with theory_dyn, optimal_dyn and optimal_dyn_bin

# for all tuned_dyn_cas_bin, get the one with the largest number of bins as those give us the final configuration found by irace.
t1 = t[((t.n==500) & (t.experiment=="tuned_dyn_cas_bin8")) | 
               ((t.n==1000) & (t.experiment=="tuned_dyn_cas_bin9")) | 
               ((t.n==2000) & (t.experiment=="tuned_dyn_cas_bin10")) | 
               ((t.n==3000) & (t.experiment=="tuned_dyn_cas_bin11"))]
t1.loc[:, "experiment"] = "tuned_dyn_cas_bin"

# show exact runtime of best tuned_dyn_cas_bin vs theory_dyn vs optimal_dyn for n<3000
t1 = pd.concat([t1, t[t.experiment.isin(["theory_dyn","optimal_dyn","optimal_dyn_bin"])]], axis=0)
t1["empirical_runtime_mean"] = [np.mean(e) for e in t1.evaluation_results]
display(t1[t1.n<=3000].pivot(index="n", columns="experiment", values="empirical_runtime_mean").round(2))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  t1.loc[:, "experiment"] = "tuned_dyn_cas_bin"


experiment,optimal_dyn,optimal_dyn_bin,theory_dyn,tuned_dyn_cas_bin
n,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
10,,,39.21,
50,,,271.61,
100,,,596.48,
200,,,1241.65,
500,2935.62,2920.33,3227.9,3254.79
1000,5984.26,5959.82,6605.07,6487.81
2000,12178.19,12185.74,13315.53,12748.33
3000,,,20128.97,19417.28
