In [5]:
import pandas as pd

HW = ["pc", "vm", "g100"]
TARGET = ["time", "quality", "memory", "price"]
ALG_PARAMS = {
        "convolution" : 4, 
        "saxpy" : 3,
        #"blackscholes" : 15, 
        "correlation" : 7, 
        "fwt" : 2 
        }

params = [(5,1),(10,10), (10,15)]
df_list = []

for algorithm in ALG_PARAMS.keys():
    for depth, estimator in params:
        df = pd.read_csv(f"{algorithm}_depth{depth}_est{estimator}_results.csv")
        df["depth"] = depth
        df["estimators"] = estimator
        df["algorithm"] = algorithm
        df_list.append(df)
        
tot_df = pd.concat(df_list, axis=0)
tot_df.dropna(axis = 0, how = 'all', inplace = True) # drop empty rows
tot_df = tot_df[~(tot_df["b[pc]"].isna() | tot_df["b[vm]"].isna() | tot_df["b[g100]"].isna())] #drop rows with no solution
tot_df.reset_index(inplace=True)

In [6]:
robust_df = tot_df[~tot_df["robust_factor"].isna()]

robust_df.reset_index(inplace=True)
robust_df

Unnamed: 0,level_0,index,objective,time,quality,memory,price,b[pc],b[vm],b[g100],...,build_time,solve_time,robust_factor,depth,estimators,algorithm,var[4],var[5],var[6],eobjective
0,6,3,"('min', 'time')",-,"('geq', 10.0)",-,-,-0.0,1.0,-0.0,...,5.36,26.33,0.01,10,10,convolution,,,,
1,10,3,"('min', 'time')",-,"('geq', 20.0)",-,-,-0.0,1.0,0.0,...,7.44,26.43,0.01,10,15,convolution,,,,
2,19,4,"('min', 'time')",-,-,"('leq', 30.0)",-,-0.0,1.0,0.0,...,8.43,32.61,0.01,10,10,saxpy,,,,
3,23,3,"('max', 'quality')","('leq', 80.0)",-,-,-,0.0,-0.0,1.0,...,7.95,12.79,0.01,10,15,saxpy,,,,
4,39,2,"('min', 'memory')",-,-,-,"('leq', 30.0)",1.0,0.0,0.0,...,3.82,7.6,0.01,10,15,fwt,,,,


Experiments that yielded an optimal solution with robustness factor
* min time, Convolution with 10 depth, 10 estimators, robust factor 0.01
* min time, Convolution with 10 depth, 15 estimators, robust factor 0.1. While the same experiment but with 10 estimators didn't find a feasible solution. And with 15 estimators, the constraint on quality was even tighter.
* min time, saxpy with 10 depth, 10 estimators, robust factor 0.01
* max quality, saxpy with 10 depth and 15 estimators, robust 0.01
* min memory, fwt with 10 depth and 15 estimators, robust 0.01

Let's compare with results of original hada

In [17]:
print("GBTs min time ",robust_df.loc[0, 'y[vm_time]'], " quality ",  robust_df.loc[0, 'y[quality]'])
print("DT min time ", 65.42, " quality ", 44.63)
print()
print("GBTs min time ", robust_df.loc[1, 'y[vm_time]'], " quality ", robust_df.loc[1, 'y[quality]'])
print("DT min time ", 65.42, " quality ", 44.63)
print()
print("GBTs mn time ", robust_df.loc[2, 'y[vm_time]'], " memory ", robust_df.loc[2, 'y[vm_memory]'])
print("DT min time ", 22.06, " memory ", 7.33)
print()
print("GBTs max quality ", robust_df.loc[3, 'y[quality]'], " time ", robust_df.loc[3, 'y[g100_time]'])
print("DT max quality ", 67.06, " time ", 27.18)

GBTs min time  67.58987528195097  quality  12.679257672906898
DT min time  65.42  quality  44.63

GBTs min time  68.04170852015632  quality  34.22765883448247
DT min time  65.42  quality  44.63

GBTs mn time  24.78799773864445  memory  4.147599675627131
DT min time  22.06  memory  7.33

GBTs max quality  53.86367211865602  time  30.149900672332056
DT max quality  67.06  time  27.18


Solutions seem to be better in HADA without omlt, although with consistently higher runtimes and build times. Let's try with bigger GBTs, 50 trees estimators.

In [18]:
df1 = pd.read_csv("saxpy_depth10_est50_results.csv")
print("GBTs max quality ", df1['y[quality]'].values, " time ", df1['y[g100_time]'].values)
print("DT max quality ", 67.06, " time ", 27.18)
df1.head()

GBTs max quality  [67.60380762]  time  [31.21209732]
DT max quality  67.06  time  27.18


Unnamed: 0,objective,time,quality,memory,price,b[pc],b[vm],b[g100],y[pc_time],y[pc_memory],...,y[g100_price],y[quality],var[0],var[1],var[2],#variables,#constraints,build_time,solve_time,robust_factor
0,"('max', 'quality')","('leq', 80.0)",-,-,-,0.0,0.0,1.0,253.314666,,...,,67.603808,52.0,52.0,52.0,97161,195746,31.8,57.91,


In [14]:
# let's compare results for runs without robustness factor
non_robust_df = tot_df[(tot_df["robust_factor"].isna()) & (tot_df["depth"] == 10) & (tot_df["estimators"] == 15)]
non_robust_df.reset_index(inplace=True)
non_robust_df

Unnamed: 0,level_0,index,objective,time,quality,memory,price,b[pc],b[vm],b[g100],...,build_time,solve_time,robust_factor,depth,estimators,algorithm,var[4],var[5],var[6],eobjective
0,7,0,"('min', 'memory')",-,"('geq', 10.0)",-,-,0.0,1.0,0.0,...,6.18,76.23,,10,15,convolution,,,,
1,8,1,"('min', 'price')",-,-,"('leq', 40.0)",-,1.0,0.0,0.0,...,4.43,9.56,,10,15,convolution,,,,
2,9,2,"('min', 'time')",-,"('geq', 20.0)",-,-,-0.0,1.0,0.0,...,7.54,29.04,,10,15,convolution,,,,
3,11,4,"('max', 'quality')",-,-,"('leq', 35.0)",-,-0.0,1.0,0.0,...,6.2,78.19,,10,15,convolution,,,,
4,20,0,"('min', 'price')","('leq', 100.0)",-,-,-,0.0,0.0,1.0,...,6.18,8.33,,10,15,saxpy,,,,
5,21,1,"('min', 'time')",-,-,"('leq', 30.0)",-,-0.0,1.0,0.0,...,12.49,54.65,,10,15,saxpy,,,,
6,22,2,"('min', 'memory')",-,"('geq', 25.0)",-,-,0.0,1.0,0.0,...,8.2,69.35,,10,15,saxpy,,,,
7,30,0,"('max', 'quality')",-,-,"('leq', 35.0)",-,0.0,-0.0,1.0,...,8.35,49.4,,10,15,correlation,8.0,5.0,48.0,
8,31,1,"('min', 'time')",-,-,-,"('leq', 20.0)",1.0,0.0,0.0,...,7.07,29.35,,10,15,correlation,23.0,32.0,6.999999,
9,32,2,"('min', 'price')",-,"('geq', 15.0)",-,-,1.0,0.0,0.0,...,1.26,1.18,,10,15,correlation,51.0,50.0,50.0,


In [19]:
print("GBTs min memory ",non_robust_df.loc[0, 'y[vm_memory]'], " quality ", non_robust_df.loc[0, 'y[quality]'])
print("DT min memory ", 21.78, " quality ", 44.63)
print()
print("GBTs min price ", non_robust_df.loc[1, 'y[pc_price]'], " memory ", non_robust_df.loc[1, 'y[pc_memory]'])
print("DT min price ", 7, " memory ", 25.8)
print()
print("GBTs mn time ", non_robust_df.loc[2, 'y[vm_time]'], " quality ", non_robust_df.loc[2, 'y[quality]'])
print("DT max quality ", 44.63, " quality ", 65.42)
print()
print("GBTs max quality ", non_robust_df.loc[3, 'y[quality]'], " memory ", non_robust_df.loc[3, 'y[vm_memory]'])
print("DT max quality ", 47.43, " memory ", 44.63)

GBTs min memory  24.054352086727064  quality  10.193005412715348
DT min memory  21.78  quality  44.63

GBTs min price  7.0  memory  24.819243247092928
DT min price  7  memory  25.8

GBTs mn time  68.04170852015632  quality  34.22765883448247
DT max quality  44.63  quality  65.42

GBTs max quality  16.231721116851947  memory  34.29059430738093
DT max quality  47.43  memory  44.63


Generally, GBT seem to perform worse than DT, mostly because in order to approximate the performance of DT encoding, GBT need to have many estimators, which create too many empirical constraints. Even with just 10 max depth and 15 estimators, with a minimization of time and a memory constraints (so 6 models), number of variables is 41684 and constraints are 86628 versus the 2890 variables and 27128 of the DT.