In [None]:
# When comparing two data sets using statistical tools, it is not
# sufficient to simply compare their average or median values. This is because
# such results can be skewed by randomness. For example, suppose we are
# calculating the median values of ten seeds. If one algorithm gets "lucky"
# and happens to use more above-average seeds, the estimated median will
# be skewed. Therefore, it is necessary to check the statistical significance of
# results. This is exactly what the indifferent entries are displaying. To determine
# statistical significance, the MOEA Framework uses the Kruskal-Wallis
# and Mann-Whitney U tests with 95% confidence intervals.

In [2]:
from scipy import stats
import sys
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [12, 8]
sys.path.append('..')
import rbf_functions

In [3]:
rbfs = [rbf_functions.original_rbf,
        rbf_functions.squared_exponential_rbf,
        rbf_functions.inverse_quadratic_rbf,
        rbf_functions.inverse_multiquadric_rbf,
        rbf_functions.exponential_rbf,
        rbf_functions.matern32_rbf,
        rbf_functions.matern52_rbf,
       ]

# Kruskal-Wallis HV local reference set

In [23]:
gddata = {}
eidata = {}
hvdata = {}
varlist = ['10', '20', '30', '40', '50', '60', '70', '80', '90', '100']
for entry in rbfs:
    rbf = entry.__name__
#     df_gd_data = pd.read_csv(f"gd_all/{rbf}_gd_all.csv")
#     df_ei_data = pd.read_csv(f"ei_all/{rbf}_ei_all.csv")
    df_hv_data = pd.read_csv(f"hv/{rbf}_hv.csv")
    gddata[rbf] = {}
    eidata[rbf] = {}
    hvdata[rbf] = {}
    for i in varlist: # add max hv value at 100.000 nfe to dict for each seed
#         gddata[rbf][i] = df_gd_data[i].dropna().iloc[-1] 
#         eidata[rbf][i] = df_ei_data[i].dropna().iloc[-1]
        hvdata[rbf][i] = df_hv_data[i].dropna().iloc[-1]
df_gd = pd.DataFrame.from_dict(gddata, orient='index')
df_ei = pd.DataFrame.from_dict(eidata, orient='index')
df_hv = pd.DataFrame.from_dict(hvdata, orient='index')

In [78]:
df_kruskal = pd.DataFrame(columns=df_hv.index)
df_pval = pd.DataFrame(columns=df_hv.index)
for i, row in df_hv.iterrows():
    for j, row2 in df_hv.iterrows():
        krusval, pval = stats.kruskal(row, row2)
        df_kruskal.loc[i, j] = krusval
        df_pval.loc[i, j] = pval

In [79]:
df_kruskal

Unnamed: 0,original_rbf,squared_exponential_rbf,inverse_quadratic_rbf,inverse_multiquadric_rbf,exponential_rbf,matern32_rbf,matern52_rbf
original_rbf,0.0,1.851429,13.165714,0.022857,14.285714,4.48,10.565714
squared_exponential_rbf,1.851429,0.0,3.862857,3.862857,11.571429,0.965714,4.165714
inverse_quadratic_rbf,13.165714,3.862857,0.0,14.285714,14.285714,2.062857,0.691429
inverse_multiquadric_rbf,0.022857,3.862857,14.285714,0.0,14.285714,8.691429,14.285714
exponential_rbf,14.285714,11.571429,14.285714,14.285714,0.0,7.0,4.48
matern32_rbf,4.48,0.965714,2.062857,8.691429,7.0,0.0,1.651429
matern52_rbf,10.565714,4.165714,0.691429,14.285714,4.48,1.651429,0.0


In [72]:
df_pval

Unnamed: 0,original_rbf,squared_exponential_rbf,inverse_quadratic_rbf,inverse_multiquadric_rbf,exponential_rbf,matern32_rbf,matern52_rbf
original_rbf,1.0,0.173617,0.000285,0.879829,0.000157,0.034294,0.001152
squared_exponential_rbf,0.173617,1.0,0.049366,0.049366,0.00067,0.325751,0.04125
inverse_quadratic_rbf,0.000285,0.049366,1.0,0.000157,0.000157,0.150927,0.405679
inverse_multiquadric_rbf,0.879829,0.049366,0.000157,1.0,0.000157,0.003197,0.000157
exponential_rbf,0.000157,0.00067,0.000157,0.000157,1.0,0.008151,0.034294
matern32_rbf,0.034294,0.325751,0.150927,0.003197,0.008151,1.0,0.198765
matern52_rbf,0.001152,0.04125,0.405679,0.000157,0.034294,0.198765,1.0


# Kruskal-Wallis on objectives

In [46]:
rbfd = {}
rbfn = []
for rbf in rbfs:
    name = rbf.__name__
    rbfn.append(name)
    rbfd[name] = pd.read_csv(f"refsets/{name}_refset.csv")

In [47]:
rbfn

['original_rbf',
 'squared_exponential_rbf',
 'inverse_quadratic_rbf',
 'inverse_multiquadric_rbf',
 'exponential_rbf',
 'matern32_rbf',
 'matern52_rbf']

# hydropower

In [48]:
df_kruskal_hyd = pd.DataFrame(columns=rbfn)
df_pval_hyd = pd.DataFrame(columns=rbfn)
for i in rbfd:
    for j in rbfd:
        row1 = rbfd[i]['hydropower']
        row2 = rbfd[j]['hydropower']
        krusval, pval = stats.kruskal(row1, row2)
        df_kruskal_hyd.loc[i, j] = krusval
        df_pval_hyd.loc[i, j] = pval

In [49]:
df_kruskal_hyd

Unnamed: 0,original_rbf,squared_exponential_rbf,inverse_quadratic_rbf,inverse_multiquadric_rbf,exponential_rbf,matern32_rbf,matern52_rbf
original_rbf,0.0,0.804483,402.927268,192.352645,790.676655,376.611228,137.218176
squared_exponential_rbf,0.804483,0.0,439.349213,211.455369,915.62835,553.741897,245.669577
inverse_quadratic_rbf,402.927268,439.349213,0.0,5.949069,12.773484,240.183682,245.007491
inverse_multiquadric_rbf,192.352645,211.455369,5.949069,0.0,0.826885,86.130827,93.120613
exponential_rbf,790.676655,915.62835,12.773484,0.826885,0.0,564.965314,635.850557
matern32_rbf,376.611228,553.741897,240.183682,86.130827,564.965314,0.0,22.449043
matern52_rbf,137.218176,245.669577,245.007491,93.120613,635.850557,22.449043,0.0


In [35]:
df_pval_hyd

Unnamed: 0,original_rbf,squared_exponential_rbf,inverse_quadratic_rbf,inverse_multiquadric_rbf,exponential_rbf,matern32_rbf,matern52_rbf
original_rbf,1.0,0.369756,0.0,0.0,0.0,0.0,0.0
squared_exponential_rbf,0.369756,1.0,0.0,0.0,0.0,0.0,0.0
inverse_quadratic_rbf,0.0,0.0,1.0,0.014725,0.000352,0.0,0.0
inverse_multiquadric_rbf,0.0,0.0,0.014725,1.0,0.363175,0.0,0.0
exponential_rbf,0.0,0.0,0.000352,0.363175,1.0,0.0,0.0
matern32_rbf,0.0,0.0,0.0,0.0,0.0,1.0,2e-06
matern52_rbf,0.0,0.0,0.0,0.0,0.0,2e-06,1.0


In [30]:
rbfd[i]

Unnamed: 0,hydropower,atomicpowerplant,baltimore,chester,environment,recreation
0,44.096830,0.300382,0.300684,0.201203,0.033639,1.0
1,69.394721,0.906201,0.525242,0.635952,0.081752,1.0
2,56.748746,0.467605,0.326137,0.335062,0.064412,1.0
3,67.585114,0.756048,0.564585,0.594769,0.077092,1.0
4,68.241955,0.710361,0.301054,0.482444,0.075095,1.0
...,...,...,...,...,...,...
852,67.737723,0.774806,0.604614,0.630421,0.080488,1.0
853,61.773701,0.853552,0.123667,0.461211,0.072511,1.0
854,66.466355,0.956174,0.557605,0.651079,0.082680,1.0
855,67.655623,0.523319,0.118167,0.295638,0.073952,1.0


# atomic power plant

In [50]:
df_kruskal_atom = pd.DataFrame(columns=rbfn)
df_pval_atom = pd.DataFrame(columns=rbfn)
for i in rbfd:
    for j in rbfd:
        row1 = rbfd[i]['atomicpowerplant']
        row2 = rbfd[j]['atomicpowerplant']
        krusval, pval = stats.kruskal(row1, row2)
        df_kruskal_atom.loc[i, j] = krusval
        df_pval_atom.loc[i, j] = pval

In [51]:
df_kruskal_atom

Unnamed: 0,original_rbf,squared_exponential_rbf,inverse_quadratic_rbf,inverse_multiquadric_rbf,exponential_rbf,matern32_rbf,matern52_rbf
original_rbf,0.0,75.362807,39.307564,21.287862,646.658373,97.580394,60.568507
squared_exponential_rbf,75.362807,0.0,8.283677,9.192126,198.447155,0.011901,2.876386
inverse_quadratic_rbf,39.307564,8.283677,0.0,0.259945,340.602191,9.436641,1.122949
inverse_multiquadric_rbf,21.287862,9.192126,0.259945,0.0,260.488511,13.449891,3.825772
exponential_rbf,646.658373,198.447155,340.602191,260.488511,0.0,296.245706,378.463528
matern32_rbf,97.580394,0.011901,9.436641,13.449891,296.245706,0.0,2.131751
matern52_rbf,60.568507,2.876386,1.122949,3.825772,378.463528,2.131751,0.0


In [52]:
df_pval_atom

Unnamed: 0,original_rbf,squared_exponential_rbf,inverse_quadratic_rbf,inverse_multiquadric_rbf,exponential_rbf,matern32_rbf,matern52_rbf
original_rbf,1.0,0.0,0.0,4e-06,0.0,0.0,0.0
squared_exponential_rbf,0.0,1.0,0.004,0.002431,0.0,0.913131,0.089888
inverse_quadratic_rbf,0.0,0.004,1.0,0.610158,0.0,0.002127,0.289284
inverse_multiquadric_rbf,4e-06,0.002431,0.610158,1.0,0.0,0.000245,0.05047
exponential_rbf,0.0,0.0,0.0,0.0,1.0,0.0,0.0
matern32_rbf,0.0,0.913131,0.002127,0.000245,0.0,1.0,0.144276
matern52_rbf,0.0,0.089888,0.289284,0.05047,0.0,0.144276,1.0


# Baltimore

In [53]:
df_kruskal_balt = pd.DataFrame(columns=rbfn)
df_pval_balt = pd.DataFrame(columns=rbfn)
for i in rbfd:
    for j in rbfd:
        row1 = rbfd[i]['baltimore']
        row2 = rbfd[j]['baltimore']
        krusval, pval = stats.kruskal(row1, row2)
        df_kruskal_balt.loc[i, j] = krusval
        df_pval_balt.loc[i, j] = pval

In [54]:
df_kruskal_balt

Unnamed: 0,original_rbf,squared_exponential_rbf,inverse_quadratic_rbf,inverse_multiquadric_rbf,exponential_rbf,matern32_rbf,matern52_rbf
original_rbf,0.0,1.378922,75.070583,89.870649,189.175073,29.456654,24.341424
squared_exponential_rbf,1.378922,0.0,65.583246,83.105616,219.447905,17.86092,15.851958
inverse_quadratic_rbf,75.070583,65.583246,0.0,3.750897,28.791946,20.170859,20.354669
inverse_multiquadric_rbf,89.870649,83.105616,3.750897,0.0,2.860346,37.515899,39.362687
exponential_rbf,189.175073,219.447905,28.791946,2.860346,0.0,168.967283,189.767866
matern32_rbf,29.456654,17.86092,20.170859,37.515899,168.967283,0.0,0.302248
matern52_rbf,24.341424,15.851958,20.354669,39.362687,189.767866,0.302248,0.0


In [55]:
df_pval_balt

Unnamed: 0,original_rbf,squared_exponential_rbf,inverse_quadratic_rbf,inverse_multiquadric_rbf,exponential_rbf,matern32_rbf,matern52_rbf
original_rbf,1.0,0.240285,0.0,0.0,0.0,0.0,1e-06
squared_exponential_rbf,0.240285,1.0,0.0,0.0,0.0,2.4e-05,6.8e-05
inverse_quadratic_rbf,0.0,0.0,1.0,0.052779,0.0,7e-06,6e-06
inverse_multiquadric_rbf,0.0,0.0,0.052779,1.0,0.090788,0.0,0.0
exponential_rbf,0.0,0.0,0.0,0.090788,1.0,0.0,0.0
matern32_rbf,0.0,2.4e-05,7e-06,0.0,0.0,1.0,0.582477
matern52_rbf,1e-06,6.8e-05,6e-06,0.0,0.0,0.582477,1.0


# Chester

In [56]:
df_kruskal_ches = pd.DataFrame(columns=rbfn)
df_pval_ches = pd.DataFrame(columns=rbfn)
for i in rbfd:
    for j in rbfd:
        row1 = rbfd[i]['baltimore']
        row2 = rbfd[j]['baltimore']
        krusval, pval = stats.kruskal(row1, row2)
        df_kruskal_ches.loc[i, j] = krusval
        df_pval_ches.loc[i, j] = pval

In [57]:
df_kruskal_ches

Unnamed: 0,original_rbf,squared_exponential_rbf,inverse_quadratic_rbf,inverse_multiquadric_rbf,exponential_rbf,matern32_rbf,matern52_rbf
original_rbf,0.0,1.378922,75.070583,89.870649,189.175073,29.456654,24.341424
squared_exponential_rbf,1.378922,0.0,65.583246,83.105616,219.447905,17.86092,15.851958
inverse_quadratic_rbf,75.070583,65.583246,0.0,3.750897,28.791946,20.170859,20.354669
inverse_multiquadric_rbf,89.870649,83.105616,3.750897,0.0,2.860346,37.515899,39.362687
exponential_rbf,189.175073,219.447905,28.791946,2.860346,0.0,168.967283,189.767866
matern32_rbf,29.456654,17.86092,20.170859,37.515899,168.967283,0.0,0.302248
matern52_rbf,24.341424,15.851958,20.354669,39.362687,189.767866,0.302248,0.0


In [58]:
df_pval_ches

Unnamed: 0,original_rbf,squared_exponential_rbf,inverse_quadratic_rbf,inverse_multiquadric_rbf,exponential_rbf,matern32_rbf,matern52_rbf
original_rbf,1.0,0.240285,0.0,0.0,0.0,0.0,1e-06
squared_exponential_rbf,0.240285,1.0,0.0,0.0,0.0,2.4e-05,6.8e-05
inverse_quadratic_rbf,0.0,0.0,1.0,0.052779,0.0,7e-06,6e-06
inverse_multiquadric_rbf,0.0,0.0,0.052779,1.0,0.090788,0.0,0.0
exponential_rbf,0.0,0.0,0.0,0.090788,1.0,0.0,0.0
matern32_rbf,0.0,2.4e-05,7e-06,0.0,0.0,1.0,0.582477
matern52_rbf,1e-06,6.8e-05,6e-06,0.0,0.0,0.582477,1.0


# Environment

# Recreation