In [1]:
import sys
import pandas as pd

sys.path.append("..")
import os
import rbf_functions

In [2]:
rbfs = [
    rbf_functions.original_rbf,
    rbf_functions.squared_exponential_rbf,
    rbf_functions.inverse_quadratic_rbf,
    rbf_functions.inverse_multiquadric_rbf,
    rbf_functions.exponential_rbf,
    rbf_functions.matern32_rbf,
    rbf_functions.matern52_rbf,
]

# Load data

In [3]:
hv_global = pd.read_csv("./calculated_metrics/hv_refset_global.csv").iloc[0, -1]
averages = {}
for entry in rbfs:
    rbf = entry.__name__

    # normalize hv against reference set so reference set is 1
    df_hv = pd.read_csv(f"calculated_metrics/hv_{rbf}_global.csv")
    df_hv.hypervolume /= hv_global

    df_ei = pd.read_csv(f"calculated_metrics/ei_{rbf}_global.csv")
    df_gd = pd.read_csv(f"calculated_metrics/gd_{rbf}_global.csv")

    avg_hv = df_hv.groupby("seed").tail(1).loc[:, "hypervolume"].mean()
    avg_ei = df_ei.groupby("ei").tail(1).loc[:, "ei"].mean()
    avg_gd = df_gd.groupby("gd").tail(1).loc[:, "gd"].mean()
    averages[rbf] = dict(
        hypervolume=avg_hv, epsilon_indicator=avg_ei, generational_distance=avg_gd
    )

In [4]:
scores = pd.DataFrame.from_dict(averages).T
scores

Unnamed: 0,hypervolume,epsilon_indicator,generational_distance
original_rbf,0.71688,0.34952,0.022556
squared_exponential_rbf,0.641916,0.411422,0.0208
inverse_quadratic_rbf,0.333763,0.472414,0.030182
inverse_multiquadric_rbf,0.35016,0.582708,0.02365
exponential_rbf,0.151511,0.363626,0.042522
matern32_rbf,0.374623,0.356777,0.032786
matern52_rbf,0.334237,0.369322,0.031683


In [5]:
import os

paretosets = {}
size = {}
for entry in rbfs:
    name = entry.__name__
    # load variables
    varlist = []
    variables = []

    output_dir = os.path.abspath("./refsets/")
    for filename in os.listdir(output_dir):
        if filename == f"{name}_refset.csv":
            varlist.append(filename[:-4])
            df_vars = pd.read_csv(os.path.join(output_dir, filename))
            paretosets[name] = df_vars
            size[name] = len(df_vars)
    variables = df_vars.values
#     print(f"Loaded: {', '.join(varlist)}")

size

{'original_rbf': 2675,
 'squared_exponential_rbf': 2142,
 'inverse_quadratic_rbf': 1684,
 'inverse_multiquadric_rbf': 1636,
 'exponential_rbf': 1585,
 'matern32_rbf': 2268,
 'matern52_rbf': 1774}

In [6]:
scores["nr. of solutions"] = pd.Series(size)

In [7]:
scores

Unnamed: 0,hypervolume,epsilon_indicator,generational_distance,nr. of solutions
original_rbf,0.71688,0.34952,0.022556,2675
squared_exponential_rbf,0.641916,0.411422,0.0208,2142
inverse_quadratic_rbf,0.333763,0.472414,0.030182,1684
inverse_multiquadric_rbf,0.35016,0.582708,0.02365,1636
exponential_rbf,0.151511,0.363626,0.042522,1585
matern32_rbf,0.374623,0.356777,0.032786,2268
matern52_rbf,0.334237,0.369322,0.031683,1774


# set contributions

In [8]:
df_global = pd.read_csv(f"./refsets/global_refset.csv")

In [9]:
l_global = df_global.values.tolist()
contribution = {}
templ = []
for rbf in paretosets:
    x = 0
    for solution in paretosets[rbf].values.tolist():
        if solution in l_global:
            templ.append(solution)
            x += 1
    contribution[rbf] = round(x / len(df_global), 3)
    rbfd[rbf] = pd.DataFrame(templ, columns=data.columns) #error in this line. Not sure the goal of this bit. 
    templ = []
contribution

NameError: name 'data' is not defined

In [None]:
scores["set contribution"] = pd.Series(contribution)

In [None]:
scores

In [None]:
scores.to_latex()

In [None]:
scores.cols

In [None]:
scores[
    [
        "nr. of solutions",
        "generational_distance",
        "epsilon_indicator",
        "hypervolume",
        "set contribution",
    ]
].to_latex()