# Intro
The goal of this notebook is to quantify how our custom whiteNoise Kernel behaves under varying noise and technical repeat conditions.

## Noise vs technical repeats for sequential Single task GP
* SingleTaskGP
* qLogExpectedImprovement
* 100 sequential runs
* range [0, 3.4]
* Noise [0,2]
* Technical repeats: [1,8]

In [1]:
import src.stats_eval_helper as seh
import pickle
from src import ax_helper
import pandas as pd

In [2]:
explo_dict:dict = pickle.load(open(r"data\bayes_sim\trial_GammaSGP_init_trial_25_10_08.pkl", "rb"))
explo_dict.keys()

dict_keys(['technical_repeats=1|noise=0.0|cycles=20|batches=1|rerun=0', 'technical_repeats=1|noise=0.0|cycles=20|batches=1|rerun=1', 'technical_repeats=1|noise=0.0|cycles=20|batches=1|rerun=2', 'technical_repeats=1|noise=0.0|cycles=20|batches=1|rerun=3', 'technical_repeats=1|noise=0.0|cycles=20|batches=1|rerun=4', 'technical_repeats=1|noise=0.0|cycles=20|batches=1|rerun=5', 'technical_repeats=1|noise=0.0|cycles=20|batches=1|rerun=6', 'technical_repeats=1|noise=0.0|cycles=20|batches=1|rerun=7', 'technical_repeats=1|noise=0.0|cycles=20|batches=1|rerun=8', 'technical_repeats=1|noise=0.0|cycles=20|batches=1|rerun=9', 'technical_repeats=1|noise=0.34|cycles=20|batches=1|rerun=0', 'technical_repeats=1|noise=0.34|cycles=20|batches=1|rerun=1', 'technical_repeats=1|noise=0.34|cycles=20|batches=1|rerun=2', 'technical_repeats=1|noise=0.34|cycles=20|batches=1|rerun=3', 'technical_repeats=1|noise=0.34|cycles=20|batches=1|rerun=4', 'technical_repeats=1|noise=0.34|cycles=20|batches=1|rerun=5', 'techni

We first make sure to agglomerate all of our runs and unencode the dict key.

In [3]:
def agglomerate_dict(explo_dict):
    full_df = pd.DataFrame()
    for id, df in explo_dict.items():
        id_dict = dict(item.split("=") for item in id.split("|"))
        for key, value in id_dict.items():
            df[key] = value

        full_df = pd.concat([full_df, df], ignore_index=True)
    return full_df


sgp_df = agglomerate_dict(explo_dict)


print(sgp_df.shape)
display(sgp_df.head())
from src.toy_functions import Hartmann6D

(2040, 14)


Unnamed: 0,trial_name,x1,x2,x3,x4,x5,x6,response,trial_index,technical_repeats,noise,cycles,batches,rerun
0,0_0,0.5,0.5,0.5,0.5,0.5,0.5,0.516406,0,1,0.0,20,1,0
1,1_0,0.427573,0.068038,0.526929,0.723911,0.331557,0.349021,0.205703,1,1,0.0,20,1,0
2,2_0,0.543296,0.736693,0.20742,0.449204,0.648057,0.645636,0.079927,2,1,0.0,20,1,0
3,3_0,0.976084,0.293457,0.821419,0.772208,0.901266,0.825685,0.003943,3,1,0.0,20,1,0
4,4_0,0.123057,0.901811,0.381681,0.054723,0.092754,0.185319,0.068779,4,1,0.0,20,1,0


Then evaluate whether the model is correctly finding the important points.

In [4]:
def eval_y_true(df):
    cols = [f"x{i}" for i in range(1, 7)]
    hartmann = Hartmann6D()
    df["y_true"] = df[cols].apply(lambda row: hartmann.eval_at(*row), axis=1).astype(float)
    ax_helper.get_above_percentile(df, 3.4, 0.9)
    return df
sgp_df = eval_y_true(sgp_df)
sgp_df.head()

Unnamed: 0,trial_name,x1,x2,x3,x4,x5,x6,response,trial_index,technical_repeats,noise,cycles,batches,rerun,y_true,assumed_hit,true_hit
0,0_0,0.5,0.5,0.5,0.5,0.5,0.5,0.516406,0,1,0.0,20,1,0,0.516406,False,False
1,1_0,0.427573,0.068038,0.526929,0.723911,0.331557,0.349021,0.205703,1,1,0.0,20,1,0,0.205703,False,False
2,2_0,0.543296,0.736693,0.20742,0.449204,0.648057,0.645636,0.079927,2,1,0.0,20,1,0,0.079927,False,False
3,3_0,0.976084,0.293457,0.821419,0.772208,0.901266,0.825685,0.003943,3,1,0.0,20,1,0,0.003943,False,False
4,4_0,0.123057,0.901811,0.381681,0.054723,0.092754,0.185319,0.068779,4,1,0.0,20,1,0,0.068779,False,False


In [5]:
hyper_param_cols=["technical_repeats", "noise", "cycles", "batches", "rerun"]
import importlib
importlib.reload(seh)
df = seh.hit_stats(sgp_df, hyper_param_cols=hyper_param_cols)
df

Unnamed: 0,technical_repeats,noise,cycles,batches,rerun,TP_index,FP_index,TN_index,FN_index,TP,FP,TN,FN,Precision,Recall,Specificity,Accuracy,F1
0,1,0.0,20,1,0,[],[],"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...",[],0,0,21,0,0.0,0.0,1.0,1.0,0.0
1,1,0.0,20,1,1,[],[],"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...",[],0,0,21,0,0.0,0.0,1.0,1.0,0.0
2,1,0.0,20,1,2,[17],[],"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...",[],1,0,20,0,1.0,1.0,1.0,1.0,1.0
3,1,0.0,20,1,3,[],[],"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...",[],0,0,21,0,0.0,0.0,1.0,1.0,0.0
4,1,0.0,20,1,4,[],[],"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...",[],0,0,21,0,0.0,0.0,1.0,1.0,0.0
5,1,0.0,20,1,5,[],[],"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...",[],0,0,21,0,0.0,0.0,1.0,1.0,0.0
6,1,0.0,20,1,6,[18],[],"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...",[],1,0,20,0,1.0,1.0,1.0,1.0,1.0
7,1,0.0,20,1,7,[],[],"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...",[],0,0,21,0,0.0,0.0,1.0,1.0,0.0
8,1,0.0,20,1,8,"[15, 18]",[],"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...",[20],2,0,18,1,1.0,0.666667,1.0,0.952381,0.8
9,1,0.0,20,1,9,[],[],"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...",[],0,0,21,0,0.0,0.0,1.0,1.0,0.0


## Noise vs technical repeats for sequential Single task GP with Gamma noise
* Gamma noise
* qLogExpectedImprovement
* 100 sequential runs
* range [0, 3.4]
* Noise [0,2]
* Technical repeats: [1,8]