In [2]:
### Libraries

In [3]:
import numpy as np
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt 
import plotly.express as px

# $R^2$ Metric

## Boston without symbolic transformation

### load data

In [208]:
jsn_path = Path().resolve() / "Logs" / "Diabetes_r2_leo.json"
data = pd.read_json(jsn_path , lines = True).drop(["datetime"], axis = 1)

### convert data

In [209]:
params = pd.json_normalize(data['params'])

In [210]:
df = pd.concat([data[["target"]], params], axis = 1).query("target > 0")
df.target = df.target*-1
df.max_depth = df['max_depth'].astype('int')
df.n_estimators = df['n_estimators'].astype('int')

### plot

In [211]:
df1 = df.query("target < -0.37")
df2 = df.query("target < -0.7")

In [212]:
px.scatter_3d(df1, x = "eta", y = "max_depth", z = "n_estimators", color = "target")

In [213]:
px.scatter(df1, x="n_estimators", y="target", color="target")

In [214]:
px.scatter_3d(df1, x = "max_depth", y = "n_estimators", z = "target", color = "target")

In [215]:
df.sort_values("target", ascending = True).head(5)

Unnamed: 0,target,eta,max_depth,n_estimators
467,-0.47431,0.358872,1,39
1626,-0.471844,0.350964,1,44
1713,-0.471588,0.353713,1,44
335,-0.469578,0.358978,1,43
2588,-0.469548,0.344518,1,44


In [84]:
df.sort_values("target", ascending = True).head(40).agg(("max", "min"))

Unnamed: 0,target,eta,max_depth,n_estimators
max,-0.429555,0.467322,3,44
min,-0.47431,0.083371,1,13


## Boston with Symbolic Transformation

### load data

In [216]:
jsn_path = Path().resolve() / "Logs" / "Diabetes_r2_let.json"
data = pd.read_json(jsn_path , lines = True).drop(["datetime"], axis = 1)

### convert data

In [217]:
params = pd.json_normalize(data['params'])

In [218]:
df = pd.concat([data[["target"]], params], axis = 1).query("target > 0")
df.target = df.target*-1
df.max_depth = df['max_depth'].astype('int')
df.n_estimators = df['n_estimators'].astype('int')

### plot

In [222]:
df1 = df.query("target < -0.35")
df2 = df.query("target < -0.41")

In [220]:
px.scatter_3d(df2, x = "eta", y = "max_depth", z = "n_estimators", color = "target")

In [221]:
px.scatter(df1, x="n_estimators", y="target", color="target")

In [225]:
px.scatter_3d(df2, x = "eta", y = "max_depth", z = "target", color = "target")

In [224]:
df.sort_values("target", ascending = True).head()

Unnamed: 0,target,eta,max_depth,n_estimators
1417,-0.471537,0.342547,1,34
2505,-0.469232,0.363423,1,45
2111,-0.468492,0.356572,1,50
2389,-0.464336,0.110361,2,64
1434,-0.464152,0.367863,1,17


# MAE

## Boston without symbolic transformation

### load data

In [377]:
jsn_path = Path().resolve() / "Logs" / "Diabetes_mae_leo.json"
data = pd.read_json(jsn_path , lines = True).drop(["datetime"], axis = 1)

### convert data

In [378]:
params = pd.json_normalize(data['params'])

In [379]:
df = pd.concat([data[["target"]]*-1, params], axis = 1)
df.max_depth = df['max_depth'].astype('int')
df.n_estimators = df['n_estimators'].astype('int')

### plot

In [388]:
df1 = df.query("target < 68")
df2 = df.query("target < 50")

In [390]:
px.scatter_3d(df1, x = "eta", y = "max_depth", z = "n_estimators", color = "target")

In [391]:
px.scatter(df1, x="max_depth", y="target", color="target")

In [392]:
px.scatter_3d(df2, x = "max_depth", y = "n_estimators", z = "target", color = "target")

In [386]:
df.sort_values("target", ascending = True).head(5)

Unnamed: 0,target,eta,max_depth,n_estimators
849,43.02095,0.362371,1,42
914,43.082775,0.213103,3,17
501,43.198405,0.37148,1,32
385,43.231788,0.336038,1,32
946,43.235379,0.374863,1,36


In [387]:
df.sort_values("target", ascending = True).head(50).agg(("max", "min"))

Unnamed: 0,target,eta,max_depth,n_estimators
max,43.631016,0.410691,4,207
min,43.02095,0.052983,1,11


## Boston with Symbolic Transformation

### load data

In [407]:
jsn_path = Path().resolve() / "Logs" / "Diabetes_mae_let.json"
data = pd.read_json(jsn_path , lines = True).drop(["datetime"], axis = 1)

### convert data

In [408]:
params = pd.json_normalize(data['params'])

In [409]:
df = pd.concat([data[["target"]]*-1, params], axis = 1)
df.max_depth = df['max_depth'].astype('int')
df.n_estimators = df['n_estimators'].astype('int')

### plot

In [410]:
etb = df.eta.between(0.15, 0.2) 
mdb = df.max_depth.between(10, 16, inclusive = "both")
neb = df.n_estimators.between(24, 31, inclusive = "both")

In [411]:
df1 = df.query("target < 50")
df2 = df.query("target < 45")
df3 = df[etb & mdb & neb]

In [412]:
px.scatter_3d(df1, x = "eta", y = "max_depth", z = "n_estimators", color = "target")

In [413]:
px.scatter(df1, x="max_depth", y="eta", color="target")

In [414]:
px.scatter_3d(df2, x = "max_depth", y = "n_estimators", z = "target", color = "target")

In [415]:
df.sort_values("target", ascending = True).head(10)

Unnamed: 0,target,eta,max_depth,n_estimators
925,42.498948,0.345656,1,45
153,42.512874,0.238381,1,68
2080,42.526818,0.352414,10,43
941,42.559414,0.344737,1,45
939,42.567129,0.244223,1,71
964,42.609247,0.354911,1,39
1408,42.630839,0.083765,1,236
1182,42.634607,0.112024,1,168
1356,42.647093,0.259658,1,114
1555,42.671676,0.141801,1,125


In [372]:
df.sort_values("target", ascending = True).head(50).agg(("max", "min"))

Unnamed: 0,target,eta,max_depth,n_estimators
max,42.775178,0.368944,1,390
min,42.498948,0.05,1,39
