In [2]:
### Libraries

In [3]:
import numpy as np
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt 
import plotly.express as px

# $R^2$ Metric

## Boston without symbolic transformation

### load data

In [208]:
jsn_path = Path().resolve() / "Logs" / "Diabetes_r2_leo.json"
data = pd.read_json(jsn_path , lines = True).drop(["datetime"], axis = 1)

### convert data

In [209]:
params = pd.json_normalize(data['params'])

In [210]:
df = pd.concat([data[["target"]], params], axis = 1).query("target > 0")
df.target = df.target*-1
df.max_depth = df['max_depth'].astype('int')
df.n_estimators = df['n_estimators'].astype('int')

### plot

In [211]:
df1 = df.query("target < -0.37")
df2 = df.query("target < -0.7")

In [212]:
px.scatter_3d(df1, x = "eta", y = "max_depth", z = "n_estimators", color = "target")

In [213]:
px.scatter(df1, x="n_estimators", y="target", color="target")

In [214]:
px.scatter_3d(df1, x = "max_depth", y = "n_estimators", z = "target", color = "target")

In [215]:
df.sort_values("target", ascending = True).head(5)

Unnamed: 0,target,eta,max_depth,n_estimators
467,-0.47431,0.358872,1,39
1626,-0.471844,0.350964,1,44
1713,-0.471588,0.353713,1,44
335,-0.469578,0.358978,1,43
2588,-0.469548,0.344518,1,44


In [84]:
df.sort_values("target", ascending = True).head(40).agg(("max", "min"))

Unnamed: 0,target,eta,max_depth,n_estimators
max,-0.429555,0.467322,3,44
min,-0.47431,0.083371,1,13


## Boston with Symbolic Transformation

### load data

In [216]:
jsn_path = Path().resolve() / "Logs" / "Diabetes_r2_let.json"
data = pd.read_json(jsn_path , lines = True).drop(["datetime"], axis = 1)

### convert data

In [217]:
params = pd.json_normalize(data['params'])

In [218]:
df = pd.concat([data[["target"]], params], axis = 1).query("target > 0")
df.target = df.target*-1
df.max_depth = df['max_depth'].astype('int')
df.n_estimators = df['n_estimators'].astype('int')

### plot

In [222]:
df1 = df.query("target < -0.35")
df2 = df.query("target < -0.41")

In [220]:
px.scatter_3d(df2, x = "eta", y = "max_depth", z = "n_estimators", color = "target")

In [221]:
px.scatter(df1, x="n_estimators", y="target", color="target")

In [225]:
px.scatter_3d(df2, x = "eta", y = "max_depth", z = "target", color = "target")

In [224]:
df.sort_values("target", ascending = True).head()

Unnamed: 0,target,eta,max_depth,n_estimators
1417,-0.471537,0.342547,1,34
2505,-0.469232,0.363423,1,45
2111,-0.468492,0.356572,1,50
2389,-0.464336,0.110361,2,64
1434,-0.464152,0.367863,1,17


# MAE

## Boston without symbolic transformation

### load data

In [122]:
jsn_path = Path().resolve() / "Logs" / "Boston_mae_leo.json"
data = pd.read_json(jsn_path , lines = True).drop(["datetime"], axis = 1)

### convert data

In [123]:
params = pd.json_normalize(data['params'])

In [124]:
df = pd.concat([data[["target"]]*-1, params], axis = 1)
df.max_depth = df['max_depth'].astype('int')
df.n_estimators = df['n_estimators'].astype('int')

### plot

In [131]:
df1 = df.query("target < 3")
df2 = df.query("target < 2.4")

In [132]:
df2.max_depth.max(), df2.max_depth.min()

(8, 7)

In [133]:
df2.eta.max(), df2.eta.min()

(0.44301691460254106, 0.29689686192874104)

In [134]:
df2.n_estimators.max(), df2.n_estimators.min()

(17, 16)

In [135]:
px.scatter_3d(df2, x = "eta", y = "max_depth", z = "n_estimators", color = "target")

In [53]:
px.scatter(df, x="max_depth", y="target", color="target")

In [130]:
df.sort_values("target", ascending = True).head(5)

Unnamed: 0,target,eta,max_depth,n_estimators
2596,2.354376,0.443017,7,16
2571,2.388027,0.297024,7,16
2544,2.388072,0.296929,7,16
2969,2.388076,0.29692,7,16
2827,2.388087,0.296897,7,16


## Boston with Symbolic Transformation

### load data

In [191]:
jsn_path = Path().resolve() / "Logs" / "Boston_mae_let.json"
data = pd.read_json(jsn_path , lines = True).drop(["datetime"], axis = 1)

### convert data

In [192]:
params = pd.json_normalize(data['params'])

In [193]:
df = pd.concat([data[["target"]]*-1, params], axis = 1)
df.max_depth = df['max_depth'].astype('int')
df.n_estimators = df['n_estimators'].astype('int')

### plot

In [205]:
etb = df.eta.between(0.15, 0.2) 
mdb = df.max_depth.between(10, 16, inclusive = "both")
neb = df.n_estimators.between(24, 31, inclusive = "both")

In [209]:
df1 = df.query("target < 3")
df2 = df.query("target < 2.2")
df3 = df[etb & mdb & neb]

In [216]:
px.scatter_3d(df3, x = "eta", y = "target", z = "n_estimators", color = "target")

In [214]:
px.scatter(df3, x="max_depth", y="eta", color="target")

In [197]:
df.sort_values("target", ascending = True).head(10)

Unnamed: 0,target,eta,max_depth,n_estimators
3303,2.138854,0.197925,12,26
3722,2.164101,0.157559,12,27
3351,2.165997,0.196006,10,25
3793,2.1697,0.158383,15,28
2863,2.175872,0.157499,13,26
3523,2.180057,0.15198,10,31
3442,2.183367,0.197942,11,27
3208,2.184918,0.197828,11,28
3601,2.185337,0.174928,16,30
3711,2.185575,0.163402,12,27
