In [1]:
### Libraries

In [4]:
import numpy as np
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt 
import plotly.express as px

# $R^2$ Metric

## Boston without symbolic transformation

### load data

In [162]:
jsn_path = Path().resolve() / "Logs" / "Boston_r2_leo.json"
data = pd.read_json(jsn_path , lines = True).drop(["datetime"], axis = 1)

### convert data

In [163]:
params = pd.json_normalize(data['params'])

In [164]:
df = pd.concat([data[["target"]], params], axis = 1).query("target > 0")
df.target = df.target*-1
df[["max_depth", "n_estimators"]] = df[["max_depth", "n_estimators"]].astype("int")

### plot

In [165]:
df1 = df.query("target < -0.83")
df2 = df.query("target < -0.8")
df3 = df.query("target < -0.7")

In [167]:
px.scatter_3d(df2, x = "max_depth", y = "n_estimators", z = "target", color = "target")

In [168]:
px.scatter(df1, x="max_depth", y="n_estimators", color="target")

In [169]:
df.sort_values("target", ascending = True).head()

Unnamed: 0,target,max_depth,n_estimators
284,-0.843814,18,19
312,-0.843814,18,19
941,-0.843814,18,19
486,-0.843814,18,19
493,-0.843814,18,19


In [170]:
df.drop_duplicates().sort_values("target", ascending = True).head(50).agg(("max", "min"))

Unnamed: 0,target,max_depth,n_estimators
max,-0.841666,50,25
min,-0.843814,14,19


## Boston with Symbolic Transformation

### load data

In [179]:
jsn_path = Path().resolve() / "Logs" / "Boston_r2_let.json"
data = pd.read_json(jsn_path , lines = True).drop(["datetime"], axis = 1)

### convert data

In [180]:
params = pd.json_normalize(data['params'])

In [181]:
df = pd.concat([data[["target"]], params], axis = 1).query("target > 0")
df.target = df.target*-1
df[["max_depth", "n_estimators"]] = df[["max_depth", "n_estimators"]].astype("int")
df = df.drop_duplicates()

### plot

In [182]:
df1 = df.query("target < -0.8")
df2 = df.query("target < -0.7")

In [183]:
px.scatter_3d(df1, x = "max_depth", y = "n_estimators", z = "target", color = "target")

In [184]:
px.scatter(df1.query("n_estimators == 67"), x="max_depth", y="target", color="target", hover_data = ["n_estimators"])

In [177]:
df.sort_values("target", ascending = True).head()

Unnamed: 0,target,max_depth,n_estimators
324,-0.847269,17,67
168,-0.847173,20,67
367,-0.847096,14,67
83,-0.847096,16,67
884,-0.846893,34,67


In [178]:
df1.drop_duplicates().sort_values("target", ascending = True).head(5).agg(("max", "min"))

Unnamed: 0,target,max_depth,n_estimators
max,-0.846893,39,67
min,-0.847269,14,67


# MAE

## Boston without symbolic transformation

### load data

In [57]:
jsn_path = Path().resolve() / "Logs" / "Boston_mae_leo.json"
data = pd.read_json(jsn_path , lines = True).drop(["datetime"], axis = 1)

### convert data

In [58]:
params = pd.json_normalize(data['params'])

In [59]:
df = pd.concat([data[["target"]]*-1, params], axis = 1)
df[["max_depth", "n_estimators"]] = df[["max_depth", "n_estimators"]].astype("int")

### plot

In [60]:
df1 = df.query("target < 2.9")
df2 = df.query("target < 2.45")

In [68]:
px.scatter_3d(df2, x = "n_estimators", y = "max_depth", z = "target", color = "target")

In [62]:
px.scatter(df1, x="max_depth", y="n_estimators", color="target")

In [69]:
df.sort_values("target", ascending = True).head()

Unnamed: 0,target,max_depth,n_estimators
1640,2.425312,14,66
1648,2.425312,14,66
1646,2.425312,14,66
1644,2.425312,14,66
1643,2.425312,14,66


In [64]:
df.sort_values("target", ascending = True).head(100).agg(("max", "min"))

Unnamed: 0,target,max_depth,n_estimators
max,2.425959,14,70
min,2.425312,14,66


## Boston with symbolic transformation

### load data

In [47]:
jsn_path = Path().resolve() / "Logs" / "Boston_mae_let.json"
data = pd.read_json(jsn_path , lines = True).drop(["datetime"], axis = 1)

### convert data

In [48]:
params = pd.json_normalize(data['params'])

In [49]:
df = pd.concat([data[["target"]]*-1, params], axis = 1)
df[["max_depth", "n_estimators"]] = df[["max_depth", "n_estimators"]].astype("int")

### plot

In [50]:
df1 = df.query("target < 2.9")
df2 = df.query("target < 2.45")

In [56]:
px.scatter_3d(df2, x = "n_estimators", y = "max_depth", z = "target", color = "target")

In [52]:
px.scatter(df1, x="max_depth", y="n_estimators", color="target")

In [53]:
df.sort_values("target", ascending = True).head(100)

Unnamed: 0,target,max_depth,n_estimators
481,2.362334,16,67
561,2.362334,16,67
560,2.362334,16,67
554,2.362334,16,67
551,2.362334,16,67
...,...,...,...
448,2.362334,16,67
446,2.362334,16,67
433,2.362334,16,67
430,2.362334,16,67


In [54]:
df.drop_duplicates().sort_values("target", ascending = True).head(100).agg(("max", "min"))

Unnamed: 0,target,max_depth,n_estimators
max,2.362334,16,67
min,2.362334,16,67
