In [1]:
### Libraries

In [3]:
import numpy as np
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt 
import plotly.express as px

# $R^2$ Metric

## Boston without symbolic transformation

### load data

In [162]:
jsn_path = Path().resolve() / "Logs" / "Boston_r2_leo.json"
data = pd.read_json(jsn_path , lines = True).drop(["datetime"], axis = 1)

### convert data

In [163]:
params = pd.json_normalize(data['params'])

In [164]:
df = pd.concat([data[["target"]], params], axis = 1).query("target > 0")
df.target = df.target*-1
df[["max_depth", "n_estimators"]] = df[["max_depth", "n_estimators"]].astype("int")

### plot

In [165]:
df1 = df.query("target < -0.83")
df2 = df.query("target < -0.8")
df3 = df.query("target < -0.7")

In [167]:
px.scatter_3d(df2, x = "max_depth", y = "n_estimators", z = "target", color = "target")

In [168]:
px.scatter(df1, x="max_depth", y="n_estimators", color="target")

In [169]:
df.sort_values("target", ascending = True).head()

Unnamed: 0,target,max_depth,n_estimators
284,-0.843814,18,19
312,-0.843814,18,19
941,-0.843814,18,19
486,-0.843814,18,19
493,-0.843814,18,19


In [170]:
df.drop_duplicates().sort_values("target", ascending = True).head(50).agg(("max", "min"))

Unnamed: 0,target,max_depth,n_estimators
max,-0.841666,50,25
min,-0.843814,14,19


## Boston with Symbolic Transformation

### load data

In [179]:
jsn_path = Path().resolve() / "Logs" / "Boston_r2_let.json"
data = pd.read_json(jsn_path , lines = True).drop(["datetime"], axis = 1)

### convert data

In [180]:
params = pd.json_normalize(data['params'])

In [181]:
df = pd.concat([data[["target"]], params], axis = 1).query("target > 0")
df.target = df.target*-1
df[["max_depth", "n_estimators"]] = df[["max_depth", "n_estimators"]].astype("int")
df = df.drop_duplicates()

### plot

In [182]:
df1 = df.query("target < -0.8")
df2 = df.query("target < -0.7")

In [183]:
px.scatter_3d(df1, x = "max_depth", y = "n_estimators", z = "target", color = "target")

In [184]:
px.scatter(df1.query("n_estimators == 67"), x="max_depth", y="target", color="target", hover_data = ["n_estimators"])

In [177]:
df.sort_values("target", ascending = True).head()

Unnamed: 0,target,max_depth,n_estimators
324,-0.847269,17,67
168,-0.847173,20,67
367,-0.847096,14,67
83,-0.847096,16,67
884,-0.846893,34,67


In [178]:
df1.drop_duplicates().sort_values("target", ascending = True).head(5).agg(("max", "min"))

Unnamed: 0,target,max_depth,n_estimators
max,-0.846893,39,67
min,-0.847269,14,67


# MAE

## Boston without symbolic transformation

### load data

In [None]:
jsn_path = Path().resolve() / "Logs" / "Boston_mae_leo.json"
data = pd.read_json(jsn_path , lines = True).drop(["datetime"], axis = 1)

### convert data

In [None]:
params = pd.json_normalize(data['params'])

In [None]:
df = pd.concat([data[["target"]]*-1, params], axis = 1)
df[["max_depth", "n_estimators"]] = df[["max_depth", "n_estimators"]].astype("int")

### plot

In [60]:
df1 = df.query("target < 2.9")
df2 = df.query("target < 2.45")

In [68]:
px.scatter_3d(df2, x = "n_estimators", y = "max_depth", z = "target", color = "target")

In [62]:
px.scatter(df1, x="max_depth", y="n_estimators", color="target")

In [69]:
df.sort_values("target", ascending = True).head()

Unnamed: 0,target,max_depth,n_estimators
1640,2.425312,14,66
1648,2.425312,14,66
1646,2.425312,14,66
1644,2.425312,14,66
1643,2.425312,14,66


In [64]:
df.sort_values("target", ascending = True).head(100).agg(("max", "min"))

Unnamed: 0,target,max_depth,n_estimators
max,2.425959,14,70
min,2.425312,14,66


## Boston with symbolic transformation

### load data

In [47]:
jsn_path = Path().resolve() / "Logs" / "Boston_mae_let.json"
data = pd.read_json(jsn_path , lines = True).drop(["datetime"], axis = 1)

### convert data

In [48]:
params = pd.json_normalize(data['params'])

In [49]:
df = pd.concat([data[["target"]]*-1, params], axis = 1)
df[["max_depth", "n_estimators"]] = df[["max_depth", "n_estimators"]].astype("int")

### plot

In [50]:
df1 = df.query("target < 2.9")
df2 = df.query("target < 2.45")

In [56]:
px.scatter_3d(df2, x = "n_estimators", y = "max_depth", z = "target", color = "target")

In [52]:
px.scatter(df1, x="max_depth", y="n_estimators", color="target")

In [53]:
df.sort_values("target", ascending = True).head(100)

Unnamed: 0,target,max_depth,n_estimators
481,2.362334,16,67
561,2.362334,16,67
560,2.362334,16,67
554,2.362334,16,67
551,2.362334,16,67
...,...,...,...
448,2.362334,16,67
446,2.362334,16,67
433,2.362334,16,67
430,2.362334,16,67


In [54]:
df.drop_duplicates().sort_values("target", ascending = True).head(100).agg(("max", "min"))

Unnamed: 0,target,max_depth,n_estimators
max,2.362334,16,67
min,2.362334,16,67


# MAPE

## Boston without symbolic transformation

### load data

In [90]:
jsn_path = Path().resolve() / "Logs" / "Boston_mape_leo.json"
data = pd.read_json(jsn_path , lines = True).drop(["datetime"], axis = 1)

### convert data

In [91]:
params = pd.json_normalize(data['params'])

In [92]:
df = pd.concat([data[["target"]]*-1, params], axis = 1)
df[["max_depth", "n_estimators"]] = df[["max_depth", "n_estimators"]].astype("int")

### plot

In [93]:
df1 = df.query("target < 12")
df2 = df.query("target < 8")

In [94]:
px.scatter_3d(df1, x = "n_estimators", y = "max_depth", z = "target", color = "target")

In [73]:
px.scatter(df1, x="max_depth", y="n_estimators", color="target")

In [74]:
df.sort_values("target", ascending = True).head()

Unnamed: 0,target,max_depth,n_estimators
385,11.637422,14,66
239,11.637422,14,66
503,11.642299,14,70
457,11.642299,14,70
243,11.642299,14,70


In [75]:
df.sort_values("target", ascending = True).head(100).agg(("max", "min"))

Unnamed: 0,target,max_depth,n_estimators
max,11.758568,30,125
min,11.637422,14,46


## Boston with symbolic transformation

### load data

In [149]:
jsn_path = Path().resolve() / "Logs" / "Boston_mape_let.json"
data = pd.read_json(jsn_path , lines = True).drop(["datetime"], axis = 1)

### convert data

In [150]:
params = pd.json_normalize(data['params'])

In [151]:
df = pd.concat([data[["target"]]*-1, params], axis = 1)
df[["max_depth", "n_estimators"]] = df[["max_depth", "n_estimators"]].astype("int")

### plot

In [152]:
df1 = df.query("target < 13")
df2 = df.query("target < 10")

In [153]:
px.scatter_3d(df1, x = "n_estimators", y = "max_depth", z = "target", color = "target")

In [154]:
px.scatter(df1, x="max_depth", y="n_estimators", color="target")

In [147]:
df.sort_values("target", ascending = True).head(100)

Unnamed: 0,target,max_depth,n_estimators
265,10.956972,16,66
279,10.956972,16,66
445,10.956972,16,66
446,10.956972,16,66
267,10.956972,16,66
...,...,...,...
1454,10.969416,16,109
1423,10.969416,16,109
1403,10.969416,16,109
1455,10.969416,16,109


In [148]:
df.drop_duplicates().sort_values("target", ascending = True).head(100).agg(("max", "min"))

Unnamed: 0,target,max_depth,n_estimators
max,11.029254,35,118
min,10.956972,15,65


# RMSPE

## Boston without symbolic transformation

### load data

In [122]:
jsn_path = Path().resolve() / "Logs" / "Boston_rmspe_leo.json"
data = pd.read_json(jsn_path , lines = True).drop(["datetime"], axis = 1)

### convert data

In [123]:
params = pd.json_normalize(data['params'])

In [124]:
df = pd.concat([data[["target"]]*-1, params], axis = 1)
df[["max_depth", "n_estimators"]] = df[["max_depth", "n_estimators"]].astype("int")

### plot

In [125]:
df1 = df.query("target < 24")
df2 = df.query("target < 8")

In [126]:
px.scatter_3d(df1, x = "n_estimators", y = "max_depth", z = "target", color = "target")

In [119]:
px.scatter(df1, x="max_depth", y="n_estimators", color="target")

In [120]:
df.sort_values("target", ascending = True).head()

Unnamed: 0,target,max_depth,n_estimators
681,18.377696,18,20
193,18.377696,18,20
739,18.377696,18,20
271,18.377696,18,20
167,18.377696,18,20


In [121]:
df.sort_values("target", ascending = True).head(100).agg(("max", "min"))

Unnamed: 0,target,max_depth,n_estimators
max,18.76259,40,26
min,18.377696,18,19


## Boston with symbolic transformation

### load data

In [127]:
jsn_path = Path().resolve() / "Logs" / "Boston_rmspe_let.json"
data = pd.read_json(jsn_path , lines = True).drop(["datetime"], axis = 1)

### convert data

In [128]:
params = pd.json_normalize(data['params'])

In [129]:
df = pd.concat([data[["target"]]*-1, params], axis = 1)
df[["max_depth", "n_estimators"]] = df[["max_depth", "n_estimators"]].astype("int")

### plot

In [130]:
df1 = df.query("target < 19")
df2 = df.query("target < 15")

In [131]:
px.scatter_3d(df1, x = "n_estimators", y = "max_depth", z = "target", color = "target")

In [132]:
px.scatter(df1, x="max_depth", y="n_estimators", color="target")

In [133]:
df.sort_values("target", ascending = True).head(100)

Unnamed: 0,target,max_depth,n_estimators
329,17.375808,15,66
389,17.375808,15,66
361,17.375808,15,66
390,17.375808,15,66
391,17.375808,15,66
...,...,...,...
353,17.375808,15,66
468,17.375808,15,66
406,17.375808,15,66
322,17.375808,15,66


In [134]:
df.drop_duplicates().sort_values("target", ascending = True).head(100).agg(("max", "min"))

Unnamed: 0,target,max_depth,n_estimators
max,17.56161,34,137
min,17.375808,14,22
