# Training

- In order to compare in detail the results of different hyperparameters configurations, it is developed a system based on GridSearchcv[*](https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html) and Pipeline[*](https://scikit-learn.org/stable/modules/generated/sklearn.pipeline.Pipeline.html) that execute a single configuration at each execution, and save it into a csv file. A different csv file is generated for each model.
    - Another advantage of this system is that the program execution can be stopped at any time without losing the already trained configurations
    - The only downside is that the execution is not parallel, but the dataset is relateively small, thus not much time for each configuration execution
- 

In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split

from sklearn import preprocessing
from sklearn.decomposition import PCA

from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from xgboost import XGBRegressor
from sklearn.ensemble import GradientBoostingRegressor

from utils.Combinations import Step, Pipe, extract_combinations
from utils.Search import grid_search, print_results, best_hyperparameters

from utils.Custom_Steps import HighCorrFeaturesRemover, OnlyFormula, OnlyProperties

In [2]:
plt.style.use("seaborn-v0_8")

DATA_FOLDER = "data/"
OUTPUT_FOLDER = "outputs/"

RANDOM_STATE = 42

---
---
## Data Load

In [3]:
df = pd.concat(
    [
        pd.read_csv(DATA_FOLDER + "formula_train.csv").drop(columns=["critical_temp"]),
        pd.read_csv(DATA_FOLDER + "train.csv"),
    ],
    axis=1,
)
print("Shapes of Properties+Formula df: ", df.shape)

Shapes of Properties+Formula df:  (17010, 169)


In [4]:
# Remove "material" feature
df = df.drop(columns="material")

---
---
## Split

In [5]:
train, test = train_test_split(df, test_size=0.2, random_state=RANDOM_STATE)

X_train = train.drop(columns=["critical_temp"])
y_train = train[["critical_temp"]]

X_test = test.drop(columns=["critical_temp"])
y_test = test[["critical_temp"]]

X_train.shape, X_test.shape, y_train.shape, y_test.shape

((13608, 167), (3402, 167), (13608, 1), (3402, 1))

---
---
## Preprocessing

In [6]:
high_corr_features_remover_step = Step(
    "high_corr_features_remover",
    HighCorrFeaturesRemover(),
    {"corr_threshold": [0.85, 0.95, 0.99]},
)
only_properties_step = Step(
    "only_properties",
    OnlyProperties(),
)
only_formula_step = Step(
    "only_formula",
    OnlyFormula(),
)
std_step = Step(
    "std",
    preprocessing.StandardScaler(),
)
minmax_step = Step(
    "minmax",
    preprocessing.MinMaxScaler(),
)
l1_step = Step(
    "l1",
    preprocessing.Normalizer(norm="l1"),
)
l2_step = Step(
    "l2",
    preprocessing.Normalizer(norm="l2"),
)
lmax_step = Step(
    "lmax",
    preprocessing.Normalizer(norm="max"),
)
pca_step = Step(
    "pca",
    PCA(random_state=RANDOM_STATE),
    {
        "n_components": [0.85, 0.95, 0.99],
    },
)

---
---
# Models

---
## Linear Regression

In [7]:
estimator_tag = "linear_regression"
linear_regression_step = Step(estimator_tag, LinearRegression())

combinations = extract_combinations(
    Pipe(linear_regression_step),
    Pipe(l2_step, linear_regression_step),
    #
    Pipe(minmax_step, linear_regression_step),
    Pipe(high_corr_features_remover_step, minmax_step, linear_regression_step),
    #
    Pipe(std_step, linear_regression_step),
    Pipe(std_step, pca_step, linear_regression_step),
    Pipe(l2_step, std_step, pca_step, linear_regression_step),
    #
    Pipe(l1_step, minmax_step, linear_regression_step),
    Pipe(l2_step, minmax_step, linear_regression_step),
    Pipe(lmax_step, minmax_step, linear_regression_step),
    Pipe(lmax_step, std_step, linear_regression_step),
    #
    Pipe(high_corr_features_remover_step, std_step, pca_step, linear_regression_step),
    Pipe(high_corr_features_remover_step, lmax_step, minmax_step, linear_regression_step),
    #
    Pipe(only_properties_step, linear_regression_step),
    Pipe(only_formula_step, linear_regression_step),
)
grid_search(OUTPUT_FOLDER, X_train, y_train, X_test, y_test, combinations, estimator_tag=estimator_tag)

display(best_hyperparameters(OUTPUT_FOLDER + estimator_tag + "_output.csv", 20))

print_results(OUTPUT_FOLDER + estimator_tag + "_output.csv", 15)


Combination 1/31  |  linear_regression
  ==> Already done. Skipped.

Combination 2/31  |  l2 + linear_regression
  ==> Already done. Skipped.

Combination 3/31  |  minmax + linear_regression
  ==> Already done. Skipped.

Combination 4/31  |  high_corr_features_remover + minmax + linear_regression
  ==> Already done. Skipped.

Combination 5/31  |  high_corr_features_remover + minmax + linear_regression
  ==> Already done. Skipped.

Combination 6/31  |  high_corr_features_remover + minmax + linear_regression
  ==> Already done. Skipped.

Combination 7/31  |  std + linear_regression
  ==> Already done. Skipped.

Combination 8/31  |  std + pca + linear_regression
  ==> Already done. Skipped.

Combination 9/31  |  std + pca + linear_regression
  ==> Already done. Skipped.

Combination 10/31  |  std + pca + linear_regression
  ==> Already done. Skipped.

Combination 11/31  |  l2 + std + pca + linear_regression
  ==> Already done. Skipped.

Combination 12/31  |  l2 + std + pca + linear_regre

{'tag': ['minmax + linear_regression',
  'linear_regression',
  'std + linear_regression',
  'high_corr_features_remover + minmax + linear_regression',
  'l1 + minmax + linear_regression',
  'l2 + linear_regression'],
 'high_corr_features_remover__corr_threshold': [nan, 0.99],
 'pca__n_components': [nan]}

Unnamed: 0,tag,R2,MSE,high_corr_features_remover__corr_threshold,pca__n_components
2,minmax + linear_regression,0.7538,280.1071,,
0,linear_regression,0.7538,280.1071,,
4,std + linear_regression,0.7538,280.1704,,
16,high_corr_features_remover + minmax + linear_regression,0.7536,280.398,0.99,
7,l1 + minmax + linear_regression,0.7491,285.4663,,
1,l2 + linear_regression,0.7396,296.326,,
8,l2 + minmax + linear_regression,0.7396,296.326,,
3,high_corr_features_remover + minmax + linear_regression,0.7384,297.6406,0.95,
13,only_properties + linear_regression,0.7264,311.3427,,
28,high_corr_features_remover + std + pca + linear_regression,0.706,334.5391,0.99,0.99


---
## Random Forest


In [8]:
estimator_tag = "random_forest"
random_forest_step = Step(
    estimator_tag,
    RandomForestRegressor(n_jobs=-1, random_state=RANDOM_STATE),
    {
        "max_features": [0.2, 0.4, 0.6, 0.8],
        "max_samples": [0.66],
        "n_estimators": [200],
        "max_depth": [25],
        "ccp_alpha": [0.0, 0.01],
        "criterion": ["squared_error"],
    },
)

combinations = extract_combinations(
    Pipe(random_forest_step),
    Pipe(l2_step, random_forest_step),
    #
    Pipe(minmax_step, random_forest_step),
    Pipe(high_corr_features_remover_step, minmax_step, random_forest_step),
    #
    Pipe(std_step, random_forest_step),
    Pipe(std_step, pca_step, random_forest_step),
    Pipe(l2_step, std_step, pca_step, random_forest_step),
    #
    Pipe(l1_step, minmax_step, random_forest_step),
    Pipe(l2_step, minmax_step, random_forest_step),
    Pipe(lmax_step, minmax_step, random_forest_step),
    Pipe(lmax_step, std_step, random_forest_step),
    #
    Pipe(high_corr_features_remover_step, std_step, pca_step, random_forest_step),
    Pipe(high_corr_features_remover_step, lmax_step, minmax_step, random_forest_step),
    #
    Pipe(only_properties_step, random_forest_step),
    Pipe(only_formula_step, random_forest_step),
    #
    Pipe(only_properties_step, minmax_step, random_forest_step),
    Pipe(only_formula_step, minmax_step, random_forest_step),
    #
    Pipe(only_properties_step, lmax_step, minmax_step, random_forest_step),
    Pipe(only_formula_step, lmax_step, minmax_step, random_forest_step),
)
grid_search(OUTPUT_FOLDER, X_train, y_train, X_test, y_test, combinations, estimator_tag=estimator_tag)

display(best_hyperparameters(OUTPUT_FOLDER + estimator_tag + "_output.csv", 20))

print_results(OUTPUT_FOLDER + estimator_tag + "_output.csv", 15)


Combination 1/280  |  random_forest
  ==> Already done. Skipped.

Combination 2/280  |  random_forest
  ==> Already done. Skipped.

Combination 3/280  |  random_forest
  ==> Already done. Skipped.

Combination 4/280  |  random_forest
  ==> Already done. Skipped.

Combination 5/280  |  random_forest
  ==> Already done. Skipped.

Combination 6/280  |  random_forest
  ==> Already done. Skipped.

Combination 7/280  |  random_forest
  ==> Already done. Skipped.

Combination 8/280  |  random_forest
  ==> Already done. Skipped.

Combination 9/280  |  l2 + random_forest
  ==> Already done. Skipped.

Combination 10/280  |  l2 + random_forest
  ==> Already done. Skipped.

Combination 11/280  |  l2 + random_forest
  ==> Already done. Skipped.

Combination 12/280  |  l2 + random_forest
  ==> Already done. Skipped.

Combination 13/280  |  l2 + random_forest
  ==> Already done. Skipped.

Combination 14/280  |  l2 + random_forest
  ==> Already done. Skipped.

Combination 15/280  |  l2 + random_fores

{'tag': ['high_corr_features_remover + lmax + minmax + random_forest',
  'high_corr_features_remover + minmax + random_forest',
  'lmax + minmax + random_forest',
  'lmax + std + random_forest',
  'high_corr_features_remover + lmax + std + random_forest',
  'lmax + random_forest',
  'std + random_forest',
  'random_forest',
  'minmax + random_forest',
  'minmax + lmax + random_forest'],
 'random_forest__max_samples': [0.66],
 'random_forest__criterion': ['squared_error'],
 'random_forest__n_estimators': [200],
 'random_forest__max_depth': [25, 35, 50],
 'random_forest__max_features': [0.6, 0.2, 0.7, 0.4, 0.3, 0.5, 0.8, 0.9, 0.1],
 'pca__n_components': [nan],
 'high_corr_features_remover__corr_threshold': [0.99, 0.95, nan, 0.85],
 'random_forest__max_leaf_nodes': [nan, "'None'"],
 'features_remover__corr_threshold': [nan],
 'random_forest__ccp_alpha': [0.0, nan, 0.01, 0.001]}

Unnamed: 0,tag,R2,MSE,random_forest__max_samples,random_forest__criterion,random_forest__n_estimators,random_forest__max_depth,random_forest__max_features,pca__n_components,high_corr_features_remover__corr_threshold,random_forest__max_leaf_nodes,features_remover__corr_threshold,random_forest__ccp_alpha
862,high_corr_features_remover + lmax + minmax + random_forest,0.9278,82.1303,0.66,squared_error,200,25,0.6,,0.99,,,0.0
418,high_corr_features_remover + minmax + random_forest,0.9278,82.18,0.66,squared_error,200,25,0.2,,0.95,,,0.0
108,lmax + minmax + random_forest,0.9275,82.5286,0.66,squared_error,200,25,0.7,,,,,
860,high_corr_features_remover + lmax + minmax + random_forest,0.9275,82.5364,0.66,squared_error,200,25,0.4,,0.99,,,0.0
161,lmax + std + random_forest,0.9274,82.5686,0.66,squared_error,200,25,0.6,,,,,
598,lmax + std + random_forest,0.9274,82.5686,0.66,squared_error,200,25,0.6,,,,,0.0
167,high_corr_features_remover + lmax + std + random_forest,0.9274,82.578,0.66,squared_error,200,25,0.3,,0.95,,,
187,high_corr_features_remover + lmax + minmax + random_forest,0.9274,82.6101,0.66,squared_error,200,25,0.5,,0.95,,,
424,high_corr_features_remover + minmax + random_forest,0.9273,82.7082,0.66,squared_error,200,25,0.4,,0.95,,,0.0
160,lmax + std + random_forest,0.9272,82.8147,0.66,squared_error,200,25,0.5,,,,,


---
## XGBoost

[Documentation](https://xgboost.readthedocs.io/en/stable/parameter.html)

In [9]:
estimator_tag = "xg_boost"
xg_boost_step = Step(
    estimator_tag,
    XGBRegressor(n_jobs=-1, random_state=RANDOM_STATE),
    {
        "n_estimators": [450],
        "learning_rate": [0.15],  # default
        "max_depth": [6],  # default 6
        "min_child_weight": [3],  # 1
        "gamma": [0.0, 0.5],
        "subsample": [1.0],
        "colsample_bytree": [1],
        "reg_lambda": [1.0],  # 0.01, 0.1,
        "reg_alpha": [0.1],  # 0, 0.01,
        "tree_method": ["exact", "approx", "auto"],
    },
)

combinations = extract_combinations(
    Pipe(xg_boost_step),
    #
    Pipe(minmax_step, xg_boost_step),
    Pipe(high_corr_features_remover_step, minmax_step, xg_boost_step),
    #
    Pipe(std_step, xg_boost_step),
    Pipe(std_step, pca_step, xg_boost_step),
    #
    Pipe(high_corr_features_remover_step, std_step, pca_step, xg_boost_step),
    #
    Pipe(only_properties_step, xg_boost_step),
    Pipe(only_formula_step, xg_boost_step),
)
grid_search(OUTPUT_FOLDER, X_train, y_train, X_test, y_test, combinations, estimator_tag=estimator_tag)

best_hyperparameters(OUTPUT_FOLDER + estimator_tag + "_output.csv", 10)

print_results(OUTPUT_FOLDER + estimator_tag + "_output.csv", 15)


Combination 1/120  |  xg_boost
  ==> Already done. Skipped.

Combination 2/120  |  xg_boost
  ==> Already done. Skipped.

Combination 3/120  |  xg_boost
  ==> Already done. Skipped.

Combination 4/120  |  xg_boost
  ==> Already done. Skipped.

Combination 5/120  |  xg_boost
  ==> Already done. Skipped.

Combination 6/120  |  xg_boost
  ==> Already done. Skipped.

Combination 7/120  |  minmax + xg_boost
  ==> Already done. Skipped.

Combination 8/120  |  minmax + xg_boost
  ==> Already done. Skipped.

Combination 9/120  |  minmax + xg_boost
  ==> Already done. Skipped.

Combination 10/120  |  minmax + xg_boost
  ==> Already done. Skipped.

Combination 11/120  |  minmax + xg_boost
  ==> Already done. Skipped.

Combination 12/120  |  minmax + xg_boost
  ==> Already done. Skipped.

Combination 13/120  |  high_corr_features_remover + minmax + xg_boost
  ==> Already done. Skipped.

Combination 14/120  |  high_corr_features_remover + minmax + xg_boost
  ==> Already done. Skipped.

Combinatio

Unnamed: 0,tag,R2,MSE,xg_boost__n_estimators,xg_boost__learning_rate,xg_boost__max_depth,xg_boost__min_child_weight,xg_boost__gamma,xg_boost__subsample,xg_boost__colsample_bytree,xg_boost__reg_lambda,xg_boost__reg_alpha,high_corr_features_remover__corr_threshold,pca__n_components,xg_boost__tree_method
1739,xg_boost,0.9314,78.0905,450,0.15,6,3,0.5,1.0,1.0,1.0,0.1,,,auto
1738,xg_boost,0.9314,78.0905,450,0.15,6,3,0.5,1.0,1.0,1.0,0.1,,,approx
1697,xg_boost,0.9314,78.0905,450,0.15,6,3,0.5,1.0,1.0,1.0,0.1,,,
1744,minmax + xg_boost,0.9314,78.0905,450,0.15,6,3,0.5,1.0,1.0,1.0,0.1,,,approx
1745,minmax + xg_boost,0.9314,78.0905,450,0.15,6,3,0.5,1.0,1.0,1.0,0.1,,,auto
1701,minmax + xg_boost,0.9314,78.0905,450,0.15,6,3,0.5,1.0,1.0,1.0,0.1,,,
1703,minmax + xg_boost,0.9313,78.1145,500,0.15,6,3,0.5,1.0,1.0,1.0,0.1,,,
1699,xg_boost,0.9313,78.1145,500,0.15,6,3,0.5,1.0,1.0,1.0,0.1,,,
1691,xg_boost,0.9311,78.4515,400,0.15,6,3,0.5,1.0,1.0,1.0,0.1,,,
1695,minmax + xg_boost,0.9311,78.4515,400,0.15,6,3,0.5,1.0,1.0,1.0,0.1,,,


---
## KNN

In [10]:
estimator_tag = "knr"
knr_step = Step(
    estimator_tag,
    KNeighborsRegressor(n_jobs=-1),
    {
        "n_neighbors": [5, 15],
        "weights": ["distance"],  # , "uniform"
        "metric": ["cosine", "euclidean", "cityblock", "nan_euclidean"],
    },
)

combinations = extract_combinations(
    Pipe(knr_step),
    Pipe(l2_step, knr_step),
    #
    Pipe(minmax_step, knr_step),
    Pipe(high_corr_features_remover_step, minmax_step, knr_step),
    #
    Pipe(std_step, knr_step),
    Pipe(std_step, pca_step, knr_step),
    Pipe(l2_step, std_step, pca_step, knr_step),
    #
    Pipe(l1_step, minmax_step, knr_step),
    Pipe(l2_step, minmax_step, knr_step),
    Pipe(lmax_step, minmax_step, knr_step),
    Pipe(lmax_step, std_step, knr_step),
    #
    Pipe(high_corr_features_remover_step, std_step, pca_step, knr_step),
    Pipe(high_corr_features_remover_step, lmax_step, std_step, pca_step, knr_step),
    Pipe(high_corr_features_remover_step, lmax_step, minmax_step, knr_step),
    Pipe(high_corr_features_remover_step, l2_step, minmax_step, knr_step),
    #
    Pipe(only_properties_step, knr_step),
    Pipe(only_formula_step, knr_step),
    #
    Pipe(only_properties_step, high_corr_features_remover_step, knr_step),
    Pipe(only_formula_step, high_corr_features_remover_step, knr_step),
)
grid_search(OUTPUT_FOLDER, X_train, y_train, X_test, y_test, combinations, estimator_tag=estimator_tag)

display(best_hyperparameters(OUTPUT_FOLDER + estimator_tag + "_output.csv", 20))

print_results(OUTPUT_FOLDER + estimator_tag + "_output.csv", 15)


Combination 1/392  |  knr
  ==> Already done. Skipped.

Combination 2/392  |  knr
  ==> Already done. Skipped.

Combination 3/392  |  knr
  ==> Already done. Skipped.

Combination 4/392  |  knr
  ==> Already done. Skipped.

Combination 5/392  |  knr
  ==> Already done. Skipped.

Combination 6/392  |  knr
  ==> Already done. Skipped.

Combination 7/392  |  knr
  ==> Already done. Skipped.

Combination 8/392  |  knr
  ==> Already done. Skipped.

Combination 9/392  |  l2 + knr
  ==> Already done. Skipped.

Combination 10/392  |  l2 + knr
  ==> Already done. Skipped.

Combination 11/392  |  l2 + knr
  ==> Already done. Skipped.

Combination 12/392  |  l2 + knr
  ==> Already done. Skipped.

Combination 13/392  |  l2 + knr
  ==> Already done. Skipped.

Combination 14/392  |  l2 + knr
  ==> Already done. Skipped.

Combination 15/392  |  l2 + knr
  ==> Already done. Skipped.

Combination 16/392  |  l2 + knr
  ==> Already done. Skipped.

Combination 17/392  |  minmax + knr
  ==> Already done. 

{'tag': ['high_corr_features_remover + lmax + minmax + knr',
  'l2 + minmax + knr',
  'high_corr_features_remover + l2 + minmax + knr',
  'lmax + std + knr',
  'std + knr',
  'lmax + minmax + knr',
  'high_corr_features_remover + minmax + knr',
  'only_formula + high_corr_features_remover + knr',
  'only_formula + knr',
  'high_corr_features_remover + l1 + minmax + knr',
  'minmax + knr',
  'std + pca + knr',
  'l1 + minmax + knr',
  'high_corr_features_remover + std + pca + knr',
  'high_corr_features_remover + lmax + std + pca + knr',
  'l2 + std + pca + knr'],
 'knr__n_neighbors': [5, 6, 4, 15, 25],
 'knr__weights': ['distance'],
 'knr__metric': ['cityblock',
  'cosine',
  'nan_euclidean',
  'minkowski',
  'euclidean'],
 'high_corr_features_remover__corr_threshold': [0.95, 0.99, nan, 0.85],
 'pca__n_components': [nan, 0.85, 0.95, 0.99]}

Unnamed: 0,tag,R2,MSE,knr__n_neighbors,knr__weights,knr__metric,high_corr_features_remover__corr_threshold,pca__n_components
386,high_corr_features_remover + lmax + minmax + knr,0.9199,91.1676,5,distance,cityblock,0.95,
893,high_corr_features_remover + lmax + minmax + knr,0.9193,91.812,5,distance,cityblock,0.99,
582,high_corr_features_remover + lmax + minmax + knr,0.9192,91.9401,6,distance,cityblock,0.95,
578,high_corr_features_remover + lmax + minmax + knr,0.9188,92.4412,4,distance,cityblock,0.95,
258,l2 + minmax + knr,0.9185,92.7162,5,distance,cityblock,,
630,high_corr_features_remover + l2 + minmax + knr,0.9184,92.8081,5,distance,cityblock,0.95,
322,lmax + std + knr,0.9184,92.8403,5,distance,cityblock,,
909,high_corr_features_remover + l2 + minmax + knr,0.9184,92.844,5,distance,cityblock,0.99,
130,std + knr,0.9184,92.8489,5,distance,cityblock,,
290,lmax + minmax + knr,0.9183,93.0164,5,distance,cityblock,,


---
## SVR

In [11]:
estimator_tag = "svr"
svr_step = Step(
    estimator_tag,
    SVR(),
    {
        "kernel": ["rbf"],  # , "sigmoid", "linear", "poly"
        "epsilon": [0.1, 1],  # 0, 0.00001, 0.0001, 0.001, 0.01, 0.1, 1.0],
        "tol": [0.0001, 0.001, 0.01, 0.1],  # [1e-5, 1e-4, 1e-3, 1e-2],
        "C": [0.1, 1],  # 0.1, 1, 100, 500, 5, 20, 8, 9, 15
        "max_iter": [100000],
    },
)

combinations = extract_combinations(
    # Pipe(svr_step),
    # Pipe(l2_step, svr_step),
    #
    # Pipe(minmax_step, svr_step),
    #
    Pipe(std_step, svr_step),
    Pipe(std_step, pca_step, svr_step),
    Pipe(l1_step, std_step, pca_step, svr_step),
    Pipe(l2_step, std_step, pca_step, svr_step),
    Pipe(lmax_step, std_step, pca_step, svr_step),
    #
    Pipe(l1_step, std_step, svr_step),
    Pipe(l2_step, std_step, svr_step),
    Pipe(lmax_step, std_step, svr_step),
    #
    Pipe(high_corr_features_remover_step, std_step, svr_step),
    Pipe(high_corr_features_remover_step, std_step, pca_step, svr_step),
    # Pipe(high_corr_features_remover_step, lmax_step, minmax_step, svr_step),
    #
    Pipe(only_properties_step, svr_step),
    Pipe(only_formula_step, svr_step),
    Pipe(only_properties_step, std_step, svr_step),
    Pipe(only_formula_step, std_step, svr_step),
)
grid_search(OUTPUT_FOLDER, X_train, y_train, X_test, y_test, combinations, estimator_tag=estimator_tag)

display(best_hyperparameters(OUTPUT_FOLDER + estimator_tag + "_output.csv", 20))

print_results(OUTPUT_FOLDER + estimator_tag + "_output.csv", 15)


Combination 1/512  |  std + svr
  ==> Already done. Skipped.

Combination 2/512  |  std + svr
  ==> Already done. Skipped.

Combination 3/512  |  std + svr
  ==> Already done. Skipped.

Combination 4/512  |  std + svr
  ==> Already done. Skipped.

Combination 5/512  |  std + svr
  ==> Already done. Skipped.

Combination 6/512  |  std + svr
  ==> Already done. Skipped.

Combination 7/512  |  std + svr
  ==> Already done. Skipped.

Combination 8/512  |  std + svr
  ==> Already done. Skipped.

Combination 9/512  |  std + svr
  ==> Already done. Skipped.

Combination 10/512  |  std + svr
  ==> Already done. Skipped.

Combination 11/512  |  std + svr
  ==> Already done. Skipped.

Combination 12/512  |  std + svr
  ==> Already done. Skipped.

Combination 13/512  |  std + svr
  ==> Already done. Skipped.

Combination 14/512  |  std + svr
  ==> Already done. Skipped.

Combination 15/512  |  std + svr
  ==> Already done. Skipped.

Combination 16/512  |  std + svr
  ==> Already done. Skipped.



{'tag': ['high_corr_features_remover + std + svr',
  'std + svr',
  'high_corr_features_remover + std + pca + svr',
  'std + pca + svr',
  'l1 + std + svr',
  'l1 + std + pca + svr',
  'l2 + std + svr',
  'only_properties + std + svr',
  'l2 + std + pca + svr',
  'minmax + svr'],
 'svr__kernel': ['rbf', 'poly'],
 'svr__epsilon': [1.0, 0.1],
 'svr__tol': [0.1, 0.01, 0.001, 0.0001],
 'svr__C': [1.0],
 'svr__max_iter': [100000],
 'high_corr_features_remover__corr_threshold': [0.99, nan, 0.95, 0.85],
 'pca__n_components': [nan, 0.99, 0.95, 0.85]}

Unnamed: 0,tag,R2,MSE,svr__kernel,svr__epsilon,svr__tol,svr__C,svr__max_iter,high_corr_features_remover__corr_threshold,pca__n_components
868,high_corr_features_remover + std + svr,0.7851,244.4647,rbf,1.0,0.1,1.0,100000,0.99,
866,high_corr_features_remover + std + svr,0.7851,244.5151,rbf,1.0,0.01,1.0,100000,0.99,
864,high_corr_features_remover + std + svr,0.7851,244.5169,rbf,1.0,0.001,1.0,100000,0.99,
862,high_corr_features_remover + std + svr,0.7851,244.5172,rbf,1.0,0.0001,1.0,100000,0.99,
860,high_corr_features_remover + std + svr,0.7848,244.8116,rbf,0.1,0.1,1.0,100000,0.99,
858,high_corr_features_remover + std + svr,0.7848,244.8183,rbf,0.1,0.01,1.0,100000,0.99,
856,high_corr_features_remover + std + svr,0.7848,244.8218,rbf,0.1,0.001,1.0,100000,0.99,
854,high_corr_features_remover + std + svr,0.7848,244.822,rbf,0.1,0.0001,1.0,100000,0.99,
518,std + svr,0.7848,244.8251,rbf,1.0,0.01,1.0,100000,,
163,std + svr,0.7848,244.828,rbf,1.0,0.0001,1.0,100000,,


In [None]:
import hiplot as hip

hip.Experiment.from_csv(OUTPUT_FOLDER + estimator_tag + "_output.csv").display()