# Format DataFrame

In [1]:
import pandas as pd
from sklearn.datasets import load_diabetes

data = load_diabetes()
train_df = pd.DataFrame(data=data.data, columns=data.feature_names)
train_df["progression"] = data.target

print(train_df.shape)
train_df.head()

(442, 11)


Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,progression
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019908,-0.017646,151.0
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.06833,-0.092204,75.0
2,0.085299,0.05068,0.044451,-0.005671,-0.045599,-0.034194,-0.032356,-0.002592,0.002864,-0.02593,141.0
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022692,-0.009362,206.0
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031991,-0.046641,135.0


# Set Up Environment

In [2]:
from hyperparameter_hunter import Environment, CVExperiment

env = Environment(
    train_dataset=train_df,
    results_path="HyperparameterHunterAssets",
    target_column="progression",
    metrics=["mean_absolute_error"],
    cv_type="KFold",
    cv_params=dict(n_splits=12, shuffle=True, random_state=32),
    runs=2,
)

Cross-Experiment Key:   '_5zDEjF2PgF4Tj2bxagz_-JU-Nig_vU3WpP_thE_rsk='


Now that HyperparameterHunter has an active `Environment`, we can do two things:

# 1. Perform Experiments

In [3]:
from xgboost import XGBRegressor

experiment = CVExperiment(
    model_initializer=XGBRegressor,
    model_init_params=dict(max_depth=4, n_estimators=400, subsample=0.5),
    model_extra_params=dict(fit=dict(eval_metric="mae")),
)

<18:55:50> Validated Environment:  '_5zDEjF2PgF4Tj2bxagz_-JU-Nig_vU3WpP_thE_rsk='
<18:55:50> Initialized Experiment: 'c3555fcd-ff02-4a5a-aa7f-f2cbbe63a322'
<18:55:50> Hyperparameter Key:     'dJFRA_seUsBfnxhHdFj0t3yMOo_Z_k_mf4HqTTAkhJc='
<18:55:50> 
<18:55:50> F0/R0  |  OOF(mean_absolute_error=50.23672)  |  Time Elapsed: 0.11365 s
<18:55:50> F0/R1  |  OOF(mean_absolute_error=50.59791)  |  Time Elapsed: 0.11199 s
<18:55:50> F0.0 AVG:   OOF(mean_absolute_error=49.50914)  |  Time Elapsed: 0.22989 s
<18:55:50> F1/R0  |  OOF(mean_absolute_error=54.15111)  |  Time Elapsed: 0.11137 s
<18:55:50> F1/R1  |  OOF(mean_absolute_error=49.27629)  |  Time Elapsed: 0.11155 s
<18:55:50> F0.1 AVG:   OOF(mean_absolute_error=51.17912)  |  Time Elapsed: 0.22752 s
<18:55:50> F2/R0  |  OOF(mean_absolute_error=46.85189)  |  Time Elapsed: 0.11181 s
<18:55:50> F2/R1  |  OOF(mean_absolute_error=44.18693)  |  Time Elapsed: 0.11129 s
<18:55:50> F0.2 AVG:   OOF(mean_absolute_error=44.42631)  |  Time Elapsed: 0.22724

# 2. Hyperparameter Optimization

In [4]:
from hyperparameter_hunter import GBRT, Real, Integer, Categorical

optimizer = GBRT(iterations=20, random_state=32)

optimizer.forge_experiment(
    model_initializer=XGBRegressor,
    model_init_params=dict(
        max_depth=Integer(2, 20),
        n_estimators=Integer(100, 900),
        learning_rate=Real(0.0001, 0.5),
        subsample=0.5,
        booster=Categorical(["gbtree", "gblinear"]),
    ),
    model_extra_params=dict(
        fit=dict(
            eval_metric=Categorical(["rmse", "mae"]),
        ),
    ),
)

optimizer.go()

Validated Environment with key: "_5zDEjF2PgF4Tj2bxagz_-JU-Nig_vU3WpP_thE_rsk="
[31mSaved Result Files[0m
[31m____________________________________________________________________________________________________________________________[0m
 Step |       ID |   Time |      Value |   (fit, eval_metric) |   booster |   learning_rate |   max_depth |   n_estimators | 
Experiments matching cross-experiment key/algorithm: 1
Experiments fitting in the given space: 1
Experiments matching current guidelines: 1
    0 | c3555fcd | 00m00s | [35m  49.26527[0m | [32m                 mae[0m | [32m   gbtree[0m | [32m         0.1000[0m | [32m          4[0m | [32m           400[0m | 
[31mHyperparameter Optimization[0m
[31m____________________________________________________________________________________________________________________________[0m
 Step |       ID |   Time |      Value |   (fit, eval_metric) |   booster |   learning_rate |   max_depth |   n_estimators | 
    1 | 75272ef8

Notice, `optimizer` recognizes our earlier `experiment`'s hyperparameters fit inside the search space/guidelines set for `optimizer`.

Then, when optimization is started, it automatically learns from `experiment`'s results - without any extra work for us!