# Format DataFrame

In [1]:
import pandas as pd
from sklearn.datasets import load_diabetes

data = load_diabetes()
train_df = pd.DataFrame(data=data.data, columns=data.feature_names)
train_df["progression"] = data.target

print(train_df.shape)
train_df.head()

(442, 11)


Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,progression
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019908,-0.017646,151.0
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.06833,-0.092204,75.0
2,0.085299,0.05068,0.044451,-0.005671,-0.045599,-0.034194,-0.032356,-0.002592,0.002864,-0.02593,141.0
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022692,-0.009362,206.0
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031991,-0.046641,135.0


# Set Up Environment

In [2]:
from hyperparameter_hunter import Environment, CVExperiment
from sklearn.metrics import mean_squared_log_error

env = Environment(
    train_dataset=train_df,
    results_path="HyperparameterHunterAssets",
    target_column="progression",
    metrics=dict(msle=(mean_squared_log_error, "min")),
    cv_type="KFold",
    cv_params=dict(n_splits=10, random_state=42),
    runs=3,
)

Cross-Experiment Key:   'SM0fBtJM_PadIDgPuyTT0v3Vweau04D4XNhWpqJ1BEY='


Now that HyperparameterHunter has an active `Environment`, we can do two things:

# 1. Perform Experiments

In [3]:
from rgf import RGFRegressor

experiment = CVExperiment(
    model_initializer=RGFRegressor,
    model_init_params=dict(max_leaf=2000, algorithm='RGF', min_samples_leaf=10),
)

<20:54:34> Validated Environment:  'SM0fBtJM_PadIDgPuyTT0v3Vweau04D4XNhWpqJ1BEY='
<20:54:34> Initialized Experiment: '7ac42393-1e8e-4b62-a6b2-4abcc2569fc2'
<20:54:34> Hyperparameter Key:     'M898kSqnvKDNsKDrekegUv9zMnuV1E57RHWNXNe7_00='
<20:54:34> 
<20:54:34> F0/R0  |  OOF(msle=0.22658)  |  Time Elapsed: 0.19622 s
<20:54:34> F0/R1  |  OOF(msle=0.22658)  |  Time Elapsed: 0.19427 s
<20:54:35> F0/R2  |  OOF(msle=0.22658)  |  Time Elapsed: 0.19377 s
<20:54:35> F0.0 AVG:   OOF(msle=0.22658)  |  Time Elapsed: 0.59082 s
<20:54:35> F1/R0  |  OOF(msle=0.33111)  |  Time Elapsed: 0.1958 s
<20:54:35> F1/R1  |  OOF(msle=0.33111)  |  Time Elapsed: 0.19596 s
<20:54:35> F1/R2  |  OOF(msle=0.33111)  |  Time Elapsed: 0.19581 s
<20:54:35> F0.1 AVG:   OOF(msle=0.33111)  |  Time Elapsed: 0.5942 s
<20:54:35> F2/R0  |  OOF(msle=0.19371)  |  Time Elapsed: 0.19539 s
<20:54:36> F2/R1  |  OOF(msle=0.19371)  |  Time Elapsed: 0.19476 s
<20:54:36> F2/R2  |  OOF(msle=0.19371)  |  Time Elapsed: 0.19474 s
<20:54:36> 

# 2. Hyperparameter Optimization

In [4]:
from hyperparameter_hunter import ExtraTreesOptPro, Real, Integer, Categorical

optimizer = ExtraTreesOptPro(iterations=30, random_state=42)

optimizer.set_experiment_guidelines(
    model_initializer=RGFRegressor,
    model_init_params=dict(
        max_leaf=2000,
        algorithm=Categorical(['RGF', 'RGF_Opt', 'RGF_Sib']),
        l2=Real(0.01, 0.3),
        normalize=Categorical([True, False]),
        learning_rate=Real(0.3, 0.7),
        loss=Categorical(['LS', 'Expo', 'Log']),
    ),
)

optimizer.go()

Validated Environment with key: "SM0fBtJM_PadIDgPuyTT0v3Vweau04D4XNhWpqJ1BEY="
[31mSaved Result Files[0m
[31m______________________________________________________________________________________________________________[0m
 Step |       ID |   Time |      Value |   algorithm |        l2 |   learning_rate |      loss |   normalize | 
Experiments matching cross-experiment key/algorithm: 1
Experiments fitting in the given space: 1
Experiments matching current guidelines: 1
    0 | 7ac42393 | 00m00s | [35m   0.23179[0m | [32m        RGF[0m | [32m   0.1000[0m | [32m         0.5000[0m | [32m       LS[0m | [32m          1[0m | 
[31mHyperparameter Optimization[0m
[31m______________________________________________________________________________________________________________[0m
 Step |       ID |   Time |      Value |   algorithm |        l2 |   learning_rate |      loss |   normalize | 
    1 | 5ac47409 | 00m17s |   21.74256 |     RGF_Sib |    0.1710 |          0.5947 |  

Notice, `optimizer` recognizes our earlier `experiment`'s hyperparameters fit inside the search space/guidelines set for `optimizer`.

Then, when optimization is started, it automatically learns from `experiment`'s results - without any extra work for us!