# Format DataFrame

In [1]:
import pandas as pd
from sklearn.datasets import load_boston

data = load_boston()
train_df = pd.DataFrame(data=data.data, columns=data.feature_names)
train_df["median_value"] = data.target

print(train_df.shape)
train_df.head()

(506, 14)


Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,median_value
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


# Set Up Environment

In [2]:
from hyperparameter_hunter import Environment, CVExperiment
from sklearn.metrics import r2_score
from sklearn.model_selection import RepeatedKFold

env = Environment(
    train_dataset=train_df,
    results_path="HyperparameterHunterAssets",
    target_column="median_value",
    metrics=dict(r2=r2_score),
    cv_type=RepeatedKFold,
    cv_params=dict(n_repeats=2, n_splits=5, random_state=42),
)

Cross-Experiment Key:   '2YztSDxi5paDQaugUYdWoxknmDUq4I-UBXQHCzXUIyg='


Now that HyperparameterHunter has an active `Environment`, we can do two things:

# 1. Perform Experiments

In [3]:
from lightgbm import LGBMRegressor

experiment = CVExperiment(
    model_initializer=LGBMRegressor,
    model_init_params=dict(
        boosting_type="gbdt", num_leaves=31, min_child_samples=5, subsample=0.5,
    ),
)

<15:23:45> Validated Environment:  '2YztSDxi5paDQaugUYdWoxknmDUq4I-UBXQHCzXUIyg='
<15:23:45> Initialized Experiment: '3b699656-e442-4f21-afa8-80099e81427f'
<15:23:45> Hyperparameter Key:     'Bl2Yb6WpmWUnmspQM0IGGkgIix_b2EsMI0V71x8RkUQ='
<15:23:45> 
<15:23:45> 
<15:23:45> F0.0 AVG:   OOF(r2=0.88616)  |  Time Elapsed: 0.05149 s
<15:23:45> F0.1 AVG:   OOF(r2=0.87997)  |  Time Elapsed: 0.04793 s
<15:23:45> F0.2 AVG:   OOF(r2=0.83099)  |  Time Elapsed: 0.04655 s
<15:23:45> F0.3 AVG:   OOF(r2=0.91790)  |  Time Elapsed: 0.04865 s
<15:23:45> F0.4 AVG:   OOF(r2=0.87084)  |  Time Elapsed: 0.04752 s
<15:23:45> Repetition 0 AVG:   OOF(r2=0.87919)  |  Time Elapsed: 0.24743 s
<15:23:45> 
<15:23:45> F1.0 AVG:   OOF(r2=0.89532)  |  Time Elapsed: 0.04824 s
<15:23:45> F1.1 AVG:   OOF(r2=0.84094)  |  Time Elapsed: 0.04853 s
<15:23:45> F1.2 AVG:   OOF(r2=0.81587)  |  Time Elapsed: 0.04729 s
<15:23:45> F1.3 AVG:   OOF(r2=0.86505)  |  Time Elapsed: 0.04734 s
<15:23:45> F1.4 AVG:   OOF(r2=0.89984)  |  Time 

# 2. Hyperparameter Optimization

In [4]:
from hyperparameter_hunter import ExtraTreesOptPro, Real, Integer, Categorical

optimizer = ExtraTreesOptPro(iterations=12, random_state=1337)

optimizer.set_experiment_guidelines(
    model_initializer=LGBMRegressor,
    model_init_params=dict(
        boosting_type=Categorical(["gbdt", "dart"]),
        num_leaves=Integer(10, 40),
        max_depth=-1,
        min_child_samples=5,
        subsample=Real(0.3, 0.7),
    ),
)

optimizer.go()

Validated Environment with key: "2YztSDxi5paDQaugUYdWoxknmDUq4I-UBXQHCzXUIyg="
[31mSaved Result Files[0m
[31m_______________________________________________________________________________________[0m
 Step |       ID |   Time |      Value |   boosting_type |   num_leaves |   subsample | 
Experiments matching cross-experiment key/algorithm: 1
Experiments fitting in the given space: 1
Experiments matching current guidelines: 1
    0 | 3b699656 | 00m00s | [35m   0.88135[0m | [32m           gbdt[0m | [32m          31[0m | [32m     0.5000[0m | 
[31mHyperparameter Optimization[0m
[31m_______________________________________________________________________________________[0m
 Step |       ID |   Time |      Value |   boosting_type |   num_leaves |   subsample | 
    1 | 0a38ba6b | 00m00s |    0.82988 |            dart |           15 |      0.4684 | 
    2 | ebf7bde0 | 00m00s | [35m   0.88600[0m | [32m           gbdt[0m | [32m          29[0m | [32m     0.5947[0m | 
    3

Notice, `optimizer` recognizes our earlier `experiment`'s hyperparameters fit inside the search space/guidelines set for `optimizer`.

Then, when optimization is started, it automatically learns from `experiment`'s results - without any extra work for us!