# Format DataFrame

In [1]:
import pandas as pd
from sklearn.datasets import load_breast_cancer

data = load_breast_cancer()
train_df = pd.DataFrame(data.data, columns=data.feature_names)
train_df["diagnosis"] = data.target

print(train_df.shape)
train_df.head()

(569, 31)


Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,diagnosis
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,0
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,0
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,0
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,0
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,0


# Set Up Environment

In [2]:
from hyperparameter_hunter import Environment, CVExperiment
from sklearn.model_selection import StratifiedKFold

env = Environment(
    train_dataset=train_df,
    results_path="HyperparameterHunterAssets",
    target_column="diagnosis",
    metrics=["roc_auc_score"],
    cv_type=StratifiedKFold,
    cv_params=dict(n_splits=10, shuffle=True, random_state=32),
)

Cross-Experiment Key:   'LOKr-L2lRGe8e8l0E-8SY5FEcl2u7dwBRtrl36HDjvQ='


Now that HyperparameterHunter has an active `Environment`, we can do two things:

# 1. Perform Experiments

In [3]:
from xgboost import XGBClassifier

experiment = CVExperiment(
    model_initializer=XGBClassifier,
    model_init_params=dict(objective="reg:linear", max_depth=3, n_estimators=100, subsample=0.5),
    model_extra_params=dict(
        fit=dict(
            eval_set=[(env.train_input, env.train_target), (env.validation_input, env.validation_target)],
            early_stopping_rounds=5,
            eval_metric="mae",
        ),
    ),
)

<18:54:43> Validated Environment:  'LOKr-L2lRGe8e8l0E-8SY5FEcl2u7dwBRtrl36HDjvQ='
<18:54:43> Initialized Experiment: 'c3ddb52c-ac47-46bd-9b11-f6407f7ca611'
<18:54:43> Hyperparameter Key:     'ntCovu5ufjSeQtJZkSxwzO0oDL0s5aq4BstpfidOTZE='
<18:54:43> 
<18:54:43> F0.0 AVG:   OOF(roc_auc_score=0.95455)  |  Time Elapsed: 0.04106 s
<18:54:43> F0.1 AVG:   OOF(roc_auc_score=0.96338)  |  Time Elapsed: 0.04396 s
<18:54:44> F0.2 AVG:   OOF(roc_auc_score=0.90079)  |  Time Elapsed: 0.04339 s
<18:54:44> F0.3 AVG:   OOF(roc_auc_score=0.97222)  |  Time Elapsed: 0.03908 s
<18:54:44> F0.4 AVG:   OOF(roc_auc_score=0.97619)  |  Time Elapsed: 0.03897 s
<18:54:44> F0.5 AVG:   OOF(roc_auc_score=0.90079)  |  Time Elapsed: 0.03904 s
<18:54:44> F0.6 AVG:   OOF(roc_auc_score=0.98611)  |  Time Elapsed: 0.03699 s
<18:54:44> F0.7 AVG:   OOF(roc_auc_score=0.98571)  |  Time Elapsed: 0.03822 s
<18:54:44> F0.8 AVG:   OOF(roc_auc_score=0.97619)  |  Time Elapsed: 0.03699 s
<18:54:44> F0.9 AVG:   OOF(roc_auc_score=1.00000

# 2. Hyperparameter Optimization

In [4]:
from hyperparameter_hunter import BayesianOptPro, Real, Integer, Categorical

optimizer = BayesianOptPro(iterations=30, random_state=1337)

optimizer.set_experiment_guidelines(
    model_initializer=XGBClassifier,
    model_init_params=dict(
        objective="reg:linear",
        max_depth=Integer(2, 20),
        learning_rate=Real(0.0001, 0.5),
        subsample=0.5,
        booster=Categorical(["gbtree", "dart"]),
    ),
    model_extra_params=dict(
        fit=dict(
            eval_set=[(env.train_input, env.train_target), (env.validation_input, env.validation_target)],
            early_stopping_rounds=5,
            eval_metric=Categorical(["auc", "mae"]),
        ),
    ),
)

optimizer.go()

Validated Environment with key: "LOKr-L2lRGe8e8l0E-8SY5FEcl2u7dwBRtrl36HDjvQ="
[31mSaved Result Files[0m
[31m___________________________________________________________________________________________________________[0m
 Step |       ID |   Time |      Value |   (fit, eval_metric) |   booster |   learning_rate |   max_depth | 
Experiments matching cross-experiment key/algorithm: 1
Experiments fitting in the given space: 1
Experiments matching current guidelines: 1
    0 | c3ddb52c | 00m00s | [35m   0.96145[0m | [32m                 mae[0m | [32m   gbtree[0m | [32m         0.1000[0m | [32m          3[0m | 
[31mHyperparameter Optimization[0m
[31m___________________________________________________________________________________________________________[0m
 Step |       ID |   Time |      Value |   (fit, eval_metric) |   booster |   learning_rate |   max_depth | 
    1 | 7239620d | 00m00s |    0.95298 |                  mae |      dart |          0.1864 |          17 | 
 

Notice, `optimizer` recognizes our earlier `experiment`'s hyperparameters fit inside the search space/guidelines set for `optimizer`.

Then, when optimization is started, it automatically learns from `experiment`'s results - without any extra work for us!