# Format DataFrame

In [1]:
import pandas as pd
from sklearn.datasets import load_breast_cancer

data = load_breast_cancer()
train_df = pd.DataFrame(data.data, columns=data.feature_names)
train_df["diagnosis"] = data.target

print(train_df.shape)
train_df.head()

(569, 31)


Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,diagnosis
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,0
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,0
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,0
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,0
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,0


# Set Up Environment

In [2]:
from hyperparameter_hunter import Environment, CVExperiment

env = Environment(
    train_dataset=train_df,
    results_path="HyperparameterHunterAssets",
    target_column="diagnosis",
    metrics=["roc_auc_score"],
    cv_type="StratifiedKFold",
    cv_params=dict(n_splits=5, random_state=32),
    verbose=1,
)

Cross-Experiment Key:   'aZlwCukh8RVC3A3BIv1DK27sZYyUCsegMFpr2R4JB60='


We're initializing our `Environment` with `verbose=1` to tell our experiments to only log the essentials because we're about to run lots of experiments.

Now that HyperparameterHunter has an active `Environment`, we can do two things:

# 1. Perform Experiments

In [3]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC, LinearSVC, NuSVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.neural_network import MLPClassifier

experiment_0 = CVExperiment(KNeighborsClassifier, {})

<21:06:43> Validated Environment:  'aZlwCukh8RVC3A3BIv1DK27sZYyUCsegMFpr2R4JB60='
<21:06:43> Initialized Experiment: 'aacc31f5-006b-4b2a-9d3c-657ab49c9842'
<21:06:43> Hyperparameter Key:     '-lUbOmxw4PBQbJTgKXe2SQFMLkdgvTCnmTl9Wnz3CUc='
<21:06:43> 
<21:06:43> 
<21:06:43> FINAL:    OOF(roc_auc_score=0.91671)  |  Time Elapsed: 0.04529 s
<21:06:43> 
<21:06:43> Saving results for Experiment: 'aacc31f5-006b-4b2a-9d3c-657ab49c9842'


In [4]:
experiment_1 = CVExperiment(SVC, {})

<21:06:43> Validated Environment:  'aZlwCukh8RVC3A3BIv1DK27sZYyUCsegMFpr2R4JB60='
<21:06:43> Initialized Experiment: '5ca7860e-a6fb-4b72-a07d-ecaaf5e03e03'
<21:06:43> Hyperparameter Key:     'zSm0a8Zxw7EfGD4Ai8Xz63NasLSEOwkx_bV1rNK5_W8='
<21:06:43> 
<21:06:43> 
<21:06:43> FINAL:    OOF(roc_auc_score=0.50000)  |  Time Elapsed: 0.11869 s
<21:06:43> 
<21:06:43> Saving results for Experiment: '5ca7860e-a6fb-4b72-a07d-ecaaf5e03e03'


In [5]:
experiment_2 = CVExperiment(LinearSVC, {})

<21:06:43> Validated Environment:  'aZlwCukh8RVC3A3BIv1DK27sZYyUCsegMFpr2R4JB60='
<21:06:43> Initialized Experiment: 'b07ba658-bb31-45ff-8f23-729bd550c39b'
<21:06:43> Hyperparameter Key:     'umSzvriJIkgXYrenXLwiFwAGcPH-z8u5r8rnMRmK12Y='
<21:06:43> 
<21:06:43> 
<21:06:43> FINAL:    OOF(roc_auc_score=0.89917)  |  Time Elapsed: 0.15589 s
<21:06:43> 
<21:06:43> Saving results for Experiment: 'b07ba658-bb31-45ff-8f23-729bd550c39b'


In [6]:
experiment_3 = CVExperiment(NuSVC, {})

<21:06:43> Validated Environment:  'aZlwCukh8RVC3A3BIv1DK27sZYyUCsegMFpr2R4JB60='
<21:06:43> Initialized Experiment: '69883d40-af9b-450d-a6a4-08872690e018'
<21:06:43> Hyperparameter Key:     'Db1ESJoQdnzn1dzxKdDO_yR4PniOYG7W6vMrybE6mxI='
<21:06:43> 
<21:06:44> 
<21:06:44> FINAL:    OOF(roc_auc_score=0.50000)  |  Time Elapsed: 0.10987 s
<21:06:44> 
<21:06:44> Saving results for Experiment: '69883d40-af9b-450d-a6a4-08872690e018'


In [7]:
experiment_4 = CVExperiment(DecisionTreeClassifier, {})

<21:06:44> Validated Environment:  'aZlwCukh8RVC3A3BIv1DK27sZYyUCsegMFpr2R4JB60='
<21:06:44> Initialized Experiment: '98097e63-0094-4726-b9ab-90a90cb8f9a8'
<21:06:44> Hyperparameter Key:     'Em2R2LoQaueLGw5QJS3sUe6Gpv9voIAGrND_DQmerFM='
<21:06:44> 
<21:06:44> 
<21:06:44> FINAL:    OOF(roc_auc_score=0.92209)  |  Time Elapsed: 0.06197 s
<21:06:44> 
<21:06:44> Saving results for Experiment: '98097e63-0094-4726-b9ab-90a90cb8f9a8'


In [8]:
experiment_5 = CVExperiment(RandomForestClassifier, {})

<21:06:44> Validated Environment:  'aZlwCukh8RVC3A3BIv1DK27sZYyUCsegMFpr2R4JB60='
<21:06:44> Initialized Experiment: 'f6a00324-a7eb-438f-ad35-a1f7882641e2'
<21:06:44> Hyperparameter Key:     'OdIHXd6SeFQi6gFbjUKsKR_KplScHEjsAppJ4Yhxpys='
<21:06:44> 
<21:06:44> 
<21:06:44> FINAL:    OOF(roc_auc_score=0.95777)  |  Time Elapsed: 0.09257 s
<21:06:44> 
<21:06:44> Saving results for Experiment: 'f6a00324-a7eb-438f-ad35-a1f7882641e2'


In [9]:
experiment_6 = CVExperiment(AdaBoostClassifier, {})

<21:06:44> Validated Environment:  'aZlwCukh8RVC3A3BIv1DK27sZYyUCsegMFpr2R4JB60='
<21:06:44> Initialized Experiment: '35a23c9d-311d-4e69-8e81-142218c25cfa'
<21:06:44> Hyperparameter Key:     '65vlVQBglPtLRvLoMPQ4vo6V11XNuBPSVdGa3gfiga8='
<21:06:44> 
<21:06:44> 
<21:06:44> FINAL:    OOF(roc_auc_score=0.96425)  |  Time Elapsed: 0.50177 s
<21:06:44> 
<21:06:44> Saving results for Experiment: '35a23c9d-311d-4e69-8e81-142218c25cfa'


In [10]:
experiment_7 = CVExperiment(GradientBoostingClassifier, {})

<21:06:44> Validated Environment:  'aZlwCukh8RVC3A3BIv1DK27sZYyUCsegMFpr2R4JB60='
<21:06:44> Initialized Experiment: 'bd95521e-2062-479e-ac8c-07e6fe993247'
<21:06:44> Hyperparameter Key:     'QsW5PTxiq440XcDtxUQDwRZ2k6utNZByjmD0aDq4viM='
<21:06:44> 
<21:06:45> 
<21:06:45> FINAL:    OOF(roc_auc_score=0.95393)  |  Time Elapsed: 0.64309 s
<21:06:45> 
<21:06:45> Saving results for Experiment: 'bd95521e-2062-479e-ac8c-07e6fe993247'


In [11]:
experiment_8 = CVExperiment(GaussianNB, {})

<21:06:45> Validated Environment:  'aZlwCukh8RVC3A3BIv1DK27sZYyUCsegMFpr2R4JB60='
<21:06:45> Initialized Experiment: 'c4052269-f4d4-439e-927a-d8c2f6707969'
<21:06:45> Hyperparameter Key:     'rKKFZX71cT7Hyreg02mNi_12YKCWQWNX9X9Dm1y1mH8='
<21:06:45> 
<21:06:45> 
<21:06:45> FINAL:    OOF(roc_auc_score=0.93035)  |  Time Elapsed: 0.03822 s
<21:06:45> 
<21:06:45> Saving results for Experiment: 'c4052269-f4d4-439e-927a-d8c2f6707969'


In [12]:
experiment_9 = CVExperiment(LinearDiscriminantAnalysis, {})

<21:06:45> Validated Environment:  'aZlwCukh8RVC3A3BIv1DK27sZYyUCsegMFpr2R4JB60='
<21:06:45> Initialized Experiment: 'e26e93bd-3e81-4de8-a5ca-57b4471d4376'
<21:06:45> Hyperparameter Key:     '0oh263u3lfeNrwqq0jzCrNw1M5eMWldb8CIw6ML7qmI='
<21:06:45> 
<21:06:45> 
<21:06:45> FINAL:    OOF(roc_auc_score=0.95003)  |  Time Elapsed: 0.04905 s
<21:06:45> 
<21:06:45> Saving results for Experiment: 'e26e93bd-3e81-4de8-a5ca-57b4471d4376'


In [13]:
experiment_10 = CVExperiment(QuadraticDiscriminantAnalysis, {})

<21:06:45> Validated Environment:  'aZlwCukh8RVC3A3BIv1DK27sZYyUCsegMFpr2R4JB60='
<21:06:45> Initialized Experiment: 'f013e207-95eb-4f72-9c20-4e587c25f52b'
<21:06:45> Hyperparameter Key:     'qdHf2hC76V9AcGks6LKQjZoi1kM-_Z5bQBskGQ_FM5M='
<21:06:45> 
<21:06:45> 
<21:06:45> FINAL:    OOF(roc_auc_score=0.95821)  |  Time Elapsed: 0.04468 s
<21:06:45> 
<21:06:45> Saving results for Experiment: 'f013e207-95eb-4f72-9c20-4e587c25f52b'


In [14]:
experiment_11 = CVExperiment(MLPClassifier, {})

<21:06:45> Validated Environment:  'aZlwCukh8RVC3A3BIv1DK27sZYyUCsegMFpr2R4JB60='
<21:06:45> Initialized Experiment: 'b41a3b1c-ef03-47b5-82f1-00cf74b43403'
<21:06:45> Hyperparameter Key:     'JYz6ECsGC_S2dHnO3Gxcn4-jC5odreQXXF13WwoeW_0='
<21:06:45> 
<21:06:46> 
<21:06:46> FINAL:    OOF(roc_auc_score=0.92003)  |  Time Elapsed: 0.82798 s
<21:06:46> 
<21:06:46> Saving results for Experiment: 'b41a3b1c-ef03-47b5-82f1-00cf74b43403'


Of course, SKLearn has many more algorithms than those shown here, but I think you get the idea.

Notice that in all the above experiments, we gave `CVExperiment` `model_init_params={}`. Passing an empty dict tells it to use the default hyperparameters for the `model_initializer`, which it'll figure out on its own.

# 2. Hyperparameter Optimization

We're just going to do optimization on one of the algorithms used above (`AdaBoostClassifier`); although, HyperparameterHunter can certainly do consecutive optimization rounds.

Notice below that `optimizer` correctly identifies `experiment_6` as being the only saved experiment it can learn from because it's optimizing `AdaBoostClassifier`.

In [15]:
from hyperparameter_hunter import RandomForestOptPro, Real, Integer, Categorical

optimizer = RandomForestOptPro(iterations=12, random_state=42)

optimizer.set_experiment_guidelines(
    model_initializer=AdaBoostClassifier,
    model_init_params=dict(
        n_estimators=Integer(25, 100),
        learning_rate=Real(0.5, 1.0),
        algorithm=Categorical(["SAMME", "SAMME.R"]),
    ),
)

optimizer.go()

Validated Environment with key: "aZlwCukh8RVC3A3BIv1DK27sZYyUCsegMFpr2R4JB60="
[31mSaved Result Files[0m
[31m_________________________________________________________________________________________[0m
 Step |       ID |   Time |      Value |   algorithm |   learning_rate |   n_estimators | 
Experiments matching cross-experiment key/algorithm: 1
Experiments fitting in the given space: 1
Experiments matching current guidelines: 1
    0 | 35a23c9d | 00m00s | [35m   0.96425[0m | [32m    SAMME.R[0m | [32m         1.0000[0m | [32m            50[0m | 
[31mHyperparameter Optimization[0m
[31m_________________________________________________________________________________________[0m
 Step |       ID |   Time |      Value |   algorithm |   learning_rate |   n_estimators | 
    1 | 7838c551 | 00m00s |    0.96425 |     SAMME.R |          0.6864 |             68 | 
    2 | e3f8b127 | 00m00s |    0.95438 |     SAMME.R |          0.8683 |             44 | 
    3 | ec5b82e8 | 00m00s |