# Format DataFrame

In [1]:
import pandas as pd
from sklearn.datasets import make_classification

x, y = make_classification(n_samples=700, n_classes=2, shuffle=True, random_state=32)
train_df = pd.DataFrame(x, columns=range(x.shape[1]))
train_df["y"] = y

print(train_df.shape)
train_df.head()

(700, 21)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,11,12,13,14,15,16,17,18,19,y
0,1.542875,1.012219,0.529826,0.096133,1.040699,-0.807561,-0.34268,0.858858,0.147749,-0.107334,...,-0.616889,-1.45416,-0.20129,0.863571,0.437816,0.127156,1.589943,0.774433,0.099306,0
1,-0.704136,0.725639,-0.811169,1.068405,0.056545,0.118662,-0.425186,0.668945,0.081395,-0.225007,...,-0.382115,1.202129,-0.940534,0.758537,-0.313684,-1.114822,1.61412,-0.782022,-2.004021,0
2,1.649923,-1.174697,-1.001881,-1.332309,-1.215187,0.270599,-2.110571,-0.462214,-0.20716,0.634851,...,0.704766,0.268407,0.28099,-0.139143,0.410571,0.060985,0.031953,-0.403103,-0.293248,1
3,-1.661594,-0.053371,-1.867264,1.87213,0.207626,0.000256,0.161002,0.092727,-0.376592,-0.312441,...,1.079768,0.963923,0.864046,1.094562,-0.861178,0.254324,0.45935,-0.577521,0.658839,1
4,1.242715,0.9813,0.399682,-1.361206,1.865577,0.355011,-0.043375,1.075501,0.225397,-1.153797,...,-0.89042,-1.206708,-0.64525,0.9784,-1.146337,1.830191,-0.218601,-1.67015,-0.024537,0


# Set Up Environment

In [2]:
from hyperparameter_hunter import Environment, CVExperiment
from sklearn.model_selection import RepeatedStratifiedKFold

env = Environment(
    train_dataset=train_df,
    results_path="HyperparameterHunterAssets",
    target_column="y",
    metrics=["hamming_loss"],
    cv_type=RepeatedStratifiedKFold,
    cv_params=dict(n_repeats=2, n_splits=10, random_state=1337),
)

Cross-Experiment Key:   'QVqPKuVZbfho-zk60rJMGsmbpahcFa1Po6hf9aVPWFc='


Now that HyperparameterHunter has an active `Environment`, we can do two things:

# 1. Perform Experiments

In [3]:
from rgf import RGFClassifier

experiment = CVExperiment(
    model_initializer=RGFClassifier,
    model_init_params=dict(max_leaf=1000, algorithm='RGF', min_samples_leaf=10),
)

<20:51:47> Validated Environment:  'QVqPKuVZbfho-zk60rJMGsmbpahcFa1Po6hf9aVPWFc='
<20:51:47> Initialized Experiment: '15b9efd6-0fbc-4796-8627-5d25b817c443'
<20:51:47> Hyperparameter Key:     '-IXQQcLRa5eTe7vPDpD9l7gsq-zO73d7r2FPZ9Bt6cI='
<20:51:47> 
<20:51:47> 
<20:51:47> F0.0 AVG:   OOF(hamming_loss=0.09859)  |  Time Elapsed: 0.53661 s
<20:51:48> F0.1 AVG:   OOF(hamming_loss=0.12676)  |  Time Elapsed: 0.53318 s
<20:51:49> F0.2 AVG:   OOF(hamming_loss=0.04225)  |  Time Elapsed: 0.5238 s
<20:51:49> F0.3 AVG:   OOF(hamming_loss=0.05714)  |  Time Elapsed: 0.57586 s
<20:51:50> F0.4 AVG:   OOF(hamming_loss=0.08571)  |  Time Elapsed: 0.52476 s
<20:51:50> F0.5 AVG:   OOF(hamming_loss=0.05714)  |  Time Elapsed: 0.52243 s
<20:51:51> F0.6 AVG:   OOF(hamming_loss=0.07143)  |  Time Elapsed: 0.55067 s
<20:51:51> F0.7 AVG:   OOF(hamming_loss=0.07246)  |  Time Elapsed: 0.52484 s
<20:51:52> F0.8 AVG:   OOF(hamming_loss=0.10145)  |  Time Elapsed: 0.53572 s
<20:51:52> F0.9 AVG:   OOF(hamming_loss=0.0579

# 2. Hyperparameter Optimization

In [4]:
from hyperparameter_hunter import BayesianOptPro, Real, Integer, Categorical

optimizer = BayesianOptPro(iterations=10, random_state=42)

optimizer.set_experiment_guidelines(
    model_initializer=RGFClassifier,
    model_init_params=dict(
        max_leaf=1000,
        algorithm=Categorical(['RGF', 'RGF_Opt', 'RGF_Sib']),
        l2=Real(0.01, 0.3),
        normalize=Categorical([True, False]),
        learning_rate=Real(0.3, 0.7),
        loss=Categorical(['LS', 'Expo', 'Log', 'Abs'])
    ),
)

optimizer.go()

Validated Environment with key: "QVqPKuVZbfho-zk60rJMGsmbpahcFa1Po6hf9aVPWFc="
[31mSaved Result Files[0m
[31m______________________________________________________________________________________________________________[0m
 Step |       ID |   Time |      Value |   algorithm |        l2 |   learning_rate |      loss |   normalize | 
Experiments matching cross-experiment key/algorithm: 1
Experiments fitting in the given space: 1
Experiments matching current guidelines: 1
    0 | 15b9efd6 | 00m00s | [35m   0.07857[0m | [32m        RGF[0m | [32m   0.1000[0m | [32m         0.5000[0m | [32m      Log[0m | [32m          0[0m | 
[31mHyperparameter Optimization[0m
[31m______________________________________________________________________________________________________________[0m
 Step |       ID |   Time |      Value |   algorithm |        l2 |   learning_rate |      loss |   normalize | 
    1 | f9f825df | 00m12s | [35m   0.07714[0m | [32m    RGF_Sib[0m | [32m   0.171

Notice, `optimizer` recognizes our earlier `experiment`'s hyperparameters fit inside the search space/guidelines set for `optimizer`.

Then, when optimization is started, it automatically learns from `experiment`'s results - without any extra work for us!