In [1]:
from sklearn.datasets import load_wine
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split

In [2]:
from mlblocks.mlpipeline import MLPipeline

In [3]:
from btb import HyperParameter, ParamTypes
from btb.tuning import GP

In [27]:
wine = load_wine()
X_train, X_test, y_train, y_test = train_test_split(wine.data, wine.target)

In [5]:
pipeline = MLPipeline(['random_forest_classifier'])

In [6]:
hyperparams = pipeline.get_tunable_hyperparams()
for hyperparam in hyperparams:
    print(hyperparam)

Hyperparameter: Name: criterion, Block Name: rf_classifier, Type: string, Range: ['entropy', 'gini'], Value: gini
Hyperparameter: Name: max_features, Block Name: rf_classifier, Type: float, Range: [0.1, 1.0], Value: 0.46446437664959683
Hyperparameter: Name: max_depth, Block Name: rf_classifier, Type: int, Range: [2, 10], Value: 7
Hyperparameter: Name: min_samples_split, Block Name: rf_classifier, Type: int, Range: [2, 4], Value: 2
Hyperparameter: Name: min_samples_leaf, Block Name: rf_classifier, Type: int, Range: [1, 3], Value: 2
Hyperparameter: Name: n_estimators, Block Name: rf_classifier, Type: int_cat, Range: [100], Value: 100
Hyperparameter: Name: n_jobs, Block Name: rf_classifier, Type: int_cat, Range: [-1], Value: -1


In [19]:
pipeline.to_dict()

{'rf_classifier__criterion': 'gini',
 'rf_classifier__max_features': 0.46446437664959683,
 'rf_classifier__max_depth': 7,
 'rf_classifier__min_samples_split': 2,
 'rf_classifier__min_samples_leaf': 2,
 'rf_classifier__n_estimators': 100,
 'rf_classifier__n_jobs': -1}

In [24]:
tunables = [
    ((p.block_name, p.param_name), HyperParameter(p.param_type, p.param_range))
    for p in pipeline.get_tunable_hyperparams()
]
tunables

[(('rf_classifier', 'criterion'),
  <btb.hyper_parameter.StringCatHyperParameter at 0x7fe2960b6940>),
 (('rf_classifier', 'max_features'),
  <btb.hyper_parameter.FloatHyperParameter at 0x7fe2960b69e8>),
 (('rf_classifier', 'max_depth'),
  <btb.hyper_parameter.IntHyperParameter at 0x7fe2960b6c18>),
 (('rf_classifier', 'min_samples_split'),
  <btb.hyper_parameter.IntHyperParameter at 0x7fe2960b6a90>),
 (('rf_classifier', 'min_samples_leaf'),
  <btb.hyper_parameter.IntHyperParameter at 0x7fe2960b6dd8>),
 (('rf_classifier', 'n_estimators'),
  <btb.hyper_parameter.IntCatHyperParameter at 0x7fe2960b6c50>),
 (('rf_classifier', 'n_jobs'),
  <btb.hyper_parameter.IntCatHyperParameter at 0x7fe2960b6978>)]

In [25]:
tuner = GP(tunables)

In [26]:
tuner.propose()

{('rf_classifier', 'criterion'): 'entropy',
 ('rf_classifier', 'max_features'): 0.48232461525591264,
 ('rf_classifier', 'max_depth'): 2,
 ('rf_classifier', 'min_samples_split'): 2,
 ('rf_classifier', 'min_samples_leaf'): 3,
 ('rf_classifier', 'n_estimators'): 100,
 ('rf_classifier', 'n_jobs'): -1}

In [28]:
for i in range(10):
    params = tuner.propose()
    pipeline.set_from_hyperparam_dict(params)
    pipeline.fit(X_train, y_train)
    y_pred = pipeline.predict(X_test)
    score = f1_score(y_pred, y_test, average='micro')
    tuner.add(params, score)

score



0.9777777777777777

In [29]:
params

{('rf_classifier', 'criterion'): 'gini',
 ('rf_classifier', 'max_features'): 0.1890833133445103,
 ('rf_classifier', 'max_depth'): 9,
 ('rf_classifier', 'min_samples_split'): 2,
 ('rf_classifier', 'min_samples_leaf'): 2,
 ('rf_classifier', 'n_estimators'): 100,
 ('rf_classifier', 'n_jobs'): -1}