In [1]:
import numpy as np
from btb import HyperParameter, ParamTypes
from btb.tuning import GP
from mlblocks.mlpipeline import MLPipeline
from sklearn.datasets import load_wine
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold

In [2]:
def get_tuner(pipeline):
    tunables = [
        ((p.block_name, p.param_name), HyperParameter(p.param_type, p.param_range))
        for p in pipeline.get_tunable_hyperparams()
    ]
    return GP(tunables)

In [3]:
def scorer(obs, exp):
    return f1_score(obs, exp, average='micro')

In [4]:
def cv_score(X, y, blocks, params, score_func, splits=5):
    skf = StratifiedKFold(n_splits=splits, shuffle=True, random_state=0)
    scores = list()
    for train_index, test_index in skf.split(X, y):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        
        pipeline = MLPipeline(blocks)
        pipeline.set_from_hyperparam_dict(params)
        pipeline.fit(X_train, y_train)
        y_pred = pipeline.predict(X_test)
        scores.append(score_func(y_pred, y_test))
    
    return np.mean(scores), np.std(scores)

In [9]:
def make_pipeline(X, y, blocks, score_func, budget=20, splits=5):
    pipeline = MLPipeline(blocks)
    tuner = get_tuner(pipeline)
    for _ in range(budget):
        params = tuner.propose()
        score, std = cv_score(X, y, blocks, params, score_func, splits)
        tuner.add(params, score)
        
        yield params, score, std

In [10]:
wine = load_wine()
X, y = wine.data, wine.target
blocks = ['random_forest_classifier']

steps = list(make_pipeline(X, y, blocks, scorer, splits=5))



In [11]:
[(score, std) for (_, score, std) in steps]

[(0.9439417568829332, 0.017855183211689985),
 (0.9437830267242031, 0.03131900835750861),
 (0.9552115981527747, 0.013268148148532872),
 (0.9439417568829332, 0.025048942948389222),
 (0.9439417568829332, 0.025048942948389222),
 (0.9772922502334268, 0.011369949981975243),
 (0.9498061927473692, 0.01982641164521094),
 (0.9828478057889823, 0.014014239478014388),
 (0.9830065359477125, 0.013887927686846917),
 (0.9828478057889823, 0.014014239478014388),
 (0.9606170035581801, 0.01388337756569935),
 (0.9661725591137357, 0.011683743470801295),
 (0.9771335200746967, 0.021389639749015147),
 (0.9828478057889823, 0.014014239478014388),
 (0.988562091503268, 0.014018046136945897),
 (0.9664814394226159, 0.01028551947204806),
 (0.9715779645191411, 0.01808041701803548),
 (0.9828478057889823, 0.014014239478014388),
 (0.9496560425972191, 0.020472801142455862),
 (0.9771335200746967, 0.021389639749015147)]

In [12]:
steps[-1][0]

{('rf_classifier', 'criterion'): 'entropy',
 ('rf_classifier', 'max_features'): 0.1684225701771657,
 ('rf_classifier', 'max_depth'): 6,
 ('rf_classifier', 'min_samples_split'): 2,
 ('rf_classifier', 'min_samples_leaf'): 3,
 ('rf_classifier', 'n_estimators'): 100,
 ('rf_classifier', 'n_jobs'): -1}