# Multiboosting
> How to train a multiboosting model

In [None]:
%%time
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split

data = fetch_openml("Fashion-MNIST", data_home="/home/matthieu/sklearn_data")

X = data["data"].reshape(-1,28,28)
Y = data["target"]

xtrain, xtest, ytrain, ytest = train_test_split(X, Y, train_size=60000, random_state=13)

CPU times: user 23.5 s, sys: 1.36 s, total: 24.9 s
Wall time: 24.7 s


In [None]:
from sklearn.ensemble import GradientBoostingClassifier
from mlg_lib.imgfeat import HogFactory
from mlg_lib.ml_utils import make_single_feature_model, sk_train
from sklearn.metrics import accuracy_score, confusion_matrix

In [None]:
pipeline = make_single_feature_model(
    HogFactory(orientations=8, pixels_per_cell=(4, 4),cells_per_block=(1, 1), multichannel=False),
    GradientBoostingClassifier(max_depth=2, max_features="log2", n_estimators=50, subsample=0.1)
)

In [None]:
%%time
out = sk_train(xtrain, xtest, ytrain, ytest, pipeline, metrics=dict(cm=confusion_matrix, acc=accuracy_score).items())

CPU times: user 2min 18s, sys: 487 ms, total: 2min 18s
Wall time: 2min 19s


In [None]:
print(out)

TrainingOutput(model=Pipeline(memory=None,
         steps=[('lambdarow',
                 LambdaRow(row_func=HogFactory(orientations=8, pixels_per_cell=(4, 4), cells_per_block=(1, 1), multichannel=False))),
                ('gradientboostingclassifier',
                 GradientBoostingClassifier(criterion='friedman_mse', init=None,
                                            learning_rate=0.1, loss='deviance',
                                            max_depth=2, max_features='log2',
                                            max_leaf_nodes=None,
                                            min_impurity_decrease=0.0,
                                            min_impurity_split=None,
                                            min_samples_leaf=1,
                                            min_samples_split=2,
                                            min_weight_fraction_leaf=0.0,
                                            n_estimators=50,
                                      

In [None]:
# explicitly require this experimental feature
from sklearn.experimental import enable_hist_gradient_boosting  # noqa
# now you can import normally from ensemble
from sklearn.ensemble import HistGradientBoostingClassifier

In [None]:
pipeline = make_single_feature_model(
    HogFactory(orientations=8, pixels_per_cell=(4, 4),cells_per_block=(1, 1), multichannel=False),
    HistGradientBoostingClassifier(max_depth=2, max_iter=50)
)

In [None]:
%%time
out = sk_train(xtrain, xtest, ytrain, ytest, pipeline, metrics=dict(cm=confusion_matrix, acc=accuracy_score).items())

CPU times: user 7min 8s, sys: 2.2 s, total: 7min 11s
Wall time: 3min 48s


In [None]:
print(out)

TrainingOutput(model=Pipeline(memory=None,
         steps=[('lambdarow',
                 LambdaRow(row_func=HogFactory(orientations=8, pixels_per_cell=(4, 4), cells_per_block=(1, 1), multichannel=False))),
                ('histgradientboostingclassifier',
                 HistGradientBoostingClassifier(l2_regularization=0.0,
                                                learning_rate=0.1, loss='auto',
                                                max_bins=256, max_depth=2,
                                                max_iter=50, max_leaf_nodes=31,
                                                min_samples_leaf=20,
                                                n_iter_no_change=None,
                                                random_state=None, scoring=None,
                                                tol=1e-07,
                                                validation_fraction=0.1,
                                                verbose=0))],
         verbose=Fals

In [None]:
import numpy as np
from sklearn.ensemble import BaggingClassifier

T = 500

_T = int(np.sqrt(T))

pipeline = make_single_feature_model(
    HogFactory(orientations=8, pixels_per_cell=(4, 4),cells_per_block=(1, 1), multichannel=False),
    BaggingClassifier(
        n_estimators=_T,
        base_estimator=HistGradientBoostingClassifier(max_depth=2, max_iter=_T)
    )
)

In [None]:
%%time
out = sk_train(xtrain, xtest, ytrain, ytest, pipeline, metrics=dict(cm=confusion_matrix, acc=accuracy_score).items())

CPU times: user 15min 24s, sys: 6.47 s, total: 15min 30s
Wall time: 6min


In [None]:
print(out)

TrainingOutput(model=Pipeline(memory=None,
         steps=[('lambdarow',
                 LambdaRow(row_func=HogFactory(orientations=8, pixels_per_cell=(4, 4), cells_per_block=(1, 1), multichannel=False))),
                ('baggingclassifier',
                 BaggingClassifier(base_estimator=HistGradientBoostingClassifier(l2_regularization=0.0,
                                                                                 learning_rate=0.1,
                                                                                 loss='auto',
                                                                                 max_bins=256,
                                                                                 max_depth=2,
                                                                                 max_iter=22,
                                                                                 max_leaf_nodes=31,
                                                                          