# How to train a Rocket-like classifier
> Details

In [1]:
%%time
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split

data = fetch_openml("Fashion-MNIST", data_home="/home/matthieu/sklearn_data")

X = data["data"].reshape(-1,28,28)
Y = data["target"]

xtrain, xtest, ytrain, ytest = train_test_split(X, Y, train_size=60000, random_state=13)

CPU times: user 21.3 s, sys: 1.28 s, total: 22.6 s
Wall time: 22.4 s


In [5]:
from sklearn.ensemble import ExtraTreesClassifier
from mlg_lib.imgfeat import ConvolutionFeature
from mlg_lib.ml_utils import make_single_feature_model, sk_train, LambdaRow
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.pipeline import make_pipeline
import numpy as np

In [3]:
kernel = 0.2* np.random.randn(3,3,1024)

pipeline = make_single_feature_model(
    ConvolutionFeature(kernel),
    ExtraTreesClassifier(n_estimators=100, max_features="log2")
)

In [15]:
pipeline = make_pipeline(
        LambdaRow(ConvolutionFeature(kernel), use_joblib=True, n_jobs=4),
        ExtraTreesClassifier(n_estimators=100, max_features="log2", n_jobs=4)
)

In [19]:
xtrain, xtest, ytrain, ytest = train_test_split(X, Y, train_size=60000, random_state=13)

In [20]:
%%time
out = sk_train(xtrain, xtest, ytrain, ytest, pipeline, metrics=dict(cm=confusion_matrix, acc=accuracy_score).items())

CPU times: user 2min 37s, sys: 8.98 s, total: 2min 46s
Wall time: 49min 1s


In [21]:
out

TrainingOutput(model=Pipeline(memory=None,
         steps=[('lambdarow',
                 LambdaRow(row_func=ConvolutionFeature(kernel=array([[[ 0.14719296, -0.19030036,  0.05962824, ...,  0.1291832 ,
         -0.13140789,  0.06986491],
        [-0.10300972, -0.15590834,  0.30330397, ..., -0.1913684 ,
         -0.13388783, -0.00087435],
        [-0.07295761,  0.13729776, -0.04655148, ...,  0.22858822,
          0.21206639,  0.1428331 ]],

       [[ 0.09615466, -0.09682852, -0.13381...
                ('extratreesclassifier',
                 ExtraTreesClassifier(bootstrap=False, class_weight=None,
                                      criterion='gini', max_depth=None,
                                      max_features='log2', max_leaf_nodes=None,
                                      min_impurity_decrease=0.0,
                                      min_impurity_split=None,
                                      min_samples_leaf=1, min_samples_split=2,
                                    

In [36]:
from skimage.filters import gabor_kernel

# prepare filter bank kernels
kernels = []
for theta in range(8):
    theta = theta / 4. * np.pi
    for sigma in (1, 3):
        for frequency in (0.05, 0.125, 0.25, 0.5, 1.):
            kernel = np.real(gabor_kernel(frequency, theta=theta,
                                          sigma_x=sigma, sigma_y=sigma))
            kernels.append(kernel)

In [62]:
padded_kernels = list()

for k in kernels:
    if k.shape==(7,7):
        padded_kernels.append(pad(k, 6, mode="constant"))
    elif k.shape==(15,15):
        padded_kernels.append(pad(k, 2, mode="constant"))
    else:
         padded_kernels.append(k)   

In [73]:
gabor_kernels = np.dstack(padded_kernels)

In [74]:
pipeline = make_pipeline(
        LambdaRow(ConvolutionFeature(gabor_kernels), use_joblib=True, n_jobs=4),
        ExtraTreesClassifier(n_estimators=100, max_features="log2", n_jobs=4)
)

In [75]:
%%time
out = sk_train(xtrain, xtest, ytrain, ytest, pipeline, metrics=dict(cm=confusion_matrix, acc=accuracy_score).items())

CPU times: user 1min 5s, sys: 2.76 s, total: 1min 8s
Wall time: 10min 25s


In [76]:
out

TrainingOutput(model=Pipeline(memory=None,
         steps=[('lambdarow',
                 LambdaRow(row_func=ConvolutionFeature(kernel=array([[[ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, ...,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
        [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, ...,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
        [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, ...,
          0.00000000e+00...
                ('extratreesclassifier',
                 ExtraTreesClassifier(bootstrap=False, class_weight=None,
                                      criterion='gini', max_depth=None,
                                      max_features='log2', max_leaf_nodes=None,
                                      min_impurity_decrease=0.0,
                                      min_impurity_split=None,
                                      min_samples_leaf=1, min_samples_split=2,
                                      min_weig