# How to train a patch based ML model on FMNIST
> Testing patch approaches on FMNIST

In [None]:
%%time
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split

data = fetch_openml("Fashion-MNIST", data_home="/home/matthieu/sklearn_data")

X = data["data"].reshape(-1,28,28)
Y = data["target"]

xtrain, xtest, ytrain, ytest = train_test_split(X, Y, train_size=60000, random_state=13)

CPU times: user 21.7 s, sys: 1.2 s, total: 22.9 s
Wall time: 23 s


In [None]:
from mlg_lib.ml_utils import PatchTransform, LambdaRow
from mlg_lib.imgfeat import flatten
from sklearn.pipeline import make_pipeline
from sklearn.ensemble import RandomTreesEmbedding, RandomForestClassifier
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from mlg_lib.ml_utils import sk_train
from sklearn.metrics import accuracy_score, confusion_matrix

In [None]:
pipeline = make_pipeline(
    PatchTransform(transformer=make_pipeline(
                                LambdaRow(flatten),
                                StandardScaler(),
                                PCA(n_components=8)
                            ),
               patch_size=8,
               stride=8,
               max_patches=10
              ),
    RandomForestClassifier(max_depth=None, max_features="log2", n_estimators=100)
)

In [None]:
out = sk_train(xtrain, xtest, ytrain, ytest, pipeline, metrics=dict(cm=confusion_matrix, acc=accuracy_score).items())

In [None]:
print(out)

TrainingOutput(model=Pipeline(memory=None,
         steps=[('patchtransform',
                 PatchTransform(max_patches=10, patch_size=8, stride=8,
                                transformer=Pipeline(memory=None,
                                                     steps=[('lambdarow',
                                                             LambdaRow(row_func=<function flatten at 0x7f75a09d0378>)),
                                                            ('standardscaler',
                                                             StandardScaler(copy=True,
                                                                            with_mean=True,
                                                                            with_std=True)),
                                                            ('pca',
                                                             PCA(copy=True,
                                                                 iterated_power='auto',
       

In [None]:
%%time

pipeline = make_pipeline(
    PatchTransform(transformer=make_pipeline(
                                LambdaRow(flatten),
                                RandomTreesEmbedding(max_depth=2, n_estimators=32, sparse_output=False),
                            ),
               patch_size=8,
               stride=8,
               max_patches=4
              ),
    RandomForestClassifier(max_depth=None, max_features="log2", n_estimators=100, n_jobs=-1)
)

CPU times: user 525 µs, sys: 26 µs, total: 551 µs
Wall time: 562 µs


In [None]:
%%time
out = sk_train(xtrain, xtest, ytrain, ytest, pipeline, metrics=dict(cm=confusion_matrix, acc=accuracy_score).items())

CPU times: user 11min 53s, sys: 2.56 s, total: 11min 56s
Wall time: 11min 56s


In [None]:
print(out)

TrainingOutput(model=Pipeline(memory=None,
         steps=[('patchtransform',
                 PatchTransform(max_patches=4, patch_size=8, stride=8,
                                transformer=Pipeline(memory=None,
                                                     steps=[('lambdarow',
                                                             LambdaRow(row_func=<function flatten at 0x7f99d85cd950>)),
                                                            ('randomtreesembedding',
                                                             RandomTreesEmbedding(max_depth=2,
                                                                                  max_leaf_nodes=None,
                                                                                  min_impurity_decrease=0.0,
                                                                                  min_impurity_split=None,
                                                                                  min_sample

In [None]:
%%time

from sklearn.preprocessing import StandardScaler, Normalizer, FunctionTransformer
from sklearn.cluster import KMeans

def _relu(x):
    x[x<0]=0
    return x

pipeline = make_pipeline(
    PatchTransform(transformer=make_pipeline(
                                LambdaRow(flatten),
                                Normalizer(norm="l2"),
                                KMeans(n_clusters=100, n_init=3, max_iter=100),
                                StandardScaler(),
                                FunctionTransformer(_relu, validate=False)
                            ),
               patch_size=8,
               stride=8,
               max_patches=4
              ),
    RandomForestClassifier(max_depth=None, max_features="log2", n_estimators=100, n_jobs=-1)
)

CPU times: user 394 µs, sys: 10 µs, total: 404 µs
Wall time: 411 µs


In [None]:
%%time
out = sk_train(xtrain, xtest, ytrain, ytest, pipeline, metrics=dict(cm=confusion_matrix, acc=accuracy_score).items())

CPU times: user 6min 24s, sys: 1min 26s, total: 7min 51s
Wall time: 4min 47s


In [None]:
print(out)

TrainingOutput(model=Pipeline(memory=None,
         steps=[('patchtransform',
                 PatchTransform(max_patches=4, patch_size=8, stride=8,
                                transformer=Pipeline(memory=None,
                                                     steps=[('lambdarow',
                                                             LambdaRow(row_func=<function flatten at 0x7f99d85cd950>)),
                                                            ('normalizer',
                                                             Normalizer(copy=True,
                                                                        norm='l2')),
                                                            ('kmeans',
                                                             KMeans(algorithm='auto',
                                                                    copy_x=True,
                                                                    init='k-means++',
                        