# How to train a patch based ML model on FMNIST
> Testing patch approaches on FMNIST

In [None]:
%%time
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split

data = fetch_openml("Fashion-MNIST", data_home="/home/matthieu/sklearn_data")

X = data["data"].reshape(-1,28,28)
Y = data["target"]

xtrain, xtest, ytrain, ytest = train_test_split(X, Y, train_size=60000, random_state=13)

CPU times: user 7 µs, sys: 1 µs, total: 8 µs
Wall time: 14.3 µs


In [None]:
from mlg_lib.ml_utils import PatchTransform, LambdaRow
from mlg_lib.imgfeat import flatten
from sklearn.pipeline import make_pipeline
from sklearn.ensemble import RandomTreesEmbedding, RandomForestClassifier
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from mlg_lib.ml_utils import sk_train
from sklearn.metrics import accuracy_score, confusion_matrix

In [None]:
pipeline = make_pipeline(
    PatchTransform(transformer=make_pipeline(
                                LambdaRow(flatten),
                                StandardScaler(),
                                PCA(n_components=8)
                            ),
               patch_size=8,
               stride=8,
               max_patches=10
              ),
    RandomForestClassifier(max_depth=None, max_features="log2", n_estimators=100)
)

In [None]:
out = sk_train(xtrain, xtest, ytrain, ytest, pipeline, metrics=dict(cm=confusion_matrix, acc=accuracy_score).items())

In [None]:
print(out)

TrainingOutput(model=Pipeline(memory=None,
         steps=[('patchtransform',
                 PatchTransform(max_patches=10, patch_size=8, stride=8,
                                transformer=Pipeline(memory=None,
                                                     steps=[('lambdarow',
                                                             LambdaRow(row_func=<function flatten at 0x7f75a09d0378>)),
                                                            ('standardscaler',
                                                             StandardScaler(copy=True,
                                                                            with_mean=True,
                                                                            with_std=True)),
                                                            ('pca',
                                                             PCA(copy=True,
                                                                 iterated_power='auto',
       

In [None]:
%%time

from sklearn.decomposition import TruncatedSVD

pipeline = make_pipeline(
    PatchTransform(transformer=make_pipeline(
                                LambdaRow(flatten),
                                RandomTreesEmbedding(max_depth=2, n_estimsklearnrs=32, sparse_output=False),
                                TruncatedSVD(n_components=8)
                                
                            ),
               patch_size=8,
               stride=8,
               max_patches=10
              ),
    RandomForestClassifier(max_depth=None, max_features="log2", n_estimators=100)
)