In [None]:
# default_exp fashion_bench

# fashion_bench

> Utilities for benchmarking ML on FMNIST

In [None]:
import numpy as np
from mlg_lib.sk_utils import LambdaRow
from numpy import ndarray
from sklearn.pipeline import make_pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, accuracy_score
from skimage.feature import hog
from skimage.feature import daisy

In [None]:
def flatten(arr: ndarray)->ndarray:
    return arr.flatten()


def hog_features(arr: ndarray)->ndarray:
    return hog(arr,
               orientations=8,
               pixels_per_cell=(8, 8),
               cells_per_block=(2, 2),
               block_norm="L2",
               transform_sqrt=True,
               feature_vector=True)


def daisy_features(x):
    
    orientations=8
    histograms=6
    step, radius=13, 7

    return daisy(x, step=step, radius=radius, rings=2, histograms=histograms, orientations=orientations).flatten()

## Loading data

In [None]:
data = np.load("/home/matthieu/Workspace/data/fashion_mnist.npz")
xtrain, xtest, ytrain, ytest = data["xtrain"], data["xtest"], data["ytrain"], data["ytest"]

## Raw pixel values

### Fitting classifier

In [None]:
pipe = make_pipeline(
    LambdaRow(flatten),
    RandomForestClassifier(max_depth=None, max_features="log2", n_estimators=100)
)

pipe.fit(xtrain, ytrain)

Pipeline(memory=None,
         steps=[('lambdarow',
                 LambdaRow(row_func=<function flatten at 0x7fc422ac46a8>)),
                ('randomforestclassifier',
                 RandomForestClassifier(bootstrap=True, class_weight=None,
                                        criterion='gini', max_depth=None,
                                        max_features='log2',
                                        max_leaf_nodes=None,
                                        min_impurity_decrease=0.0,
                                        min_impurity_split=None,
                                        min_samples_leaf=1, min_samples_split=2,
                                        min_weight_fraction_leaf=0.0,
                                        n_estimators=100, n_jobs=None,
                                        oob_score=False, random_state=None,
                                        verbose=0, warm_start=False))],
         verbose=False)

### Evalutation

In [None]:
confusion_matrix(pipe.predict(xtrain), ytrain)

array([[6000,    0,    0,    0,    0,    0,    0,    0,    0,    0],
       [   0, 6000,    0,    0,    0,    0,    0,    0,    0,    0],
       [   0,    0, 6000,    0,    0,    0,    0,    0,    0,    0],
       [   0,    0,    0, 6000,    0,    0,    0,    0,    0,    0],
       [   0,    0,    0,    0, 6000,    0,    0,    0,    0,    0],
       [   0,    0,    0,    0,    0, 6000,    0,    0,    0,    0],
       [   0,    0,    0,    0,    0,    0, 6000,    0,    0,    0],
       [   0,    0,    0,    0,    0,    0,    0, 6000,    0,    0],
       [   0,    0,    0,    0,    0,    0,    0,    0, 6000,    0],
       [   0,    0,    0,    0,    0,    0,    0,    0,    0, 6000]])

In [None]:
confusion_matrix(pipe.predict(xtest), ytest)

array([[863,   3,  14,  19,   0,   0, 164,   0,   0,   0],
       [  1, 957,   0,   3,   0,   0,   1,   0,   2,   0],
       [ 11,   3, 810,   9, 100,   0, 124,   0,   7,   0],
       [ 33,  24,  10, 908,  37,   0,  29,   0,   2,   0],
       [  3,   6, 111,  27, 810,   0,  96,   0,   5,   0],
       [  1,   0,   0,   0,   0, 956,   0,  13,   2,   8],
       [ 74,   5,  51,  31,  50,   0, 563,   0,   4,   0],
       [  0,   0,   0,   0,   0,  30,   0, 949,   4,  45],
       [ 14,   2,   4,   3,   3,   1,  23,   0, 973,   2],
       [  0,   0,   0,   0,   0,  13,   0,  38,   1, 945]])

In [None]:
accuracy_score(pipe.predict(xtest), ytest)

0.8718

## HOG

In [None]:
pipe = make_pipeline(
    LambdaRow(hog_features),
    RandomForestClassifier(max_depth=None, max_features="log2", n_estimators=100)
)

pipe.fit(xtrain, ytrain)

Pipeline(memory=None,
         steps=[('lambdarow',
                 LambdaRow(row_func=<function hog_features at 0x7fc422ac4598>)),
                ('randomforestclassifier',
                 RandomForestClassifier(bootstrap=True, class_weight=None,
                                        criterion='gini', max_depth=None,
                                        max_features='log2',
                                        max_leaf_nodes=None,
                                        min_impurity_decrease=0.0,
                                        min_impurity_split=None,
                                        min_samples_leaf=1, min_samples_split=2,
                                        min_weight_fraction_leaf=0.0,
                                        n_estimators=100, n_jobs=None,
                                        oob_score=False, random_state=None,
                                        verbose=0, warm_start=False))],
         verbose=False)

### Evaluation

In [None]:
confusion_matrix(pipe.predict(xtrain), ytrain)

array([[6000,    0,    0,    0,    0,    0,    0,    0,    0,    0],
       [   0, 6000,    0,    0,    0,    0,    0,    0,    0,    0],
       [   0,    0, 6000,    0,    0,    0,    0,    0,    0,    0],
       [   0,    0,    0, 6000,    0,    0,    0,    0,    0,    0],
       [   0,    0,    0,    0, 6000,    0,    0,    0,    0,    0],
       [   0,    0,    0,    0,    0, 6000,    0,    0,    0,    0],
       [   0,    0,    0,    0,    0,    0, 6000,    0,    0,    0],
       [   0,    0,    0,    0,    0,    0,    0, 6000,    0,    1],
       [   0,    0,    0,    0,    0,    0,    0,    0, 6000,    0],
       [   0,    0,    0,    0,    0,    0,    0,    0,    0, 5999]])

In [None]:
confusion_matrix(pipe.predict(xtest), ytest)

array([[846,   0,  16,  19,   2,   0, 179,   0,   4,   1],
       [  2, 949,   0,  10,   0,   0,   2,   0,   1,   0],
       [ 38,   5, 743,  25, 100,   0, 106,   0,   7,   0],
       [ 33,  33,   8, 864,  40,   1,  38,   0,   8,   0],
       [  5,   4, 128,  37, 765,   0, 122,   0,   6,   0],
       [  0,   0,   0,   0,   0, 948,   1,  63,   3,  11],
       [ 61,   6,  97,  39,  86,   0, 531,   0,   8,   0],
       [  0,   0,   0,   0,   0,  41,   0, 902,   2,  41],
       [ 15,   3,   8,   6,   7,   1,  21,   0, 960,   1],
       [  0,   0,   0,   0,   0,   9,   0,  35,   1, 946]])

In [None]:
accuracy_score(pipe.predict(xtest), ytest)

0.8454

## Daisy

In [None]:
pipe = make_pipeline(
    LambdaRow(daisy_features),
    RandomForestClassifier(max_depth=None, max_features="log2", n_estimators=100)
)

pipe.fit(xtrain, ytrain)

Pipeline(memory=None,
         steps=[('lambdarow',
                 LambdaRow(row_func=<function daisy_features at 0x7fc422ac48c8>)),
                ('randomforestclassifier',
                 RandomForestClassifier(bootstrap=True, class_weight=None,
                                        criterion='gini', max_depth=None,
                                        max_features='log2',
                                        max_leaf_nodes=None,
                                        min_impurity_decrease=0.0,
                                        min_impurity_split=None,
                                        min_samples_leaf=1, min_samples_split=2,
                                        min_weight_fraction_leaf=0.0,
                                        n_estimators=100, n_jobs=None,
                                        oob_score=False, random_state=None,
                                        verbose=0, warm_start=False))],
         verbose=False)

### Evaluation

In [None]:
confusion_matrix(pipe.predict(xtrain), ytrain)

array([[6000,    0,    0,    0,    0,    0,    0,    0,    0,    0],
       [   0, 6000,    0,    0,    0,    0,    0,    0,    0,    0],
       [   0,    0, 6000,    0,    0,    0,    0,    0,    0,    0],
       [   0,    0,    0, 6000,    0,    0,    0,    0,    0,    0],
       [   0,    0,    0,    0, 6000,    0,    0,    0,    0,    0],
       [   0,    0,    0,    0,    0, 6000,    0,    0,    0,    0],
       [   0,    0,    0,    0,    0,    0, 6000,    0,    0,    0],
       [   0,    0,    0,    0,    0,    0,    0, 6000,    0,    0],
       [   0,    0,    0,    0,    0,    0,    0,    0, 6000,    0],
       [   0,    0,    0,    0,    0,    0,    0,    0,    0, 6000]])

In [None]:
confusion_matrix(pipe.predict(xtest), ytest)

array([[860,   1,  11,  15,   2,   0, 146,   0,   1,   0],
       [  0, 960,   0,   6,   0,   0,   1,   0,   1,   0],
       [ 17,   4, 827,   9,  81,   0,  95,   0,   3,   0],
       [ 39,  23,   9, 888,  41,   0,  30,   0,   4,   0],
       [  3,   4,  82,  38, 818,   0, 105,   0,   5,   0],
       [  0,   0,   0,   0,   0, 963,   0,  20,   2,  11],
       [ 67,   6,  66,  37,  56,   0, 610,   0,   3,   0],
       [  0,   0,   0,   0,   0,  28,   0, 951,   3,  37],
       [ 14,   2,   5,   7,   2,   2,  13,   0, 977,   1],
       [  0,   0,   0,   0,   0,   7,   0,  29,   1, 951]])

In [None]:
accuracy_score(pipe.predict(xtest), ytest)

0.8805