In [30]:
import pandas as pd
import numpy as np
import seaborn as sns
import random
import matplotlib
import matplotlib.pyplot as plt
import time

from sklearn.metrics import confusion_matrix, plot_confusion_matrix, accuracy_score, plot_roc_curve,\
                             precision_recall_curve, plot_precision_recall_curve, f1_score, average_precision_score,\
                             hinge_loss, precision_score, recall_score, classification_report
from sklearn.model_selection import train_test_split, cross_val_score, cross_validate
from sklearn.metrics import make_scorer, accuracy_score, average_precision_score, f1_score,\
                            log_loss, precision_score, recall_score, roc_auc_score
from sklearn.preprocessing import label_binarize, LabelBinarizer, LabelEncoder, OneHotEncoder
from sklearn.datasets import fetch_openml
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.utils import resample

import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, Flatten, Conv2D, MaxPooling2D
from keras.layers.normalization import BatchNormalization
from keras.optimizers import RMSprop
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import to_categorical

from catboost import CatBoostClassifier

# Data load

In [9]:
fmnist = fetch_openml("Fashion-MNIST", data_home="./fmnist", cache=True)
classes = [str(x) for x in range(0, 10)]
num_classes = len(classes)

def mk_dataset(total, fmnist=fmnist, classes=classes):
    samples = int(fmnist.data.shape[0]*total)
    return resample(fmnist.data, fmnist.target, n_samples=samples)

def plot_imgs(x, y, w=28, h=28):
    plt.figure(figsize=(10,10))
    for i in range(min(25, x.shape[0])):
        plt.subplot(5,5,i+1)
        plt.xticks([])
        plt.yticks([])
        plt.grid(False)
        img = x[i]
        img = img.reshape((w, h))
        plt.imshow(img)
        plt.xlabel(y[i])
    plt.show()    

# Model evaluation

In [10]:
def binarized_scorer(metric, **kwargs):
    lb = LabelBinarizer()
    def score(y_test, y_pred, metric=metric, lb=lb, kwargs=kwargs):
        lb.fit(y_test)
        y_test = lb.transform(y_test)
        y_pred = lb.transform(y_pred)
        return metric(y_test, y_pred, **kwargs)
    return make_scorer(score)

def mk_test(clf, name, gpu=False):
    def run_test(X, Y, clf=clf, name=name):
        scoring = {
            "accuracy":     binarized_scorer(accuracy_score), 
            "f1_score":     binarized_scorer(f1_score, average='macro'), 
            "log_loss":     binarized_scorer(log_loss), 
            "precision":    binarized_scorer(precision_score, average='macro'), 
            "recall":       binarized_scorer(recall_score, average='macro'), 
            "roc_auc":      binarized_scorer(roc_auc_score, average='macro'),
            # to je pole pod Precision-Recall, albo jakaś średnia. nie wiem.
            "prec_rec_auc": binarized_scorer(average_precision_score, average='macro') 
        }
        n_jobs = None if gpu else 8
        scores = cross_validate(clf, X, Y, cv=5, n_jobs=n_jobs, scoring=scoring)
        del scores['fit_time']
        del scores['score_time']
        keys = list(scores.keys())
        for key in keys:
            scores[key.replace('test_', '')] = [np.mean(scores.pop(key))]
        df = pd.DataFrame.from_dict(scores)
        df.insert(loc=0, column='Name', value=name)
        return df
    return run_test

# Models

In [11]:
def mk_adaboost(depth=5, n=100, seed=1):
    return AdaBoostClassifier(
        base_estimator=DecisionTreeClassifier(max_depth=depth),
        n_estimators=n,
        random_state=seed)

def mk_catboost():
    return CatBoostClassifier(iterations=1000, task_type="GPU")

class MyLittleKerasClassifier(KerasClassifier):
    # predict() nie zwracal 
    def predict(self, X):
        y_pred = KerasClassifier.predict(self, X)
        return to_categorical(y_pred, num_classes)

def mk_mlp(epochs=10):
    def build():
        model = Sequential()
        # starannie dobrane wartosci, wiem co robie
        model.add(Dense(256, activation='relu', input_shape=(28*28,)))
        model.add(Dropout(0.2))
        model.add(Dense(512, activation='relu'))
        model.add(Dropout(0.2))
        model.add(Dense(512, activation='relu'))
        model.add(Dropout(0.2))
        model.add(Dense(512, activation='relu'))
        model.add(Dropout(0.2))
        model.add(Dense(256, activation='relu'))
        model.add(Dropout(0.2))
        model.add(Dense(num_classes, activation='softmax'))
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        return model
    return MyLittleKerasClassifier(build_fn=build, epochs=epochs)

def mk_cnn(epochs=10):
    def build():
        model = Sequential()
        model.add(Conv2D(filters=32, kernel_size=(3, 3), input_shape=(28,28, 1), activation='relu'))
        model.add(BatchNormalization())
        model.add(Conv2D(filters=32, kernel_size=(3, 3), activation='relu'))
        model.add(BatchNormalization())
        model.add(Conv2D(32, kernel_size = 5, strides=2, padding='same', activation='relu'))
        model.add(BatchNormalization())
        model.add(Dropout(0.4))
        model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu'))
        model.add(BatchNormalization())
        model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu'))
        model.add(BatchNormalization())
        model.add(Conv2D(64, kernel_size = 5, strides=2, padding='same', activation='relu'))
        model.add(BatchNormalization())
        model.add(Dropout(0.4))
        model.add(Conv2D(128, kernel_size = 4, activation='relu'))
        model.add(BatchNormalization())
        model.add(Flatten())
        model.add(Dropout(0.4))
        model.add(Dense(10, activation='softmax'))
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        return model
    return MyLittleKerasClassifier(build_fn=build, epochs=epochs)

In [53]:
def test_cnn(data_sz, epochs):
    name = 'CNN | {}% of data'.format(data_sz*100.0)
    name += ' | {} epochs'.format(epochs) if epochs else ''
    x, y = mk_dataset(data_sz)
    y = to_categorical(y, num_classes)
    x /= 255.0
    x = x.reshape((x.shape[0], 28, 28, 1))
    return mk_test(mk_cnn(epochs=epochs), name, gpu=True)(x, y)

def test_mlp(data_sz, epochs):
    name = 'MLP | {}% of data'.format(data_sz*100.0)
    name += ' | {} epochs'.format(epochs) if epochs else ''
    x, y = mk_dataset(data_sz)
    y = to_categorical(y, num_classes)
    x /= 255.0
    return mk_test(mk_mlp(epochs=epochs), name, gpu=True)(x, y)

# Pomiar z użyciem time.time() to nie jest czas CPU
# ale już tam nic to. timeit() było zbyt irytujące
def test_adaboost(data_sz, n=100, duration=False):
    name = 'AdaBoost | {}% of data'.format(data_sz*100.0)
    start_time = time.time()
    result = mk_test(mk_adaboost(n=n), name)(*mk_dataset(data_sz))
    elapsed = time.time()-start_time
    if duration:
        result['Name'] += ' | %d secs' % elapsed
    return result

# Pomiar metryk w zależności od ilości dostępnych danych

In [None]:
tests_data = pd.concat([method(data_sz, None) for method in [test_adaboost, test_mlp, test_cnn] for data_sz in [0.1, 0.5, 1.0]])
tests_data

# Pomiar metryk w zależności od czasów treningu

## Dla Adaboosta - czas rzeczywisty

In [51]:
tests_time = [test_adaboost(0.5, n=n, duration=True) for n in [50, 150, 500]]
pd.concat(tests_time)

Unnamed: 0,Name,accuracy,f1_score,log_loss,precision,recall,roc_auc,prec_rec_auc
0,AdaBoost | 50.0% of data | 212 secs,0.688314,0.688106,10.765243,0.697314,0.688081,0.826725,0.550447
0,AdaBoost | 50.0% of data | 627 secs,0.753543,0.752674,8.512328,0.75593,0.753249,0.862933,0.628641
0,AdaBoost | 50.0% of data | 1785 secs,0.789543,0.789982,7.268932,0.792207,0.790228,0.883419,0.672362


## Dla sieci - liczba epok

In [54]:
tests_time += [method(0.5, epochs) for method in [test_mlp, test_cnn] for epochs in [5, 15, 25]]
tests_time = pd.concat(tests_time)
tests_time.to_pickle("3b.pkl")
tests_time

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15


Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25


Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5


Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15


Epoch 15/15
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25


Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


Unnamed: 0,Name,accuracy,f1_score,log_loss,precision,recall,roc_auc,prec_rec_auc
0,AdaBoost | 50.0% of data | 212 secs,0.688314,0.688106,10.765243,0.697314,0.688081,0.826725,0.550447
0,AdaBoost | 50.0% of data | 627 secs,0.753543,0.752674,8.512328,0.75593,0.753249,0.862933,0.628641
0,AdaBoost | 50.0% of data | 1785 secs,0.789543,0.789982,7.268932,0.792207,0.790228,0.883419,0.672362
0,MLP | 50.0% of data| 5 epochs,0.8588,0.858106,4.876875,0.863362,0.858452,0.921385,0.764586
0,MLP | 50.0% of data| 15 epochs,0.8824,0.880851,4.06176,0.882991,0.881708,0.934326,0.798184
0,MLP | 50.0% of data| 25 epochs,0.885657,0.884865,3.949262,0.887206,0.885287,0.936292,0.804582
0,CNN | 50.0% of data| 5 epochs,0.8966,0.896143,3.571309,0.89999,0.896879,0.942695,0.823182
0,CNN | 50.0% of data| 15 epochs,0.9282,0.928281,2.479884,0.930066,0.928377,0.960202,0.873302
0,CNN | 50.0% of data| 25 epochs,0.943486,0.943669,1.951934,0.944031,0.943767,0.968742,0.898537
