In [301]:
import pandas as pd
import numpy as np
import seaborn as sns
import random
import matplotlib
import matplotlib.pyplot as plt

from sklearn.metrics import confusion_matrix, plot_confusion_matrix, accuracy_score, plot_roc_curve,\
                             precision_recall_curve, plot_precision_recall_curve, f1_score, average_precision_score,\
                             hinge_loss, precision_score, recall_score, classification_report
from sklearn.model_selection import train_test_split, cross_val_score, cross_validate
from sklearn.metrics import make_scorer, accuracy_score, average_precision_score, f1_score,\
                            log_loss, precision_score, recall_score, roc_auc_score
from sklearn.preprocessing import label_binarize, LabelBinarizer
from sklearn.datasets import fetch_openml
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.utils import resample

# Data load

In [346]:
fmnist = fetch_openml("Fashion-MNIST", data_home="./fmnist", cache=True)
classes = [str(x) for x in range(0, 10)]
def mk_dataset(total, fmnist=fmnist, classes=classes):
    samples = int(fmnist.data.shape[0]*total)
    return resample(fmnist.data, fmnist.target, n_samples=samples)

# Model evaluation

In [358]:
def binarized_scorer(metric, **kwargs):
    lb = LabelBinarizer()
    def score(y_test, y_pred, metric=metric, lb=lb, kwargs=kwargs):
        lb.fit(y_test)
        y_test = lb.transform(y_test)
        y_pred = lb.transform(y_pred)
        return metric(y_test, y_pred, **kwargs)
    return make_scorer(score)

def mk_test(clf, name):
    def run_test(X, Y, clf=clf, name=name):
        scoring = {
            "accuracy":          binarized_scorer(accuracy_score), 
            "f1":                binarized_scorer(f1_score, average='macro'), 
            "neg_log_loss":      binarized_scorer(log_loss), 
            "precision":         binarized_scorer(precision_score, average='macro'), 
            "recall":            binarized_scorer(recall_score, average='macro'), 
            "roc_auc":           binarized_scorer(roc_auc_score, average='macro'),
            # to je pole pod Precision-Recall, albo jakaś średnia. nie wiem.
            "average_precision": binarized_scorer(average_precision_score, average='macro') 
        }
        scores = cross_validate(clf, X, Y, cv=4, n_jobs=8, scoring=scoring)
        return scores
    return run_test

# AdaBoost

In [359]:
def mk_adaboost(depth=5, n=100, seed=1):
    return AdaBoostClassifier(
        base_estimator=DecisionTreeClassifier(max_depth=depth),
        n_estimators=n,
        random_state=seed)

def mk_knn():
    return KNeighborsClassifier()

In [360]:
test = mk_test(mk_adaboost(depth=10), "AdaBoost")
test(*mk_dataset(0.1))

{'fit_time': array([123.0119493 , 123.00694561, 122.40339684, 124.1129477 ]),
 'score_time': array([0.43439364, 0.42738676, 0.41037226, 0.37333918]),
 'test_accuracy': array([0.84857143, 0.85257143, 0.84114286, 0.86171429]),
 'test_f1': array([0.84910824, 0.85383553, 0.84183597, 0.86415286]),
 'test_neg_log_loss': array([5.23015757, 5.09200246, 5.48673134, 4.77621936]),
 'test_precision': array([0.84986438, 0.85522527, 0.8426891 , 0.86592101]),
 'test_recall': array([0.84937377, 0.85355064, 0.8419653 , 0.86298085]),
 'test_roc_auc': array([0.91626236, 0.9185702 , 0.91214405, 0.92379401]),
 'test_average_precision': array([0.75151053, 0.75900108, 0.73946241, 0.77189013])}