In [None]:
import gzip
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, roc_curve, auc, make_scorer
from sklearn.datasets import load_iris
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import cross_val_score, KFold, cross_val_predict
from sklearn.preprocessing import label_binarize

In [None]:
def load_fashion_mnist():
    """
    Loads Fashion MNIST dataset.
    
    Adapted from: https://github.com/zalandoresearch/fashion-mnist/blob/master/utils/mnist_reader.py
    """
    TRAIN_IMAGES = 'train-images-idx3-ubyte.gz'
    TRAIN_LABELS = 'train-labels-idx1-ubyte.gz'    
    TEST_IMAGES = 't10k-images-idx3-ubyte.gz'
    TEST_LABELS = 't10k-labels-idx1-ubyte.gz'

    with gzip.open(TRAIN_LABELS, 'rb') as tr_labels_file, gzip.open(TEST_LABELS, 'rb') as ts_labels_file:
        train_labels = np.frombuffer(tr_labels_file.read(), dtype=np.uint8, offset=8)
        test_labels = np.frombuffer(ts_labels_file.read(), dtype=np.uint8, offset=8)

    with gzip.open(TRAIN_IMAGES, 'rb') as tr_images_file, gzip.open(TEST_IMAGES, 'rb') as ts_images_file:
        train_images = np.frombuffer(tr_images_file.read(), dtype=np.uint8, offset=16).reshape(len(train_labels), 784)
        test_images = np.frombuffer(ts_images_file.read(), dtype=np.uint8, offset=16).reshape(len(test_labels), 784)

    return train_images, train_labels, test_images, test_labels

train_images, train_labels, test_images, test_labels = load_fashion_mnist()

In [None]:
y = train_labels
y.shape

In [None]:
# rescaling features using min-max scaling
scaler = MinMaxScaler()
X = scaler.fit_transform(train_images)
#train_scaled is X
test_scaled = scaler.fit_transform(test_images)

In [None]:
small_y = y[:6000]
small_X = X[:6000,:]

In [None]:
# Create a nearest neighbor classifier
clf = KNeighborsClassifier(n_neighbors=3, algorithm='kd_tree')
clf

In [None]:
y_pred = cross_val_predict(clf, small_X, small_y, cv=5)

In [None]:
y_pred

In [None]:
y_predict = clf.fit(small_X,small_y).predict(small_X)

In [None]:
y_predict

In [None]:
cm1 = confusion_matrix(small_y, y_pred, labels = [0, 1, 2, 3, 4,5,6,7,8,9])
cm1

In [None]:
cm2 = confusion_matrix(small_y, y_predict, labels = [0, 1, 2, 3, 4,5,6,7,8,9])
cm2

In [None]:
n_classes = cm1.shape[0]
tpr = []
fpr = []
for i in range(n_classes):
    tp = cm1[i, i]
    fn = np.sum(cm1[i, :]) - tp
    fp = np.sum(cm1[:, i]) - tp
    tn = np.sum(cm1) - tp - fn - fp
    tpr_i = tp / (tp + fn)
    fpr_i = fp / (fp + tn)
    tpr.append(tpr_i)
    fpr.append(fpr_i)

In [None]:
tpr_ave1 = sum(tpr)/len(tpr)
tpr_ave1

In [None]:
# Plot the ROC curve
plt.figure(figsize=(6, 6))
plt.plot([0, 1], [0, 1], 'k--')
plt.plot(fpr, tpr, 'b')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.show()

In [None]:
n_classes = cm1.shape[0]
tpr = []
fpr = []
for i in range(n_classes):
    tp = cm1[i, i]
    fn = np.sum(cm1[i, :]) - tp
    fp = np.sum(cm1[:, i]) - tp
    tn = np.sum(cm1) - tp - fn - fp
    tpr_i = tp / (tp + fn)
    fpr_i = fp / (fp + tn)
    tpr.append(tpr_i)
    fpr.append(fpr_i)

In [None]:
tpr_ave2 = sum(tpr)/len(tpr)
tpr_ave2

In [None]:
# Plot the ROC curve
plt.figure(figsize=(6, 6))
plt.plot([0, 1], [0, 1], 'k--')
plt.plot(fpr, tpr, 'b')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.show()