Step 1: Import relevant packages

In [1]:
import numpy as np
np.set_printoptions(threshold=np.inf)
np.set_printoptions(linewidth=np.inf)
import matplotlib.pyplot as plt

from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import SGDClassifier

from tensorflow.keras.datasets import fashion_mnist
from sklearn.model_selection import train_test_split

Step 2: Load the Fashion-MNIST dataset

In [2]:
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
X = np.append(x_train, x_test, axis=0)
y = np.append(y_train, y_test, axis=0)

Step 3: Take a subset of the data set (3,000 for training and 1,000 for testing)

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1000, train_size=3000)

Step 4: Perform necessary reshaping of the data for the classifiers

In [4]:
X_train = X_train.reshape((3000, 28*28))
X_test = X_test.reshape((1000, 28*28))

Step 5: Initialise the classifier model

In [5]:
neigh = KNeighborsClassifier(weights="distance", n_neighbors=5, p=1)
clf = DecisionTreeClassifier(criterion="entropy", max_depth=10, splitter="best")
sgd = SGDClassifier(loss="hinge", penalty="l2")

Step 6: Fit the model to the training data

In [6]:
neigh = neigh.fit(X_train, y_train)
clf = clf.fit(X_train, y_train)
sgd = sgd.fit(X_train, y_train)

Step 7: Use the trained/fitted model to evaluate the testing data

In [7]:
y_neight = neigh.predict(X_test)
y_clf = clf.predict(X_test)
y_sgd = sgd.predict(X_test)

Step 8: Report the performance of each classifier

In [8]:

from sklearn.model_selection import cross_val_score
from sklearn.metrics import confusion_matrix
# from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score

def report(y_true, y_pred, title):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average=None)
    recall = recall_score(y_true, y_pred, average=None)
    f1 = f1_score(y_true, y_pred, average=None)
    
    cm = confusion_matrix(y_true, y_pred)
    
    print(title)
    print("  accuracy: {}".format(accuracy))
    print("  precision: {}".format(precision))
    print("  recall: {}".format(recall))
    print("  f1: {}".format(f1))
    print("  confusion matrix: \n{}\n\n\n".format(cm))
    
report(y_test, y_neight, "K-Nearest Neighbours (KNN) Algorithm")
report(y_test, y_clf, "Decision Tree (DT) Algorithm")
report(y_test, y_sgd, "Stochastic Gradient Descent (SGD) Algorithm")

K-Nearest Neighbours (KNN) Algorithm
  accuracy: 0.809
  precision: [0.74311927 0.96226415 0.62886598 0.8411215  0.64444444 0.97979798 0.58064516 0.87619048 0.93103448 0.86915888]
  recall: [0.81       0.95327103 0.64893617 0.8490566  0.65909091 0.85087719 0.5046729  0.93877551 0.92045455 0.94897959]
  f1: [0.77511962 0.95774648 0.63874346 0.84507042 0.65168539 0.91079812 0.54       0.90640394 0.92571429 0.90731707]
  confusion matrix: 
[[ 81   0   2   6   1   0   8   0   2   0]
 [  1 102   1   2   0   0   1   0   0   0]
 [  4   0  61   1   9   0  19   0   0   0]
 [  2   3   2  90   8   0   1   0   0   0]
 [  0   1  17   3  58   0   9   0   0   0]
 [  0   0   0   0   0  97   0   7   2   8]
 [ 21   0  13   4  13   0  54   0   2   0]
 [  0   0   0   0   0   1   0  92   0   5]
 [  0   0   1   1   1   0   1   2  81   1]
 [  0   0   0   0   0   1   0   4   0  93]]



Decision Tree (DT) Algorithm
  accuracy: 0.744
  precision: [0.74712644 0.87719298 0.625      0.71171171 0.63736264 0.8285714