The goal of this lab is to implement and compare a K-Nearest Neighbours (KNN) classifier, a Decision
Tree (DT) classifier, and a Stochastic Gradient Descent (SGD) classifier. Below we provide a brief
overview of these classifiers before specifying the task for this lab.

In [13]:
# Import relevant packages
import tensorflow as tf
import numpy as np

from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import SGDClassifier
from sklearn import metrics
from sklearn.model_selection import train_test_split

In [25]:
# Step 2
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()

In [30]:
# Take 5000 samples for training and 1000 for testing
training_size = 5000
testing_size = 1000

x_train = x_train[0:training_size]
y_train = y_train[0:training_size]

x_test = x_test[0:testing_size]
y_test = y_test[0:testing_size]

x_train = x_train.reshape(x_train.shape[0], -1)
x_test = x_test.reshape(x_test.shape[0], -1)

KNN = KNeighborsClassifier(
    n_neighbors=3,
    algorithm='auto',
    leaf_size=30,
    metric='minkowski',
    p=1,
    metric_params=None,
    n_jobs=None
)

DT = DecisionTreeClassifier(
)

SGD = SGDClassifier (
    max_iter=250
)


KNN.fit(x_train, y_train)
result = KNN.predict(x_test)
print("KNN Classifier")
print('Accuracy: ', metrics.accuracy_score(y_test, result))
print('Precision: ', metrics.precision_score(y_test, result, average='weighted'))
print('Recall: ', metrics.recall_score(y_test, result, average='weighted'))
print('F1 Score: ', metrics.f1_score(y_test, result, average='weighted'))
metrics.confusion_matrix(y_test, result)

DT.fit(x_train, y_train)
result = DT.predict(x_test)
print("\nDT Classifier")
print('Accuracy: ', metrics.accuracy_score(y_test, result))
print('Precision: ', metrics.precision_score(y_test, result, average='weighted'))
print('Recall: ', metrics.recall_score(y_test, result, average='weighted'))
print('F1 Score: ', metrics.f1_score(y_test, result, average='weighted'))
metrics.confusion_matrix(y_test, result)

SGD.fit(x_train, y_train)
result = SGD.predict(x_test)
print("\nSGD Classifier")
print('Accuracy: ', metrics.accuracy_score(y_test, result))
print('Precision: ', metrics.precision_score(y_test, result, average='weighted'))
print('Recall: ', metrics.recall_score(y_test, result, average='weighted'))
print('F1 Score: ', metrics.f1_score(y_test, result, average='weighted'))
metrics.confusion_matrix(y_test, result)


KNN Classifier
Accuracy:  0.82
Precision:  0.824949282236487
Recall:  0.82
F1 Score:  0.819827968618585

DT Classifier
Accuracy:  0.737
Precision:  0.7416607146608223
Recall:  0.737
F1 Score:  0.7382467461119219

SGD Classifier
Accuracy:  0.775
Precision:  0.8028296791923686
Recall:  0.775
F1 Score:  0.7684543229386854


array([[ 97,   1,   5,   0,   0,   0,   3,   0,   1,   0],
       [  2, 100,   0,   3,   0,   0,   0,   0,   0,   0],
       [  3,   1,  98,   0,   3,   0,   6,   0,   0,   0],
       [ 20,   4,  10,  43,   8,   0,   7,   0,   1,   0],
       [  1,   0,  49,   0,  58,   0,   6,   1,   0,   0],
       [  0,   0,   2,   0,   0,  73,   0,   4,   2,   6],
       [ 18,   0,  30,   0,   8,   0,  38,   0,   3,   0],
       [  0,   0,   0,   0,   0,   1,   0,  88,   1,   5],
       [  0,   0,   2,   0,   1,   0,   2,   0,  90,   0],
       [  0,   0,   0,   0,   0,   1,   0,   4,   0,  90]], dtype=int64)