In [None]:
import struct
import numpy as np
from sklearn import svm, metrics
from sklearn.ensemble import BaggingClassifier
from sklearn import datasets
from sklearn.multiclass import OneVsRestClassifier
import matplotlib as mpl
import matplotlib.pyplot as plt
import pickle
%matplotlib inline

In [None]:
import itertools
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix', cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix. 
    Normalization can be applied by setting normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm. sum (axis=1)[:, np.newaxis] 
        print("Normalized confusion matrix")
    else:    
        print('Confusion matrix, without normalization')
    print(cm)
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)
    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product (range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
            horizontalalignment="center",
            color="white" if cm[i, j]> thresh else "black")
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [None]:
def read_idx(filename):
    with open(filename, 'rb') as f:
        zero, data_type, dims = struct.unpack('>HBB', f.read(4))
        shape = tuple(struct.unpack('>I', f.read(4))[0] for d in range(dims))
        return np.frombuffer(f.read(), dtype=np.uint8).reshape(shape)

In [None]:
raw_train = read_idx("train-images-idx3-ubyte")
train_data = np.reshape(raw_train, (60000, 28*28))
train_label = read_idx("train-labels-idx1-ubyte")
raw_test = read_idx("t10k-images-idx3-ubyte")
test_data = np.reshape(raw_test, (10000, 28*28))
test_label = read_idx("t10k-labels-idx1-ubyte")

In [None]:
start = time.time()
X = train_data
Y = train_label
knn = neighbors.KNeighborsClassifier(n_neighbors=3, metric='manhattan').fit(X,Y)
end = time.time()
print("KNN Time: ", end - start,"s")

In [None]:
start = time.time()
X = train_data
Y = train_label
knn = neighbors.KNeighborsClassifier(n_neighbors=2).fit(X,Y)
end = time.time()
print("KNN Train Time: ", end - start,"s")

In [None]:
pkl_filename = "pickle_knn_model.pkl"
with open(pkl_filename, 'wb') as file:
    pickle.dump(knn, file)

In [None]:
pkl_filename = "pickle_knn_model.pkl"
with open(pkl_filename, 'rb') as file:
    knn = pickle.load(file)

In [None]:
start = time.time()
x_test = test_data
y_true = test_label
y_pred = knn.predict(x_test)
end = time.time()
print("KNN Predict Time: ", end - start, "s")

In [None]:
cm = metrics.confusion_matrix(y_true, y_pred)
plot_confusion_matrix(cm, ["0","1","2","3","4","5","6","7","8","9"]) #normalize=True

In [None]:
start = time.time()
score = knn.score(x_test, y_true)
print("Test score: {0:.2f} %".format(100 * score))
end = time.time()
print("KNN Score Time: ", end - start, "s")

In [None]:
idx = np.where((y_pred == 3) & (y_true == 8))[0]
fig = plt.figure(figsize=(5,30))
for i in range(len(idx)):
    ax = fig.add_subplot(len(idx),1 , i + 1)
    imgplot = ax.imshow(np.reshape(x_test[idx[i],:], (28,28)), cmap=mpl.colormaps['Greys'])
    imgplot.set_interpolation("nearest")
plt.show()

In [None]:
idx = np.where((y_pred == 1) & (y_true == 7))[0]
fig = plt.figure(figsize=(5,30))
for i in range(len(idx)):
    ax = fig.add_subplot(len(idx),1 , i + 1)
    imgplot = ax.imshow(np.reshape(x_test[idx[i],:], (28,28)), cmap=mpl.colormaps['Greys'])
    imgplot.set_interpolation("nearest")
plt.show()

In [None]:
idx = np.where((y_pred == 9) & (y_true == 4))[0]
fig = plt.figure(figsize=(5,30))
for i in range(len(idx)):
    ax = fig.add_subplot(len(idx),1 , i + 1)
    imgplot = ax.imshow(np.reshape(x_test[idx[i],:], (28,28)), cmap=mpl.colormaps['Greys'])
    imgplot.set_interpolation("nearest")
plt.show()