## Setup

In [1]:
# Common imports
import numpy as np

# to make this notebook's output stable across runs
np.random.seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12

## Get MNIST

In [2]:
from sklearn.datasets import fetch_mldata
mnist = fetch_mldata('MNIST original')
mnist

{'COL_NAMES': ['label', 'data'],
 'DESCR': 'mldata.org dataset: mnist-original',
 'data': array([[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]], dtype=uint8),
 'target': array([0., 0., 0., ..., 9., 9., 9.])}

## Seperate Train & Test Dataset

In [None]:
train_dataset_counts = 60000
X, y = mnist['data'], mnist['target']
X_train, X_test, y_train, y_test = X[:train_dataset_counts], X[train_dataset_counts:], y[:train_dataset_counts], y[train_dataset_counts:]

random_indexes = np.random.permutation(train_dataset_counts)
X_train = X_train[random_indexes]
y_train = y_train[random_indexes]

## Fine Tune KNeighborsClassifier Model

In [4]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV

param_grid = [{"weights":["uniform"], "n_neighbors":[4]}]

knn_cls_no_tune = KNeighborsClassifier()
knn_cls_no_tune.fit(X_train, y_train)

knn_grid = GridSearchCV(KNeighborsClassifier(), param_grid=param_grid, cv=5, verbose=3)
knn_grid.fit(X_train, y_train)

## Performance Evaluation on Training Datasets

In [5]:
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

y_predict_no_tune = knn_cls_no_tune.predict(X_train)
print("Precision without tune", precision_score(y_predict_no_tune, y_train))
print("Recall without tune", recall_score(y_predict_no_tune, y_train))
print("F1 score without tune", f1_score(y_predict_no_tune, y_train))
print("Accuracy score without tune", accuracy_score(y_predict_no_tune, y_train))

y_predict_tuned = knn_grid.predict(X_train)
print("Precision without tune", precision_score(y_predict_tuned, y_train))
print("Recall without tune", recall_score(y_predict_tuned, y_train))
print("F1 score without tune", f1_score(y_predict_tuned, y_train))
print("Accuracy score without tune", accuracy_score(y_predict_tuned, y_train))


NameError: name 'knn_cls_no_tune' is not defined

## Performance Evaluation on Test Datasets

In [None]:
y_predict_no_tune = knn_cls_no_tune.predict(X_test)
print("Precision without tune", precision_score(y_predict_no_tune, y_test))
print("Recall without tune", recall_score(y_predict_no_tune, y_test))
print("F1 score without tune", f1_score(y_predict_no_tune, y_test))
print("Accuracy score without tune", accuracy_score(y_predict_no_tune, y_test))

y_predict_tuned = knn_grid.predict(X_test)
print("Precision without tune", precision_score(y_predict_tuned, y_test))
print("Recall without tune", recall_score(y_predict_tuned, y_test))
print("F1 score without tune", f1_score(y_predict_tuned, y_test))
print("Accuracy score without tune", accuracy_score(y_predict_tuned, y_test))

## Augment Training Dataset by Shifting

In [None]:
from scipy.ndimage.interpolation import shift
import matplotlib
import matplotlib.pyplot as plt

def shift_image(image, dx, dy):
    return shift(image.reshape((28,28)), (dy, dx)).reshape(784)
    
def plot_image(image):
    plt.imshow(image.reshape((28,28)), cmap=matplotlib.cm.binary)
    
plt.subplot(131)
plot_image(X_train[50])
plt.subplot(132)
plot_image(shift_image(X_train[50], -3,-5))

In [None]:
print(X_train.shape)
print(y_train.shape)
X_train_augmented = X_train.copy()
y_train_augmented = y_train.copy()

for dx, dy in [(1, 0), (-1, 0), (0, 1), (0, -1)]:
    for image, label in zip(X_train, y_train):
        X_train_augmented = np.append(X_train_augmented, [shift_image(image, dx, dy)], axis=0)
        y_train_augmented = np.append(y_train_augmented, [label])

print(X_train_augmented.shape)
print(y_train_augmented.shape)

plt.subplot(191)
plot_image(X_train_augmented[0])
plt.subplot(192)
plot_image(X_train_augmented[60000])
plt.subplot(193)
plot_image(X_train_augmented[120000])