Try to build a classifier for the MNIST dataset that achieves over 97% accuracy
on the test set. Hint: the KNeighborsClassifier works quite well for this task;
you just need to find good hyperparameter values 

In [None]:
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.neighbors import KNeighborsClassifier
from sklearn.multiclass import OneVsOneClassifier
from sklearn.model_selection import cross_val_score, cross_val_predict, train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.model_selection import GridSearchCV

In [None]:
mnist = fetch_openml('mnist_784', version=1)

In [None]:
mnist.keys()

In [None]:
X = mnist['data']
y = mnist['target']

In [None]:
# X = X.astype(np.uint16)
y = y.astype(np.uint16)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, 
                                                    shuffle=True, stratify=y)

In [None]:
y_test = y_test.values

In [None]:
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)

In [None]:
param_grid = {
    'n_neighbors': [1, 3, 6],
    'weights': ['distance', 'uniform']
}

# Instaitiate the model 
knn = KNeighborsClassifier()

In [None]:
grid_search = GridSearchCV(knn, param_grid=param_grid, scoring='accuracy')

In [None]:
grid_search.fit(X_train, y_train)

In [None]:
y_pred = grid_search.predict(X_test)

In [None]:
grid_search.best_score_

Write a function that can shift an MNIST image in any direction (left, right, up, or down) by 3 pixel. Then, for each image in the training set, create four shifted copies (one per direction) and add them to the training set. Finally, train your best model on this expanded training set and measure its accuracy on the test set. You should observe that your model performs even better now! This technique of
artificially growing the training set is called data augmentation or training set expansion.

In [None]:
from scipy.ndimage import shift
import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd

In [None]:
few_digit = X[12:13]
few_digit = np.array(few_digit, dtype=np.uint16)
some_digits_image = few_digit.reshape(28,28)
few_digit.shape

In [None]:
#Image shifted down by 3 pixel
image_shifted_down = shift(some_digits_image, (3, 0))
image_shifted_down_resharped = image_shifted_down.reshape(1, 784)


plt.imshow(image_shifted_down, cmap=mpl.cm.binary, interpolation='antialiased') 
# Other form of interpolation to generate pixel are: 'nearest', 'bicubic', 'bilinear' 
# Check out the Documentation to understand more 
plt.axis('on')
plt.show()

In [None]:
# image shifted up by 3 pixel
image_shifted_up = shift(some_digits_image, (-3, 0))
image_shifted_up_resharped = image_shifted_up.reshape(1, 784)


plt.imshow(image_shifted_up, cmap=mpl.cm.binary, interpolation='antialiased') 
# Other form of interpolation to generate pixel are: 'nearest', 'bicubic', 'bilinear' 
# Check out the Documentation to understand more 
plt.axis('on')
plt.show()

In [None]:
# image shifted to the right by 3 pixel
image_shifted_right = shift(some_digits_image, (0, 3))
image_shifted_right_resharped = image_shifted_right.reshape(1, 784)

plt.imshow(image_shifted_right, cmap=mpl.cm.binary, interpolation="antialiased")
plt.axis()
plt.show

In [None]:
# image shifted to the left by 3 pixel
image_shifted_left = shift(some_digits_image, (0, -3))
image_shifted_left_resharped = image_shifted_left.reshape(1, 784)

plt.imshow(image_shifted_left, cmap=mpl.cm.binary, interpolation="antialiased")
plt.axis()
plt.show

In [None]:
X_new_array = np.concatenate([image_shifted_down_resharped, image_shifted_left_resharped, image_shifted_right_resharped,
            image_shifted_up_resharped], axis=0)

X_train_new = np.concatenate([X_train, X_new_array], axis=0)
X_train_new.shape

In [None]:
y_few_digit = y[12:13]
y_new_array = np.concatenate([y_few_digit, y_few_digit, y_few_digit,
                             y_few_digit], axis=0)

y_train_new = np.concatenate([y_train, y_new_array], axis=0)
y_train_new.shape

In [None]:
knn_new = KNeighborsClassifier(n_neighbors=3, weights='distance')

knn_new.fit(X_train_new, y_train_new)

In [None]:
knn_new.score(X_test, y_test)