In [1]:
# Preliminaries
import numpy as np
from sklearn.datasets import fetch_mldata
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
from scipy.ndimage.interpolation import shift

In [2]:
# Preparing for training and test set
mnist = fetch_mldata('MNIST original')

X, y = mnist["data"], mnist["target"]
X_train, X_test, y_train, y_test = X[:60000], X[60000:], y[:60000], y[60000:]
shuffle_index = np.random.permutation(60000)
X_train, y_train = X_train[shuffle_index], y_train[shuffle_index]

### 1. Try to build a classifier for the MNIST dataset that achieves over 97% accuracy on the test set. Hint: the `KNeighborsClassifier` works quite well for this task; you just need to find good hyperparameter values (try a grid search on the `weights` and `n_neighbors` hyperparameters).

In [3]:
knn_clf = KNeighborsClassifier(n_jobs=-1, weights='distance', n_neighbors=4)
knn_clf.fit(X_train, y_train)
y_knn_pred = knn_clf.predict(X_test)
accuracy_score(y_test, y_knn_pred)

0.9714

In [None]:
# Warning:
# this cell is really time-consuming ...
# it takes days of time to search the optimal

param_grid = [
    {'weights': ['distance', 'uniform'], 'n_neighbors': [4, 5, 6]}
]

knn_clf = KNeighborsClassifier()
knn_class = GridSearchCV(knn_clf, param_grid)
knn_class.fit(X_train, y_train)

### 2. Write a function that can shift an MNIST image in any direction (left, right, up, or down) by one pixel.5 Then, for each image in the training set, create four shifted copies (one per direction) and add them to the training set. Finally, train your best model on this expanded training set and measure its accuracy on the test set. You should observe that your model performs even better now! This technique of artificially growing the training set is called *data augmentation* or *training set expansion*.

In [5]:
def shift_image_by_pixel(image, dx, dy):
    image = image.reshape((28, 28))
    shifted = shift(image, [dy, dx], cval=0)
    return shifted.reshape([-1])

def shift_image_by_direction(image, direction):
    if direction == 'left':
        return shift_image_by_pixel(image, -1, 0)
    elif direction == 'right':
        return shift_image_by_pixel(image, 1, 0)
    elif direction == 'up':
        return shift_image_by_pixel(image, 0, -1)
    else:
        return shift_image_by_pixel(image, 0, 1)

In [21]:
X_train_aug = [image for image in X_train]
y_train_aug = [label for label in y_train]

for image, label in zip(X_train_aug, y_train_aug):
    X_train_aug.append(shift_image_by_direction(image, 'left'))
    X_train_aug.append(shift_image_by_direction(image, 'right'))
    X_train_aug.append(shift_image_by_direction(image, 'up'))
    X_train_aug.append(shift_image_by_direction(image, 'down'))
    y_train_aug.append(label)
    
X_train_aug = np.array(X_train_aug)
y_train_aug = np.array(y_train_aug)

shuffle_index = np.random.permutation(len(X_train_aug))
X_train_aug = X_train_aug[shuffle_index]
y_train_aug = y_train_aug[shuffle_index]

KeyboardInterrupt: 

In [4]:
def shift_image(image, dx, dy):
    image = image.reshape((28, 28))
    shifted_image = shift(image, [dy, dx], cval=0, mode="constant")
    return shifted_image.reshape([-1])

In [5]:
X_train_augmented = [image for image in X_train]
y_train_augmented = [label for label in y_train]

for dx, dy in ((1, 0), (-1, 0), (0, 1), (0, -1)):
    for image, label in zip(X_train, y_train):
        X_train_augmented.append(shift_image(image, dx, dy))
        y_train_augmented.append(label)

X_train_augmented = np.array(X_train_augmented)
y_train_augmented = np.array(y_train_augmented)

In [6]:
shuffle_idx = np.random.permutation(len(X_train_augmented))
X_train_augmented = X_train_augmented[shuffle_idx]
y_train_augmented = y_train_augmented[shuffle_idx]

In [7]:
knn_clf = KNeighborsClassifier(n_jobs=-1, weights='distance', n_neighbors=4)
knn_clf.fit(X_train_augmented, y_train_augmented)
y_pred = knn_clf.predict(X_test)
accuracy_score(y_test, y_pred)

0.9763