#### Fetching Data from OpenML

In [1]:
import numpy as np

from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784', version=1, as_frame=False)




#### Extracting data and label from dictionary

In [2]:
mnist.keys()

dict_keys(['data', 'target', 'frame', 'categories', 'feature_names', 'target_names', 'DESCR', 'details', 'url'])

In [3]:
X, Y = mnist['data'], mnist['target']

#### Understanding dataset

In [4]:
X

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [5]:
X.shape

(70000, 784)

In [6]:
Y

array(['5', '0', '4', ..., '4', '5', '6'], dtype=object)

In [7]:
Y.shape

(70000,)

In [8]:
Y.dtype

dtype('O')

In [9]:
# Changing the dtype from object to integer

Y = Y.astype(np.int32)

#### Spliting the dataset into training and testing data

In [10]:
# MNIST data is already split into training and testing set

X_train, X_test, Y_train, Y_test = X[:60000], X[60000:], Y[:60000], Y[60000:]

#### Increasing the total training dataset by shifting the image in all direction

In [11]:
from scipy.ndimage import shift

# Creating function to shift the image in given direction

def shift_image(image, dx, dy):
    image = image.reshape((28, 28))
    shifted_image = shift(image, [dy, dx], cval=0, mode='constant')
    return shifted_image.reshape([-1])

In [12]:
X_train_augmented = [image for image in X_train]
Y_train_augmented = [image for image in Y_train]

# Appending the dataset with the shifted image

for dx, dy in ((1,0), (-1,0), (0,1), (0, -1)):
    for image, label in zip(X_train, Y_train):
        X_train_augmented.append(shift_image(image, dx, dy))
        Y_train_augmented.append(label)

In [13]:
X_train_augmented = np.array(X_train_augmented)
Y_train_augmented = np.array(Y_train_augmented)

# Shuffling the dataset to avoid any biasness

shuffle_index = np.random.permutation(len(X_train_augmented))

X_train_augmented = X_train_augmented[shuffle_index]
Y_train_augmented = Y_train_augmented[shuffle_index]


#### Creating and testing the accuracy of Model

In [14]:
from sklearn.neighbors import KNeighborsClassifier
knc_clf = KNeighborsClassifier(n_neighbors=4, weights='distance')

In [16]:
knc_clf.fit(X_train_augmented, Y_train_augmented)

KNeighborsClassifier(n_neighbors=4, weights='distance')

In [17]:
Y_test_pred = knc_clf.predict(X_test)

# Lets Check the accuracy
from sklearn.metrics import accuracy_score
accuracy_score(Y_test, Y_test_pred)

0.9763

#### Thank You!!!