In [5]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

### Dataset Loading Functions

In [6]:
def mnist_images_load(file):
    f = open(file, 'rb')
    magic_number = int.from_bytes(f.read(4), "big")
    images_count = int.from_bytes(f.read(4), "big")
    rows = int.from_bytes(f.read(4), "big")
    cols = int.from_bytes(f.read(4), "big")
    
    images = []
    for n in range(images_count):
        image=f.read(rows*cols)
        image=[int(image[i]) for i in range(rows*cols)]
        images.append(image)
    return images

In [7]:
def mnist_labels_load(file):
    f = open(file, 'rb')
    magic_number = int.from_bytes(f.read(4), "big")
    images_count = int.from_bytes(f.read(4), "big")
    labels = []
    for n in range(images_count):
        labels.append(int.from_bytes(f.read(1), "big", signed=False))
    return labels

### Load dataset

In [8]:
train_x = mnist_images_load("dataset/train-images.idx3-ubyte")
test_x = mnist_images_load("dataset/t10k-images.idx3-ubyte")
train_y = mnist_labels_load("dataset/train-labels.idx1-ubyte")
test_y = mnist_labels_load("dataset/t10k-labels.idx1-ubyte")

## KNN

In [9]:
from sklearn.neighbors import KNeighborsClassifier

In [10]:
classifier=KNeighborsClassifier(n_neighbors=5, weights='uniform', algorithm='auto', leaf_size=30, p=2, metric='minkowski', metric_params=None, n_jobs=None)

In [11]:
classifier.fit(train_x, train_y)

KNeighborsClassifier()

In [12]:
prediction = classifier.predict(test_x)
hits = np.sum([1 if prediction[i]==test_y[i] else 0 for i in range(len(test_y))])
hit_rate = hits/len(test_y)
hit_rate

0.9688

In [13]:
len(train_x[0])

784

## Save model

In [15]:
from joblib import dump, load

In [16]:
dump(classifier, 'knn.joblib') 

['knn.joblib']

In [17]:
train_x[3]

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 124,
 253,
 255,
 63,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 96,
 244,
 251,
 253,
 62,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 127,
 251,
 251,
 253,
 62,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 68,
 236,
 251,

In [45]:
for n in range(10):
    train_x[n] = ["#" if i!=0 else "." for i in train_x[n]]
    train_x[n] = np.array(train_x[n])
    train_x[n] = train_x[n].reshape(28,28)
    for i in range(cols):
        for j in range(rows):
            print(train_x[n][i][j], end=" ")
        print()
    print()

. . . . . . . . . . . . . . . . . . . . . . . . . . . . 
. . . . . . . . . . . . . . . . . . . . . . . . . . . . 
. . . . . . . . . . . . . . . . . . . . . . . . . . . . 
. . . . . . . . . . . . . . . . . . . . . . . . . . . . 
. . . . . . . . . . . . . . . . . . . . . . . . . . . . 
. . . . . . . . . . . . # # # # # # # # # # # # . . . . 
. . . . . . . . # # # # # # # # # # # # # # # # . . . . 
. . . . . . . # # # # # # # # # # # # # # # # . . . . . 
. . . . . . . # # # # # # # # # # # . . . . . . . . . . 
. . . . . . . . # # # # # # # . # # . . . . . . . . . . 
. . . . . . . . . # # # # # . . . . . . . . . . . . . . 
. . . . . . . . . . . # # # # . . . . . . . . . . . . . 
. . . . . . . . . . . # # # # . . . . . . . . . . . . . 
. . . . . . . . . . . . # # # # # # . . . . . . . . . . 
. . . . . . . . . . . . . # # # # # # . . . . . . . . . 
. . . . . . . . . . . . . . # # # # # # . . . . . . . . 
. . . . . . . . . . . . . . . # # # # # . . . . . . . . 
. . . . . . . . . . . . . . . .

. . . . . . . . . . . . . # # # # # . . . . . . . . . . 
. . . . . . . . . . . . . # # # # # . . . . . . . . . . 
. . . . . . . . . . . . . . # # # # # . . . . . . . . . 
. . . . . . . . . . . . . . # # # # # . . . . . . . . . 
. . . . . . . . . . . . . . # # # # # . . . . . . . . . 
. . . . . . . . . . . . . . # # # # # . . . . . . . . . 
. . . . . . . . . . . . . . . . . . . . . . . . . . . . 
. . . . . . . . . . . . . . . . . . . . . . . . . . . . 
. . . . . . . . . . . . . . . . . . . . . . . . . . . . 
. . . . . . . . . . . . . . . . . . . . . . . . . . . . 

. . . . . . . . . . . . . . . . . . . . . . . . . . . . 
. . . . . . . . . . . . . . . . . . . . . . . . . . . . 
. . . . . . . . . . . . . . . . . . . . . . . . . . . . 
. . . . . . . . . . . . . . . . . . . . . . . . . . . . 
. . . . . . . . . . . . . . . . . . . . . . . . . . . . 
. . . . . . . . . . . # # # # # # # # # # # . . . . . . 
. . . . . . . . . # # # # # # # # # # # # # # . . . . . 
. . . . . . . . . # # # # # # 