In [70]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics

In [6]:
data = np.load('Datasets/mnist_train_small.npy')

In [13]:
X = data[:,1:]
y = data[:,0]

In [23]:
X_train, X_test, y_train, y_test = train_test_split(
...     X, y, test_size=0.33, random_state=42)

In [33]:
knn = KNeighborsClassifier(n_neighbors=1)

In [50]:
knn.fit(X_train,y_train)

In [60]:
X_test.shape

(6600, 784)

In [72]:
ans = knn.predict(X_test[:100])

In [103]:
knn.score(X_test[:100],y_test[:100])

0.95

## Custom KNN

In [93]:
class CustomKNN:
    def __init__(self,n_neighbours = 5):
        self.n_neighbours = n_neighbours
    # training function
    def fit(self,X,y):
        self._X = X.astype(np.int64)
        self._y = y
    
    def predict_point(self,point):
        distances = []
        for x_point,y_point in zip(self._X,self._y):
            dist = ((point-x_point)**2).sum()
            distances.append([dist,y_point])
        sortedDist = sorted(distances)
        top = sortedDist[:self.n_neighbours]
        items,counts = np.unique(np.array(top)[:,1],return_counts=True)
        return np.array(items[np.argmax(counts)],dtype=int)
    
    # predict function
    def predict(self,X):
        results = []
        for point in X:
            results.append(self.predict_point(point))
        return np.array(results)
    def score(self,X,y):
        return sum(self.predict(X)==y)/len(y)
    
            
    

In [94]:
knn = CustomKNN()
knn.fit(X_train,y_train)

In [98]:
knn.predict(X_test[:10])

array([1, 7, 0, 9, 4, 5, 4, 6, 9, 2])

In [100]:
knn.score(X_test[:100],y_test[:100])

0.95