In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

### Custom Implementation of K-Nearest-Neighbours(KNN)

In [2]:
class CustomKNN:
    
    #Construcor
    def __init__(self, n_neighnhours=5):
        self.n_neighnhours = n_neighnhours
    
    #training function
    def fit(self, X , y):
        self._X = X.astype(np.int64)  # Because the X number is too big to calculate or it's a ndarray 
        
        #or we can use Standardisation formula
        # self._X = (X - X.mean()) / X.std()  # so the number lies b/w 0 to 1
        self._y = y
        
    # predict point
    # given a single point, tell me which class it belongs to
    def predict_point(self, point):
        #storing the distance of given 'point' from each point in training data
        list_dist = []
        
        # these points are from my training data
        for x_point, y_point in zip(self._X, self._y):
            dist_point = ((point - x_point) ** 2 ).sum()
            list_dist.append([dist_point, y_point])
            
        ## sorting the list according to the distance
        sorted_dist = sorted(list_dist)
        top_k = sorted_dist[:self.n_neighnhours]
        
        #taking the count
        items, counts = np.unique(np.array(top_k)[:, 1], return_counts=True)
        ans = items[np.argmax(counts)]
        return ans
    
    def predict(self, X):
        results = []
        
        for point in X:
            results.append(self.predict_point(point))
        
        return np.array(results, dtype=int)
    
    #score to measure mmy accuracy
    def score(self, X, y):
        return sum(self.predict(X) == y) / len(y)

In [3]:
data= np.load('../datasets/mnist_train_small.npy')

In [4]:
data.shape

(19999, 785)

In [5]:
# 1st column represents the number and rest columns represents the strected data of image
X = data[:, 1:]
y = data[:, 0]

In [6]:
X, y

(array([[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]], dtype=uint8),
 array([5, 7, 9, ..., 2, 9, 5], dtype=uint8))

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [8]:
X_train.shape, y_train.shape

((13399, 784), (13399,))

In [9]:
X_test.shape, y_test.shape

((6600, 784), (6600,))

In [10]:
cust_model = CustomKNN()

In [11]:
cust_model.fit(X_train, y_train)

In [12]:
cust_model.predict(X_test[:10])

array([1, 7, 0, 9, 4, 5, 4, 6, 9, 2])

In [13]:
y_test[:10]

array([7, 7, 0, 9, 4, 5, 4, 6, 9, 2], dtype=uint8)

In [14]:
cust_model.score(X_test[:100], y_test[:100])

0.95