# *K-Nearest Neighbours Implementation*

In [None]:
#Importing useful Libraries
import numpy as np
from scipy import stats
from collections import Counter
import pandas as pd

In [None]:
#Distance Function
def euclidean_distance(x1, x2):
        return np.sqrt(np.sum((x1 - x2)**2))

In [None]:
class KNN:

    def __init__(self, k=3):
        self.k = k

    def fit(self, X, y):
        self.X_train = X
        self.y_train = y

    def predict(self, X):
        y_pred = [self._predict(x) for x in X]
        return np.array(y_pred)

    def _predict(self, x):
        # Computing distances between x and all examples in the training set
        distances = [euclidean_distance(x, x_train) for x_train in self.X_train]
        # Sort by distance and return indices of the first k neighbors
        k_idx = np.argsort(distances)[:self.k]
        # Extracting the labels of the k nearest neighbor training samples
        k_neighbor_labels = [self.y_train[i] for i in k_idx]  
        # return the most common class label                                    ##The Counter acts as a Mode of the Data
        most_common = Counter(k_neighbor_labels).most_common(1) 
        return most_common[0][0]

In [None]:
#Accuracy Function
def accuracy(y_true, y_pred):
    accuracy = np.sum(y_true == y_pred) / len(y_true)
    return accuracy

In [None]:
#Loading MNIST Training Data 
df = pd.read_csv('/content/sample_data/mnist_train_small.csv',header=None)
V = df.iloc[:,1:785]
V = (V-V.mean())/(V.max()-V.min()+0.000001)                                     ##Normalisation
nV = df.iloc[:,0]
y = np.array(nV)
X = np.array(V)
np.shape(X),np.shape(y)
V.head()

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,...,745,746,747,748,749,750,751,752,753,754,755,756,757,758,759,760,761,762,763,764,765,766,767,768,769,770,771,772,773,774,775,776,777,778,779,780,781,782,783,784
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-5e-05,-5e-05,-5e-05,-5e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-9.4e-05,-0.000134,-0.00023,-0.000285,-0.0005,-0.000754,...,-0.014564,-0.010302,-0.006739,-0.004,-0.002169,-0.000974,-0.000383,-0.000154,-5e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-5e-05,-8.1e-05,-0.000156,-0.000349,-0.000444,-0.000626,-0.001086,-0.001592,-0.002144,-0.002243,-0.00273,-0.002634,-0.002141,-0.001436,-0.00085,-0.000345,-0.000144,-0.000173,-0.000128,-9.7e-05,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-5e-05,-5e-05,-5e-05,-5e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-9.4e-05,-0.000134,-0.00023,-0.000285,-0.0005,-0.000754,...,-0.014564,-0.010302,-0.006739,-0.004,-0.002169,-0.000974,-0.000383,-0.000154,-5e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-5e-05,-8.1e-05,-0.000156,-0.000349,-0.000444,-0.000626,-0.001086,-0.001592,-0.002144,-0.002243,-0.00273,-0.002634,-0.002141,-0.001436,-0.00085,-0.000345,-0.000144,-0.000173,-0.000128,-9.7e-05,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-5e-05,-5e-05,-5e-05,-5e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-9.4e-05,-0.000134,-0.00023,-0.000285,-0.0005,-0.000754,...,-0.014564,-0.010302,-0.006739,-0.004,-0.002169,-0.000974,-0.000383,-0.000154,-5e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-5e-05,-8.1e-05,-0.000156,-0.000349,-0.000444,-0.000626,-0.001086,-0.001592,-0.002144,-0.002243,-0.00273,-0.002634,-0.002141,-0.001436,-0.00085,-0.000345,-0.000144,-0.000173,-0.000128,-9.7e-05,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-5e-05,-5e-05,-5e-05,-5e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-9.4e-05,-0.000134,-0.00023,-0.000285,-0.0005,-0.000754,...,0.044259,-0.010302,-0.006739,-0.004,-0.002169,-0.000974,-0.000383,-0.000154,-5e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-5e-05,-8.1e-05,-0.000156,-0.000349,-0.000444,-0.000626,-0.001086,-0.001592,-0.002144,-0.002243,-0.00273,-0.002634,-0.002141,-0.001436,-0.00085,-0.000345,-0.000144,-0.000173,-0.000128,-9.7e-05,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-5e-05,-5e-05,-5e-05,-5e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-9.4e-05,-0.000134,-0.00023,-0.000285,-0.0005,-0.000754,...,-0.014564,-0.010302,-0.006739,-0.004,-0.002169,-0.000974,-0.000383,-0.000154,-5e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-5e-05,-8.1e-05,-0.000156,-0.000349,-0.000444,-0.000626,-0.001086,-0.001592,-0.002144,-0.002243,-0.00273,-0.002634,-0.002141,-0.001436,-0.00085,-0.000345,-0.000144,-0.000173,-0.000128,-9.7e-05,0.0,0.0,0.0,0.0


In [None]:
#Loading MNIST Testing Data 
df1 = pd.read_csv('/content/sample_data/mnist_test.csv',header=None)
V1 = df.iloc[:,1:785]
V1 = (V1-V1.mean())/(V1.max()-V1.min()+0.000001)                                ##Normalisation
nV 1= df.iloc[:,0]
y1 = np.array(nV1)
X1 = np.array(V1)
np.shape(X1),np.shape(y1)
V1.head()

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,...,745,746,747,748,749,750,751,752,753,754,755,756,757,758,759,760,761,762,763,764,765,766,767,768,769,770,771,772,773,774,775,776,777,778,779,780,781,782,783,784
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-5e-05,-5e-05,-5e-05,-5e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-9.4e-05,-0.000134,-0.00023,-0.000285,-0.0005,-0.000754,...,-0.014564,-0.010302,-0.006739,-0.004,-0.002169,-0.000974,-0.000383,-0.000154,-5e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-5e-05,-8.1e-05,-0.000156,-0.000349,-0.000444,-0.000626,-0.001086,-0.001592,-0.002144,-0.002243,-0.00273,-0.002634,-0.002141,-0.001436,-0.00085,-0.000345,-0.000144,-0.000173,-0.000128,-9.7e-05,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-5e-05,-5e-05,-5e-05,-5e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-9.4e-05,-0.000134,-0.00023,-0.000285,-0.0005,-0.000754,...,-0.014564,-0.010302,-0.006739,-0.004,-0.002169,-0.000974,-0.000383,-0.000154,-5e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-5e-05,-8.1e-05,-0.000156,-0.000349,-0.000444,-0.000626,-0.001086,-0.001592,-0.002144,-0.002243,-0.00273,-0.002634,-0.002141,-0.001436,-0.00085,-0.000345,-0.000144,-0.000173,-0.000128,-9.7e-05,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-5e-05,-5e-05,-5e-05,-5e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-9.4e-05,-0.000134,-0.00023,-0.000285,-0.0005,-0.000754,...,-0.014564,-0.010302,-0.006739,-0.004,-0.002169,-0.000974,-0.000383,-0.000154,-5e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-5e-05,-8.1e-05,-0.000156,-0.000349,-0.000444,-0.000626,-0.001086,-0.001592,-0.002144,-0.002243,-0.00273,-0.002634,-0.002141,-0.001436,-0.00085,-0.000345,-0.000144,-0.000173,-0.000128,-9.7e-05,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-5e-05,-5e-05,-5e-05,-5e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-9.4e-05,-0.000134,-0.00023,-0.000285,-0.0005,-0.000754,...,0.044259,-0.010302,-0.006739,-0.004,-0.002169,-0.000974,-0.000383,-0.000154,-5e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-5e-05,-8.1e-05,-0.000156,-0.000349,-0.000444,-0.000626,-0.001086,-0.001592,-0.002144,-0.002243,-0.00273,-0.002634,-0.002141,-0.001436,-0.00085,-0.000345,-0.000144,-0.000173,-0.000128,-9.7e-05,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-5e-05,-5e-05,-5e-05,-5e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-9.4e-05,-0.000134,-0.00023,-0.000285,-0.0005,-0.000754,...,-0.014564,-0.010302,-0.006739,-0.004,-0.002169,-0.000974,-0.000383,-0.000154,-5e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-5e-05,-8.1e-05,-0.000156,-0.000349,-0.000444,-0.000626,-0.001086,-0.001592,-0.002144,-0.002243,-0.00273,-0.002634,-0.002141,-0.001436,-0.00085,-0.000345,-0.000144,-0.000173,-0.000128,-9.7e-05,0.0,0.0,0.0,0.0


In [None]:
k = 3
clf = KNN(k=k)
clf.fit(X, y)
predictions = clf.predict(X1)
print("custom KNN classification accuracy", accuracy(y1, predictions))

custom KNN classification accuracy 0.98455
