# K-Nearest Neighbours Classifier
- This notebook contains the code to implement the KNN classifier algorithm from scratch

## Importing Libraries

In [7]:
# data manipulation
import numpy as np
import pandas as pd

# preprocessing
from sklearn.preprocessing import StandardScaler

# mathematics
import statistics

# dataset
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

## Custom Class

In [12]:
class KNNClassifier:

  def __init__(self, n_neighbours=5):
    self.n_neighbours = n_neighbours


  def _euclidean_distance(self, a, b):
    return np.sqrt(np.sum((a - b) ** 2))


  def fit(self, X, y):
    self.X_ = X
    self.y_ = y
    return self


  def predict(self, X):
    result = np.empty(X.shape[0], dtype=int)
    for i, x in enumerate(X):
      distances = [self._euclidean_distance(x, x1) for x1 in self.X_]
      neighbours = np.argsort(distances)[:self.n_neighbours]
      labels = self.y_[neighbours]
      result[i] = statistics.mode(labels)
    return result


  def score(self, X, y):
    y_pred = self.predict(X)
    return (y == y_pred).mean()

## Getting the Data

In [13]:
X, y = load_iris(return_X_y=True)

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    stratify=y,
                                                    random_state=7)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(120, 4) (120,)
(30, 4) (30,)


## Scaling the Data

In [14]:
scaler = StandardScaler()
scaler.fit(X_train)

In [15]:
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

## Training the Model

In [16]:
knn = KNNClassifier(n_neighbours=5)
knn.fit(X_train_scaled, y_train)

<__main__.KNNClassifier at 0x7a1b33e848e0>

In [20]:
knn.predict(X_test_scaled)

array([2, 0, 0, 1, 2, 1, 2, 0, 2, 2, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 2, 0,
       1, 0, 2, 2, 2, 1, 0, 2])

## Evaluating the Model

In [18]:
knn.score(X_train_scaled, y_train)

0.9583333333333334

In [19]:
knn.score(X_test_scaled, y_test)

1.0