<a href="https://colab.research.google.com/github/Aniket-Mahindrakar/ML_algorithms/blob/main/K_Nearest_Neighbour.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# K - Nearest Neighbour

In [1]:
# Import classes
import math
import random
from collections import Counter

In [2]:
# K Nearest Neighbour Model Implementation
class KNN:
  def init(self):
    self.x = None
    self.y = None
    self.verbose = False
    self.algorithm_type = "regression"

  def fit(self, x, y, verbose = False, algorithm_type = "regression"):
    self.x = x
    self.y = y
    self.verbose = verbose
    self.algorithm_type = algorithm_type

    if(self.verbose):
      print("Train Inputs = ", (self.x, self.y))

  def predict(self, x, k):
    distance_label = [
        (self.distance(x, train_point), train_label)
        for train_point, train_label in
        zip(self.x, self.y)
    ]

    if(self.verbose):
      print("Neighbour distance labels = ", distance_label)

    neighbours = sorted(distance_label)[:k]

    if(self.verbose):
      print(k, "closest neighbour distances = ", [item for item, _ in neighbours])
      print(k, "closest neighbour labels = ", [item for _, item in neighbours])

    if(self.algorithm_type == "regression"):
      # Average of neighbour classes
      return sum(label for _, label in neighbours) / k

    elif(self.algorithm_type == "classification"):
      # Majority of neighbour classes
      neighbour_labels = [label for dist, label in neighbours]
      return Counter(neighbour_labels).most_common()[0][0]

  def distance(self, x_test, x_train, algorithm_type = "regression"):
    if(self.algorithm_type == "regression"):
      # Euclidean distance
      distance = [(xte - xtr)**2 for xte, xtr in zip(x_test, x_train)]
      distance = math.sqrt(sum(distance))

    if(self.algorithm_type == "classification"):
      # Cosine Similarity
      sum_xTest_xTest, sum_xTest_xTrain, sum_xTrain_xTrain = 0, 0, 0
      for i in range(len(x_test)):
          x = x_test[i]; y = x_train[i]
          sum_xTest_xTest += x*x
          sum_xTrain_xTrain += y*y
          sum_xTest_xTrain += x*y

      distance = sum_xTest_xTrain / math.sqrt(sum_xTest_xTest * sum_xTrain_xTrain)

    return distance

In [3]:
# Train Inputs
x_train = []

for i in [1, 2, 3]:
  for j in [4, 5, 6]:
    for k in [7, 8, 9]:
      x_train.append([i, j, k])

verbose = True

In [4]:
# Regression
# Train Inputs
y_train = random.sample([r/100 for r in range(1, 900, 1)], len(x_train))

algorithm_type = "regression"

# Test Inputs
x_test = [1, 4, 9]
k = 2

# Model
model = KNN()

# Model training
model.fit(x_train, y_train, verbose, algorithm_type)

# Model testing
prediction = model.predict(x_test, k)

print("\nAverage prediction =", prediction)

Train Inputs =  ([[1, 4, 7], [1, 4, 8], [1, 4, 9], [1, 5, 7], [1, 5, 8], [1, 5, 9], [1, 6, 7], [1, 6, 8], [1, 6, 9], [2, 4, 7], [2, 4, 8], [2, 4, 9], [2, 5, 7], [2, 5, 8], [2, 5, 9], [2, 6, 7], [2, 6, 8], [2, 6, 9], [3, 4, 7], [3, 4, 8], [3, 4, 9], [3, 5, 7], [3, 5, 8], [3, 5, 9], [3, 6, 7], [3, 6, 8], [3, 6, 9]], [2.22, 6.91, 5.67, 8.0, 5.26, 4.31, 7.25, 3.94, 3.46, 7.3, 2.17, 6.0, 1.29, 5.33, 5.08, 6.73, 0.21, 2.98, 7.42, 3.27, 4.03, 4.47, 2.21, 0.95, 7.36, 5.0, 5.37])
Neighbour distance labels =  [(2.0, 2.22), (1.0, 6.91), (0.0, 5.67), (2.23606797749979, 8.0), (1.4142135623730951, 5.26), (1.0, 4.31), (2.8284271247461903, 7.25), (2.23606797749979, 3.94), (2.0, 3.46), (2.23606797749979, 7.3), (1.4142135623730951, 2.17), (1.0, 6.0), (2.449489742783178, 1.29), (1.7320508075688772, 5.33), (1.4142135623730951, 5.08), (3.0, 6.73), (2.449489742783178, 0.21), (2.23606797749979, 2.98), (2.8284271247461903, 7.42), (2.23606797749979, 3.27), (2.0, 4.03), (3.0, 4.47), (2.449489742783178, 2.21), (

In [5]:
# Classification
# Train Inputs
y_train = random.choices(range(1, 9), k=len(x_train))

algorithm_type = "classification"

# Test Inputs
x_test = [1, 4, 9]
k = 3

# Model
model = KNN()

# Model training
model.fit(x_train, y_train, verbose, algorithm_type)

# Model testing
prediction = model.predict(x_test, k)

print("\nMost frequent prediction =", prediction)

Train Inputs =  ([[1, 4, 7], [1, 4, 8], [1, 4, 9], [1, 5, 7], [1, 5, 8], [1, 5, 9], [1, 6, 7], [1, 6, 8], [1, 6, 9], [2, 4, 7], [2, 4, 8], [2, 4, 9], [2, 5, 7], [2, 5, 8], [2, 5, 9], [2, 6, 7], [2, 6, 8], [2, 6, 9], [3, 4, 7], [3, 4, 8], [3, 4, 9], [3, 5, 7], [3, 5, 8], [3, 5, 9], [3, 6, 7], [3, 6, 8], [3, 6, 9]], [3, 3, 3, 2, 6, 2, 4, 3, 3, 6, 8, 6, 5, 2, 3, 1, 7, 6, 3, 3, 5, 2, 3, 1, 5, 1, 5])
Neighbour distance labels =  [(0.9947294626039876, 3), (0.9989286273905196, 3), (1.0, 3), (0.9797958971132712, 2), (0.9902586757499069, 6), (0.9960823508073664, 2), (0.958562442090658, 4), (0.9749851691262617, 3), (0.9857160402833807, 3), (0.9850254141636948, 6), (0.9919501068991623, 8), (0.9950879561185557, 6), (0.9722066481023147, 5), (0.9846306294290864, 2), (0.9920369404816276, 3), (0.9529760045804524, 1), (0.970725343394151, 7), (0.9826029297007869, 6), (0.9629083542170928, 3), (0.9743911956946198, 3), (0.9811468853477787, 5), (0.9535563601534947, 2), (0.9693877551020408, 3), (0.9796509890