<a href="https://colab.research.google.com/github/ReutFarkash/useful/blob/main/K_Nearest_Neighbor_from_Scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

[K-Nearest Neighbor from Scratch - Machine Learning Python](https://www.youtube.com/watch?v=QzAaRuDskyc&list=PLhhyoLH6IjfxpLWyOgBt1sBzIapdRKZmj&ab_channel=AladdinPersson)

In [1]:
import numpy as np

In [3]:
class KNearestNeighbor():
  def __init__(self, k):
    self.k = k

  def train(self, X, y):
    self.X_train = X
    self.y_train = y
  
  def predict(self, X_test, num_loops=2):
    if num_loops == 2:
      distances = self.compute_distance_two_loops(X_test)
    if num_loops == 1:
      distances = self.compute_distance_one_loop(X_test)
    else:
      distances = self.compute_distance_vectorized(X_test)
    return self.predict_labels(distances)
  
  def compute_distance_two_loops(self, X_test):
    # Naive, inefficient way
    num_test = X_test.shape[0]
    num_train = self.X_train.shape[0]
    distances = np.zeros((num_test, num_train))
    for i in range(num_test):
      for j in range(num_train):
        distances[i,j] = np.sqrt(np.sum((X_test[i,:] - self.X_train[j,:])**2))

    #np.sqrt(np.sum(np.power(X_test - X_train), axis=0), axis=0)

    return distances
  
  def compute_distance_one_loop(self, X_test):
    # Naive, inefficient way
    num_test = X_test.shape[0]
    num_train = self.X_train.shape[0]
    distances = np.zeros((num_test, num_train))
    for i in range(num_test):
      distances[i,:] = np.sqrt(np.sum((self.X_train - X_test[i,:])**2, axis=1))
    return distances
  
  def compute_distance_vectorized(self, X_test):
    # (X_test - X_train)^2 = X_test^2 - 2*X_test*X_train + X_train^2
    X_test_squared = np.sum(X_test**2, axis=1, keepdims=True)
    X_train_squared = np.sum(self.X_train**2, axis=1, keepdims=True)
    two_X_test_X_train = np.dot(X_test, self.X_train.T)
    return np.sqrt(X_test_squared - 2*two_X_test_X_train + X_train_squared.T)
  
  def predict_labels(self, distances):
    num_test = distances.shape[0]
    y_pred = np.zeros(num_test)

    for i in range(num_test):
      y_indices = np.argsort(distances[i,:])
      k_closest_classes = self.y_train[y_indices[:self.k]].astype(int)
      y_pred[i] = np.argmax(np.bincount(k_closest_classes))

    return y_pred



X = np.loadtxt('https://raw.githubusercontent.com/aladdinpersson/Machine-Learning-Collection/master/ML/algorithms/knn/example_data/data.txt', delimiter=',')
y = np.loadtxt('https://raw.githubusercontent.com/aladdinpersson/Machine-Learning-Collection/master/ML/algorithms/knn/example_data/targets.txt')
KNN = KNearestNeighbor(k=3)
KNN.train(X, y)
y_pred = KNN.predict(X, num_loops=0)

print(f'Accuracy: {sum(y_pred==y)/y.shape[0]}')

Accuracy: 0.9111111111111111


