# Building K Nearest Neighbor classifier from scratch.

In [3]:
import numpy as np
from collections import Counter

In [2]:
# implementing formula for euclidean distance
def euclidean_distance(x1,x2):
  return np.sqrt(np.sum((x1-x2)**2))

In [36]:
#creating the KNN classifier class
class KNN:
  #setting k = 3 as default value
  def __init__(self,k=3):
    self.k = k
  
  def fit(self,X,y):
    self.X_train = X
    self.y_train = y
  
  def predict(self,X):
    y_pred = [self._predict(x) for x in X]
    return np.array(y_pred)

  def _predict(self,x):
    #distance
    distances = [euclidean_distance(x,x_train) for x_train in self.X_train]
    # k nearest samples
    k_indices = np.argsort(distances)[:self.k]
    k_nearest_labels = [self.y_train[i] for i in k_indices]
    # most common class label
    most_common = Counter(k_nearest_labels).most_common(1)
    return most_common[0][0]

## Now testing the performance of our KNN classifier on iris dataset.

In [37]:
from sklearn import datasets
from sklearn.model_selection import train_test_split

In [38]:
iris = datasets.load_iris()
X,y = iris.data, iris.target

In [39]:
# train-test split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=1234)

In [44]:
# instantiating our classifier
clf = KNN(k=5)
clf.fit(X_train,y_train)

In [45]:
predictions = clf.predict(X_test)

In [46]:
# creating a function for checking accuracy
def accuracy(y_true, y_pred):
    accuracy = np.sum(y_true == y_pred) / len(y_true)
    return accuracy

## Checking Accuracy

In [50]:
print("Accuracy : {:.4f}".format(accuracy(y_test, predictions)))

Accuracy : 0.9667
