**K Nearest Neighbors from Scratch with Iris Dataset**

In [2]:
#Import libraries
import numpy as np
import matplotlib.pyplot as plt

from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.manifold import TSNE

In [3]:
#Load dataset
iris_dataset = datasets.load_iris()

In [4]:
#Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(iris_dataset.data, iris_dataset.target, test_size=0.3)

In [5]:
#Define euclidean distance for margin
def euclidean_distance(x1, x2):
  return np.sqrt(np.sum((x1 - x2) ** 2))

In [6]:
#Define manhattan distance for margin
def manhattan_distance(x1, x2):
  return np.sum(np.abs(x1 - x2))

In [7]:
#Minkowski distance for margin
def minkowski_distance(x1, x2, p):
  return(np.sum(np.abs(x1 - x2) ** p)) ** (1 / p)

In [8]:
#Define nearest neighbors func to get the nearests
def get_nearest_neighbors(x0, num_neighbors):
  num_rows = X_train.shape[0]
  distances = np.zeros((num_rows))

  for i, x in enumerate(X_train):
    if(x == x0).all():
      continue
    
    distances[i] = minkowski_distance(x0, x, 2)
  
  return np.argsort(distances)[:num_neighbors]

In [9]:
#Define predict function
def predict(x0, num_neighbors):
  neighbors = get_nearest_neighbors(x0, num_neighbors)
  classes = np.unique(y_train)
  votes = {}

  for cls in classes:
    votes[cls] = np.sum(y_train[neighbors] == cls)
  
  return max(votes.keys(), key=(lambda k: votes[k]))

In [10]:
#Predict 
predict(np.array([5.1, 3.5, 1.4, 0.2]), 3)

0

In [13]:
#Test model
def evaluate(num_neighbors, X, y):
  y_pred = np.zeros((y.shape[0]))

  for i, x in enumerate(X):
    y_pred[i] = predict(x, num_neighbors)
  
  return accuracy_score(y, y_pred)

In [14]:
print("Evaluation on training set", evaluate(7, X_train, y_train))
print("Evaluation on testing set", evaluate(7, X_test, y_test))

Evaluation on training set 0.9904761904761905
Evaluation on testing set 0.9555555555555556


**KNN with SKLearn**

In [15]:
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(n_neighbors=7, metric='euclidean')
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)
print('Accuracy on training', accuracy_score(y_train, classifier.predict(X_train)))
print('Accuracy on testing', accuracy_score(y_test, y_pred))

Accuracy on training 0.9904761904761905
Accuracy on testing 0.9555555555555556
