# K-Nearest Neigbours

**This notebook contains implementation of k-nearest neighbours algorithm from scratch and also using scikit-learn python library**

**Step 1:** Import necessary libraries

In [135]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import math
%matplotlib inline

plt.rcParams['figure.figsize'] = (20.0, 10.0)

**Step 2:** Load the Dataset

In [136]:
data = pd.read_csv("Iris.csv")
data.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


**Step 3:** Split the dataset into training data and testing data using *train_test_split* function of scikit-learn

In [137]:
from sklearn.model_selection import train_test_split

In [138]:
X = data[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']]
Y = data[['Species']]
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.25)
m = x_train.shape[0]
n = x_train.shape[1]
x_train = x_train.to_numpy()
x_test = x_test.to_numpy()
y_train = y_train.to_numpy()
y_test = y_test.to_numpy()

# kNN Implementation from Scratch

In [139]:
def euclid_dist(x1, x2):
    distance = np.sum((x1 - x2)**2)**0.5
    return distance

In [140]:
def get_neighbours(x_train, x_i, k):
    distances = {}
    for i in range(x_train.shape[0]):
        distances[euclid_dist(x_train[i], x_i)] = i
    distance_index = sorted(distances)
    neighbours = [distances[i] for i in distance_index[:k]]
    return neighbours

In [141]:
import operator
def get_response(neighbours, y_train):
    votes = {}
    for i in neighbours:
        response = y_train[i]
        if response in votes:
            votes[response] += 1
        else:
            votes[response] = 1
    sortedvotes = sorted(votes.items(), key=operator.itemgetter(1), reverse=True)
    return sortedvotes[0][0]

In [142]:
from sklearn.metrics import accuracy_score

In [143]:
y_train = y_train.reshape(y_train.shape[0])
y_test = y_test.reshape(y_test.shape[0])

In [144]:
y_pred = []
k = 5
for x in x_test:
    neighbours = get_neighbours(x_train, x, k)
    y_pred.append(get_response(neighbours, y_train))

accuracy_score(y_test, y_pred)

0.9736842105263158

# kNN Implementation in scikit-learn

In [145]:
from sklearn import preprocessing

In [146]:
le = preprocessing.LabelEncoder()
y_train_encoded = le.fit_transform(y_train)
y_test_encoded = le.fit_transform(y_test)

In [147]:
from sklearn.neighbors import KNeighborsClassifier

In [148]:
clf = KNeighborsClassifier(n_neighbors=5)
clf.fit(x_train, y_train_encoded)

y_predicted = clf.predict(x_test)

In [149]:
accuracy_score(y_test_encoded, y_predicted)

0.9736842105263158