### Importing the Libraries

In [34]:
import pandas as pd
from sklearn.model_selection import train_test_split
from math import sqrt

### Loading the data

In [16]:
data = pd.read_csv('data.csv')
data

Unnamed: 0,Sepal Length,Sepal Width,Petal Length,Petal Width,Species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica


In [39]:
data.dtypes

Sepal Length    float64
Sepal Width     float64
Petal Length    float64
Petal Width     float64
Species          object
dtype: object

### Euclidean Distance

$\large{d(p, q) = \sqrt{\sum_{i=1}^{n}(q_i - p_i)^2}}$

In [70]:
def euclidean_distance(obs1, obs2):
    return sqrt(sum((obs1[i] - obs2[i])**2 for i in range(len(obs1)-1)))

### Returning the Neighbors
Returning the required number of neighbors based on smallest distances.

In [8]:
def return_neighbors(train, testObs, numNeighbors):
    dist = []
    for i in train:
        dist.append((euclidean_distance(i, testObs), i[-1]))
    
    dist.sort(key = lambda x:x[0])
    
    return dist[:numNeighbors]

### Classification
Returning the target value with the highest frequency.

In [69]:
def make_prediction(train, testObs, numNeighbors):
    neighbors = [species[-1] for species in return_neighbors(train, testObs, numNeighbors)]
    return max(neighbors, key=neighbors.count)

### Accuracy

In [11]:
def accuracy(predictions, testVals):
    score = (sum(1 if predictions[i] == testVals[i] else 0 for i in range(len(predictions))) / len(predictions))*100
    return score

### KNN

In [74]:
def knn(train, test, testVals, numNeighbors):
    predictions = []
    for testObs in test:
        predictions.append(make_prediction(train, testObs, numNeighbors))
    
    print("Accuracy:- ", accuracy(predictions, testVals), "%")
    return predictions

In [19]:
X = data.drop(['Species'],axis=1)
Y = data['Species']

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25, random_state=42)
train = pd.concat([X_train, Y_train], axis = 1)

In [75]:
numNeighbors = 5
predictions = knn(train.values, X_test.values, Y_test.values, numNeighbors)

Accuracy:-  100.0 %
