In [22]:
import numpy as np
import pandas as pd
from math import sqrt
from sklearn.model_selection import train_test_split

In [23]:
# Calculating euclidean distance between two vectors
def get_distance(v1, v2):
    dist = 0.0
    for i in range(len(v1)):
        dist += (v1[i] - v2[i]) ** 2
    return sqrt(dist)

# Get neighbors of a data point
def get_neighbours(X_train, y_train, x_test_val, k):
    neigh_dist = []
    neighbours = []
    index = 0
    for x_train_val in X_train:
        y_train_val = y_train[index]
        d = get_distance(x_train_val, x_test_val)
        neigh_dist.append([d, x_train_val, y_train_val])
        index += 1
    # sort in increasing order according to distance from the test point
    neigh_dist.sort(key=lambda tup: tup[0])
    for i in range(k):
        neighbours.append(neigh_dist[i])
    return neighbours

# KNN algorithm
def knn(X_train, y_train, X_test, k):
    predictions = []
    for test_val in X_test:
        neighbours = get_neighbours(X_train, y_train, test_val, k)
        predicted_values = [neighbour[2] for neighbour in neighbours]
        predicted = max(set(predicted_values), key=predicted_values.count)
        predictions.append(predicted)
    return(predictions)

In [24]:
# Loading data
iris = pd.read_csv('Iris.csv')

# Cleaning the Data
iris.drop(columns="Id", inplace=True)

In [25]:
# Features and Labels
X = iris.iloc[:, 0:4].values
y = iris.iloc[:, 4].values

# Train and Test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [26]:
# Number of nearest neighbours considered
k = 3

# calculate score from predictions as number of correct predictions / total number of predictions 
predictions = knn(X_train, y_train, X_test, k)
correct = sum(predictions == y_test)

# Accuracy
accuracy = (correct / len(predictions)) * 100
print('Accuracy: %.3f%%' % accuracy)

Accuracy: 96.667%
