In [60]:
# Custom functions needed for K-NN

import pandas as pd
import numpy as np
from math import sqrt

def get_euclidean_distance(x1, y1, x2, y2):
    distance = sqrt((x1-x2)**2 + (y1-y2)**2)
    return distance

def get_k_nearest_neighbours(k, df, x, y): 
    distances = []
    for i in range(len(df)) : 
        x_df = df.iloc[i]['Sepal Length']
        y_df = df.iloc[i]['Sepal Width']
        distance = get_euclidean_distance(x, y, x_df, y_df)
        distance_map = dict(
            index = i,
            type = df.iloc[i]['Species'],
            distance = distance
        )
        distances.append(distance_map)
    sorted_distances = sorted(distances, key=lambda i: i['distance'], reverse=False)
    k_nearest_neighbours = list()
    for i in range(k):
        k_nearest_neighbours.append(sorted_distances[i])
    return k_nearest_neighbours

def get_types(nearest_neighbours):
    types = list()
    for i in range(len(nearest_neighbours)):
        curr_neighbour_type = nearest_neighbours[i]['type']
        types.append(curr_neighbour_type)
    return types

def get_majority(types):
    return max(types ,key=types.count)


In [67]:
# Sample implemenation on IRIS dataset

iris_df = pd.read_csv('./resources/iris.csv')
preprocessed_df = iris_df.drop(columns=['Petal Length', 'Petal Width'])

nearest_neighbours = get_k_nearest_neighbours(3, preprocessed_df, 4.2, 9.1)
closest_types = get_types(nearest_neighbours)
print(closest_types)
print(get_majority(closest_types))

nearest_neighbours = get_k_nearest_neighbours(10, preprocessed_df, 9.2, 9.1)
closest_types = get_types(nearest_neighbours)
print(closest_types)
print(get_majority(closest_types))


['Iris-setosa', 'Iris-setosa', 'Iris-setosa']
Iris-setosa
['Iris-virginica', 'Iris-virginica', 'Iris-virginica', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-virginica', 'Iris-virginica', 'Iris-versicolor', 'Iris-virginica']
Iris-virginica
