### Package 

In [1]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
import torch


In [2]:
#Package needed for KNN
from csv import reader
from sys import exit
from math import sqrt
from operator import itemgetter


### KNN

In [9]:
def get_classes(training_set):
    return list(set([c[-1] for c in training_set]))


In [10]:
def find_neighbors(distances, k):
    return distances[0:k]


In [11]:
def find_response(neighbors, classes):
    votes = [0] * len(classes)

    for instance in neighbors:
        for ctr, c in enumerate(classes):
            if instance[-2] == c:
                votes[ctr] += 1

    return max(enumerate(votes), key=itemgetter(1))


In [31]:
def knn(training_set, x_test, y_test, k):
    distances = []
    dist = 0
    limit = len(training_set[0]) - 1
    label_predict = []

    # generate response classes from training data
    classes = get_classes(training_set)

    try:
        for test_instance in x_test:
            for row in training_set:
                for x, y in zip(row[:limit], test_instance):
                    dist += (x-y) * (x-y)
                distances.append(row + [sqrt(dist)])
                dist = 0

            distances.sort(key=itemgetter(len(distances[0])-1))

            # find k nearest neighbors
            neighbors = find_neighbors(distances, k)

            # get the class with maximum votes
            index, value = find_response(neighbors, classes)
            label_predict.append(classes[index])
            # print(label_predict)

            # Display prediction
            # print('The predicted class for sample ' + str(test_instance) + ' is : ' + str(classes[index]))
            # print('Number of votes : ' + str(value) + ' out of ' + str(k))

            # empty the distance list
            distances.clear()

    except Exception as e:
        print(e)

    #Testing accuracy
    test_acc = np.mean(label_predict == y_test)*100
    # print(label_predict)
    # print('Testing Accuracy: %.2f%%'%test_acc)

    return test_acc


### Load Data

In [32]:
dataset = pd.read_csv('CleanedData.csv')
dataset = np.array(dataset)

data = dataset[:, :-1]
target = dataset[:, -1]

In [33]:
accuracy_list =[]
test_acc = 0
for random_seed in range(40,51):
    x_train, x_test, y_train, y_test = train_test_split(data, target, test_size=0.33,random_state=random_seed)
    train_set = np.column_stack((x_train, y_train)).tolist()
    test_acc = knn(train_set, x_test[:100], y_test[:100], k=5)
    accuracy_list.append(test_acc)

print(accuracy_list)

[74.0, 83.0, 76.0, 71.0, 73.0, 72.0, 82.0, 69.0, 73.0, 80.0, 70.0]


In [34]:
np.mean(accuracy_list)

74.81818181818181