In [1]:
import pandas as pd
import numpy as np
import random
from collections import Counter
import warnings
from sklearn.preprocessing import LabelEncoder

In [12]:
def KNN(data, predict, k=3):
    if len(data) >= k:
        warnings.warn('k set to a value lesser than voting groups')
    distances = []
    for group in data:
        for features in data[group]:
            euclidean_distance = np.linalg.norm(np.array(features) - np.array(predict))
            distances.append([euclidean_distance, group])
    votes = [i[1] for i in sorted(distances) [:k]]
    vote_results = Counter(votes).most_common(1)[0][0]
    confidence = Counter(votes).most_common(1)[0][1] / k
    return vote_results, confidence

In [13]:
df = pd.read_csv('car.data')
le = LabelEncoder()
for column in df.columns:
    df[column] = le.fit_transform(df[column])
df

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,class
0,4,4,0,0,3,1,3
1,4,4,0,0,3,2,3
2,4,4,0,0,3,0,3
3,4,4,0,0,2,1,3
4,4,4,0,0,2,2,3
...,...,...,...,...,...,...,...
1724,2,1,3,2,2,2,2
1725,2,1,3,2,2,0,4
1726,2,1,3,2,0,1,3
1727,2,1,3,2,0,2,2


In [14]:
full_data = df.astype(float).values.tolist()
random.shuffle(full_data)
test_size = 0.2
train_set = {0:[], 1:[], 2:[], 3:[], 4:[]}
test_set =  {0:[], 1:[], 2:[], 3:[], 4:[]}
train_data = full_data[:-int(test_size * len(full_data))]
test_data = full_data[-int(test_size * len(full_data)):]
#filling up train and test sets with train and test data
for i in train_data:
    train_set[i[-1]].append(i[:-1])
for i in test_data:
    test_set[i[-1]].append(i[:-1])

In [16]:
correct = 0
total = 0
for group in test_set:
    for data in test_set[group]:
        vote, confidence = KNN(train_set, data, k=7)
        if group == vote:
            correct+=1
        else:
            print(confidence)
        total+=1
print('accuracy ', correct/total)

0.5714285714285714
0.5714285714285714
0.7142857142857143
0.5714285714285714
0.5714285714285714
0.5714285714285714
0.42857142857142855
0.42857142857142855
0.5714285714285714
0.42857142857142855
0.5714285714285714
0.5714285714285714
0.7142857142857143
0.5714285714285714
0.42857142857142855
0.5714285714285714
0.5714285714285714
0.7142857142857143
0.7142857142857143
0.5714285714285714
0.5714285714285714
0.7142857142857143
0.7142857142857143
0.5714285714285714
0.5714285714285714
0.42857142857142855
0.7142857142857143
0.7142857142857143
0.5714285714285714
0.5714285714285714
0.5714285714285714
0.42857142857142855
0.2857142857142857
0.42857142857142855
0.42857142857142855
0.42857142857142855
0.5714285714285714
0.42857142857142855
0.7142857142857143
0.42857142857142855
accuracy  0.8840579710144928
