In [14]:
import numpy as np 
import pandas as pd 
from sklearn import neighbors, metrics
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [15]:
data = pd.read_csv("car_evaluation.csv")
data.head()

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,class
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,2,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,med,unacc


In [16]:
# Select features
X = data[['buying', 'maint', 'safety']]
X.head()

Unnamed: 0,buying,maint,safety
0,vhigh,vhigh,low
1,vhigh,vhigh,med
2,vhigh,vhigh,high
3,vhigh,vhigh,low
4,vhigh,vhigh,med


In [17]:
# Select the label
y = data['class']
y.head()

0    unacc
1    unacc
2    unacc
3    unacc
4    unacc
Name: class, dtype: object

In [18]:
X = X.values # NumPy array
X

array([['vhigh', 'vhigh', 'low'],
       ['vhigh', 'vhigh', 'med'],
       ['vhigh', 'vhigh', 'high'],
       ...,
       ['low', 'low', 'low'],
       ['low', 'low', 'med'],
       ['low', 'low', 'high']], dtype=object)

In [19]:
""" 
Now we have the problem: our data consists of strings, we need to convert into nums with LabelEncoder
"""
# X conversion
print(X.shape)

for i in range(X.shape[1]): # 3
    print(X[:, i]) # Selects the first element for 3 columns

LE = LabelEncoder()
for i in range(len(X[0])):
    X[:, i] = LE.fit_transform(X[:, i])

X[:5] # vhigh=3, med=2, low=1, high=0

(1728, 3)
['vhigh' 'vhigh' 'vhigh' ... 'low' 'low' 'low']
['vhigh' 'vhigh' 'vhigh' ... 'low' 'low' 'low']
['low' 'med' 'high' ... 'low' 'med' 'high']


array([[3, 3, 1],
       [3, 3, 2],
       [3, 3, 0],
       [3, 3, 1],
       [3, 3, 2]], dtype=object)

In [21]:
# y conversion
label_mapping = {
    'unacc':0,
    'acc':1,
    'good':2,
    'vgood':3,
}

y = y.map(label_mapping)
y = np.array(y)
y

array([0, 0, 0, ..., 0, 2, 3])

In [29]:
# KNN Model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) # 20% to test set

knn = neighbors.KNeighborsClassifier(n_neighbors=23, weights='uniform')
knn.fit(X_train, y_train)

KNeighborsClassifier(n_neighbors=23)

In [30]:
predictions = knn.predict(X_test)
accuracy = metrics.accuracy_score(y_test, predictions)
accuracy

0.7485549132947977

In [31]:
predictions

array([0, 1, 1, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 2, 1, 0, 0, 1, 2, 0, 1, 2, 2, 0, 1, 0, 0, 0,
       0, 0, 1, 0, 0, 2, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 2, 0, 0, 2, 1,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 1,
       1, 2, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1,
       0, 2, 0, 0, 0, 0, 2, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0,
       0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 1, 1, 0, 1, 0, 0, 2,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0,
       1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       2, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,

In [None]:
# For KNN regressor you take the average of n_neighbors = 23 nearest neighbours
# For KNN classifier you take the mood of n_neighbors = 23 nearest neighbours