In [2]:
import pandas as pd
import numpy as np
from sklearn import datasets
from sklearn import neighbors, metrics
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

In [26]:
# Reading dataset
data = pd.read_csv('car.data')

x = data[[
    'buying',
    'maint',
    'safety'
]].values

y = data['class']

print(data.head(5))

  buying  maint doors person lug_boot safety  class
0  vhigh  vhigh     2      2    small    low  unacc
1  vhigh  vhigh     2      2    small    med  unacc
2  vhigh  vhigh     2      2    small   high  unacc
3  vhigh  vhigh     2      2      med    low  unacc
4  vhigh  vhigh     2      2      med    med  unacc


In [27]:
print(x)
print()
print(y)
print()
print(x.shape)
print(y.shape)

[['vhigh' 'vhigh' 'low']
 ['vhigh' 'vhigh' 'med']
 ['vhigh' 'vhigh' 'high']
 ...
 ['low' 'low' 'low']
 ['low' 'low' 'med']
 ['low' 'low' 'high']]

0       unacc
1       unacc
2       unacc
3       unacc
4       unacc
        ...  
1723     good
1724    vgood
1725    unacc
1726     good
1727    vgood
Name: class, Length: 1728, dtype: object

(1728, 3)
(1728,)


In [28]:
# Converting x (strings) to numbers.
le = LabelEncoder()
for i in range(len(x[0])):
    x[:, i] = le.fit_transform(x[:, i])

print("LabelEncoder: ", x, sep='\n')
print()


LabelEncoder: 
[[3 3 1]
 [3 3 2]
 [3 3 0]
 ...
 [1 1 1]
 [1 1 2]
 [1 1 0]]



In [29]:
# Converting y (labels) to numbers.
label_mapping = {
    'unacc': 0,
    'acc': 1,
    'good': 2,
    'vgood': 3
}

y = y.map(label_mapping)

print(y)

0       0
1       0
2       0
3       0
4       0
       ..
1723    2
1724    3
1725    0
1726    2
1727    3
Name: class, Length: 1728, dtype: int64


In [30]:
# Creating test and train variables.
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)

(1382, 3)
(346, 3)
(1382,)
(346,)


In [24]:
# Creating a model.
model = neighbors.KNeighborsClassifier(n_neighbors=30, weights='uniform')

print(model)

KNeighborsClassifier(n_neighbors=30)


In [31]:
# Training the model.
model.fit(x_train, y_train)

In [32]:
# Predictions.
prediction = model.predict(x_test)

print("y_test values: ", y_test.values, sep='\n')
print()
print("Predictions: ", prediction, sep='\n')

y_test values: 
[0 2 0 1 0 0 1 1 0 0 0 0 0 1 3 1 0 3 3 0 0 0 0 0 0 0 0 1 0 0 1 0 0 1 0 0 0
 0 1 0 0 1 1 0 0 0 0 0 0 0 1 0 0 0 0 1 0 1 0 2 0 1 0 0 2 2 0 0 1 0 3 0 0 0
 1 0 0 0 0 0 0 0 1 1 1 2 0 0 1 1 0 0 0 0 0 0 0 0 0 3 0 1 0 0 1 1 0 0 1 0 1
 2 2 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 2 0 3 0 0 0
 1 1 1 0 0 1 1 0 1 0 1 0 0 0 0 1 0 1 1 0 0 0 0 3 0 1 0 0 0 1 1 0 2 0 0 0 1
 0 0 0 0 3 0 0 0 0 0 0 2 0 0 0 1 0 0 0 0 0 3 0 0 0 0 2 0 1 0 1 0 0 0 0 1 0
 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 3 1 0 0 0 0 0 0 0 0 1 0 3 1 1 0 0 0 3 0
 0 0 0 1 1 3 3 0 0 2 0 1 1 0 0 0 1 0 1 0 0 0 1 0 3 1 0 0 0 0 0 0 1 0 0 0 1
 1 1 2 0 0 2 0 0 0 0 0 0 3 3 0 0 0 0 0 0 0 1 1 1 0 1 0 1 0 1 0 0 3 0 0 0 0
 0 1 0 0 1 0 1 1 0 2 0 0 0]

Predictions: 
[0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0 0 0 0 0 0 1 0 0 0 0 0 0
 0 1 1 0 1 1 0 0 1 1 0 0 0 1 0 1 1 0 1 0 1 0 0 1 1 0 0 0 0 0 1 1 0 0 0 0 0
 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 1 0 1 1 0 0 0 1 0 1
 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 

In [33]:
# Seeing the accuracy.
acc = metrics.accuracy_score(y_test, prediction)

print(acc)

0.7398843930635838
