In [1]:
import pandas as pd
import numpy  as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn import linear_model
from sklearn import preprocessing
from matplotlib import pyplot as pt

In [3]:
# Link to dataset: https://archive.ics.uci.edu/ml/datasets/car+evaluation
# This program will classify the acceptability condition of a car based on the attributes
car_df = pd.read_csv("car.data")
car_df.head() # prints out first 5 rows of the dataframe in car.data

Unnamed: 0,buying,maint,door,persons,lug_boot,safety,class
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,2,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,med,unacc


In [4]:
preprocessed_data = preprocessing.LabelEncoder()

# fit_transform() wil fit label encoder and return encoded labels as numeric form
buying            =   preprocessed_data.fit_transform(list(car_df["buying"]))
maintainance      =   preprocessed_data.fit_transform(list(car_df["maint"]))
number_of_doors   =   preprocessed_data.fit_transform(list(car_df["door"]))
number_of_people  =   preprocessed_data.fit_transform(list(car_df["persons"]))
lug_boot_size     =   preprocessed_data.fit_transform(list(car_df["lug_boot"]))
safety_measure    =   preprocessed_data.fit_transform(list(car_df["safety"]))
cls               =   preprocessed_data.fit_transform(list(car_df["class"]))

print(buying)
print(maintainance)
print(number_of_doors)
print(number_of_people)
print(lug_boot_size)
print(safety_measure)
print(cls)

[3 3 3 ... 1 1 1]
[3 3 3 ... 1 1 1]
[0 0 0 ... 3 3 3]
[0 0 0 ... 2 2 2]
[2 2 2 ... 0 0 0]
[1 2 0 ... 1 2 0]
[2 2 2 ... 2 1 3]


In [5]:
X = list(zip(buying, maintainance, number_of_doors, number_of_people, lug_boot_size, safety_measure)) # features - properties of training data
Y = list(cls) # label - output

In [14]:
#split the data
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size = 0.3,random_state=20)

In [15]:
knn_model = KNeighborsClassifier(n_neighbors=3) # number of neighbors
knn_model.fit(x_train,y_train)

In [16]:
predicted = knn_model.predict(x_test)

In [17]:
accuracy = knn_model.score(x_test, y_test)
accuracy = round(accuracy*100, 2)
print(f"Accuracy of the model is: {accuracy}%")

Accuracy of the model is: 86.9%


In [18]:

# print out test data, prediciton, and actual value as encoded numeric values
targets = ["unacc", "acc", "good", "vgood"]
for i in range(len(predicted)):
  print("Predicted: ", predicted[i], "Data: ", x_test[i], "Actual: ", y_test[i])

# print out test data, prediciton, and actual value
for i in range(len(predicted)):
  print("Predicted: ", targets[predicted[i]], "Data: ", x_test[i], "Actual: ", targets[y_test[i]])

Predicted:  2 Data:  (3, 2, 2, 1, 2, 2) Actual:  2
Predicted:  2 Data:  (3, 2, 3, 1, 2, 2) Actual:  2
Predicted:  0 Data:  (1, 0, 0, 1, 0, 0) Actual:  3
Predicted:  3 Data:  (1, 1, 2, 2, 0, 0) Actual:  3
Predicted:  2 Data:  (3, 0, 0, 2, 2, 0) Actual:  2
Predicted:  0 Data:  (3, 2, 1, 1, 2, 0) Actual:  0
Predicted:  2 Data:  (1, 2, 2, 0, 2, 1) Actual:  2
Predicted:  0 Data:  (1, 0, 3, 1, 2, 0) Actual:  0
Predicted:  2 Data:  (3, 1, 3, 0, 2, 0) Actual:  2
Predicted:  1 Data:  (1, 1, 1, 2, 2, 0) Actual:  1
Predicted:  2 Data:  (0, 3, 2, 2, 1, 0) Actual:  2
Predicted:  2 Data:  (0, 3, 3, 0, 2, 2) Actual:  2
Predicted:  2 Data:  (2, 3, 1, 2, 2, 2) Actual:  2
Predicted:  0 Data:  (0, 1, 3, 1, 1, 0) Actual:  0
Predicted:  1 Data:  (1, 1, 2, 2, 0, 2) Actual:  1
Predicted:  0 Data:  (2, 2, 1, 2, 1, 2) Actual:  0
Predicted:  2 Data:  (3, 0, 3, 1, 0, 1) Actual:  2
Predicted:  0 Data:  (2, 3, 1, 2, 0, 1) Actual:  2
Predicted:  2 Data:  (0, 2, 1, 0, 2, 1) Actual:  2
Predicted:  2 Data:  (1, 3, 2, 