In [17]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import CategoricalNB
from sklearn.metrics import accuracy_score

#### Load the dataset and display the first 10 rows

In [18]:
df = pd.read_csv("car_simple.csv")
df.head(10)

Unnamed: 0,buying,maint,doors,persons,lugboot,safety,class
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,high,2,2,small,high,acc
3,high,high,3,4,med,low,unacc
4,high,med,3,4,med,med,acc
5,high,low,3,4,big,high,good
6,med,med,4,4,small,low,unacc
7,med,low,4,4,small,med,acc
8,low,low,4,4,small,high,vgood
9,low,low,5,more,med,high,vgood


#### Encode the categorical features using LabelEncoder

In [19]:
label_encoders = {}
for column in df.columns:
    le = LabelEncoder()
    df[column] = le.fit_transform(df[column])
    label_encoders[column] = le
df.head(1)

Unnamed: 0,buying,maint,doors,persons,lugboot,safety,class
0,3,3,0,0,2,1,2


#### Split the data into training and testing sets

In [20]:
X = df.iloc[:, 0:6]
y = df.iloc[:, 6]

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42
)

#### Train Naive Bayes CategoricalNB model & calculation accuracy

In [21]:
nb = CategoricalNB()
nb.fit(X_train, y_train)
y_pred_nb = nb.predict(X_test)
nb_accuracy = accuracy_score(y_test, y_pred_nb)

##### Train the KNN model (k = 3) & calculation accuracy

In [22]:
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)
y_pred_knn = knn.predict(X_test)
knn_accuracy = accuracy_score(y_test, y_pred_knn)

#### Compare Accuracies

In [23]:
print("KNN Accuracy:", knn_accuracy)
print("Naive Bayes Accuracy:", nb_accuracy)

KNN Accuracy: 0.4
Naive Bayes Accuracy: 0.8


#### 1. Which model achieved higher accuracy?

*Naive Bayes*

#### 2. Why do you think that model performed better?

*The naive Bayes treats each feature as a separate categorical value,*

*While KNN requires numerical distances, after LabelEncoder encoding the numbers no longer represent real distances, this may reduce its accuracy.*