In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, classification_report

# Step 1: Create the dataset
data = {
    'Company': ['Toyota', 'Toyota', 'Honda', 'Ford', 'Ford', 'Ford', 'Honda', 'Toyota', 'Toyota', 'Ford', 
                'Ford', 'Toyota', 'Honda', 'Honda', 'Honda', 'Ford'],
    'Car Model': ['SUV', 'Sedan', 'Sedan', 'Truck', 'Hatchback', 'Hatchback', 'Hatchback', 'Truck', 'SUV', 
                  'SUV', 'SUV', 'Truck', 'Sedan', 'Sedan', 'SUV', 'Sedan'],
    'Buy': ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'No']
}

df = pd.DataFrame(data)

# Step 2: Encode the categorical variables
label_encoder_company = LabelEncoder()
label_encoder_car_model = LabelEncoder()
label_encoder_buy = LabelEncoder()
df['Company'] = label_encoder_company.fit_transform(df['Company'])
df['Car Model'] = label_encoder_car_model.fit_transform(df['Car Model'])
df['Buy'] = label_encoder_buy.fit_transform(df['Buy'])

# Step 3: Separate features and target variable
X = df[['Company', 'Car Model']]
y = df['Buy']

# Step 4: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Train the KNN model
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)

# Step 6: Predict for a new instance (Honda, Sedan)
new_data = pd.DataFrame([[label_encoder_company.transform(['Honda'])[0], 
                          label_encoder_car_model.transform(['Sedan'])[0]]], 
                        columns=['Company', 'Car Model'])
predicted_buy = knn.predict(new_data)

# Step 7: Evaluate the model using confusion matrix
y_pred = knn.predict(X_test)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred, target_names=['No', 'Yes'])
predicted_buy_text = label_encoder_buy.inverse_transform(predicted_buy)[0]

# Display the results
print("Prediction for Company=Honda and Car Model=Sedan:", predicted_buy_text)
print("\nConfusion Matrix:\n", conf_matrix)
print("\nClassification Report:\n", class_report)

Prediction for Company=Honda and Car Model=Sedan: Yes

Confusion Matrix:
 [[0 3]
 [0 1]]

Classification Report:
               precision    recall  f1-score   support

          No       0.00      0.00      0.00         3
         Yes       0.25      1.00      0.40         1

    accuracy                           0.25         4
   macro avg       0.12      0.50      0.20         4
weighted avg       0.06      0.25      0.10         4



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
