In [1]:
from sklearn import preprocessing
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split

# Dataset: Fruit classification
color = ['Red', 'Red', 'Orange', 'Red', 'Orange', 'Orange', 'Red', 'Red', 'Orange', 'Red']
size = ['Small', 'Medium', 'Medium', 'Large', 'Small', 'Medium', 'Large', 'Small', 'Medium', 'Large']
fruit = ['Apple', 'Apple', 'Orange', 'Apple', 'Orange', 'Orange', 'Apple', 'Apple', 'Orange', 'Apple']  # Target variable

# Encoding categorical data consistently
color_le = preprocessing.LabelEncoder()
size_le = preprocessing.LabelEncoder()
fruit_le = preprocessing.LabelEncoder()

# Fit and transform the data
color_encoded = color_le.fit_transform(color)
size_encoded = size_le.fit_transform(size)
fruit_encoded = fruit_le.fit_transform(fruit)

# Combine encoded features
features = list(zip(color_encoded, size_encoded))

# Split data into training and test sets
features_train, features_test, label_train, label_test = train_test_split(
    features, fruit_encoded, test_size=0.2, random_state=42
)

# K-Nearest Neighbors model
model = KNeighborsClassifier(n_neighbors=3, metric='euclidean')
model.fit(features_train, label_train)

# Prediction for "Red" and "Medium"
test_data = [[color_le.transform(["Red"])[0], size_le.transform(["Medium"])[0]]]
predicted = model.predict(test_data)
print("Prediction for Red and Medium:", fruit_le.inverse_transform(predicted)[0])

# Predictions on the test set and evaluation
predicted_test = model.predict(features_test)
print("Test Set Prediction:", fruit_le.inverse_transform(predicted_test))

# Confusion Matrix and Accuracy
conf_mat = confusion_matrix(label_test, predicted_test)
print("Confusion Matrix:\n", conf_mat)

accuracy = accuracy_score(label_test, predicted_test)
print("Accuracy:", accuracy)


Prediction for Red and Medium: Apple
Test Set Prediction: ['Orange' 'Apple']
Confusion Matrix:
 [[1 0]
 [0 1]]
Accuracy: 1.0
