In [3]:
from sklearn import preprocessing
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split

# Dataset
weather = ['Sunny', 'Overcast', 'Rainy', 'Sunny', 'Rainy', 'Overcast', 'Sunny', 'Overcast', 'Rainy', 'Sunny']
temperature = ['Hot', 'Hot', 'Mild', 'Mild', 'Cool', 'Cool', 'Cool', 'Mild', 'Mild', 'Mild']
play = ['No', 'Yes', 'Yes', 'No', 'Yes', 'Yes', 'No', 'Yes', 'Yes', 'No']  # Target variable

# Encoding categorical data consistently
weather_le = preprocessing.LabelEncoder()
temperature_le = preprocessing.LabelEncoder()
play_le = preprocessing.LabelEncoder()

# Fit and transform the data
weather_encoded = weather_le.fit_transform(weather)
temperature_encoded = temperature_le.fit_transform(temperature)
play_encoded = play_le.fit_transform(play)

# Combine encoded features
features = list(zip(weather_encoded, temperature_encoded))

# Split data into training and test sets
features_train, features_test, label_train, label_test = train_test_split(
    features, play_encoded, test_size=0.2, random_state=42
)

# K-Nearest Neighbors model
model = KNeighborsClassifier(n_neighbors=3, metric='euclidean')
model.fit(features_train, label_train)

# Prediction for "Overcast" and "Mild"
# Using the same encoder for prediction
test_data = [[weather_le.transform(["Overcast"])[0], temperature_le.transform(["Mild"])[0]]]
predicted = model.predict(test_data)
print("Prediction for Overcast and Mild:", "Yes" if predicted[0] == 1 else "No")

# Predictions on the test set and evaluation
predicted_test = model.predict(features_test)
print("Test Set Prediction:", predicted_test)

# Confusion Matrix and Accuracy
conf_mat = confusion_matrix(label_test, predicted_test)
print("Confusion Matrix:\n", conf_mat)

accuracy = accuracy_score(label_test, predicted_test)
print("Accuracy:", accuracy)


Prediction for Overcast and Mild: Yes
Test Set Prediction: [1 1]
Confusion Matrix:
 [[2]]
Accuracy: 1.0


