In [5]:
from sklearn import preprocessing
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split

weather = ['sunny', 'sunny', 'overcast', 'rainy', 'rainy', 'overcast', 'sunny']
feature1 = ['hot', 'hot', 'hot', 'mild', 'cool', 'cool', 'mild']
feature2 = ['high', 'high', 'high', 'normal', 'normal', 'high', 'normal']
play = ['no', 'no', 'yes', 'yes', 'yes', 'yes', 'no']

le_weather = preprocessing.LabelEncoder()
le_feature1 = preprocessing.LabelEncoder()
le_feature2 = preprocessing.LabelEncoder()
le_play = preprocessing.LabelEncoder()

weather_encoded = le_weather.fit_transform(weather)
feature1_encoded = le_feature1.fit_transform(feature1)
feature2_encoded = le_feature2.fit_transform(feature2)
play_encoded = le_play.fit_transform(play)

features = list(zip(feature1_encoded, feature2_encoded))

features_train, features_test, label_train, label_test = train_test_split(features, play_encoded, test_size=0.3, random_state=42)

model = KNeighborsClassifier(n_neighbors=3, metric='euclidean')
model.fit(features_train, label_train)

predicted = model.predict(features_test)

predicted_labels = le_play.inverse_transform(predicted)

print("Prediction:", predicted_labels)
conf_mat = confusion_matrix(label_test, predicted)

print("Confusion Matrix:")
print(conf_mat)

accuracy = accuracy_score(label_test, predicted)
print("Accuracy:", accuracy)

Prediction: ['yes' 'yes' 'yes']
Confusion Matrix:
[[0 2]
 [0 1]]
Accuracy: 0.3333333333333333


In [6]:
import numpy as np
from collections import Counter

# Training samples (X1, X2, Classification)
training_samples = [
    (7, 7, 'Bad'),
    (7, 4, 'Bad'),
    (3, 4, 'Good'),
    (1, 4, 'Good')
]

# Query instance
query_instance = (3, 7)

# Function to calculate Euclidean distance
def euclidean_distance(p, q):
    return np.sqrt((q[0] - p[0])**2 + (q[1] - p[1])**2)

# Calculate distances
distances = []
for sample in training_samples:
    distance = euclidean_distance(query_instance, sample[:2])  # Get only (X1, X2)
    distances.append((distance, sample[2]))  # (distance, classification)

# Sort distances
distances.sort(key=lambda x: x[0])  # Sort by distance

# Select K nearest neighbors
K = 3
nearest_neighbors = distances[:K]

# Gather the classifications of the nearest neighbors
classes = [neighbor[1] for neighbor in nearest_neighbors]

# Majority voting
predicted_class = Counter(classes).most_common(1)[0][0]

# Output results
print("Distances and Classifications:")
for distance, classification in distances:
    print(f"Distance: {distance:.2f}, Classification: {classification}")

print(f"\nNearest Neighbors (K={K}): {nearest_neighbors}")
print(f"Predicted Classification for the query instance {query_instance}: {predicted_class}")

Distances and Classifications:
Distance: 3.00, Classification: Good
Distance: 3.61, Classification: Good
Distance: 4.00, Classification: Bad
Distance: 5.00, Classification: Bad

Nearest Neighbors (K=3): [(3.0, 'Good'), (3.605551275463989, 'Good'), (4.0, 'Bad')]
Predicted Classification for the query instance (3, 7): Good
