In [None]:
import numpy as np
from collections import Counter

# Sample dataset
data = [
    ['Sunny', 'Hot', 'No'],
    ['Sunny', 'Hot', 'No'],
    ['Overcast', 'Hot', 'Yes'],
    ['Rainy', 'Mild', 'Yes'],
    ['Rainy', 'Cool', 'Yes'],
    ['Rainy', 'Cool', 'No'],
    ['Overcast', 'Cool', 'Yes'],
    ['Sunny', 'Mild', 'No'],
    ['Sunny', 'Cool', 'Yes'],
    ['Rainy', 'Mild', 'Yes'],
    ['Sunny', 'Mild', 'Yes'],
    ['Overcast', 'Mild', 'Yes'],
    ['Overcast', 'Hot', 'Yes'],
]

# Convert categorical data to numerical data
weather_map = {'Sunny': 0, 'Overcast': 1, 'Rainy': 2}
temperature_map = {'Hot': 0, 'Mild': 1, 'Cool': 2}
play_map = {'No': 0, 'Yes': 1}

# Prepare the dataset
X = []
y = []
for row in data:
    X.append([weather_map[row[0]], temperature_map[row[1]]])
    y.append(play_map[row[2]])

X = np.array(X)
y = np.array(y)

# KNN function
def euclidean_distance(a, b):
    return np.sqrt(np.sum((a - b) ** 2))

def knn_predict(X_train, y_train, X_test, k=3):
    distances = []

    # Calculate distances from the test point to all training points
    for i in range(len(X_train)):
        dist = euclidean_distance(X_test, X_train[i])
        distances.append((dist, y_train[i]))

    # Sort by distance and select the k nearest neighbors
    distances.sort(key=lambda x: x[0])
    k_nearest = distances[:k]

    # Vote for the most common class
    k_nearest_classes = [label for (_, label) in k_nearest]
    most_common = Counter(k_nearest_classes).most_common(1)

    return most_common[0][0]  # Return the class label

# Test instance
test_instance = np.array([1, 1])  # Overcast, Mild
k = 3  # Number of neighbors

# Predict
prediction = knn_predict(X, y, test_instance, k)
predicted_play = 'Yes' if prediction == 1 else 'No'

print(f'Prediction for weather Overcast and temperature Mild: {predicted_play}')

Prediction for weather Overcast and temperature Mild: Yes


In [None]:
import math

# Define the coordinates of the query instance and training samples
query_instance = (3, 7)
training_samples = [(7, 7), (7, 4), (3, 4), (1, 4)]
labels = ["Bad", "Bad", "Good", "Good"]

# Calculate Euclidean distances between the query instance and each training sample
distances = []
for i, sample in enumerate(training_samples):
    distance = math.sqrt((sample[0] - query_instance[0]) ** 2 + (sample[1] - query_instance[1]) ** 2)
    distances.append((distance, labels[i]))

# Sort distances to get the 3 nearest neighbors
distances.sort(key=lambda x: x[0])
nearest_neighbors = distances[:3]

# Determine the majority class among the 3 nearest neighbors
nearest_neighbors


[(3.0, 'Good'), (3.605551275463989, 'Good'), (4.0, 'Bad')]