In [21]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, classification_report

# Step 1: Create the dataset
data = {
    'Weather': ['Sunny', 'Sunny', 'Overcast', 'Rainy', 'Rainy', 'Rainy', 'Overcast', 'Sunny', 'Sunny', 'Rainy'],
    'Temperature': ['Hot', 'Hot', 'Mild', 'Mild', 'Cool', 'Cool', 'Cool', 'Mild', 'Cool', 'Mild'],
    'Play': ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes']
}

df = pd.DataFrame(data)

# Step 2: One-hot encode categorical features
X = df[['Weather', 'Temperature']]
y = df['Play']

# Updated OneHotEncoder usage
encoder = OneHotEncoder(sparse_output=False)  # Use sparse_output instead of sparse
X_encoded = encoder.fit_transform(X)

# Step 3: Train the KNN model
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_encoded, y)

# Step 4: Predict for new instance (Weather = Overcast, Temperature = Mild)
new_instance = pd.DataFrame({
    'Weather_Rainy': [0],
    'Weather_Sunny': [0],
    'Weather_Overcast': [1],
    'Temperature_Hot': [0],
    'Temperature_Mild': [1],
    'Temperature_Cool': [0]
})

# Step 5: Make prediction
prediction = knn.predict(new_instance)
print(f"Prediction for Overcast and Mild: {prediction[0]}")

# Step 6: Evaluate the model
y_pred = knn.predict(X_encoded)
cm = confusion_matrix(y, y_pred)
print("Confusion Matrix:")
print(cm)

print("Classification Report:")
print(classification_report(y, y_pred))


Prediction for Overcast and Mild: No
Confusion Matrix:
[[3 1]
 [1 5]]
Classification Report:
              precision    recall  f1-score   support

          No       0.75      0.75      0.75         4
         Yes       0.83      0.83      0.83         6

    accuracy                           0.80        10
   macro avg       0.79      0.79      0.79        10
weighted avg       0.80      0.80      0.80        10





In [5]:
import numpy as np
import pandas as pd

# Training data
training_data = {
    'X1': [4, 5, 7, 8],
    'X2': [5, 6, 8, 7],
    'Y': ['A', 'A', 'B', 'B']
}

df_train = pd.DataFrame(training_data)
query_instance = np.array([3, 7])

# Calculate distances
distances = []
for index, row in df_train.iterrows():
    distance = np.sqrt((row['X1'] - query_instance[0])**2 + (row['X2'] - query_instance[1])**2)
    distances.append((distance, row['Y']))

# Sort by distance and take the nearest 3
distances.sort(key=lambda x: x[0])
nearest_neighbors = distances[:3]

# Gather classes of the nearest neighbors
classes = [neighbor[1] for neighbor in nearest_neighbors]
prediction = max(set(classes), key=classes.count)

print(f"Distances: {distances}")
print(f"Nearest Neighbors: {nearest_neighbors}")
print(f"Predicted Classification: {prediction}")


Distances: [(2.23606797749979, 'A'), (2.23606797749979, 'A'), (4.123105625617661, 'B'), (5.0, 'B')]
Nearest Neighbors: [(2.23606797749979, 'A'), (2.23606797749979, 'A'), (4.123105625617661, 'B')]
Predicted Classification: A
