# 🌸 K-Nearest Neighbors (KNN) Classification on Iris Dataset

In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from matplotlib.colors import ListedColormap


In [None]:

# Step 1: Load the dataset
df = pd.read_csv("Iris.csv")
df.drop('Id', axis=1, inplace=True)
print(df.head())

# Step 2: Feature selection and target variable
X = df.drop('Species', axis=1)
y = df['Species']

# Step 3: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 4: Normalize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [None]:

# Step 5: Train the model with initial k=3
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train_scaled, y_train)

# Step 6: Predict and evaluate
y_pred = knn.predict(X_test_scaled)

print("\n✅ Accuracy:", accuracy_score(y_test, y_pred))
print("\n🧩 Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\n📊 Classification Report:\n", classification_report(y_test, y_pred))


In [None]:

# ✅ Experiment with Different K Values
k_values = range(1, 21)
accuracy_scores = []

for k in k_values:
    model = KNeighborsClassifier(n_neighbors=k)
    model.fit(X_train_scaled, y_train)
    pred = model.predict(X_test_scaled)
    accuracy_scores.append(accuracy_score(y_test, pred))

# Plot Accuracy vs. K
plt.figure(figsize=(8, 5))
plt.plot(k_values, accuracy_scores, marker='o', color='darkorange')
plt.title('Accuracy vs. K Value')
plt.xlabel('K')
plt.ylabel('Accuracy')
plt.grid(True)
plt.show()


In [None]:

# ✅ Visualize Decision Boundaries (Using 2 Features)
X_vis = df[['PetalLengthCm', 'PetalWidthCm']].values
y_vis = df['Species'].values

le = LabelEncoder()
y_vis_encoded = le.fit_transform(y_vis)

X_train_vis, X_test_vis, y_train_vis, y_test_vis = train_test_split(X_vis, y_vis_encoded, test_size=0.3, random_state=42)
X_train_vis = scaler.fit_transform(X_train_vis)
X_test_vis = scaler.transform(X_test_vis)

knn_vis = KNeighborsClassifier(n_neighbors=5)
knn_vis.fit(X_train_vis, y_train_vis)


In [None]:

# Function to plot decision boundaries
def plot_decision_boundaries(X, y, model, title):
    h = 0.02
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1

    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))
    Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)

    cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF'])
    cmap_bold  = ListedColormap(['#FF0000', '#00FF00', '#0000FF'])

    plt.figure(figsize=(8, 6))
    plt.contourf(xx, yy, Z, cmap=cmap_light)
    plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=50)
    plt.xlabel("Petal Length (scaled)")
    plt.ylabel("Petal Width (scaled)")
    plt.title(title)
    plt.show()

# Plot boundary using scaled test data
plot_decision_boundaries(X_test_vis, y_test_vis, knn_vis, "KNN Decision Boundary (K=5)")
