## Load and Normalize the iris data

In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

# Load the Iris dataset
iris = load_iris()
X, y = iris.data, iris.target  # Features and target labels

# Convert target labels to categorical names (like "Arctica", "Harlequin", "Caroliniana")
target_names = {0: "Arctica", 1: "Harlequin", 2: "Caroliniana"}
y = np.array([target_names[label] for label in y])

# Split into training (80%) and test (20%) sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize data using Min-Max Scaling
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

print("Dataset loaded and preprocessed successfully!")


Dataset loaded and preprocessed successfully!


## kNN Implementation

In [2]:
from collections import Counter

# Function to compute Euclidean distance
def euclidean_distance(x1, x2):
    return np.sqrt(np.sum((x1 - x2) ** 2))

# kNN Classifier implementation
class KNNClassifier:
    def __init__(self, k=3):
        self.k = k

    def fit(self, X_train, y_train):
        self.X_train = X_train
        self.y_train = y_train

    def predict(self, X_test):
        predictions = []
        for test_sample in X_test:
            # Compute distances to all training points
            distances = [euclidean_distance(test_sample, x_train) for x_train in self.X_train]

            # Get indices of k nearest neighbors
            k_indices = np.argsort(distances)[:self.k]

            # Get the labels of k nearest neighbors
            k_nearest_labels = [self.y_train[i] for i in k_indices]

            # Majority vote for prediction
            most_common = Counter(k_nearest_labels).most_common(1)[0][0]
            predictions.append(most_common)

        return np.array(predictions)


## Training and testing with different k values

In [3]:
from sklearn.metrics import accuracy_score

# Values of k to test
k_values = [3, 5, 7]

for k in k_values:
    print(f"\n🔹 Evaluating kNN for k = {k}")

    # Initialize and train kNN model
    knn = KNNClassifier(k=k)
    knn.fit(X_train, y_train)

    # Predict on test data
    y_pred = knn.predict(X_test)

    # Calculate accuracy
    accuracy = accuracy_score(y_test, y_pred)
    print(f"✅ Accuracy for k = {k}: {accuracy:.4f}")



🔹 Evaluating kNN for k = 3
✅ Accuracy for k = 3: 1.0000

🔹 Evaluating kNN for k = 5
✅ Accuracy for k = 5: 1.0000

🔹 Evaluating kNN for k = 7
✅ Accuracy for k = 7: 1.0000


## We get 100% accuracy with eucleadean distance and k = 3,5 and 7