# Step-by-Step KNN Implementation

In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, precision_score, recall_score, f1_score
from collections import Counter


In [2]:
def euclidean_distance(x1, x2):
    return np.sqrt(np.sum((x1 - x2) ** 2))

def manhattan_distance(x1, x2):
    return np.sum(np.abs(x1 - x2))

def cosine_distance(x1, x2):
    dot = np.dot(x1, x2)
    norm1 = np.linalg.norm(x1)
    norm2 = np.linalg.norm(x2)
    return 1 - dot / (norm1 * norm2)


# KNN Classifier From Scratch

In [3]:
class KNN:
    def __init__(self, k=3, distance_func='euclidean'):
        self.k = k
        self.distance_func = distance_func_map[distance_func]
        
    def fit(self, X, y):
        self.X_train = X
        self.y_train = y

    def predict(self, X):
        return [self._predict(x) for x in X]

    def _predict(self, x):
        # Compute distances
        distances = [self.distance_func(x, x_train) for x_train in self.X_train]
        # Get k nearest
        k_indices = np.argsort(distances)[:self.k]
        k_labels = [self.y_train[i] for i in k_indices]
        # Majority vote
        most_common = Counter(k_labels).most_common(1)
        return most_common[0][0]


In [4]:
distance_func_map = {
    'euclidean': euclidean_distance,
    'manhattan': manhattan_distance,
    'cosine': cosine_distance
}


In [5]:
data = load_iris()
X, y = data.data, data.target


In [6]:
def evaluate_model(X, y, k_list=[1,3,5,7,10], distance_types=['euclidean', 'manhattan', 'cosine'], n_splits=5):
    kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)

    for dist in distance_types:
        print(f"\n=== Distance: {dist.upper()} ===")
        for k_val in k_list + [len(X)-1]:  # Including K=N
            print(f"\n--- K = {k_val} ---")
            all_y_true = []
            all_y_pred = []
            
            for train_index, test_index in kf.split(X):
                X_train, X_test = X[train_index], X[test_index]
                y_train, y_test = y[train_index], y[test_index]
                
                model = KNN(k=k_val, distance_func=dist)
                model.fit(X_train, y_train)
                preds = model.predict(X_test)
                
                all_y_true.extend(y_test)
                all_y_pred.extend(preds)
            
            print("Confusion Matrix:")
            print(confusion_matrix(all_y_true, all_y_pred))
            print("Classification Report:")
            print(classification_report(all_y_true, all_y_pred, zero_division=0))
