# Exercise 3: k-Nearest Neighbors (k-NN)

✅ Part 1: Implement k-NN from scratch (using only NumPy).

✅ Part 2: Do it with scikit-learn for comparison.


### ✅ Part 1: Implement k-NN from scratch (using only NumPy).
---

### 1. Import Libraries

In [2]:
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
import numpy as np

### 2. Load MNIST Subset (100 samples for simplicity)

In [12]:
digits = load_digits()
X, y = digits.data, digits.target

# Select only 100 samples
X, y = X[:100], y[:100]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")

X_train shape: (80, 64), y_train shape: (80,)
X_test shape: (20, 64), y_test shape: (20,)


### 3. Define k-NN from Scratch

In [4]:
def euclidean_distance(a, b):
    """
    Calculate the Euclidean distance between two points.
    
    distance = sqrt((x1 - x2)^2 + (y1 - y2)^2)
    """
    return np.sqrt(np.sum((a - b) ** 2))

def knn_predict(X_train, y_train, X_test, k=3):
    """
    Predict the class labels for the test set using k-NN.
    
    X_train: Training data
    y_train: Training labels
    X_test: Test data
    k: Number of neighbors to consider
    """
    preds = []
    
    for test_point in X_test:
        # Calculate distances from the test point to all training points
        distances = np.array([euclidean_distance(test_point, train_point) for train_point in X_train])
        
        # Get the indices of the k nearest neighbors
        k_indices = np.argsort(distances)[:k]
        
        # Get the labels of the k nearest neighbors
        k_nearest_labels = [y_train[i] for i in k_indices]
        
        # Predict the most common label among the neighbors
        most_common = np.bincount(k_nearest_labels).argmax()
        preds.append(most_common)
    
    return np.array(preds)

### 4. Run k-NN on Test Set

In [5]:
y_pred = [knn_predict(X_train, y_train, X_test, k=3) for _ in range(10)]
accuracy = np.mean(np.array(y_pred) == y_test)
print(f"Accuracy: {accuracy * 100:.2f}%")

### ✅ Part 2: k-NN with scikit-learn
---

### 1. Use the Same Data

In [6]:
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)
y_pred_sklearn = knn.predict(X_test)

accuracy_sklearn = np.mean(y_pred_sklearn == y_test)
print("k-NN Accuracy (scikit-learn):", accuracy_sklearn)