In [3]:
import numpy as np
from collections import Counter

class KNearestNeighbors:
    """
    K-Nearest Neighbors (KNN) classifier from scratch.
    Supports both classification and regression (majority vote or mean).
    """
    def __init__(self, n_neighbors=3, task="classification"):
        """
        Parameters:
        -----------
        n_neighbors : int
            Number of neighbors to use.
        task : str, "classification" or "regression"
            Determines type of prediction.
        """
        self.n_neighbors = n_neighbors
        self.task = task
        self.X_train = None
        self.y_train = None

    def fit(self, X, y):
        """
        Store the training data.

        Parameters:
        -----------
        X : numpy.ndarray
            Training data (n_samples, n_features)
        y : numpy.ndarray
            Target values (n_samples,)
        """
        self.X_train = X
        self.y_train = y

    def _euclidean_distance(self, a, b):
        return np.sqrt(np.sum((a - b) ** 2, axis=1))

    def predict(self, X):
        """
        Predict the class (or value) for each sample in X.

        Parameters:
        -----------
        X : numpy.ndarray
            Test data (n_samples, n_features)

        Returns:
        --------
        numpy.ndarray
            Predicted classes/values (n_samples,)
        """
        predictions = []
        for x in X:
            # Compute distances to all training points
            distances = self._euclidean_distance(self.X_train, x)
            # Get the indices of the nearest neighbors
            neighbors_idx = np.argsort(distances)[:self.n_neighbors]
            neighbor_labels = self.y_train[neighbors_idx]

            if self.task == "classification":
                # Majority vote
                most_common = Counter(neighbor_labels).most_common(1)[0][0]
                predictions.append(most_common)
            elif self.task == "regression":
                # Mean value
                predictions.append(np.mean(neighbor_labels))
            else:
                raise ValueError("Unknown task type. Use 'classification' or 'regression'.")
        return np.array(predictions)

> ## Example usage:

In [4]:
# Classification example
X_train = np.array([[1,2], [2,3], [3,4], [6,7], [7,8], [8,9]])
y_train = np.array([0, 0, 0, 1, 1, 1])
X_test = np.array([[2,2], [7,7]])
knn = KNearestNeighbors(n_neighbors=3, task="classification")
knn.fit(X_train, y_train)
print("Classification predictions:", knn.predict(X_test))

# Regression example
y_train_reg = np.array([1.0, 1.5, 2.0, 5.0, 5.5, 6.0])
knn_reg = KNearestNeighbors(n_neighbors=2, task="regression")
knn_reg.fit(X_train, y_train_reg)
print("Regression predictions:", knn_reg.predict(X_test))

Classification predictions: [0 1]
Regression predictions: [1.25 5.25]
