In [6]:
import numpy as np
from typing import Dict, List, Any, Tuple
from numpy.typing import NDArray

In [27]:
class KNearestNeighbor:
    def __init__(self, k: int = 3, distance: str = "euclidean"):
        self.k: int = k
        self.distance: str = distance
        self.X_train: NDArray[np.float_] | None = None
        self.y_train: NDArray[np.float_] | None = None
    
    def _compute_distance(self, x1: NDArray[np.float_], x2: NDArray[np.float_]) -> float:
        if self.distance == 'euclidean':
            return float(np.sqrt(np.sum((x1 - x2)**2)))
        elif self.distance == "manhattan":
            return float(np.abs(x1-x2))
        else:
            raise ValueError("Unsupported distance type")
    
    def _get_neighbors(self, x: NDArray[np.float_]) -> NDArray[np.float_]:
        distances = [self._compute_distance(x, x_train) for x_train in self.X_train]
        top_neighbors_ind = np.argsort(distances)[:self.k]
        return top_neighbors_ind
    
    def _counter(self, Data: List[Any]) -> Dict[Any, int]:
        freq: Dict[Any, int] = {}
        for item in Data:
            freq[item] = freq.get(item, 0) + 1
        return freq
    
    def _most_common(self, freq: Dict[Any, int], n: int = 1) -> List[Tuple[Any, int]]:
        return sorted(freq.items(), key=lambda kv: (-kv[1], kv[0]))[:1]
    
    def fit(self, X_train: List[float] | NDArray[np.float_], y_train: List[float] | NDArray[np.float_]) -> None:
        self.X_train = np.asarray(X_train, dtype= float)
        self.y_train = np.asarray(y_train)
        if self.X_train.shape[0] != self.y_train.shape[0]:
            raise ValueError("X and y must have the same shape")
    
    def predict_one(self, x: NDArray[np.float_]):
        neighbors_ind = self._get_neighbors(x)
        neighbors_labels = [self.y_train[i] for i in neighbors_ind]
        freq = self._counter(neighbors_labels)
        top = self._most_common(freq)
        if not top:
            return neighbors_labels[0]
        label, _ = top[0]
        return label

    def predict(self, X_train:NDArray[np.float_]) -> NDArray[Any]:
        X_train = np.asarray(X_train, dtype=float)
        return np.array([self.predict_one(x_train) for x_train in X_train])


In [28]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

iris = load_iris()
X,y = iris.data, iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

my_knn = KNearestNeighbor(k=5)
my_knn.fit(X_train, y_train)
y_pred_scratch = my_knn.predict(X_test)

sk_knn = KNeighborsClassifier(n_neighbors=5)
sk_knn.fit(X_train, y_train)
y_pred_lib = sk_knn.predict(X_test)


print("Scratch KNN Accuracy:", accuracy_score(y_test, y_pred_scratch))
print("Scratch KNN Accuracy:", accuracy_score(y_test, y_pred_lib))

Scratch KNN Accuracy: 1.0
Scratch KNN Accuracy: 1.0
