In [1]:
import numpy as np
from collections import Counter

class KNN():
    def __init__(self, k):
        self.k = k

    def fit(self, X, y):
        self.X = X
        self.y = y

    def predict(self, X):
        predictions = [
            self._predict_sample(x) for x in X
        ]

        return np.array(predictions)
    
    def _predict_sample(self, x):
        distances = self._euclidean_distance(self.X, x)
        k_indices = np.argsort(distances)[:self.k]
        k_nearest_labels = [self.y[i] for i in k_indices]
        most_common = Counter(k_nearest_labels).most_common(1)
        return most_common[0][0]
    
    def _euclidean_distance(self, X, x):
        return np.sqrt(np.sum((X - x)**2, axis=1))
    

In [2]:
# Test
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

data = datasets.load_iris()
X, y = data.data, data.target
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=123
)

k = 3
clf = KNN(k)
clf.fit(X_train, y_train)
predictions = clf.predict(X_test)

print("KNN classification accuracy", accuracy_score(y_test, predictions))

KNN classification accuracy 0.9666666666666667
