## Minimal K-Nearest Neighbors Classifier Implementation

In [1]:
import numpy as np
import pandas as pd

In [2]:
from sklearn.base import BaseEstimator
from scipy.stats import mode

In [3]:
class KNeighborsClassifier(BaseEstimator):
        
    def __init__(self, n_neighbors=5):
        self.n_neighbors = n_neighbors
        
        
    def fit(self, X, y):
        assert X.shape[0] == y.shape[0]
        
        self.X_ = np.copy(X)
        self.y_ = np.copy(y)
    
    
    @staticmethod
    def _most_frequent(arr):  # Finding most frequent number of an array.
        return mode(arr)[0][0]
    
    
    def _calculate_distances(self, X):
        return np.linalg.norm(X - self.X_[:, None], axis=-1).T
        

    def predict(self, X):
        assert X.shape[1] == self.X_.shape[1]
        
        distances = self._calculate_distances(X)
        order = distances.argsort(axis=1)
        nearest_neighbors = self.y_[order][:, :self.n_neighbors]
        prediction = np.apply_along_axis(self._most_frequent, arr=nearest_neighbors, axis=1)  # Finding most frequent class for each prediction.
        
        return prediction

In [4]:
from sklearn.datasets import make_classification
X, y = make_classification(n_samples=2_000, n_features=10, n_classes=3, n_informative=8)

In [5]:
clf = KNeighborsClassifier(n_neighbors=5)
clf.fit(X, y)

In [6]:
from sklearn.metrics import accuracy_score
accuracy_score(y, clf.predict(X))

0.91

In [7]:
from sklearn.model_selection import cross_val_score
from scipy.stats import hmean
hmean(cross_val_score(KNeighborsClassifier(n_neighbors=5), X, y, scoring='accuracy', cv=10, n_jobs=-1))

0.8689377684520952

In [8]:
from sklearn.neighbors import KNeighborsClassifier as sk_clf
hmean(cross_val_score(sk_clf(), X, y, scoring='accuracy', cv=10, n_jobs=-1))

0.8659958285808481