In [1]:
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.base import BaseEstimator
from sklearn.model_selection import StratifiedKFold
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler

In [2]:
X_train, y_train = StandardScaler().fit_transform(load_iris().data), load_iris().target

In [3]:
X_train.shape, y_train.shape

((150, 4), (150,))

In [4]:
def performance(model):
    score = cross_val_score(model, X_train, y_train, cv=StratifiedKFold(n_splits=10, shuffle=True, random_state=0), scoring='accuracy').mean()
    return score

In [5]:
class KNearestNeighbor(BaseEstimator):
    def __init__(self, n_neighbors=5):
        self.n_neighbors = n_neighbors
        self.X_ = None
        self.y_ = None
    
    def fit(self, X, y):
        self.X_ = X.copy()
        self.y_ = y.copy()
        return self
    
    def calculate_distance(self, x1, x2):
        return np.linalg.norm(x1 - x2)
    
    def find_neighbors_major(self, n, x):
        X, y = self.X_, self.y_
        neighbor_dict = {}
        distances = []
        for xi in X:
            distances.append(self.calculate_distance(x, xi))
        closest_distances = sorted(distances)[:n]
        for dis in closest_distances:
            for i in range(len(distances)):
                if distances[i] == dis:
                    neighbor_dict[y[i]] = neighbor_dict.get(y[i], 0) + 1
                    break
        return list(neighbor_dict.keys())[np.argmax(list(neighbor_dict.values()))]
    
    def predict(self, X):
        y_pred = []
        for x in X:
            y_pred.append(self.find_neighbors_major(self.n_neighbors, x))
        return np.array(y_pred)

In [6]:
performance(KNearestNeighbor())

0.9466666666666667

In [7]:
from sklearn.neighbors import KNeighborsClassifier

In [8]:
performance(KNeighborsClassifier())

0.9466666666666667