In [7]:
import numpy as np

In [8]:
class _kNN_classifier:
    
    def __init__(self, k, X_train, y_train, X_test):
        self.k = k
        self.X_train = X_train
        self.y_train = y_train
        self.X_test = X_test
        self.n_samples = X_test.shape[0]
        
    def _predict(self):
        y_pred = np.zeros(self.n_samples)
        
        for test_index, test_object in enumerate(self.X_test):
            distance_label = {}
            
            for train_index, train_object in enumerate(self.X_train):
                dist = np.linalg.norm(test_object - train_object)
                distance_label[dist] = self.y_train[train_index]
                
            labels_appearance = {}
            
            for index, key in enumerate(sorted(distance_label)):
                label = distance_label[key]
                if label not in labels_appearance: labels_appearance[label] = 1
                else: labels_appearance[label] += 1
                
                if index == self.k: break
                    
            for key in sorted(labels_appearance, key=labels_appearance.get, reverse=True):
                y_pred[test_index] = key
                break
            
        return y_pred

In [9]:
from sklearn.datasets import load_digits

digits = load_digits()
data = digits.data
target = digits.target

data.shape

(1797, 64)

In [10]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.25)

print(f"Train data shape: {X_train.shape}")
print(f"Test data shape: {X_test.shape}")

Train data shape: (1347, 64)
Test data shape: (450, 64)


In [11]:
from sklearn.metrics import accuracy_score

In [12]:
%%time

_knn_custom = _kNN_classifier(5, X_train, y_train, X_test)
y_pred_custom = _knn_custom._predict()
accuracy_custom = accuracy_score(y_test, y_pred_custom)

print(f"Accuracy of custom model: {accuracy_custom}")

Accuracy of custom model: 0.9822222222222222
Wall time: 7.64 s


In [13]:
%%time

from sklearn.neighbors import KNeighborsClassifier

knn_sklearn = KNeighborsClassifier(n_neighbors=5)
knn_sklearn.fit(X_train, y_train)
y_pred_sklearn = knn_sklearn.predict(X_test)
accuracy_sklearn = accuracy_score(y_test, y_pred_sklearn)

print(f'Accuracy of sklearn model: {accuracy_sklearn}')

Accuracy of sklearn model: 0.9822222222222222
Wall time: 290 ms
