In [None]:
import numpy as np
from collections import Counter
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [3]:
dataset = datasets.load_breast_cancer()
X, y = dataset.data, dataset.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42, stratify=y)

In [55]:
class KNN:
    def __init__(self, k):
        self.k = k
        self.X = None
        self.y = None

    def fit(self, X: np.ndarray, y: np.ndarray):
        self.X = X
        self.y = y

    def predict(self, X: np.ndarray):
        predictions = []
        
        for x in X:
            distances = []
            for x_train in self.X:
                distance = self._euclidian_distance(x, x_train)
                distances.append(distance)
            
            distances = np.array(distances)
            sorted_distance_idxs = np.argsort(distances)[:self.k]
            labels = self.y[sorted_distance_idxs]
            common_label = self._most_common_label(labels)
            predictions.append(common_label)

        return np.array(predictions)  


    def _euclidian_distance(self, a1: np.ndarray, a2: np.ndarray):
        return np.sqrt(np.sum(np.square(a1 - a2)))
    
    def _most_common_label(self, y: np.ndarray):
        return Counter(y).most_common(1)[0][0]

In [58]:
best_acc, best_k = 0, None
clf_report = None

for k in np.arange(2, 21):
    clf = KNN(k=k)
    clf.fit(X_train, y_train)
    y_preds = clf.predict(X_test)

    acc = classification_report(y_test, y_preds, output_dict=True)['accuracy']
    if acc > best_acc:
        best_acc = acc
        best_k = k
        clf_report = classification_report(y_test, y_preds)

In [59]:
print(f'Best k: {best_k}')
print(clf_report)

Best k: 8
              precision    recall  f1-score   support

           0       0.96      0.91      0.93        53
           1       0.95      0.98      0.96        90

    accuracy                           0.95       143
   macro avg       0.95      0.94      0.95       143
weighted avg       0.95      0.95      0.95       143

