In [44]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from collections import Counter

In [45]:
class KNN:
    def __init__(self, n_neighbors=2):
        self.n_neighbors = n_neighbors

    def predict(self, X, y, queries):

        y_preds = []
        for query in queries:
            # calculate distances from other points
            distances = self._calculate_distance(X, query)

            # find indexes of closest n_neighbors from the distances array
            sorted_distance_idx = distances.argsort()
            n_closest_distance_idx = sorted_distance_idx[:min(
                self.n_neighbors, len(distances))]

            # count the frequencies of classes from y and pick the most frequent class
            labels = y[n_closest_distance_idx]
            y_pred = Counter(labels).most_common(1)[0][0]
            y_preds.append(y_pred)

        return np.array(y_preds)

    def _calculate_distance(self, X, query):
        distances = []
        for point in X:
            #euclidean distance
            distance = np.sqrt(np.sum(np.square(point - query)))
            distances.append(distance)
        
        return np.array(distances)

In [46]:
data = datasets.load_breast_cancer()
X, y = data.data, data.target

In [47]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=24)

In [48]:
best_accuracy, optimal_n_neighbors = 0, 0

for n_neighbors in range(2, 10):
    clf = KNN(n_neighbors)
    predictions = clf.predict(X_train, y_train, X_test)
    accuracy = np.sum(predictions == y_test) / len(y_test)

    print(f"Accuracy {accuracy}, N neighbors: {n_neighbors}")
    
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        optimal_n_neighbors = n_neighbors

print(f"\nBest accuracy {best_accuracy}, N neighbors {optimal_n_neighbors}")

Accuracy 0.9298245614035088, N neighbors: 2
Accuracy 0.9473684210526315, N neighbors: 3
Accuracy 0.956140350877193, N neighbors: 4
Accuracy 0.956140350877193, N neighbors: 5
Accuracy 0.956140350877193, N neighbors: 6
Accuracy 0.956140350877193, N neighbors: 7
Accuracy 0.956140350877193, N neighbors: 8
Accuracy 0.956140350877193, N neighbors: 9

Best accuracy 0.956140350877193, N neighbors 4
