In [1]:
import numpy as np

In [None]:
class KNN:
    def __init__(self, k=3):
        self.k = k

    def fit(self, X, y):
        self.X_train = X
        self.y_train = y
        return self

    def predict(self, X):
        y_pred = np.zeros(len(X))
        for i, x in enumerate(X):
            distances = np.sqrt(np.sum((self.X_train - x) ** 2, axis=1))
            k_indices = np.argpartition(distances, self.k)[:self.k]
            k_nearest_labels = self.y_train[k_indices]
            y_pred[i] = np.argmax(np.bincount(k_nearest_labels))
        return y_pred

In [3]:
import numpy as np


rng = np.random.default_rng(0)
n_per_class = 50
X0 = rng.normal(loc=[0, 0], scale=0.5, size=(n_per_class, 2))
X1 = rng.normal(loc=[3, 3], scale=0.5, size=(n_per_class, 2))
X = np.vstack([X0, X1])
y = np.array([0]*n_per_class + [1]*n_per_class)


perm = rng.permutation(len(X))
X, y = X[perm], y[perm]


split = 70
X_train, y_train = X[:split], y[:split]
X_test,  y_test  = X[split:], y[split:]


clf = KNN(k=3).fit(X_train, y_train)
y_pred = clf.predict(X_test)


acc = (y_pred == y_test).mean()
print(f"Test accuracy: {acc:.3f}")
print("First 10 predictions vs truth:")
print(np.vstack([y_pred[:10], y_test[:10]]).T)


Test accuracy: 1.000
First 10 predictions vs truth:
[[1. 1.]
 [0. 0.]
 [1. 1.]
 [0. 0.]
 [1. 1.]
 [1. 1.]
 [0. 0.]
 [0. 0.]
 [1. 1.]
 [1. 1.]]
