In [13]:
import numpy as np
from sklearn.model_selection import train_test_split

In [6]:
def create_synthetic_data(n: int = 1000, d: int = 5, noise: float = 0.1):
    X = np.random.randn(n, d)

    true_w = np.random.randint(0, 20, d)
    y = X @ true_w + noise * np.random.randn(n)

    return X, y, true_w

In [11]:
class KNNBase:
    def __init__(self, k: int = 3, metric: str = 'euclidean'):
        self.k = k
        if metric not in ('euclidean', 'manhattan'):
            raise ValueError('Unsupported distance metric')
        self.metric = metric
    
    def _distances(self, X):
        match self.metric:
            case 'euclidean': return np.sqrt(np.sum((X[:, None, :] - self.X_train[None, :, :])**2, axis=2))
            case 'manhattan': return np.sum(np.abs(X[:, None, :] - self.X_train[None, :, :], axis=2))

    def fit(self, X, y):
        self.X_train = X
        self.y_train = y
    
class KNNClassifier(KNNBase):
    def predict(self, X):
        distances = self._distances(X)
        idx_for_knn = np.argpartition(distances, self.k, axis=1)[:, :self.k]
        neighbor_labels = self.y_train[idx_for_knn]
        preds = np.array([np.bincount(row).argmax() for row in neighbor_labels])

        return preds

class KNNRegressor(KNNBase):
    def predict(self, X):
        distances = self._distances(X)
        idx_for_knn = np.argpartition(distances, self.k, axis=1)[:, :self.k]
        neighbor_vals = self.y_train[idx_for_knn]

        return neighbor_vals.mean(axis=1)
       

In [14]:
X, y, true_w = create_synthetic_data()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

# KNN model
knn = KNNRegressor(k=5)
knn.fit(X_train, y_train)

preds = knn.predict(X_test)
print(preds[:10])


[-15.62189653  39.23053484  22.90202139  11.63286229 -31.07975524
 -17.56142049  21.81469677  19.7263316   10.68693275   5.871639  ]
