In [1]:
import pandas as pd
import numpy as np
from sklearn.datasets import make_regression
import random
from sklearn.model_selection import train_test_split

In [24]:
X, y = make_regression(n_samples=50, n_features=5, n_informative=2, noise=5, random_state=42)
X = pd.DataFrame(X)
y = pd.Series(y)
X.columns = [f'col_{col}' for col in X.columns]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [30]:
def euclidean(X_train, X_test):
    return np.sqrt(sum((X_test - X_train)**2))

In [31]:
def manhattan(X_train, X_test):
    return sum(abs(X_test-X_train))

In [32]:
def chebyshev(X_train, X_test):
    return max(abs(X_train - X_test))

In [33]:
def cosine(X_train, X_test):
    sqrt_train = np.sqrt(sum(X_train**2))
    sqrt_test = np.sqrt(sum(X_test**2)) 
                        
    return 1 - sum(X_train * X_test)/(sqrt_train+sqrt_test)

In [39]:
class MyKNNReg:
    def __init__(self, k=3, metric='euclidean'):
        self.k=k
        self.metric = globals()[metric]
        
    def __str__(self):
        return f"MyKNNReg class: k={self.k}"     
    
    def fit(self, X, y):
        self.X = X
        self.y = y
        self.train_size = X.shape
        
    def predict(self, X):
        n_samples = X.shape[0]
        y_pred = np.ones(n_samples)
        for i in range(n_samples):
            distances = np.zeros(self.train_size[0])
            for j in range(self.train_size[0]):
                distances[j] = self.metric(X.iloc[i], self.X.iloc[j])
            idx = np.argsort(distances)[:self.k]
            
            y = self.y.iloc[idx]
            y_pred[i] = self.y.iloc[idx].mean()
        return y_pred     

In [40]:
knn = MyKNNReg(3)

In [41]:
knn.fit(X_train, y_train)

In [42]:
knn.predict(X_test)

array([  9.64639385,  52.39652796,  50.52284562, -17.86069339,
       -71.97584191,  -3.20141951,  22.59876192,   7.71650566,
       -10.29661187,  53.88765247])