In [1]:
import numpy as np
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split

In [2]:
X, y = load_boston()['data'], load_boston()['target']
X_fixed, X_test, Y_fixed, Y_test = train_test_split(X, y, test_size=0.1, shuffle=True, random_state=1234)

In [3]:
X_fixed, X_train, Y_fixed, Y_train = train_test_split(X_fixed, Y_fixed, test_size=0.3, shuffle=True, random_state=1234)

In [4]:
print(X_fixed.shape, Y_fixed.shape)
print(X_train.shape, Y_train.shape)
print(X_test.shape, Y_test.shape)

(318, 13) (318,)
(137, 13) (137,)
(51, 13) (51,)


### REGRESSION EXPERIMENT

In [5]:
K = 6
learning_rate = 0.01
epochs = 10

In [6]:
def distance(X1, X2):
    dist = np.sum((X1 - X2)**2, axis = 1, keepdims=True)
    dist = np.sqrt(dist)
    return dist

In [7]:
def mse(predictions, ground_truth):
    loss = 1/K * (np.sum((ground_truth - predictions), axis=1, keepdims=True)**2)
    return loss

### Defining the Weights and the Bias

In [8]:
W = np.random.randn(1, K)
b = np.zeros(shape=(1,1))

In [9]:
# step 1: calculate the K- nearest distances and store in ascending order

for (x_train, y_train) in zip(X_train, Y_train):
    distances = {}
    x_train = x_train.reshape(1, len(x_train))
    
    for (x_fixed, y_fixed) in zip(X_fixed, Y_fixed):
        x_fixed = x_fixed.reshape(1, len(x_fixed))
        dist = distance(x_train, x_fixed)
        distances[float(dist)] = y_fixed
    
    k = 0
    k_dist = []
    for i in sorted(distances):
        k += 1
        k_dist.append(distances[i])
        if k == K:
            break
    
    # step 2: now do the weighted sum
    D = np.array(k_dist).reshape(K, 1)
    predictions = 1/K * np.dot(W, D) + b
    y_train = np.array(y_train).reshape(1,1)
    loss = mse(y_train, predictions)
    #print(loss)
    
    # grad computation
    
    W_grad = -2/K * np.dot((y_train - predictions), D.T)
    b_grad = -2/K * np.sum((y_train - predictions), axis=1, keepdims=True)
    
    W -= learning_rate * W_grad
    b -= learning_rate * b_grad

In [10]:
#w = np.array([1.1, 1.0, 0.8, 0.4, 0.21, 0.1]).reshape(1, K)
#b = 0.51

predictions = []
for x_test in X_test:
    distances = {}
    x_test = x_test.reshape(1, len(x_test))
    
    for (x_fixed, y_fixed) in zip(X_fixed, Y_fixed):
        x_fixed = x_fixed.reshape(1, len(x_fixed))
        dist = distance(x_test, x_fixed)
        distances[float(dist)] = y_fixed
    
    k = 0
    k_dist = []
    for i in sorted(distances):
        k += 1
        k_dist.append(distances[i])
        if k == K:
            break
    
    D_test = 1/k * np.array(k_dist).reshape(K,1)
    pred_test = np.dot(W, D) + b
    predictions.append(float(pred_test))