In [1]:
import numpy as np
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split

In [2]:
X, y = load_boston()['data'], load_boston()['target']
X_fixed, X_test, y_fixed, y_test = train_test_split(X, y, test_size=0.1, shuffle=True, random_state=1234)

In [3]:
X_fixed, X_train, y_fixed, y_train = train_test_split(X_fixed, y_fixed, test_size=0.3, shuffle=True, random_state=1234)

In [4]:
print(X_fixed.shape, y_fixed.shape)
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(318, 13) (318,)
(137, 13) (137,)
(51, 13) (51,)


### REGRESSION EXPERIMENT

In [5]:
K = 6
epochs = 100

In [6]:
def distance(X1, X2):
    dist = np.sum((X1 - X2)**2, axis = 1, keepdims=True)
    dist = np.sqrt(dist)
    return dist

In [7]:
def mse(predictions, ground_truth):
    m = len(predictions)
    loss = 1/(2*m) * (np.sum((ground_truth - predictions), axis=1, keepdims=True)**2)
    return loss

In [8]:
W = np.random.randn(1, K)
b = np.zeros(shape=(1,1))

In [14]:
'''
STEPS:
1. calculate the distance (of k-nearest neighbors)
2. for each x_test: we get a distance (d) belongs to (K, 1)
                    calculate, np.dot(W, d) + b (belongs to 1,1)
                    once we got D (d of all x_test), calculate the loss (L)

3. Calculate dL/dW = dL/dD * dD/dW
'''

for epoch in range(epochs):
    Distances = []
    for x_train in X_train:
        x_train = x_train.reshape(1, len(x_train))
        distances = {}
        for (x_fixed, yf) in zip(X_fixed, y_fixed):
            x_fixed = x_fixed.reshape(1, len(x_fixed))
            dist = distance(x_train, x_fixed)
            distances[float(dist)] = yf
            
        k = 0
        k_dist = []
        for i in sorted(distances):
            k += 1
            k_dist.append(distances[i])
            if k == K:
                break
        
        all_distances = np.array(k_dist).reshape(K, 1)
        y_d = np.dot(W, all_distances) + b
        Distances.append(int(y_d))
        
    Distances = np.array(Distances)
    print(Distances)
    break

[-152 -109  -64  -47  -73  -87  -34  -27  -48  -77  -38  -56  -67  -82
  -54 -102  -72  -29  -28  -31 -155  -45  -39  -68  -27  -79  -78 -103
  -48  -50  -88  -56  -21  -48  -79 -105  -58  -93 -161  -39  -34  -64
  -58  -32  -22  -52  -64  -72  -78  -66 -143  -72  -66  -39  -30  -30
  -92 -117  -55  -59 -126  -51 -132  -56  -62  -19  -31   -5  -31  -69
 -118  -91   -4  -47  -39  -86  -62  -79  -98  -66  -59  -52 -167 -127
  -57  -76  -20  -66 -106  -87  -65  -57  -56  -66  -68 -113  -64  -55
  -31  -88  -54  -30  -25  -47  -77 -112  -58  -91  -35  -34 -119  -46
  -45  -66  -97  -55  -67  -85  -73  -55  -28  -23  -60  -61  -65  -44
  -67  -37  -35  -55 -124  -70  -55  -87  -57  -58  -33]


In [10]:
for x_train in X_train:
    x_train = x_train.reshape(1, len(x_train))
    print(x_train.shape)
    break

(1, 13)
