In [1]:
import numpy as np
import pandas as pd

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

In [2]:
# cost or loss function
def cost(Y, Yhat):
    return np.mean((Yhat - Y) ** 2)

In [3]:
path = '../../data/KNN_Linear_Regression/real_estate.csv'

df = pd.read_csv(path)
df.head()

Unnamed: 0,No,X1 transaction date,X2 house age,X3 distance to the nearest MRT station,X4 number of convenience stores,X5 latitude,X6 longitude,Y house price of unit area
0,1,2012.917,32.0,84.87882,10,24.98298,121.54024,37.9
1,2,2012.917,19.5,306.5947,9,24.98034,121.53951,42.2
2,3,2013.583,13.3,561.9845,5,24.98746,121.54391,47.3
3,4,2013.5,13.3,561.9845,5,24.98746,121.54391,54.8
4,5,2012.833,5.0,390.5684,5,24.97937,121.54245,43.1


In [5]:
X = df.drop(['No', 'Y house price of unit area'], axis=1)
y = df['Y house price of unit area']

X_train = X[:350]
y_train = y[:350]

X_test = X[350:]
y_test = y[350:]

y_train = y_train.to_numpy().reshape(-1, 1)
y_test = y_test.to_numpy().reshape(-1, 1)

In [16]:
d0 = 6
d1 = h = 100 # size of hidden layer
d2 = C = 1
# initialize parameters randomly
W1 = 0.01*np.random.randn(d0, d1)
b1 = np.zeros((d1, 1))
W2 = 0.01*np.random.randn(d1, d2)
b2 = np.zeros((d2, 1))

N = X_train.T.shape[1]
eta = 0.0001 # learning rate

for i in range(10000):
    ## Feedforward
    Z1 = np.dot(W1.T, X_train.T) + b1
    A1 = np.where(Z1 > 0, Z1, 0.01 * Z1)  # LeakyReLU
    Z2 = np.dot(W2.T, A1) + b2
    Yhat = Z2

    # print loss after each 1000 iterations
    if i %1000 == 0:
        # compute the loss: average cross-entropy loss
        loss = cost(y_train, Yhat)
        print("iter %d, loss: %f" %(i, loss))

    # backpropagation
    E2 = (Yhat - y_train.T)/N
    dW2 = np.dot(A1, E2.T)
    db2 = np.sum(E2, axis = 1, keepdims = True)
    E1 = np.dot(W2, E2)
    E1 = np.where(Z1 > 0, E1, 0.01 * E1)  # Gradient of LeakyReLU
    dW1 = np.dot(X_train.T, E1.T)
    db1 = np.sum(E1, axis = 1, keepdims = True)
    
    # Gradient clipping (To avoid booming gradient)
    clip_value = 1.0
    dW1 = np.clip(dW1, -clip_value, clip_value)
    dW2 = np.clip(dW2, -clip_value, clip_value)
    db1 = np.clip(db1, -clip_value, clip_value)
    db2 = np.clip(db2, -clip_value, clip_value)
    
    # Gradient Descent update
    W1 += -eta*dW1
    b1 += -eta*db1
    W2 += -eta*dW2
    b2 += -eta*db2

iter 0, loss: 1580.515019
iter 1000, loss: 296.063281
iter 2000, loss: 298.367582
iter 3000, loss: 300.015600
iter 4000, loss: 301.025891
iter 5000, loss: 301.619004
iter 6000, loss: 302.042452
iter 7000, loss: 302.840697
iter 8000, loss: 303.258380
iter 9000, loss: 303.398037


In [17]:
Z1 = np.dot(W1.T, X_test.T) + b1
A1 = np.maximum(Z1, 0)
Z2 = np.dot(W2.T, A1) + b2

Z2

array([[42.36272041, 26.00452004, 22.15897056, 26.09401979, 26.94159442,
        46.94302625, 48.4689472 , 49.19492141, 51.61458603, 24.93936947,
        46.21299168, 39.93073827, 31.55270432, 45.91896775, 34.53653192,
        23.59683391, 25.3106359 , 25.14743524, 43.64810344, 23.28409893,
        45.84664385, 48.3298802 , 44.24541041, 49.76542118, 46.40541541,
        26.44950488, 25.53055835, 53.86953047, 34.6677154 , 49.85382441,
        46.31103216, 51.43211484, 19.87828203, 38.00179028, 18.3421388 ,
        49.95885591, 51.48293723, 24.19852181, 24.78509459, 43.31970896,
        40.2017502 , 26.25578651, 36.08043044, 31.59230941, 15.44436471,
        39.85027454, 27.25366249, 28.71234636, 25.53055835, 48.71917004,
        39.16460494, 25.42280531, 48.36093636, 45.08676644, 45.73619211,
        47.4128385 , 47.8445637 , 24.99835794, 23.60682087, 18.70156707,
        52.8768314 , 43.23649026, 51.5886255 , 52.67279926]])

In [18]:
print("MSE:", mean_squared_error(y_test, Z2[0]))

MSE: 54.25336396419151


In [19]:
linR = LinearRegression()

linR.fit(X_train, y_train)

y_pred = linR.predict(X_test)

print("MSE:", mean_squared_error(y_test, y_pred))

MSE: 65.18991450477287
