In [56]:
import numpy as np
import matplotlib.pyplot as plt

from sklearn.datasets import load_boston
boston = load_boston()

X = boston.data
y = boston.target
m = X.shape[0]  #number of samples
n = X.shape[1]  #number of features

assert m == y.shape[0]


In [57]:
# Standardize/Normallize Data
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [58]:
# Split Data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3)
assert len(X_train)  == len(y_train)
assert len(X_test) == len(y_test)

In [59]:
# Insert Intercept (Theta0) to X
intercept = np.ones((X_train.shape[0], 1))
X_train = np.concatenate((intercept, X_train), axis=1)

intercept = np.ones((X_test.shape[0], 1))
X_test = np.concatenate((intercept, X_test), axis=1)

In [60]:
# Batch Gradient Descent Algorithm

# Mean Square Error
def MSE(yhat, y):
    return (((yhat - y)**2).sum()) / yhat.shape[0]

# y hat
def hx(X, theta):
    return X @ theta

def gradient(X, error):
    return X.T @ error

In [61]:
from time import time

iter_stop = 0
max_iter = 1000
loss_old = 10000
tol = 0.0001
alpha = 0.0001
theta = np.zeros(X_train.shape[1])

start = time()
for i in range(max_iter):

    yhat = hx(X_train, theta)
    error = yhat - y_train
    grad = gradient(X_train, error)

    theta = theta - alpha * grad

    loss_new = MSE(yhat, y_train)
    diff = np.abs(loss_new - loss_old)

    if diff < tol:
        iter_stop = i+1
        break
    else:
        loss_old = loss_new
        
time_taken = time() - start

yhat_test = hx(X_test, theta)
mse = MSE(yhat_test, y_test)
print("*** Batch Gradient Descent ***")
print("MSE: ", mse)
print("Stop at iteration: ", iter_stop)
print("Time used: ", time_taken)

*** Batch Gradient Descent ***
MSE:  21.193367515504956
Stop at iteration:  758
Time used:  0.01263117790222168


In [83]:
# Stochastic gradient
iter_stop = 0
max_iter = 1000
loss_old = 10000
tol = 0.0001
alpha = 0.01
theta = np.zeros(X_rand.shape[1])

start = time()
for i in range(max_iter):

    rand = np.random.randint(0,X_train.shape[0])
    X_rand = X_train[rand,:].reshape(1,-1)
    y_rand = y_train[rand].reshape(1)

    yhat = hx(X_rand, theta)
    error = yhat - y_rand
    grad = gradient(X_rand, error)

    theta = theta - alpha * grad

    loss_new = MSE(yhat, y_rand)
    diff = np.abs(loss_new - loss_old)
    
    iter_stop = i+1
    if diff < tol:
        break
    else:
        loss_old = loss_new

yhat_test = hx(X_test, theta)
mse = MSE(yhat_test, y_test)
print("*** Stochastic Gradient Descent ***")
print("MSE: ", mse)
print("Stop at iteration: ", iter_stop)
print("Time used: ", time_taken)

*** Stochastic Gradient Descent ***
MSE:  20.5522044520857
Stop at iteration:  1000
Time used:  0.010968685150146484
