In [1]:
import numpy as np
import matplotlib.pyplot as plt

from sklearn.datasets import load_boston
boston = load_boston()

X = boston.data
y = boston.target
m = X.shape[0]  #number of samples
n = X.shape[1]  #number of features

assert m == y.shape[0]


In [2]:
# Standardize/Normallize Data
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [3]:
# Split Data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3)
assert len(X_train)  == len(y_train)
assert len(X_test) == len(y_test)

In [4]:
# Insert Intercept (Theta0) to X
intercept = np.ones((X_train.shape[0], 1))
X_train = np.concatenate((intercept, X_train), axis=1)

intercept = np.ones((X_test.shape[0], 1))
X_test = np.concatenate((intercept, X_test), axis=1)

In [5]:
# Mean Square Error
def MSE(yhat, y):
    return (((yhat - y)**2).sum()) / yhat.shape[0]

# y hat
def hx(X, theta):
    return X @ theta

def gradient(X, error):
    return X.T @ error

In [101]:
# Batch Gradient Descent Algorithm
from time import time

iter_stop = 0
max_iter = 1000
loss_old = 10000
tol = 0.0001
alpha = 0.0001
theta = np.zeros(X_train.shape[1])

start = time()
for i in range(max_iter):

    yhat = hx(X_train, theta)
    error = yhat - y_train
    grad = gradient(X_train, error)

    theta = theta - alpha * grad

    loss_new = MSE(yhat, y_train)
    diff = np.abs(loss_new - loss_old)

    if diff < tol:
        iter_stop = i+1
        break
    else:
        loss_old = loss_new
        
time_taken = time() - start

yhat_test = hx(X_test, theta)
mse = MSE(yhat_test, y_test)
print("*** Batch Gradient Descent ***")
print("MSE: ", mse)
print("Stop at iteration: ", iter_stop)
print("Time used: ", time_taken)

*** Batch Gradient Descent ***
MSE:  25.266071259034653
Stop at iteration:  726
Time used:  0.01485753059387207


In [104]:
# Stochastic Gradient Descent Algorithm
iter_stop = 0
max_iter = 10000
loss_old = 10000
tol = 0.001
alpha = 0.001
theta = np.zeros(X_train.shape[1])

start = time()
for i in range(max_iter):

    rand = np.random.randint(0,X_train.shape[0])
    X_rand = X_train[rand,:].reshape(1,-1)
    y_rand = y_train[rand].reshape(1)

    yhat = hx(X_rand, theta)
    error = yhat - y_rand
    grad = gradient(X_rand, error)

    theta = theta - alpha * grad

    loss_new = MSE(yhat, y_rand)
    diff = np.abs(loss_new - loss_old)
    
    iter_stop = i+1
    if diff < tol:
        break
    else:
        loss_old = loss_new
time_taken = time() - start

yhat_test = hx(X_test, theta)
mse = MSE(yhat_test, y_test)
print("*** Stochastic Gradient Descent ***")
print("MSE: ", mse)
print("Stop at iteration: ", iter_stop)
print("Time used: ", time_taken)

*** Stochastic Gradient Descent ***
MSE:  27.176585879899196
Stop at iteration:  3592
Time used:  0.07401156425476074


In [11]:
print(X_train.shape)

(354, 14)


In [98]:
# Mini-Batch Gradient Descent Algorithm
iter_stop = 0
max_iter = 10000
loss_old = 10000
tol = 0.01
alpha = 0.001
theta = np.zeros(X_train.shape[1])

Batch_size = 10
print(X_train.shape)

start = time()
for i in range(max_iter):
    #X_train.shape[0]/Batch_size
    j = i%((X_train.shape[0]//Batch_size)+1)

    if j == (X_train.shape[0]//Batch_size):
        remain = X_train.shape[0] - (X_train.shape[0]//Batch_size)*Batch_size
        X_mini = X_train[(j*Batch_size):(j*Batch_size)+remain,:].reshape(remain,-1)
        y_mini = y_train[(j*Batch_size):(j*Batch_size)+remain].reshape(-1)
    else:
        X_mini = X_train[(j*Batch_size):(j+1)*Batch_size,:].reshape(Batch_size,-1)
        y_mini = y_train[(j*Batch_size):(j+1)*Batch_size].reshape(-1)

    yhat = hx(X_mini, theta)
    error = yhat - y_mini
    grad = gradient(X_mini, error)

    theta = theta - alpha * grad

    loss_new = MSE(yhat, y_mini)
    diff = np.abs(loss_new - loss_old)
    
    iter_stop = i+1
    if diff < tol:
        break
    else:
        loss_old = loss_new
time_taken = time() - start

yhat_test = hx(X_test, theta)
mse = MSE(yhat_test, y_test)
print("*** Mini-Batch Gradient Descent ***")
print("MSE: ", mse)
print("Stop at iteration: ", iter_stop)
print("Time used: ", time_taken)

(354, 14)
*** Mini-Batch Gradient Descent ***
MSE:  25.250858546151928
Stop at iteration:  1364
Time used:  0.0269315242767334


In [178]:
# Linear Regression Class

class LinearRegression:
    import numpy as np
    from time import time    
    def __init__(self, method="batch", max_iter=10000, 
            tol=0.001, alpha=0.0001):
        self.method = method
        self.max_iter = max_iter
        self.tol = tol
        self.alpha = alpha

    def set_batchsize(self, batch_size):
        self.batch_size = batch_size

    def fit(self, X_train, y_train):
        assert len(X_train)  == len(y_train)
        assert len(X_test) == len(y_test)
        loss_old = 10000
        self.iter_stop = 0
        self.theta = np.zeros(X_train.shape[1])
        start = time()
        for i in range(self.max_iter):
            if self.method == "batch":
                self.X_train = X_train
                self.y_train = y_train
            elif self.method == "sto":
                rand = np.random.randint(0,X_train.shape[0])
                self.X_train = X_train[rand,:].reshape(1,-1)
                self.y_train = y_train[rand].reshape(1)
            elif self.method == "mini":
                j = i%((X_train.shape[0]//self.batch_size)+1)
                if j == (X_train.shape[0]//self.batch_size):
                    remain = X_train.shape[0] - (X_train.shape[0]//self.batch_size)*self.batch_size
                    self.X_train = X_train[(j*self.batch_size):(j*self.batch_size)+remain,:].reshape(remain,-1)
                    self.y_train = y_train[(j*self.batch_size):(j*self.batch_size)+remain].reshape(-1)
                else:
                    self.X_train = X_train[(j*self.batch_size):(j+1)*self.batch_size,:].reshape(self.batch_size,-1)
                    self.y_train = y_train[(j*self.batch_size):(j+1)*self.batch_size].reshape(-1)
            else:
                print("method is not correct")
                break
            
            yhat = self.hx(self.X_train, self.theta)
            error = yhat - self.y_train
            grad = self.gradient(self.X_train, error)

            if i>0 and i<4:
                pass
                #print(yhat.shape, self.y_train.shape)

            self.theta = self.theta - self.alpha * grad

            loss_new = self.MSE(yhat, self.y_train)
            diff = abs(loss_new - loss_old)
    
            self.iter_stop = i+1
            if diff < self.tol:
                break
            else:
                loss_old = loss_new
        self.time_taken = time() - start

    def evalute(self, X_test, y_test):
        yhat_test = self.hx(X_test, self.theta)
        mse = self.MSE(yhat_test, y_test)
        return mse

    def iter_stop(self):
        return self.iter_stop

    def time_taken(self):
        return self.time_take

    def hx(self, X, theta):
        return X @ theta

    def MSE(self, yhat, y):
        return (((yhat - y)**2).sum()) / yhat.shape[0]

    def gradient(self, X, error):
        return X.T @ error

In [179]:
# Batch
model = LinearRegression()
model.fit(X_train, y_train)
print("MSE: ", model.evalute(X_test, y_test))
print("Stop at iteration: ", model.iter_stop)
print("Time used: ", model.time_taken)

MSE:  25.405786319954665
Stop at iteration:  335
Time used:  0.0059833526611328125


In [180]:
# Mini-Batch
model2 = LinearRegression(method="mini", tol=0.01, alpha=0.001)
model2.set_batchsize(batch_size=10)
model2.fit(X_train, y_train)
print("MSE: ", model2.evalute(X_test, y_test))
print("Stop at iteration: ", model2.iter_stop)
print("Time used: ", model2.time_taken)

MSE:  25.250858546151928
Stop at iteration:  1364
Time used:  0.01880335807800293


In [181]:
# Stochastic
model3 = LinearRegression(method="sto", tol=0.01, alpha=0.001)
model3.set_batchsize(batch_size=10)
model3.fit(X_train, y_train)
print("MSE: ", model3.evalute(X_test, y_test))
print("Stop at iteration: ", model3.iter_stop)
print("Time used: ", model3.time_taken)

MSE:  42.34359804200725
Stop at iteration:  1807
Time used:  0.0397946834564209
