In [1]:
# 6620422014 Theeratorn Phuphaisan
# SGD

import numpy as np
np.set_printoptions(precision=2)
from sklearn.datasets import make_regression
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

x = np.array([[0,1],[2,6],[3,8]]) #x1, x2
y = np.array([1,1,4])
x_b = np.c_[np.ones((x.shape[0],1)),x]
xtest = np.array([[4,9]])

def cost_function(theta, x, y, N):
    y_hat = x.dot(theta)
    c = (1/N) * np.sum((y_hat - y)**2)
    return c

def stochastic_gradient_descent(alpha, x, y, ep=0.001, max_iter=10000):
    converged = False
    iter = 0
    N = x.shape[0]  # number of samples
    print("Num of data = ", N)

    # Initial theta
    theta = np.random.random((x.shape[1], 1))
    print("Init theta.shape = ", theta.shape)

    # Total error, J(theta)
    J = cost_function(theta, x, y, N)
    print("First J = ", J)

    # Iterate Loop
    while not converged and iter < max_iter:
        for i in range(N):
            xi = x[i:i+1]  # get the i-th sample
            yi = y[i:i+1]  # get the i-th target value

            y_hat = xi.dot(theta)
            diff = y_hat - yi
            grad = xi.T.dot(diff)

            theta = theta - alpha * grad

        # Error after one pass through the dataset
        J2 = cost_function(theta, x, y, N)

        if abs(J - J2) <= ep:
            print("       Converged, iterations: ", iter, "/", max_iter)
            converged = True

        J = J2  # update error
        iter += 1  # update iter

        if iter % 100 == 0:
            print(f"Iteration {iter}, J = {J}")

        if iter == max_iter:
            print('       Max iterations exceeded!')
            converged = True

    return theta

if __name__ == '__main__':
    print("start main")
    print(x_b.shape)
    y = y.reshape(-1, 1)
    print(y.shape)

    alpha = 0.01  # learning rate
    # Training process
    theta = stochastic_gradient_descent(alpha, x_b, y, ep=0.000000000001, max_iter=1000000)
    print("Theta = ", theta)

    # Predict trained x
    xtest = np.array([[4, 9]])
    xtest_b = np.c_[np.ones((xtest.shape[0], 1)), xtest]
    y_p = xtest_b.dot(theta)
    print("y predict = ", y_p)

start main
(3, 3)
(3, 1)
Num of data =  3
Init theta.shape =  (3, 1)
First J =  2.1073923717608825
Iteration 100, J = 0.9878704185271646
Iteration 200, J = 0.9620797434689516
Iteration 300, J = 0.9396519254363042
Iteration 400, J = 0.9174974401546911
Iteration 500, J = 0.8955915190306613
Iteration 600, J = 0.8740803256445547
Iteration 700, J = 0.853034095070302
Iteration 800, J = 0.8324747344735546
Iteration 900, J = 0.8124033668578209
Iteration 1000, J = 0.7928131054013354
Iteration 1100, J = 0.7736941842350702
Iteration 1200, J = 0.7550359251377268
Iteration 1300, J = 0.7368274766494225
Iteration 1400, J = 0.7190580876328891
Iteration 1500, J = 0.7017172058116476
Iteration 1600, J = 0.6847945107766134
Iteration 1700, J = 0.6682799225441404
Iteration 1800, J = 0.652163601053612
Iteration 1900, J = 0.6364359423632973
Iteration 2000, J = 0.6210875736977692
Iteration 2100, J = 0.6061093481506432
Iteration 2200, J = 0.5914923393413385
Iteration 2300, J = 0.577227836135747
Iteration 2400, 

In [2]:
# Mini-batch GD
# size = 2

# import numpy as np
# np.set_printoptions(precision=2)
# from sklearn.datasets import make_regression
# import matplotlib.pyplot as plt
# from sklearn.linear_model import LinearRegression

x = np.array([[0,1],[2,6],[3,8]]) #x1, x2
y = np.array([1,1,4])
x_b = np.c_[np.ones((x.shape[0],1)),x]
xtest = np.array([[4,9]])

def cost_function(theta, x, y, N):
    y_hat = x.dot(theta)
    c = (1/N) * np.sum((y_hat - y)**2)
    return c

def mini_batch_gradient_descent(alpha, x, y, batch_size=2, ep=0.001, max_iter=10000):
    converged = False
    iter = 0
    N = x.shape[0]  # number of samples
    print("Num of data = ", N)

    # Initial theta
    theta = np.random.random((x.shape[1], 1))
    print("Init theta.shape = ", theta.shape)

    # Total error, J(theta)
    J = cost_function(theta, x, y, N)
    print("First J = ", J)

    # Shuffle data indices for better mini-batch performance
    indices = np.arange(N)

    # Iterate Loop
    while not converged and iter < max_iter:
        np.random.shuffle(indices)  # Shuffle the indices

        # Process each mini-batch
        for start in range(0, N, batch_size):
            end = min(start + batch_size, N)
            mini_batch_indices = indices[start:end]

            x_batch = x[mini_batch_indices]
            y_batch = y[mini_batch_indices]

            y_hat = x_batch.dot(theta)
            diff = y_hat - y_batch
            grad = x_batch.T.dot(diff) / batch_size

            theta = theta - alpha * grad

        # Error after one pass through the dataset
        J2 = cost_function(theta, x, y, N)

        if abs(J - J2) <= ep:
            print("       Converged, iterations: ", iter, "/", max_iter)
            converged = True

        J = J2  # update error
        iter += 1  # update iter

        if iter % 100 == 0:
            print(f"Iteration {iter}, J = {J}")

        if iter == max_iter:
            print('       Max iterations exceeded!')
            converged = True

    return theta

if __name__ == '__main__':
    print("start main")
    print(x_b.shape)
    y = y.reshape(-1, 1)
    print(y.shape)

    alpha = 0.01  # learning rate
    # Training process
    theta = mini_batch_gradient_descent(alpha, x_b, y, batch_size=2, ep=0.000000000001, max_iter=1000000)
    print("Theta = ", theta)

    # Predict trained x
    xtest = np.array([[4, 9]])
    xtest_b = np.c_[np.ones((xtest.shape[0], 1)), xtest]
    y_p = xtest_b.dot(theta)
    print("y predict = ", y_p)


start main
(3, 3)
(3, 1)
Num of data =  3
Init theta.shape =  (3, 1)
First J =  3.3995786378545096
Iteration 100, J = 0.8652785878288596
Iteration 200, J = 0.8611749171013311
Iteration 300, J = 0.8483113869837076
Iteration 400, J = 0.8397048145556336
Iteration 500, J = 0.8475097175963175
Iteration 600, J = 0.8422868371923933
Iteration 700, J = 0.8316636191681722
Iteration 800, J = 0.8092462455779108
Iteration 900, J = 0.7983916923165264
Iteration 1000, J = 0.793505016835725
Iteration 1100, J = 0.7963026312817553
Iteration 1200, J = 0.7772864446119558
Iteration 1300, J = 0.7727813701097312
Iteration 1400, J = 0.7686620429728235
Iteration 1500, J = 0.7521706227222342
Iteration 1600, J = 0.7471659476395351
Iteration 1700, J = 0.7367163201362846
Iteration 1800, J = 0.7324876529793191
Iteration 1900, J = 0.7270028045722472
Iteration 2000, J = 0.7171602424907579
Iteration 2100, J = 0.720448897272209
Iteration 2200, J = 0.702615915716275
Iteration 2300, J = 0.6966096357067878
Iteration 2400, 