<a href="https://colab.research.google.com/github/RatanakamonS/DADS6003-2024-S_RTNKMN/blob/main/HW2(DADS6003)_6620422002_Ratanakamon_Somklang.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##6620422002 Ratanakamon Somklang

##1. Implement SGD, Mini-Batch from https://github.com/ekaratnida/Applied-machine-learning/blob/master/Week03-MLR/Lab3.ipynb

## 1.1 Stochastic Gradient Descent

In [None]:
import numpy as np

In [None]:
def cost_function(theta, x, y):
    y_hat = x.dot(theta)
    c = (1/len(y)) * np.sum((y_hat - y) ** 2)
    return c

In [None]:
def stochastic_gradient_descent(alpha, x, y, ep=0.001, max_iter=10000):
    converged = False
    iter = 0
    N = x.shape[0]  # number of samples
    theta = np.random.random((x.shape[1], 1))  # initial theta

    # Iterate until convergence or reaching max iterations
    while not converged:
        for i in range(N):
            xi = x[i, :].reshape(1, -1)  # Take the i-th row of x
            yi = y[i]  # Take the i-th element of y

            y_hat = xi.dot(theta)
            diff = y_hat - yi

            grad = xi.T.dot(diff)
            theta = theta - alpha * grad

        # Compute error after full pass through the dataset
        J = cost_function(theta, x, y)

        # Check for convergence
        if J < ep or iter >= max_iter:
            converged = True

        iter += 1

    return theta

In [None]:
if __name__ == '__main__':
    # Example data
    x = np.array([[1, 2], [2, 3], [3, 4], [4, 5]])  # Features
    y = np.array([5, 7, 9, 11]).reshape(-1, 1)  # Labels

    # Add bias (intercept term) to x
    x_b = np.c_[np.ones((x.shape[0], 1)), x]

    alpha = 0.01  # Learning rate
    # Train the model using SGD
    theta = stochastic_gradient_descent(alpha, x_b, y, ep=0.001, max_iter=10000)
    print("Theta = ", theta)

    # Predict new value
    xtest = np.array([[4, 9]])
    xtest_b = np.c_[np.ones((xtest.shape[0], 1)), xtest]
    y_p = xtest_b.dot(theta)
    print("y predict = ", y_p)

##1.2 Mini-Batch Gradient Descent

In [None]:
import numpy as np

In [None]:
def cost_function(theta, x, y):
    y_hat = x.dot(theta)
    c = (1/len(y)) * np.sum((y_hat - y) ** 2)
    return c

In [None]:
def mini_batch_gradient_descent(alpha, x, y, batch_size=20, ep=0.001, max_iter=10000):
    converged = False
    iter = 0
    N = x.shape[0]  # Number of samples
    theta = np.random.random((x.shape[1], 1))  # Initial theta

    while not converged:
        # Shuffle the data
        indices = np.arange(N)
        np.random.shuffle(indices)
        x_shuffled = x[indices]
        y_shuffled = y[indices]

        for start in range(0, N, batch_size):
            end = min(start + batch_size, N)
            x_batch = x_shuffled[start:end]
            y_batch = y_shuffled[start:end]

            y_hat = x_batch.dot(theta)
            diff = y_hat - y_batch

            grad = x_batch.T.dot(diff) / len(y_batch)
            theta = theta - alpha * grad

        # Compute error after full pass through the dataset
        J = cost_function(theta, x, y)

        # Check for convergence
        if J < ep or iter >= max_iter:
            converged = True

        iter += 1

    return theta

In [None]:
if __name__ == '__main__':
    # Example data
    x = np.array([[1, 2], [2, 3], [3, 4], [4, 5]])  # Features
    y = np.array([5, 7, 9, 11]).reshape(-1, 1)  # Labels

    # Add bias (intercept term) to x
    x_b = np.c_[np.ones((x.shape[0], 1)), x]

    alpha = 0.01  # Learning rate
    batch_size = 2  # Mini-batch size
    # Train the model using Mini-batch Gradient Descent
    theta = mini_batch_gradient_descent(alpha, x_b, y, batch_size=batch_size, ep=0.001, max_iter=10000)
    print("Theta = ", theta)

    # Predict new value
    xtest = np.array([[4, 9]])
    xtest_b = np.c_[np.ones((xtest.shape[0], 1)), xtest]
    y_p = xtest_b.dot(theta)
    print("y predict = ", y_p)