In [None]:
import numpy as np
import matplotlib.pyplot as plt

X = np.array([1, 2, 3, 4, 5], dtype=np.float32)
y = np.array([3, 5, 7, 9, 11], dtype=np.float32)

def predict(X, w, b):
    return w * X + b

def mse_loss(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2) / 2

def compute_gradients(X, y, w, b):
    N = len(y)
    y_pred = predict(X, w, b)
    error = y - y_pred
    grad_w = -np.sum(X * error) / N
    grad_b = -np.sum(error) / N
    return grad_w, grad_b

def sgd_linear_regression(X, y, lr=0.01, epochs=100, batch_size=1, momentum=0.0, method='stochastic'):
    w, b = 0.0, 0.0
    velocity_w, velocity_b = 0.0, 0.0
    loss_history = []
    N = len(y)

    if method == 'batch':
        batch_size = N
    elif method == 'stochastic':
        batch_size = 1

    for epoch in range(epochs):

        indices = np.arange(N)
        np.random.shuffle(indices)
        X_shuffled = X[indices]
        y_shuffled = y[indices]


        for i in range(0, N, batch_size):
            end = i + batch_size
            X_batch = X_shuffled[i:end]
            y_batch = y_shuffled[i:end]

            grad_w, grad_b = compute_gradients(X_batch, y_batch, w, b)

            velocity_w = momentum * velocity_w + lr * grad_w
            velocity_b = momentum * velocity_b + lr * grad_b

            w = w - velocity_w
            b = b - velocity_b


        y_pred = predict(X, w, b)
        loss = mse_loss(y, y_pred)
        loss_history.append(loss)

    return w, b, loss_history


methods = ['stochastic', 'batch', 'mini-batch']
results = {}
epochs = 100

print("Training with different SGD methods:")
for method in methods:
    if method == 'mini-batch':
        current_batch_size = 2
    else:
        current_batch_size = 1
    w, b, loss_history = sgd_linear_regression(X, y, lr=0.01, epochs=epochs, batch_size=current_batch_size, momentum=0.0, method=method)
    results[method] = {'w': w, 'b': b, 'loss_history': loss_history}
    print(f"Method: {method} --> w = {w:.4f}, b = {b:.4f}, final loss = {loss_history[-1]:.6f}")


plt.figure(figsize=(10,6))
for method in methods:
    plt.plot(results[method]['loss_history'], label=method)
plt.xlabel("Epochs")
plt.ylabel("MSE Loss")
plt.title("Convergence of Different SGD Methods")
plt.legend()
plt.show()

learning_rates = [0.001, 0.01, 0.1]
momentum_values = [0.0, 0.5, 0.9]
exp_results = {}

plt.figure(figsize=(15, 10))
plot_idx = 1

for lr in learning_rates:
    for momentum in momentum_values:
        w, b, loss_history = sgd_linear_regression(X, y, lr=lr, epochs=epochs, batch_size=1, momentum=momentum, method='stochastic')
        exp_results[(lr, momentum)] = {'w': w, 'b': b, 'loss_history': loss_history}
        plt.subplot(len(learning_rates), len(momentum_values), plot_idx)
        plt.plot(loss_history)
        plt.xlabel("Epochs")
        plt.ylabel("Loss")
        plt.title(f"lr={lr}, momentum={momentum}")
        plot_idx += 1

plt.tight_layout()
plt.show()
