<a href="https://colab.research.google.com/github/IvaroEkel/Probabilistic-Machine-Learning_Lecture/blob/main/vanilla_GD_OLS_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import matplotlib.pyplot as plt

###############################
# Vanilla Gradient Descent for OLS
###############################

# Generate synthetic linear data
np.random.seed(42)
X = 2 * np.random.rand(100, 1)
y = 4 + 3 * X + np.random.randn(100, 1)

# Add bias term
X_b = np.c_[np.ones((100, 1)), X]

# Loss function (Mean Squared Error)
def mse_loss(theta, X, y):
    m = len(y)
    return (1/(2*m)) * np.sum((X.dot(theta) - y)**2)

# Gradient of MSE
def mse_gradient(theta, X, y):
    m = len(y)
    return (1/m) * X.T.dot(X.dot(theta) - y)

# Vanilla Gradient Descent for OLS
def vanilla_gd_ols(X, y, theta_init, lr=0.1, steps=20):
    theta = theta_init.copy()
    path = [theta.copy()]
    for _ in range(steps):
        theta -= lr * mse_gradient(theta, X, y)
        path.append(theta.copy())
    return np.array(path)

# Initialize and run
theta_init = np.random.randn(2, 1)
path_ols = vanilla_gd_ols(X_b, y, theta_init)

# Plot OLS loss over iterations
losses = [mse_loss(theta.reshape(-1,1), X_b, y) for theta in path_ols]
plt.figure(figsize=(8, 5))
plt.plot(losses)
plt.xlabel('Iteration')
plt.ylabel('MSE Loss')
plt.title('Vanilla Gradient Descent for OLS: Loss over Iterations')
plt.show()

###############################
# Loss function: simple quadratic bowl for visualization
###############################

def loss_function(theta):
    return 0.5 * np.dot(theta, np.dot(A, theta))

def gradient(theta):
    return np.dot(A, theta)

# Set up a 2D quadratic bowl
A = np.array([[3.0, 0.0], [0.0, 1.0]])  # ellipsoidal curvature

# Plot loss landscape
def plot_loss_surface():
    x = np.linspace(-4, 4, 100)
    y = np.linspace(-4, 4, 100)
    X, Y = np.meshgrid(x, y)
    Z = 0.5 * (3 * X**2 + Y**2)

    plt.figure(figsize=(8,6))
    plt.contour(X, Y, Z, levels=30, cmap='viridis')
    plt.xlabel(r'$\theta_1$')
    plt.ylabel(r'$\theta_2$')
    plt.title('Loss Landscape')

# Optimizers
def vanilla_gd(theta_init, lr=0.1, steps=20):
    theta = theta_init.copy()
    path = [theta.copy()]
    for _ in range(steps):
        theta -= lr * gradient(theta)
        path.append(theta.copy())
    return np.array(path)

def momentum_gd(theta_init, lr=0.1, momentum=0.9, steps=20):
    theta = theta_init.copy()
    v = np.zeros_like(theta)
    path = [theta.copy()]
    for _ in range(steps):
        v = momentum * v + lr * gradient(theta)
        theta -= v
        path.append(theta.copy())
    return np.array(path)

def stochastic_gd(theta_init, lr=0.1, steps=20, noise_scale=0.2):
    theta = theta_init.copy()
    path = [theta.copy()]
    for _ in range(steps):
        noisy_grad = gradient(theta) + np.random.randn(*theta.shape) * noise_scale
        theta -= lr * noisy_grad
        path.append(theta.copy())
    return np.array(path)

# Main visualization
theta_init = np.array([3.5, 3.5])

plot_loss_surface()

paths = {
    'Vanilla GD': vanilla_gd(theta_init),
    'Momentum GD': momentum_gd(theta_init),
    'Stochastic GD': stochastic_gd(theta_init)
}

colors = {'Vanilla GD': 'r', 'Momentum GD': 'b', 'Stochastic GD': 'g'}

for label, path in paths.items():
    plt.plot(path[:, 0], path[:, 1], marker='o', markersize=3, color=colors[label], label=label)

plt.legend()
plt.show()
