In [24]:
import numpy as np
import matplotlib.pyplot as plt

In [25]:
# Global parameters
N = 100  # no. data points
N_iters = 10000  # max no. iterations before stopping
noise_std = 0.1
beta_tol = 1e-9  # beta tolerance for stopping iteration when |beta_new - beta_old| <= beta_tol

# Create data set
np.random.seed(2023)
x = np.random.rand(N, 1)
noise = np.random.normal(0, noise_std, x.shape)
y = 5 - 10*x + 2*x**2 + noise

# Analytical calculations for comparing
X = np.c_[np.ones((N, 1)), x]  # ??
beta_linreg = np.linalg.inv(X.T @ X) @ (X.T @ y)  # own inversion

H = 2/N * X.T @ X  # Hessian matrix
eig_vals, eig_vecs = np.linalg.eig(H)

Plain gradient descent (GD)

In [26]:
# GD Parameters
learn_rate = 0.4

# Iterate through and improve beta
beta = np.random.randn(2, 1)
beta_prev = 0
i = 0
while i < N_iters and all(abs(beta - beta_prev) > beta_tol):  # convergence test
    beta_prev = beta.copy()
    grad = 2/N * X.T @ (X @ beta - y)
    beta -= learn_rate * grad
iters_gd = i
beta_gd = beta

Momentum based GD

In [27]:
# Momentum GD Parameters
learn_rate = 0.4
momentum_rate = 1

# Iterate through and improve beta
beta_prev = np.random.randn(2, 1)
beta = 2/N * X.T @ (X @ beta - y)
i = 0
while i < N_iters - 1 and all(abs(beta - beta_prev) > beta_tol):  # convergence test
    beta_prev = beta.copy()
    grad = 2/N * X.T @ (X @ beta - y)
    momentum = momentum_rate * (beta - beta_prev)
    beta -= learn_rate * grad - momentum
iters_mgd = i
beta_mgd = beta

Print results

In [28]:
print(f"Own inversion:\n1/max(eigenvalues)={1/max(eig_vals):g}\nbeta={beta_linreg.ravel()}\n"
      f"beta tolerance={beta_tol}\nMax iterations={N_iters}\n")
print(f"Own GD code:\nLearning rate={learn_rate}\nbeta={beta_gd.ravel()}\nIterations={iters_gd}\n")
print(f"Own GD w/momentum code:\nLearning rate={learn_rate}\nmomentum rate={momentum_rate}\n"
      f"beta={beta_mgd.ravel()}\nIterations={iters_mgd}\n")

Own inversion:
1/max(eigenvalues)=0.401248
beta=[ 4.65168156 -8.00899928]
beta tolerance=1e-09
Max iterations=10000

Own GD code:
Learning rate=0.4
beta=[ 4.65168154 -8.00899924]
Iterations=0

Own GD w/momentum code:
Learning rate=0.4
momentum rate=1
beta=[ 4.65168154 -8.00899924]
Iterations=0
