In [45]:
# Imports
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import PolynomialFeatures

In [46]:
# Setting seed to get the same data every time
np.random.seed(1016)

In [47]:
def rugnes(x):
    return 1 / (1 + 25*x**2)

In [48]:
# Setting up the data
n = 100
x = np.linspace(-1, 1, n)
y = rugnes(x)


In [None]:
def GDM_OLS(X, y, alpha=0.9, eta=0.01, n_iterations=1000, tolerance=1e-6):

    # Extract number of datapoints and number of parameters from the design matrix
    n, p = X.shape
    # Selecting a random theta to begin the gradient decent
    theta = np.random.randn(p,1)
    prev_theta = theta.copy()

    # Initial velocity
    v = np.zeros((p, 1))

    for i in range(n_iterations):
        # Compute gradient
        gradient = (2/n) * X.T @ (X @ theta - y)
        
        # Update velocity and theta
        v = alpha * v - eta * gradient
        theta_new = theta + v
        
        # Check convergence
        if np.linalg.norm(theta_new - theta) < tolerance:
            print(f"GDM converged in {i+1} iterations")
            return theta_new
            
        theta = theta_new
    
    return theta

    # Return optimal theta
    return theta

In [50]:
def GD_OLS(X, y, eta=0.01, n_iterations=1000, tolerance=1e-6):

    # Extract number of datapoints and number of parameters from the design matrix
    n, p = X.shape
    # Selecting a random theta to begin the gradient decent
    theta = np.random.randn(p,1)
    prev_theta = theta.copy()

    for i in range(n_iterations):

        # Compute gradient
        gradient = ((2/n) * X.T) @ (X @ theta - y)

        # Update theta
        theta -= eta * gradient

        # Check for convergance
        if (np.linalg.norm(prev_theta - theta)) < tolerance:
            print(f"GD converged in {i} iterations")
            break

        # Copy theta to previous_theta
        prev_theta = theta.copy()

    # Return optimal theta
    return theta

In [57]:
def OLS_reg(x, y, degree, GD=None, test_size=0.3):

    x = x.reshape(-1, 1)
    y = y.reshape(-1, 1)

    # Setting up the design matrix
    poly_features = PolynomialFeatures(degree)
    X = poly_features.fit_transform(x)

    # Splitting data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=10)

    # Computing theta using GD
    if GD == "GDM":
        theta = GDM_OLS(X_train, y_train, alpha=0.9, eta=0.01, n_iterations=1000, tolerance=1e-6)
    else:
        theta = GD_OLS(X_train, y_train, eta=0.01, n_iterations=1000, tolerance=1e-6)

    # Making predictions
    y_train_pred = X_train @ theta
    y_test_pred = X_test @ theta

    # Results
    results = {
        "train_mse": mean_squared_error(y_train, y_train_pred),
        "test_mse": mean_squared_error(y_test, y_test_pred),
        "theta": theta,
    }

    return results

In [58]:
print("Vanlilla GD", OLS_reg(x, y, 5))
print("GDM", OLS_reg(x, y, 5, "GDM"))

Vanlilla GD {'train_mse': 0.03292700991791657, 'test_mse': 0.0339311489780706, 'theta': array([[ 0.47484185],
       [-0.28669799],
       [-0.93430586],
       [ 1.35372758],
       [ 0.47854063],
       [-1.21093821]])}
GDM {'train_mse': 0.03823271451444472, 'test_mse': 0.04755348035185188, 'theta': array([[ 0.45892805],
       [ 0.17898987],
       [-0.53929299],
       [-0.73325115],
       [-0.09461821],
       [ 0.66029292]])}


In [None]:
def gradient_descent_momentum(X, y, lmbda=0, alpha=0.9, eta=0.01, n_iterations=1000, tolerance=1e-6, method=None):

    # Extract number of datapoints and number of parameters from the design matrix
    n, p = X.shape
    # Selecting a random theta to begin the gradient decent
    theta = np.random.randn(p,1)

    # Initial velocity
    v = np.zeros((p, 1))

    for i in range(n_iterations):
        # Compute gradient based on selected method
        if method=="ridge":
            gradient = (2/n) * X.T @ (X @ theta - y) + 2*lmbda*theta
        elif method=="lasso":
            gradient = (2/n) * X.T @ (X @ theta - y) + lmbda*np.sign(theta)
        else:
            gradient = (2/n) * X.T @ (X @ theta - y)
        
        # Update velocity and theta
        v = alpha * v - eta * gradient
        theta_new = theta + v
        
        # Check convergence
        if np.linalg.norm(theta_new - theta) < tolerance:
            print(f"GDM converged in {i+1} iterations")
            return theta_new
        
        # Update theta
        theta = theta_new
    
    return theta