Lasso regression

In [2]:
# Imports
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import PolynomialFeatures

In [3]:
# Setting seed to get the same data every time
np.random.seed(1016)

In [4]:
def rugnes(x):
    return 1 / (1 + 25*x**2)

In [5]:
# Setting up the data
n = 100
x = np.linspace(-1, 1, n)
y = rugnes(x)



In [6]:
def GD_lasso(X, y, lmbda=0.01, eta=0.01, n_iterations=1000, tolerance=1e-6):

    # Extract number of datapoints and number of parameters from the design matrix
    n, p = X.shape

    # Selecting a random theta to begin the gradient decent
    theta = np.random.randn(p,1)
    prev_theta = theta.copy()

    for i in range(n_iterations):

        # Compute gradient
        gradient = (-(2/n) * X.T @ (y - X @ theta)) + (lmbda * np.sign(theta))

        # Update theta
        theta -= eta * gradient

        # Check for convergance
        if (np.linalg.norm(prev_theta - theta)) < tolerance:
            break

        # Copy theta to previous_theta
        prev_theta = theta.copy()

    # Return optimal theta
    return theta

In [7]:
def GDM_Lasso(X, y, lmbda=0.01, alpha=0.9, eta=0.01, n_iterations=1000, tolerance=1e-6):

    # Initial velocity
    v = 0

    # Extract number of datapoints and number of parameters from the design matrix
    n, p = X.shape
    # Selecting a random theta to begin the gradient decent
    theta = np.random.randn(p,1)
    prev_theta = theta.copy()

    for i in range(n_iterations):

        # Compute gradient
        gradient = (-(2/n) * X.T @ (y - X @ theta)) + (lmbda * np.sign(theta))

        # Update velocity
        v = alpha * v - eta * gradient

        # Update theta
        theta += v

        # Check for convergance
        if (np.linalg.norm(prev_theta - theta)) < tolerance:
            print(f"GDM0 converged in {i} iterations")
            break

        # Copy theta to previous_theta
        prev_theta = theta.copy()

    # Return optimal theta
    return theta

In [8]:
def Lasso_reg(x, y, degree, GD=None, lmbda=0.01, test_size=0.3):

    x = x.reshape(-1, 1)
    y = y.reshape(-1, 1)

    # Setting up the design matrix
    poly_features = PolynomialFeatures(degree)
    X = poly_features.fit_transform(x)

    # Splitting data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=10)

    # Scale features
    scaler = StandardScaler()
    X_train_s = scaler.fit_transform(X_train)
    X_test_s = scaler.transform(X_test)

    # Center y
    y_offset = np.mean(y_train)
    y_train_c = y_train - y_offset

    # Compute theta based on selected GD method
    if GD == "GDM":
        theta = GDM_Lasso(X_train_s, y_train_c, lmbda=lmbda, alpha=0.9, eta=0.01, n_iterations=1000, tolerance=1e-6)
    else:
        theta = GD_lasso(X_train_s, y_train_c, lmbda=lmbda, eta=0.01, n_iterations=1000, tolerance=1e-6)

    # Making predictions
    y_train_pred = X_train_s @ theta + y_offset
    y_test_pred = X_test_s @ theta + y_offset

    # Results
    results = {
        "train_mse": mean_squared_error(y_train, y_train_pred),
        "test_mse": mean_squared_error(y_test, y_test_pred),
        "theta": theta
    }

    return results

In [9]:
print("GD:", Lasso_reg(x, y, 3))
print("GDM:", Lasso_reg(x, y, 3, "GDM"))

GD: {'train_mse': 0.03628693291010702, 'test_mse': 0.04072213132209486, 'theta': array([[ 0.40134396],
       [ 0.00194895],
       [-0.19369259],
       [ 0.00652188]])}
GDM: {'train_mse': 0.03628619366425697, 'test_mse': 0.04070952022886588, 'theta': array([[-0.04097109],
       [ 0.00526441],
       [-0.19360106],
       [ 0.00319398]])}
