### Caveat

Unlike Ridge, LASSO does not have a closed form solution

It uses coordinate descent (scikit learn's lasso). to learn
torch does not support coordinate descent. Therefore, we use AdamW instead. 

In [None]:
import sys
import os

root_dir = os.path.abspath("..")
sys.path.append(root_dir)

In [2]:
import torch

from data.regression import generate_regression_data
from models.lasso.regression import LASSORegression

In [3]:
X_df, y_series = generate_regression_data()

X_tensor = torch.tensor(X_df.values, dtype=torch.float32)
y_tensor = torch.tensor(y_series.values, dtype=torch.float32)
y_tensor = y_tensor.view(-1, 1)  # Reshape to be a column vector

train_size = int(0.8 * len(X_tensor))
X_train, X_test = X_tensor[:train_size], X_tensor[train_size:]
y_train, y_test = y_tensor[:train_size], y_tensor[train_size:]

In [6]:
model = LASSORegression(n_features=X_train.shape[1], alpha=1.0)

# In this case we succeeded with SGD
# However, L1 is not differentiable at 0
# Pytorch autograd handles it with subgradients. Often just resulting in small values instead of exactly zero.
optimizer = torch.optim.AdamW(model.parameters(), lr=0.01)
for epoch in range(1500):
    y_pred = model(X_train)
    loss = model.lasso_loss(y_pred, y_train)
    optimizer.zero_grad()  # Clear gradients
    loss.backward()  # type: ignore
    optimizer.step()  # type: ignore | Update step

In [7]:
model.eval()
with torch.no_grad():
    y_test_pred = model(X_test)
    r2 = model.r2_score(y_test_pred, y_test)
    print(f"R^2 on test set: {r2.item():.4f}")

R^2 on test set: 0.9553
