# Regularization (Ridge, Lasso, Elastic Net)

We extend the baseline to handle multicollinearity and improve generalization.
We'll compare Ridge, Lasso, and Elastic Net using time-aware cross-validation.

In [None]:
import pandas as pd, numpy as np, matplotlib.pyplot as plt
from sklearn.linear_model import RidgeCV, LassoCV, ElasticNetCV
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import TimeSeriesSplit
!wget -q https://raw.githubusercontent.com/Jihun-ust/ust-mail-557/main/Regression_Forecasting/reg_for_utils.py
import reg_for_utils as utils
csv_path = "https://raw.githubusercontent.com/Jihun-ust/ust-mail-557/main/Regression_Forecasting/marketing_daily.csv"

df = pd.read_csv("marketing_daily.csv", parse_dates=["date"]).sort_values("date")

features = ["search_spend","social_spend","display_spend","promo","price_index","temp_F","rain","is_weekend"]
target = "revenue"
train, test = utils.time_train_test_split(df, "date", test_days=90)
X_train, y_train = train[features], train[target]
X_test, y_test = test[features], test[target]

numeric = ["search_spend","social_spend","display_spend","price_index","temp_F"]
categorical = ["promo","rain","is_weekend"]
pre = ColumnTransformer([("num", StandardScaler(), numeric), ("cat", OneHotEncoder(drop="if_binary"), categorical)])

tscv = TimeSeriesSplit(n_splits=5)

## Fit Ridge, Lasso, Elastic Net with CV

In [None]:
alphas = np.logspace(-3, 3, 30)

ridge = Pipeline([("pre", pre), ("est", RidgeCV(alphas=alphas, cv=tscv))])
lasso = Pipeline([("pre", pre), ("est", LassoCV(alphas=alphas, cv=tscv, max_iter=20000, n_jobs=None))])
enet  = Pipeline([("pre", pre), ("est", ElasticNetCV(alphas=alphas, l1_ratio=[.2,.5,.8,.95,1.0], cv=tscv, max_iter=20000))])

ridge.fit(X_train, y_train)
lasso.fit(X_train, y_train)
enet.fit(X_train, y_train)

In [None]:
# Ridge
y_pred_ridge = ridge.predict(X_test)
utils.plot_series(test["date"], y_test, y_pred_ridge, title="Ridge — actual vs. predicted")

# Lasso
y_pred_lasso = lasso.predict(X_test)
utils.plot_series(test["date"], y_test, y_pred_lasso, title="Lasso — actual vs. predicted")

# Elastic Net
y_pred_enet = enet.predict(X_test)
utils.plot_series(test["date"], y_test, y_pred_enet, title="Elastic Net — actual vs. predicted")

## Compare sparsity & coefficients

In [None]:
def coef_table(pipe):
    names = pipe.named_steps["pre"].get_feature_names_out()
    coefs = pipe.named_steps["est"].coef_
    return pd.DataFrame({"feature": names, "coef": coefs}).sort_values("coef", key=abs, ascending=False)

coef_ridge = coef_table(ridge); coef_lasso = coef_table(lasso); coef_enet = coef_table(enet)
print('Ridge', "RMSE:", utils.rmse(y_test, y_pred_ridge), "MAE:", utils.mae(y_test, y_pred_ridge))
print('Lasso', "RMSE:", utils.rmse(y_test, y_pred_lasso), "MAE:", utils.mae(y_test, y_pred_lasso))
print('ElNet', "RMSE:", utils.rmse(y_test, y_pred_enet), "MAE:", utils.mae(y_test, y_pred_enet))

coeffs = pd.concat([coef_ridge, coef_lasso['coef'], coef_enet['coef']], axis=1)
new_column_names = ['feature', 'ridge_coef', 'lasso_coef', 'enet_coef']
coeffs.columns = new_column_names
plt.scatter(coeffs.index, coeffs['ridge_coef'])
plt.scatter(coeffs.index, coeffs['lasso_coef'])
plt.scatter(coeffs.index, coeffs['enet_coef'])
plt.legend(['Ridge', 'Lasso', 'ElasticNet'])
plt.title("Sparsity")
plt.show()
coeffs.head(10)