# Lab 07: Machine Learning Introduction
**BSAD 8310: Business Forecasting — University of Nebraska at Omaha**

## Objectives
1. Build a feature matrix from a raw time series (lags, rolling stats, calendar features)
2. Perform a proper three-way train/validation/test split with chronological ordering
3. Implement walk-forward baseline (Seasonal Naive, SARIMA)
4. Tune Ridge and LASSO with `TimeSeriesSplit` cross-validation
5. Visualise the bias-variance tradeoff using a synthetic experiment
6. Compare all models on horizon RMSE profiles
7. Plot LASSO coefficient paths to illustrate variable selection

## Dataset
- **RSXFS**: Advance Retail Sales — Retail and Food Services (monthly, SA, millions USD)
- Source: FRED (Federal Reserve Bank of St. Louis)
- Fallback: statsmodels macrodata (quarterly GDP)

## Key packages
`numpy`, `pandas`, `matplotlib`, `statsmodels`, `scikit-learn`

In [None]:
# ── 1. Setup ──────────────────────────────────────────────────────────────────
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
from pathlib import Path
from scipy import stats

# sklearn imports
from sklearn.linear_model import Ridge, Lasso, lasso_path
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error

np.random.seed(42)

# UNO color palette
UNO_BLUE   = '#005CA9'
UNO_RED    = '#E41C38'
UNO_GRAY   = '#525252'
UNO_GREEN  = '#15803d'
UNO_ORANGE = '#d97706'

plt.rcParams.update({
    'figure.dpi': 150,
    'axes.spines.top': False,
    'axes.spines.right': False,
    'axes.titlesize': 11,
    'axes.labelsize': 10,
    'legend.fontsize': 9,
})

FIGURES = Path('../Figures')
FIGURES.mkdir(exist_ok=True)

print('Setup complete.')

In [None]:
# ── 2. Load Data ──────────────────────────────────────────────────────────────
try:
    import pandas_datareader.data as web
    start, end = '2000-01-01', '2023-12-31'
    rsxfs  = web.DataReader('RSXFS', 'fred', start, end)
    series = rsxfs['RSXFS'].dropna()
    series.index = pd.PeriodIndex(series.index, freq='M')
    m    = 12
    FREQ = 'M'
    print(f'FRED RSXFS: {len(series)} months, {series.index[0]} to {series.index[-1]}')
except Exception as e:
    print(f'FRED unavailable ({e}); using statsmodels macrodata fallback.')
    import statsmodels.api as sm
    macro  = sm.datasets.macrodata.load_pandas().data
    series = macro['realgdp'].copy()
    series.index = pd.period_range('1959Q1', periods=len(series), freq='Q')
    m    = 4
    FREQ = 'Q'
    print(f'Fallback: {len(series)} quarters, {series.index[0]} to {series.index[-1]}')

print(series.describe().round(1))

# Quick visualisation
fig, ax = plt.subplots(figsize=(11, 3))
ax.plot(series.index.to_timestamp(), series.values, color=UNO_BLUE, lw=1.2)
ax.set_title('Raw Series', fontweight='bold')
ax.set_ylabel('Value')
plt.tight_layout()
plt.show()

In [None]:
# ── 3. Feature Engineering ────────────────────────────────────────────────────
def make_features(y, lags=None, roll_windows=None, add_calendar=True):
    """
    Build a feature matrix from a time series.

    All features use .shift(1) or later to prevent leakage:
    when predicting y_t, we can only use y_{t-1} and earlier.

    Parameters
    ----------
    y            : pd.Series with PeriodIndex
    lags         : list of int lag orders
    roll_windows : list of int rolling window sizes
    add_calendar : bool, add month-of-year feature

    Returns
    -------
    X : pd.DataFrame of features (NaN rows dropped)
    y_aligned : pd.Series aligned with X (same index)
    """
    if lags is None:
        lags = [1, 2, 3, 6, 12]
    if roll_windows is None:
        roll_windows = [3, 6, 12]

    df = pd.DataFrame(index=y.index)

    # Lag features: x_t^(k) = y_{t-k}
    for k in lags:
        df[f'lag_{k}'] = y.shift(k)

    # Rolling mean and std (applied after shift(1) to avoid leakage)
    y_shifted = y.shift(1)
    for w in roll_windows:
        df[f'roll_mean_{w}'] = y_shifted.rolling(w).mean()
        df[f'roll_std_{w}']  = y_shifted.rolling(w).std()

    # Calendar feature (always known in advance — no leakage)
    if add_calendar and hasattr(y.index, 'month'):
        df['month_sin'] = np.sin(2 * np.pi * y.index.month / 12)
        df['month_cos'] = np.cos(2 * np.pi * y.index.month / 12)

    # Trend counter
    df['trend'] = np.arange(len(y))

    # Drop rows with NaN (from lags and rolling windows)
    valid_idx = df.dropna().index
    return df.loc[valid_idx], y.loc[valid_idx]


X_full, y_full = make_features(
    series,
    lags=[1, 2, 3, 6, 12],
    roll_windows=[3, 6, 12]
)

print(f'Feature matrix shape: {X_full.shape}')
print(f'Features: {list(X_full.columns)}')
print(X_full.head(3).round(1))

# Visualise two key features vs. the target
fig, axes = plt.subplots(1, 2, figsize=(11, 3))
for ax, feat, color in zip(axes, ['lag_1', 'roll_mean_12'],
                            [UNO_BLUE, UNO_GREEN]):
    ax.plot(X_full.index.to_timestamp(), X_full[feat],
            color=color, lw=1.0, label=feat)
    ax.plot(y_full.index.to_timestamp(), y_full.values,
            color=UNO_GRAY, lw=0.8, alpha=0.5, label='y_t')
    ax.set_title(feat, fontweight='bold')
    ax.legend(fontsize=8)
plt.suptitle('Selected Features vs. Target', fontsize=11)
plt.tight_layout()
plt.show()

In [None]:
# ── 4. Three-Way Train / Validation / Test Split ──────────────────────────────
def time_split(X, y, val_frac=0.15, test_frac=0.15):
    """
    Chronological three-way split.
    Returns: (X_tr, y_tr), (X_va, y_va), (X_te, y_te)
    """
    n = len(X)
    n_test = int(np.floor(n * test_frac))
    n_val  = int(np.floor(n * val_frac))
    n_tr   = n - n_val - n_test
    return (
        (X.iloc[:n_tr],             y.iloc[:n_tr]),
        (X.iloc[n_tr:n_tr+n_val],   y.iloc[n_tr:n_tr+n_val]),
        (X.iloc[n_tr+n_val:],        y.iloc[n_tr+n_val:]),
    )


(X_tr, y_tr), (X_va, y_va), (X_te, y_te) = time_split(
    X_full, y_full, val_frac=0.15, test_frac=0.15)

print(f'Train:      {len(X_tr):4d} obs  {y_tr.index[0]} – {y_tr.index[-1]}')
print(f'Validation: {len(X_va):4d} obs  {y_va.index[0]} – {y_va.index[-1]}')
print(f'Test:       {len(X_te):4d} obs  {y_te.index[0]} – {y_te.index[-1]}')

# Fit scaler on train only; transform val and test
scaler = StandardScaler()
X_tr_s = scaler.fit_transform(X_tr)
X_va_s = scaler.transform(X_va)
X_te_s = scaler.transform(X_te)

# Visualise the split as a horizontal bar
fig, ax = plt.subplots(figsize=(10, 1.4))
sizes  = [len(X_tr), len(X_va), len(X_te)]
labels = ['Train', 'Validation', 'Test']
colors = [UNO_BLUE, UNO_GREEN, UNO_RED]
left = 0
for size, label, color in zip(sizes, labels, colors):
    ax.barh(0, size, left=left, color=color, height=0.5)
    ax.text(left + size/2, 0, f'{label}\n({size})',
            ha='center', va='center', color='white', fontsize=9)
    left += size
ax.set_xlim(0, len(X_full))
ax.axis('off')
ax.set_title('Chronological Train / Validation / Test Split',
             fontweight='bold', fontsize=10)
plt.tight_layout()
plt.show()

In [None]:
# ── 5. Baseline: Walk-Forward Naive and SARIMA ────────────────────────────────
# Re-use walk_forward_eval from L06 (inlined here for self-containment)
from statsmodels.tsa.statespace.sarimax import SARIMAX

def walk_forward_eval(series_in, model_fn, T0, H,
                      window='expanding', window_size=None):
    """Walk-forward evaluation returning a DataFrame of errors."""
    records = []
    n = len(series_in)
    for t in range(T0, n - H):
        if window == 'expanding':
            train = series_in.iloc[:t]
        else:
            start = max(0, t - (window_size or T0))
            train = series_in.iloc[start:t]
        try:
            fc = model_fn(train)
        except Exception:
            fc = np.full(H, np.nan)
        for h in range(1, H + 1):
            actual = float(series_in.iloc[t + h - 1])
            fcast  = float(fc[h - 1]) if h <= len(fc) else np.nan
            records.append({'origin': t, 'horizon': h,
                            'actual': actual, 'forecast': fcast,
                            'error':  actual - fcast})
    return pd.DataFrame(records)


H  = 12
T0 = len(series) - 36 - H   # 36 walk-forward origins

def seasonal_naive_fn(train):
    last_season = train.values[-m:]
    return np.tile(last_season, (H // m) + 1)[:H]

def sarima_fn(train):
    mod = SARIMAX(train.values,
                  order=(1, 1, 1),
                  seasonal_order=(0, 1, 1, m),
                  enforce_stationarity=False,
                  enforce_invertibility=False)
    return mod.fit(disp=False).forecast(H)

print(f'Walk-forward: T0={T0}, {len(series)-T0-H} origins, H={H}')
print('Seasonal Naive...', end=' ')
wf_naive  = walk_forward_eval(series, seasonal_naive_fn, T0, H)
print('done.  SARIMA...', end=' ')
wf_sarima = walk_forward_eval(series, sarima_fn, T0, H)
print('done.')

def horizon_rmse(df):
    return df.dropna().groupby('horizon').apply(
        lambda g: np.sqrt(np.mean((g['actual'] - g['forecast'])**2))
    )

naive_profile  = horizon_rmse(wf_naive)
sarima_profile = horizon_rmse(wf_sarima)

print('\nBaseline RMSE at h=1, 3, 12:')
for h in [1, 3, 12]:
    print(f'  h={h:2d}:  Naive={naive_profile[h]:,.0f}  "\
          "SARIMA={sarima_profile[h]:,.0f}')

In [None]:
# ── 6. Ridge and LASSO with TimeSeriesSplit CV ────────────────────────────────
# Use the train+validation window for CV; reserve test for final evaluation
X_trainval   = np.vstack([X_tr_s, X_va_s])
y_trainval   = np.concatenate([y_tr.values, y_va.values])

tscv = TimeSeriesSplit(n_splits=5, gap=0)

def cv_rmse(model_cls, alpha, X, y):
    """Return mean cross-validated RMSE for a penalised model."""
    rmse_scores = []
    for tr_idx, va_idx in tscv.split(X):
        m_fit = Pipeline([
            ('scaler', StandardScaler()),
            ('model',  model_cls(alpha=alpha))
        ])
        m_fit.fit(X[tr_idx], y[tr_idx])
        pred = m_fit.predict(X[va_idx])
        rmse_scores.append(np.sqrt(mean_squared_error(y[va_idx], pred)))
    return np.mean(rmse_scores)


ridge_alphas = np.logspace(-2, 4, 30)
lasso_alphas = np.logspace(-1, 5, 30)

print('Tuning Ridge...', end=' ')
ridge_cv = [cv_rmse(Ridge, a, X_trainval, y_trainval) for a in ridge_alphas]
alpha_ridge_star = ridge_alphas[np.argmin(ridge_cv)]
print(f'alpha* = {alpha_ridge_star:.2f}')

print('Tuning LASSO...', end=' ')
lasso_cv = [cv_rmse(Lasso, a, X_trainval, y_trainval) for a in lasso_alphas]
alpha_lasso_star = lasso_alphas[np.argmin(lasso_cv)]
print(f'alpha* = {alpha_lasso_star:.2f}')

# Plot CV curves
fig, axes = plt.subplots(1, 2, figsize=(11, 3.5))
for ax, alphas, cv_vals, a_star, name, color in zip(
    axes,
    [ridge_alphas, lasso_alphas],
    [ridge_cv, lasso_cv],
    [alpha_ridge_star, alpha_lasso_star],
    ['Ridge', 'LASSO'],
    [UNO_BLUE, UNO_RED]
):
    ax.semilogx(alphas, cv_vals, color=color, lw=1.8)
    ax.axvline(a_star, color='black', ls='--', lw=1.0,
               label=f'$\\alpha^* = {a_star:.2f}$')
    ax.set_xlabel('Regularisation strength $\\alpha$')
    ax.set_ylabel('Mean CV RMSE')
    ax.set_title(f'{name} — CV Tuning', fontweight='bold')
    ax.legend()
    ax.yaxis.set_major_formatter(
        mticker.FuncFormatter(lambda x, _: f'{x:,.0f}'))
plt.tight_layout()
plt.savefig(FIGURES / 'lecture07_cv_tuning.png', bbox_inches='tight')
plt.show()

# Refit on full train+val, evaluate on test
scaler_tv = StandardScaler().fit(X_trainval)
X_tv_s    = scaler_tv.transform(X_trainval)
X_te_s2   = scaler_tv.transform(X_te.values)

ridge_final = Ridge(alpha=alpha_ridge_star).fit(X_tv_s, y_trainval)
lasso_final = Lasso(alpha=alpha_lasso_star, max_iter=10000).fit(X_tv_s, y_trainval)

ridge_pred  = ridge_final.predict(X_te_s2)
lasso_pred  = lasso_final.predict(X_te_s2)

def rmse(a, f):
    return np.sqrt(np.mean((np.asarray(a) - np.asarray(f))**2))

print(f'Test RMSE (direct, one-step):  Ridge={rmse(y_te.values, ridge_pred):,.0f}  '
      f'LASSO={rmse(y_te.values, lasso_pred):,.0f}')

In [None]:
# ── 7. Bias-Variance Illustration (Synthetic) ─────────────────────────────────
# True function: y = sin(2*pi*x) + epsilon;  x in [0,1]
rng = np.random.default_rng(42)

n_train, n_test = 30, 300
sigma_true      = 0.25

x_train = np.sort(rng.uniform(0, 1, n_train))
y_train = np.sin(2 * np.pi * x_train) + rng.normal(0, sigma_true, n_train)

x_test  = np.linspace(0, 1, n_test)
y_test  = np.sin(2 * np.pi * x_test) + rng.normal(0, sigma_true, n_test)

degrees = [1, 2, 3, 5, 8, 12, 15]
train_rmse_bv = []
test_rmse_bv  = []

for deg in degrees:
    poly = PolynomialFeatures(degree=deg, include_bias=True)
    X_tr_bv = poly.fit_transform(x_train.reshape(-1, 1))
    X_te_bv = poly.transform(x_test.reshape(-1, 1))

    # Fit with Ridge (tiny alpha) to avoid singular matrix at high degree
    model = Ridge(alpha=1e-6).fit(X_tr_bv, y_train)

    y_pred_tr = model.predict(X_tr_bv)
    y_pred_te = model.predict(X_te_bv)

    train_rmse_bv.append(rmse(y_train, y_pred_tr))
    test_rmse_bv.append(rmse(y_test, y_pred_te))

# Optimal degree: where test RMSE is minimised
optimal_idx = int(np.argmin(test_rmse_bv))
optimal_deg = degrees[optimal_idx]

fig, axes = plt.subplots(1, 2, figsize=(11, 4))

# Left: train error
axes[0].plot(degrees, train_rmse_bv, color=UNO_BLUE,
             lw=2, marker='o', markersize=6)
axes[0].axhline(sigma_true, color=UNO_RED, ls=':', lw=1.5,
                label=f'Irreducible $\\sigma = {sigma_true}$')
axes[0].axvline(optimal_deg, color=UNO_GRAY, ls='--', lw=1.2)
axes[0].set_xlabel('Polynomial degree (complexity)')
axes[0].set_ylabel('RMSE')
axes[0].set_title('Train RMSE', fontweight='bold')
axes[0].legend()

# Right: test error
axes[1].plot(degrees, test_rmse_bv, color=UNO_RED,
             lw=2, marker='o', markersize=6)
axes[1].axhline(sigma_true, color=UNO_RED, ls=':', lw=1.5,
                label=f'Irreducible $\\sigma = {sigma_true}$')
axes[1].axvline(optimal_deg, color=UNO_GRAY, ls='--', lw=1.2,
                label=f'Optimal degree = {optimal_deg}')
axes[1].set_xlabel('Polynomial degree (complexity)')
axes[1].set_ylabel('RMSE')
axes[1].set_title('Test RMSE (U-shape = bias-variance tradeoff)', fontweight='bold')
axes[1].legend()

plt.suptitle('Bias-Variance Illustration — $y = \\sin(2\\pi x) + \\varepsilon$',
             fontsize=11, y=1.02)
plt.tight_layout()
plt.savefig(FIGURES / 'lecture07_bias_variance.png', bbox_inches='tight')
plt.show()

print(f'Train RMSE always decreases with complexity: '
      f'{train_rmse_bv[0]:.3f} → {train_rmse_bv[-1]:.4f}')
print(f'Test RMSE is U-shaped: minimum at degree {optimal_deg} '
      f'(RMSE = {test_rmse_bv[optimal_idx]:.3f})')
print(f'Degree-15 overfit: train={train_rmse_bv[-1]:.4f}, '
      f'test={test_rmse_bv[-1]:.3f} ({test_rmse_bv[-1]/test_rmse_bv[optimal_idx]:.1f}x worse)')

In [None]:
# ── 8. Model Comparison ───────────────────────────────────────────────────────
# Walk-forward evaluation for Ridge and LASSO (using feature matrix)
# Direct multi-step: one model per step is expensive;
# here we use a simple iterated 1-step approach on the feature matrix.

def ml_walk_forward(series_in, X_feat, y_feat, model_cls, alpha_star, T0_ml, H):
    """
    Walk-forward evaluation for a penalised linear model on a feature matrix.
    Uses direct one-step predictions only (horizon=1 per origin).
    """
    records = []
    n = len(X_feat)
    for t in range(T0_ml, n - 1):
        X_tr_t = X_feat.iloc[:t].values
        y_tr_t = y_feat.iloc[:t].values
        sc     = StandardScaler().fit(X_tr_t)
        model  = model_cls(alpha=alpha_star).fit(sc.transform(X_tr_t), y_tr_t)
        pred   = model.predict(sc.transform(X_feat.iloc[[t]].values))[0]
        actual = float(y_feat.iloc[t])
        records.append({'origin': t, 'horizon': 1,
                        'actual': actual, 'forecast': pred,
                        'error':  actual - pred})
    return pd.DataFrame(records)


# T0_ml: start walk-forward at 70% of the feature matrix
T0_ml = int(len(X_full) * 0.70)

print('Ridge walk-forward...', end=' ')
wf_ridge = ml_walk_forward(
    series, X_full, y_full, Ridge, alpha_ridge_star, T0_ml, H=1)
print('done.')

print('LASSO walk-forward...', end=' ')
wf_lasso = ml_walk_forward(
    series, X_full, y_full, Lasso, alpha_lasso_star, T0_ml, H=1)
print('done.')

# Align naive and SARIMA to T0_ml origins
n_feat = len(X_full)
feat_offset = len(series) - n_feat   # rows dropped by NaN from lags

def rmse_series(df):
    d = df.dropna()
    return rmse(d['actual'], d['forecast'])

# Summary table
summary_rows = [
    {'Model': 'Seasonal Naive',
     'RMSE(h=1)': naive_profile[1],   'RMSE(h=3)': naive_profile[3],
     'RMSE(h=12)': naive_profile[12]},
    {'Model': 'SARIMA(1,1,1)(0,1,1)',
     'RMSE(h=1)': sarima_profile[1],  'RMSE(h=3)': sarima_profile[3],
     'RMSE(h=12)': sarima_profile[12]},
    {'Model': f'Ridge (α={alpha_ridge_star:.1f})',
     'RMSE(h=1)': rmse_series(wf_ridge),
     'RMSE(h=3)': None, 'RMSE(h=12)': None},
    {'Model': f'LASSO (α={alpha_lasso_star:.1f})',
     'RMSE(h=1)': rmse_series(wf_lasso),
     'RMSE(h=3)': None, 'RMSE(h=12)': None},
]
summary_df = pd.DataFrame(summary_rows).set_index('Model').round(0)
print('\nModel comparison (RMSE):')
print(summary_df.to_string())

# Horizon profile plot (Naive + SARIMA only; Ridge/LASSO are h=1)
fig, ax = plt.subplots(figsize=(9, 4))
ax.plot(naive_profile.index,  naive_profile.values,
        color=UNO_GRAY, lw=1.5, ls='--', marker='o', markersize=4,
        label='Seasonal Naive')
ax.plot(sarima_profile.index, sarima_profile.values,
        color=UNO_BLUE, lw=1.8, marker='o', markersize=4,
        label='SARIMA(1,1,1)(0,1,1)')

# Mark Ridge and LASSO at h=1
ax.scatter([1], [rmse_series(wf_ridge)], color=UNO_GREEN, s=80, zorder=5,
           label=f'Ridge (α={alpha_ridge_star:.1f}), h=1')
ax.scatter([1], [rmse_series(wf_lasso)], color=UNO_RED,   s=80,
           marker='D', zorder=5,
           label=f'LASSO (α={alpha_lasso_star:.1f}), h=1')

ax.set_xlabel('Forecast horizon $h$')
ax.set_ylabel('RMSE')
ax.set_title('Horizon RMSE Profile — All Models', fontweight='bold')
ax.legend()
ax.yaxis.set_major_formatter(
    mticker.FuncFormatter(lambda x, _: f'{x:,.0f}'))
plt.tight_layout()
plt.savefig(FIGURES / 'lecture07_horizon_profile.png', bbox_inches='tight')
plt.show()
print('Horizon profile saved.')

In [None]:
# ── 9. LASSO Coefficient Path ─────────────────────────────────────────────────
# sklearn lasso_path works on the training data
X_path = X_tv_s.copy()
y_path = y_trainval.copy()

# Standardise (lasso_path expects standardised features for proper path)
alphas_path, coefs_path, _ = lasso_path(
    X_path, y_path,
    alphas=np.logspace(np.log10(alpha_lasso_star * 0.01),
                       np.log10(alpha_lasso_star * 100), 60),
    max_iter=5000
)

feature_names = list(X_full.columns)
n_features    = len(feature_names)

# Choose colours: lag features blue, rolling green, calendar/trend gray
def feat_color(name):
    if name.startswith('lag'):         return UNO_BLUE
    if name.startswith('roll_mean'):   return UNO_GREEN
    if name.startswith('roll_std'):    return UNO_ORANGE
    return UNO_GRAY

fig, axes = plt.subplots(1, 2, figsize=(14, 4.5))

# Left: LASSO path
ax = axes[0]
for j, name in enumerate(feature_names):
    ax.plot(np.log10(alphas_path), coefs_path[j],
            color=feat_color(name), lw=1.2, alpha=0.8)
ax.axvline(np.log10(alpha_lasso_star), color='black', ls='--', lw=1.2,
           label=f'$\\alpha^* = {alpha_lasso_star:.2f}$')
ax.axhline(0, color='black', lw=0.5)
ax.set_xlabel('$\\log_{10}(\\alpha)$')
ax.set_ylabel('Coefficient $\\hat{\\beta}_j$')
ax.set_title('LASSO Coefficient Path', fontweight='bold')
ax.legend(fontsize=8)

# Add feature label for the last non-zero coefficient at alpha_star
coef_at_star = coefs_path[:, np.searchsorted(alphas_path[::-1],
                                              alpha_lasso_star)][::-1]
for j, (name, coef) in enumerate(zip(feature_names, coef_at_star)):
    if abs(coef) > 1.0:
        ax.annotate(name,
                    xy=(np.log10(alpha_lasso_star), coef),
                    xytext=(np.log10(alpha_lasso_star) + 0.3, coef),
                    fontsize=6, color=feat_color(name),
                    arrowprops=dict(arrowstyle='->', lw=0.6,
                                   color=feat_color(name)))

# Right: Ridge path (smooth shrinkage for comparison)
alphas_ridge_path = np.logspace(-1, 5, 60)
coefs_ridge_path  = np.array([
    Ridge(alpha=a).fit(X_tv_s, y_trainval).coef_
    for a in alphas_ridge_path
]).T

ax2 = axes[1]
for j, name in enumerate(feature_names):
    ax2.plot(np.log10(alphas_ridge_path), coefs_ridge_path[j],
             color=feat_color(name), lw=1.2, alpha=0.8)
ax2.axvline(np.log10(alpha_ridge_star), color='black', ls='--', lw=1.2,
            label=f'$\\alpha^* = {alpha_ridge_star:.2f}$')
ax2.axhline(0, color='black', lw=0.5)
ax2.set_xlabel('$\\log_{10}(\\alpha)$')
ax2.set_ylabel('Coefficient $\\hat{\\beta}_j$')
ax2.set_title('Ridge Coefficient Path (smooth shrinkage)', fontweight='bold')
ax2.legend(fontsize=8)

plt.suptitle('LASSO vs. Ridge Shrinkage: Feature Selection vs. Smooth Shrinkage',
             fontsize=11, y=1.02)
plt.tight_layout()
plt.savefig(FIGURES / 'lecture07_lasso_path.png', bbox_inches='tight')
plt.show()

# Active features at alpha_star
final_lasso_coef = Lasso(alpha=alpha_lasso_star, max_iter=10000)\
    .fit(X_tv_s, y_trainval).coef_
active = [(name, coef) for name, coef
          in zip(feature_names, final_lasso_coef) if coef != 0]
print(f'LASSO active features at alpha*={alpha_lasso_star:.2f} '
      f'({len(active)}/{len(feature_names)}):'
)
for name, coef in sorted(active, key=lambda x: abs(x[1]), reverse=True):
    print(f'  {name:<18s}  β = {coef:+.2f}')

print('\n── Discussion questions ──')
questions = [
    '1. Which LASSO coefficient hits zero first as alpha increases?',
    '   What does that tell you about this feature\'s predictive value?',
    '2. Ridge test RMSE vs. LASSO: which is lower? Does LASSO still have value?',
    '3. Increase TimeSeriesSplit n_splits from 5 to 10. Does alpha* change?',
    '4. The polynomial degree-15 model has near-zero train RMSE.',
    '   Would you deploy it? Why?',
    '5. Which feature has the highest Ridge coefficient?',
    '   Is that consistent with what SARIMA selected implicitly?',
]
for q in questions:
    print(q)