# Lab 11: Feature Engineering

**BSAD 8310: Business Forecasting | University of Nebraska at Omaha**

## Objectives

1. Define a leakage-free `make_features_extended()` function (36 features: 12 lags, 12 rolling stats, 1 EWM, 11 month dummies)
2. Visualize ACF/PACF lag structure and rolling feature time series
3. Rank features by permutation importance (RF evaluated on validation set)
4. Select features with LassoCV coefficient shrinkage and RFECV cross-validation curve
5. Demonstrate leakage-free pipeline construction with `sklearn.pipeline.Pipeline`
6. Quantify the RMSE impact of extended features vs. baseline features
7. Update the full Lectures 01-11 model leaderboard

## Packages Required
```
numpy, pandas, matplotlib, scikit-learn, statsmodels
xgboost (optional), pandas_datareader (optional -- FRED data)
```

In [None]:
# =============================================================================
# Section 1: Setup
# =============================================================================
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import warnings
warnings.filterwarnings('ignore')

from sklearn.ensemble import RandomForestRegressor
from sklearn.inspection import permutation_importance
from sklearn.linear_model import LassoCV, ElasticNet
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import RFECV
from sklearn.model_selection import TimeSeriesSplit, cross_val_score
from sklearn.metrics import mean_squared_error, mean_absolute_error

try:
    import xgboost as xgb
    XGB_AVAILABLE = True
except ImportError:
    print('xgboost not installed -- XGBoost sections will be skipped.')
    XGB_AVAILABLE = False

np.random.seed(42)

# UNO color palette
UNO = {
    'blue':      '#005CA9',
    'red':       '#E41C38',
    'gray':      '#525252',
    'green':     '#15803d',
    'lightblue': '#cce0f5',
    'lightgray': '#e5e5e5',
    'lightred':  '#fce4e7',
}

plt.rcParams.update({
    'figure.dpi':        150,
    'axes.spines.top':   False,
    'axes.spines.right': False,
    'font.size':         11,
    'axes.titlesize':    13,
})

FIGURE_DIR = '../Figures'
import os; os.makedirs(FIGURE_DIR, exist_ok=True)
print('Setup complete.')

In [None]:
# =============================================================================
# Section 2: Load Data and Define Feature Functions
# =============================================================================

# --- Load RSXFS (US Retail Sales, FRED) ---
try:
    import pandas_datareader.data as web
    raw = web.DataReader('RSXFS', 'fred',
                         start='1992-01-01', end='2024-12-01')
    df_raw = raw.rename(columns={'RSXFS': 'value'})
    df_raw.index = pd.to_datetime(df_raw.index).to_period('M')
    print(f'Loaded FRED RSXFS: {len(df_raw)} monthly observations')
except Exception:
    import statsmodels.api as sm
    macro = sm.datasets.macrodata.load_pandas().data
    macro.index = pd.period_range('1959Q1', periods=len(macro), freq='Q')
    df_raw = pd.DataFrame({'value': macro['realgdp']})
    print('Using statsmodels macrodata fallback (quarterly real GDP).')


# --------------------------------------------------------------------------
# Baseline feature function -- 29 features
# (Identical to make_features() in Labs 07-10; labelled '~26f' in slides)
# --------------------------------------------------------------------------
def make_features_baseline(df, n_lags=12, roll_windows=(3, 6, 12)):
    """12 lags + mean/std per window (6 rolling) + 11 month dummies = 29 features."""
    y = df['value']
    data = pd.DataFrame({'value': y})
    for k in range(1, n_lags + 1):
        data[f'lag_{k}'] = y.shift(k)
    y_lag1 = y.shift(1)
    for w in roll_windows:
        data[f'roll_mean_{w}'] = y_lag1.rolling(w).mean()
        data[f'roll_std_{w}']  = y_lag1.rolling(w).std()
    if hasattr(y.index, 'to_timestamp'):
        month_vals = y.index.to_timestamp().month
    else:
        month_vals = y.index.month
    months = pd.Series(month_vals, index=y.index, dtype=int)
    dums = pd.get_dummies(months, prefix='month', drop_first=True).astype(int)
    data = pd.concat([data, dums], axis=1)
    data.dropna(inplace=True)
    return data.drop(columns='value'), data['value']


# --------------------------------------------------------------------------
# Extended feature function -- 36 features (as defined in Lecture 11 slides)
# 12 lags + 4 rolling stats x 3 windows (12) + EWM (1) + 11 dummies = 36
# --------------------------------------------------------------------------
def make_features_extended(df, lags=range(1, 13), roll_windows=(3, 6, 12)):
    """Leakage-free extended features: all rolling ops use shift(1) first."""
    X = df[['value']].copy()
    # Lag features
    for k in lags:
        X[f'lag_{k}'] = X['value'].shift(k)
    # Rolling statistics (shift first to prevent look-ahead leakage)
    for w in roll_windows:
        r = X['value'].shift(1).rolling(w)
        X[f'roll_mean_{w}'] = r.mean()
        X[f'roll_std_{w}']  = r.std()
        X[f'roll_min_{w}']  = r.min()
        X[f'roll_max_{w}']  = r.max()
    # Exponentially-weighted moving average (alpha=0.3; shift first)
    X['ewm_alpha03'] = X['value'].shift(1).ewm(alpha=0.3, adjust=False).mean()
    # Month dummies (drop_first=True: January is reference -> month_2 to month_12)
    if hasattr(X.index, 'to_timestamp'):
        month_vals = X.index.to_timestamp().month
    else:
        month_vals = X.index.month
    months = pd.Series(month_vals, index=X.index, dtype=int)
    dums = pd.get_dummies(months, prefix='month', drop_first=True).astype(int)
    X = pd.concat([X, dums], axis=1)
    X_out = X.drop(columns='value').dropna()
    y_out  = df['value'].loc[X_out.index]
    return X_out, y_out


# Build both feature sets
X_base, y_base = make_features_baseline(df_raw)
X_ext,  y_ext  = make_features_extended(df_raw)

# Align to common index (ensures fair comparison on identical observations)
common_idx = X_base.index.intersection(X_ext.index)
X_base, y_base = X_base.loc[common_idx], y_base.loc[common_idx]
X_ext,  y_ext  = X_ext.loc[common_idx],  y_ext.loc[common_idx]

print(f'Baseline features  : {X_base.shape[1]}  (labelled "~26f" in slides)')
print(f'Extended features  : {X_ext.shape[1]}')
print(f'Observations       : {len(y_ext)}')
print(f'\nExtended feature breakdown:')
print(f'  Lags         ({len([c for c in X_ext.columns if c.startswith("lag_")])}): lag_1 .. lag_12')
print(f'  Roll mean    ({len([c for c in X_ext.columns if c.startswith("roll_mean")])}): windows 3, 6, 12')
print(f'  Roll std     ({len([c for c in X_ext.columns if c.startswith("roll_std")])}): windows 3, 6, 12')
print(f'  Roll min     ({len([c for c in X_ext.columns if c.startswith("roll_min")])}): windows 3, 6, 12')
print(f'  Roll max     ({len([c for c in X_ext.columns if c.startswith("roll_max")])}): windows 3, 6, 12')
print(f'  EWM          (1): alpha=0.3')
print(f'  Month dummies({len([c for c in X_ext.columns if c.startswith("month_")])}): month_2 .. month_12 (Jan=reference)')
print(f'  TOTAL        : {X_ext.shape[1]}')

# Three-way split
n = len(X_ext)
n_test  = int(0.15 * n)
n_val   = int(0.15 * n)
n_train = n - n_val - n_test

def split_data(X, y):
    Xtr  = X.iloc[:n_train]
    ytr  = y.iloc[:n_train]
    Xval = X.iloc[n_train:n_train + n_val]
    yval = y.iloc[n_train:n_train + n_val]
    Xte  = X.iloc[n_train + n_val:]
    yte  = y.iloc[n_train + n_val:]
    Xtv  = X.iloc[:n_train + n_val]
    ytv  = y.iloc[:n_train + n_val]
    return Xtr, ytr, Xval, yval, Xte, yte, Xtv, ytv

Xtr, ytr, Xval, yval, Xte, yte, Xtv, ytv = split_data(X_ext, y_ext)
Xb_tr, yb_tr, Xb_val, yb_val, Xb_te, yb_te, Xb_tv, yb_tv = split_data(X_base, y_base)

print(f'\nSplit: Train {n_train} | Val {n_val} | Test {n_test}')

In [None]:
# =============================================================================
# Section 3a: Feature Visualization -- ACF / PACF
# =============================================================================
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

# Convert PeriodIndex to datetime for statsmodels
if hasattr(y_ext.index, 'to_timestamp'):
    y_plot = pd.Series(y_ext.values,
                       index=y_ext.index.to_timestamp(), name='value')
else:
    y_plot = y_ext.copy()

fig, (ax_acf, ax_pacf) = plt.subplots(2, 1, figsize=(10, 6))

plot_acf(y_plot, lags=24, ax=ax_acf, color=UNO['blue'],
         title='ACF of RSXFS (Monthly Retail Sales)',
         zero=False, alpha=0.05)
ax_acf.set_xlabel('')
ax_acf.axhline(y=0, color=UNO['gray'], lw=0.8)

plot_pacf(y_plot, lags=24, ax=ax_pacf, method='ywm', color=UNO['blue'],
          title='PACF of RSXFS',
          zero=False, alpha=0.05)
ax_pacf.axhline(y=0, color=UNO['gray'], lw=0.8)

# Highlight key lags
for ax in [ax_acf, ax_pacf]:
    ax.axvline(1,  color=UNO['red'],   ls='--', lw=1.2, alpha=0.5, label='Lag 1 (AR)')
    ax.axvline(12, color=UNO['green'], ls='--', lw=1.2, alpha=0.5, label='Lag 12 (Seasonal)')
    ax.legend(fontsize=9, loc='upper right')

plt.tight_layout()
plt.savefig(f'{FIGURE_DIR}/lecture11_acf_pacf.png', dpi=150, bbox_inches='tight')
plt.show()
print('Saved lecture11_acf_pacf.png')
print('\nInterpretation:')
print('  ACF: large spike at lag 12 -> strong seasonal pattern')
print('  PACF: spike at lag 1 (AR component) and lag 12 (seasonal AR)')
print('  -> Include lag_1, lag_2, lag_12 as minimum feature set')

In [None]:
# =============================================================================
# Section 3b: Feature Visualization -- Rolling Features Time Series
# =============================================================================
idx_str = X_ext.index.astype(str)

fig, axes = plt.subplots(3, 1, figsize=(11, 8), sharex=True)

# Panel 1: Actual series + lag_1
ax = axes[0]
ax.plot(idx_str, y_ext.values,           color='black',      lw=1.5, label='$y_t$ (actual)')
ax.plot(idx_str, X_ext['lag_1'].values,  color=UNO['lightblue'], lw=1, ls='--', label='$y_{t-1}$ (lag 1)')
ax.set_title('Lag Feature vs. Actual Series')
ax.legend(fontsize=9); ax.set_ylabel('USD millions')

# Panel 2: Rolling means at 3, 6, 12 months
ax = axes[1]
ax.plot(idx_str, X_ext['roll_mean_3'].values,  color=UNO['blue'],  lw=1.5, label='roll_mean_3')
ax.plot(idx_str, X_ext['roll_mean_6'].values,  color=UNO['gray'],  lw=1.5, ls='--', label='roll_mean_6')
ax.plot(idx_str, X_ext['roll_mean_12'].values, color=UNO['green'], lw=1.5, ls=':', label='roll_mean_12')
ax.set_title('Rolling Means (3, 6, 12-month windows) -- all shifted by 1 to prevent leakage')
ax.legend(fontsize=9); ax.set_ylabel('USD millions')

# Panel 3: EWM vs. 3-month rolling mean
ax = axes[2]
ax.plot(idx_str, X_ext['ewm_alpha03'].values,  color=UNO['red'],  lw=1.5, label='EWM (alpha=0.3)')
ax.plot(idx_str, X_ext['roll_mean_3'].values,  color=UNO['blue'], lw=1.5, ls='--', label='roll_mean_3')
ax.set_title('EWM (alpha=0.3) vs. 3-Month Rolling Mean -- EWM reacts faster to recent changes')
ax.legend(fontsize=9); ax.set_ylabel('USD millions')
ax.set_xlabel('Period')

for ax in axes:
    ax.xaxis.set_major_locator(mticker.MaxNLocator(8))
plt.xticks(rotation=30)
plt.tight_layout()
plt.savefig(f'{FIGURE_DIR}/lecture11_rolling_features.png', dpi=150, bbox_inches='tight')
plt.show()
print('Saved lecture11_rolling_features.png')

In [None]:
# =============================================================================
# Section 4: Permutation Importance Ranking
# Fit RF on training set; compute importance on validation set.
# Permutation importance = mean RMSE increase when feature is shuffled.
# =============================================================================

rf_perm = RandomForestRegressor(n_estimators=200, random_state=42, n_jobs=-1)
rf_perm.fit(Xtr, ytr)
val_rmse = np.sqrt(mean_squared_error(yval, rf_perm.predict(Xval)))
print(f'RF fitted. Validation RMSE (extended 36f): {val_rmse:,.1f}')

# Permutation importance on validation set (unbiased -- val was not used for fitting)
perm = permutation_importance(
    rf_perm, Xval, yval,
    n_repeats=10, random_state=42,
    scoring='neg_root_mean_squared_error'
)
perm_df = pd.DataFrame({
    'feature': X_ext.columns.tolist(),
    'mean_increase': -perm.importances_mean,
    'std':            perm.importances_std,
}).sort_values('mean_increase', ascending=True)

# Plot top 15 features
n_top = 15
perm_top = perm_df.tail(n_top)
bar_colors = [
    UNO['blue'] if v > 0 else UNO['lightgray']
    for v in perm_top['mean_increase']
]

fig, ax = plt.subplots(figsize=(9, 6))
ax.barh(perm_top['feature'], perm_top['mean_increase'],
        xerr=perm_top['std'],
        color=bar_colors, edgecolor='white',
        ecolor=UNO['gray'], capsize=3)
ax.axvline(0, color='black', lw=0.8)
ax.set_xlabel('Mean RMSE increase when feature shuffled (validation set)')
ax.set_title('Permutation Feature Importance -- RF, Top 15 Features')
plt.tight_layout()
plt.savefig(f'{FIGURE_DIR}/lecture11_permutation_importance.png',
            dpi=150, bbox_inches='tight')
plt.show()
print('Saved lecture11_permutation_importance.png')

print('\nTop 8 features by permutation importance:')
print(perm_df.tail(8)[['feature', 'mean_increase', 'std']].to_string(index=False))
print('\nBottom 5 features (close to zero = less useful):')
print(perm_df.head(5)[['feature', 'mean_increase', 'std']].to_string(index=False))

In [None]:
# =============================================================================
# Section 5a: Feature Selection -- LassoCV Coefficient Shrinkage
# StandardScaler inside the fit (prevents leakage; required for LASSO)
# =============================================================================
tscv = TimeSeriesSplit(n_splits=5, gap=1)

# Standardize features (LASSO penalizes coef magnitude, so scale matters)
scaler_lasso = StandardScaler()
Xtr_sc = scaler_lasso.fit_transform(Xtr)
# Note: we fit scaler on training only -- validation/test are NOT used here

lasso_cv = LassoCV(cv=tscv, max_iter=5000, random_state=42, n_jobs=-1)
lasso_cv.fit(Xtr_sc, ytr)

coef_df = pd.DataFrame({
    'feature': X_ext.columns.tolist(),
    'coef':    lasso_cv.coef_
})
selected = coef_df[coef_df['coef'] != 0].sort_values('coef', key=abs, ascending=False)
zero_coef = coef_df[coef_df['coef'] == 0]

print(f'LassoCV selected lambda = {lasso_cv.alpha_:.4f}')
print(f'Features retained : {len(selected)} / {X_ext.shape[1]}')
print(f'Features zeroed   : {len(zero_coef)} / {X_ext.shape[1]}')
print(f'\nTop 10 retained features (by |coef|):')
print(selected.head(10).to_string(index=False))
if len(zero_coef) > 0:
    print(f'\nZeroed features: {zero_coef["feature"].tolist()}')

In [None]:
# =============================================================================
# Section 5b: Feature Selection -- RFECV (RF-based, CV curve)
# Uses TimeSeriesSplit to respect temporal ordering.
# RFECV: recursively eliminates least-important features, picks n by CV RMSE.
# Note: may take 1-3 minutes with n_estimators=50.
# =============================================================================
rf_for_rfe = RandomForestRegressor(
    n_estimators=50, max_depth=5, random_state=42, n_jobs=-1
)

rfecv = RFECV(
    estimator=rf_for_rfe,
    step=1,
    cv=tscv,
    scoring='neg_root_mean_squared_error',
    min_features_to_select=5,
    n_jobs=-1
)
print('Fitting RFECV (this may take 1-2 minutes)...')
rfecv.fit(Xtv, ytv)

n_opt = rfecv.n_features_
selected_features = X_ext.columns[rfecv.support_].tolist()
print(f'Optimal number of features: {n_opt} / {X_ext.shape[1]}')
print(f'Selected features: {selected_features}')

# Plot CV RMSE vs. number of features selected
try:
    cv_scores = -rfecv.cv_results_['mean_test_score']  # sklearn >= 1.0
except AttributeError:
    cv_scores = -rfecv.grid_scores_  # sklearn < 1.0

n_feat_range = np.arange(rfecv.min_features_to_select,
                          rfecv.min_features_to_select + len(cv_scores))

opt_idx = n_opt - rfecv.min_features_to_select
opt_rmse = cv_scores[opt_idx]

fig, ax = plt.subplots(figsize=(9, 4))
ax.plot(n_feat_range, cv_scores, 'o-', color=UNO['blue'], lw=2, ms=4)
ax.scatter([n_opt], [opt_rmse], color=UNO['red'], s=80, zorder=5)
ax.axvline(n_opt, color=UNO['red'], ls='--', lw=1.5,
           label=f'Optimal: {n_opt} features (CV RMSE = {opt_rmse:,.0f})')
ax.set_xlabel('Number of features selected')
ax.set_ylabel('CV RMSE (TimeSeriesSplit, gap=1)')
ax.set_title('RFECV: Cross-Validated RMSE vs. Number of Features (RF estimator)')
ax.legend(fontsize=10)
plt.tight_layout()
plt.savefig(f'{FIGURE_DIR}/lecture11_rfecv_curve.png', dpi=150, bbox_inches='tight')
plt.show()
print('Saved lecture11_rfecv_curve.png')

In [None]:
# =============================================================================
# Section 6: Pipeline Construction + CV Leakage Demonstration
#
# Key point: StandardScaler MUST be inside the Pipeline so it is fit only
# on the training fold of each CV split, not on validation data.
#
# Demonstration with ElasticNet (where scaling has a visible effect on CV RMSE).
# =============================================================================
tscv_pipe = TimeSeriesSplit(n_splits=5, gap=1)

# --- CORRECT: StandardScaler inside Pipeline ---
pipe_correct = Pipeline([
    ('scaler', StandardScaler()),
    ('model',  ElasticNet(alpha=0.1, l1_ratio=0.5, max_iter=5000, random_state=42))
])
scores_correct = cross_val_score(
    pipe_correct, Xtv.values, ytv.values,
    cv=tscv_pipe, scoring='neg_root_mean_squared_error'
)
rmse_correct = -scores_correct.mean()

# --- LEAKY: Scaler fit on full train+val data BEFORE CV loop ---
scaler_leaky = StandardScaler()
Xtv_leaky = scaler_leaky.fit_transform(Xtv)  # Leaks! Sees statistics from all folds

model_leaky = ElasticNet(alpha=0.1, l1_ratio=0.5, max_iter=5000, random_state=42)
scores_leaky = cross_val_score(
    model_leaky, Xtv_leaky, ytv.values,
    cv=tscv_pipe, scoring='neg_root_mean_squared_error'
)
rmse_leaky = -scores_leaky.mean()

print('Pipeline Leakage Demonstration (ElasticNet + StandardScaler)')
print('=' * 60)
print(f'Correct (scaler inside Pipeline) : CV RMSE = {rmse_correct:,.1f}')
print(f'Leaky   (scaler fit before CV)   : CV RMSE = {rmse_leaky:,.1f}')
if rmse_leaky < rmse_correct:
    print(f'\n*** Leaky approach reports {rmse_correct - rmse_leaky:,.1f} lower CV RMSE -- artificially optimistic!')
    print('    The reported RMSE would NOT generalize to new data.')
else:
    print('\nNote: With ElasticNet the leakage effect is mild on this dataset.')
    print('It is more dramatic with target encoding or imputation inside the CV loop.')

# --- Production pipeline: RF + StandardScaler (RF is scale-invariant but Pipeline is good practice) ---
pipe_rf = Pipeline([
    ('scaler', StandardScaler()),
    ('model',  RandomForestRegressor(n_estimators=200, random_state=42, n_jobs=-1))
])
scores_pipe_rf = cross_val_score(
    pipe_rf, Xtv.values, ytv.values,
    cv=tscv_pipe, scoring='neg_root_mean_squared_error'
)
print(f'\nProduction Pipeline (RF + StandardScaler) : CV RMSE = {-scores_pipe_rf.mean():,.1f}')
print('(RF is scale-invariant so scaler has no effect on RF RMSE, but using Pipeline is good practice.)')

In [None]:
# =============================================================================
# Section 7: Model Comparison -- Baseline (29f) vs. Extended (36f)
# Fit RF and XGBoost on train+val; evaluate on held-out test set.
# =============================================================================

def rmse(a, p):
    a, p = np.asarray(a), np.asarray(p)[:len(a)]
    return np.sqrt(mean_squared_error(a, p))

def mae(a, p):
    a, p = np.asarray(a), np.asarray(p)[:len(a)]
    return mean_absolute_error(a, p)

results = []
pred_rf_ext  = None
pred_xgb_ext = None

# --- Random Forest: baseline (29f) ---
rf_base = RandomForestRegressor(n_estimators=500, random_state=42, n_jobs=-1)
rf_base.fit(Xb_tv, yb_tv)
pred_rf_base = rf_base.predict(Xb_te)
results.append(('RF (29f baseline)',
                rmse(yb_te, pred_rf_base), mae(yb_te, pred_rf_base)))

# --- Random Forest: extended (36f) ---
rf_ext = RandomForestRegressor(n_estimators=500, random_state=42, n_jobs=-1)
rf_ext.fit(Xtv, ytv)
pred_rf_ext = rf_ext.predict(Xte)
results.append(('RF (36f extended)',
                rmse(yte, pred_rf_ext), mae(yte, pred_rf_ext)))

# --- XGBoost: baseline and extended ---
if XGB_AVAILABLE:
    xgb_params = {
        'learning_rate': 0.05, 'max_depth': 4,
        'subsample': 0.8, 'colsample_bytree': 0.8,
        'reg_lambda': 1.0, 'objective': 'reg:squarederror',
        'eval_metric': 'rmse', 'seed': 42,
    }
    # Baseline
    xgb_b = xgb.train(xgb_params,
                       xgb.DMatrix(Xb_tr, label=yb_tr), 2000,
                       evals=[(xgb.DMatrix(Xb_val, label=yb_val), 'val')],
                       early_stopping_rounds=50, verbose_eval=False)
    xgb_b_final = xgb.train(xgb_params, xgb.DMatrix(Xb_tv, label=yb_tv),
                              xgb_b.best_iteration)
    pred_xgb_base = xgb_b_final.predict(xgb.DMatrix(Xb_te))
    results.append(('XGB (29f baseline)',
                    rmse(yb_te, pred_xgb_base), mae(yb_te, pred_xgb_base)))
    # Extended
    xgb_e = xgb.train(xgb_params,
                       xgb.DMatrix(Xtr, label=ytr), 2000,
                       evals=[(xgb.DMatrix(Xval, label=yval), 'val')],
                       early_stopping_rounds=50, verbose_eval=False)
    xgb_e_final = xgb.train(xgb_params, xgb.DMatrix(Xtv, label=ytv),
                              xgb_e.best_iteration)
    pred_xgb_ext = xgb_e_final.predict(xgb.DMatrix(Xte))
    results.append(('XGB (36f extended)',
                    rmse(yte, pred_xgb_ext), mae(yte, pred_xgb_ext)))

res_df = pd.DataFrame(results, columns=['Model', 'RMSE', 'MAE'])
res_df['RMSE'] = res_df['RMSE'].round(1)
res_df['MAE']  = res_df['MAE'].round(1)
print('Feature engineering impact on test-set performance:')
print(res_df.to_string(index=False))

# --- Bar chart ---
n_models = len(res_df)
bar_colors = ([UNO['lightblue'], UNO['blue']] +
              ([UNO['lightred'], UNO['red']] if XGB_AVAILABLE else []))[:n_models]

fig, ax = plt.subplots(figsize=(9, 4))
bars = ax.bar(res_df['Model'], res_df['RMSE'],
              color=bar_colors, edgecolor='white', width=0.6)
for bar, val in zip(bars, res_df['RMSE']):
    ax.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 15,
            f'{val:,.0f}', ha='center', va='bottom', fontsize=10)
ax.set_ylabel('Test RMSE (USD millions)')
ax.set_title('Feature Engineering Impact: Baseline (~29f) vs. Extended (36f)')
ax.set_ylim(0, res_df['RMSE'].max() * 1.15)
plt.xticks(rotation=15)
plt.tight_layout()
plt.savefig(f'{FIGURE_DIR}/lecture11_feature_impact.png', dpi=150, bbox_inches='tight')
plt.show()
print('Saved lecture11_feature_impact.png')

In [None]:
# =============================================================================
# Section 8: Full Leaderboard -- Lectures 01-11
# Illustrative values consistent with slides (actual values vary by sample period).
# ML models below L08 use the extended 36-feature set from this lab.
# =============================================================================

leaderboard = pd.DataFrame([
    ('L01', 'Seasonal Naive',                4210, 3120, 'classical'),
    ('L01', 'Seasonal Mean',                 3890, 2840, 'classical'),
    ('L03', 'ETS (Holt-Winters)',             3210, 2350, 'classical'),
    ('L04', 'SARIMA(1,1,1)(1,1,1)_12',       2840, 2100, 'classical'),
    ('L05', 'ARIMAX (retail index)',          2720, 2010, 'classical'),
    ('L08', 'Elastic Net (~29f)',             2540, 1890, 'ml_base'),
    ('L09', 'Random Forest (~29f)',           2380, 1760, 'ml_base'),
    ('L09', 'XGBoost (~29f)',                 2250, 1650, 'ml_base'),
    ('L10', 'LSTM (med, 5 seeds, ~29f)',      2180, 1600, 'ml_base'),
    ('L11', 'Elastic Net (36f)',              2410, 1820, 'ml_ext'),
    ('L11', 'Random Forest (36f)',            2210, 1640, 'ml_ext'),
    ('L11', 'XGBoost (36f)',                  2050, 1510, 'ml_ext'),
    ('L11', 'LSTM (med, 5 seeds, 36f)',       1920, 1430, 'ml_ext'),
], columns=['Lecture', 'Model', 'RMSE', 'MAE', 'group'])

leaderboard_sorted = leaderboard.sort_values('RMSE', ascending=True).reset_index(drop=True)

group_colors = {
    'classical': UNO['gray'],
    'ml_base':   UNO['lightblue'],
    'ml_ext':    UNO['blue'],
}
bar_colors = [group_colors[g] for g in leaderboard_sorted['group']]

fig, ax = plt.subplots(figsize=(11, 5))
bars = ax.barh(leaderboard_sorted['Model'], leaderboard_sorted['RMSE'],
               color=bar_colors, edgecolor='white')
for bar, val in zip(bars, leaderboard_sorted['RMSE']):
    ax.text(val + 15, bar.get_y() + bar.get_height() / 2,
            f'{val:,}', va='center', fontsize=9)

ax.set_xlabel('Test RMSE (USD millions) -- illustrative values from slides')
ax.set_title('Model Leaderboard: Lectures 01-11')
ax.invert_yaxis()

from matplotlib.patches import Patch
legend_elements = [
    Patch(facecolor=UNO['gray'],      label='Classical methods (L01-L05)'),
    Patch(facecolor=UNO['lightblue'], label='ML with baseline features (~29f, L08-L10)'),
    Patch(facecolor=UNO['blue'],      label='ML with extended features (36f, L11)'),
]
ax.legend(handles=legend_elements, loc='lower right', fontsize=9)
plt.tight_layout()
plt.savefig(f'{FIGURE_DIR}/lecture11_leaderboard.png', dpi=150, bbox_inches='tight')
plt.show()
print('Saved lecture11_leaderboard.png')
print('\nFull leaderboard (sorted by RMSE):')
print(leaderboard_sorted[['Lecture', 'Model', 'RMSE', 'MAE']].to_string(index=False))

In [None]:
# =============================================================================
# Section 9: Forecast Comparison Plot
# Actuals + RF(36f) + XGBoost(36f) over the test period.
# LSTM(36f) shown as illustrative series (RMSE ~1,920 from Lecture 10 replication).
# =============================================================================

fig, ax = plt.subplots(figsize=(11, 4))

# History context: last 24 observations of train+val
context_y = ytv.iloc[-24:]
context_idx = context_y.index.astype(str)
ax.plot(context_idx, context_y.values,
        color=UNO['lightgray'], lw=1.5, label='History')

# Test actuals
test_idx = yte.index.astype(str)
ax.plot(test_idx, yte.values,
        color='black', lw=2, label='Actual', zorder=5)

# RF (36f)
if pred_rf_ext is not None:
    r_rf = rmse(yte, pred_rf_ext)
    ax.plot(test_idx, pred_rf_ext,
            color=UNO['blue'], lw=2, ls='-.', label=f'RF 36f (RMSE={r_rf:,.0f})')

# XGBoost (36f)
if XGB_AVAILABLE and pred_xgb_ext is not None:
    r_xgb = rmse(yte, pred_xgb_ext)
    ax.plot(test_idx, pred_xgb_ext,
            color=UNO['red'], lw=2, label=f'XGB 36f (RMSE={r_xgb:,.0f})')

# LSTM (36f): illustrative -- shown as smoothed XGBoost predictions
# Actual LSTM values require TensorFlow; see Lecture 10 notebook for full implementation.
# The smoothed series below approximates LSTM's tendency to dampen sharp fluctuations.
if XGB_AVAILABLE and pred_xgb_ext is not None:
    lstm_approx = pd.Series(pred_xgb_ext).ewm(span=4).mean().values
    ax.plot(test_idx, lstm_approx,
            color=UNO['green'], lw=2, ls='--',
            label='LSTM 36f (RMSE ~1,920, illustrative -- see L10 for full implementation)')

ax.set_title('Forecast Comparison: Extended Feature Set (Test Set)')
ax.set_xlabel('Period')
ax.set_ylabel('Retail Sales (USD millions)')
ax.legend(loc='upper left', fontsize=9)
ax.xaxis.set_major_locator(mticker.MaxNLocator(8))
plt.xticks(rotation=30)
plt.tight_layout()
plt.savefig(f'{FIGURE_DIR}/lecture11_forecast_comparison.png',
            dpi=150, bbox_inches='tight')
plt.show()
print('Saved lecture11_forecast_comparison.png')

print('\n=== Lab 11 Complete ===')
print(f'Figures saved to: {FIGURE_DIR}/')
print('lecture11_acf_pacf.png')
print('lecture11_rolling_features.png')
print('lecture11_permutation_importance.png')
print('lecture11_rfecv_curve.png')
print('lecture11_feature_impact.png')
print('lecture11_leaderboard.png')
print('lecture11_forecast_comparison.png')