# Lab 10: Neural Networks for Time Series — LSTM

**BSAD 8310: Business Forecasting | University of Nebraska at Omaha**

## Objectives

1. Reshape the RSXFS lag-feature matrix into 3-D LSTM sequences
2. Build and train a stacked LSTM with early stopping
3. Diagnose seed variance (5 seeds, box plot)
4. Run a lookback ablation (T ∈ {12, 18, 24, 36})
5. Compare LSTM against SARIMA and XGBoost baselines

## Packages Required
```
numpy, pandas, matplotlib, scikit-learn, statsmodels, xgboost
tensorflow>=2.12  (or tensorflow-cpu)
pandas_datareader (optional — FRED data)
```

> **Note:** If TensorFlow is not installed, the LSTM sections are skipped and only the baselines run.

In [None]:
# =============================================================================
# Section 1: Setup
# =============================================================================
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import warnings
warnings.filterwarnings('ignore')

from sklearn.metrics import mean_squared_error

try:
    import tensorflow as tf
    from tensorflow import keras
    from tensorflow.keras import layers
    tf.random.set_seed(42)
    TF_AVAILABLE = True
    print(f'TensorFlow {tf.__version__} available.')
except ImportError:
    TF_AVAILABLE = False
    print('TensorFlow not installed — LSTM sections will be skipped.')

try:
    import xgboost as xgb
    XGB_AVAILABLE = True
except ImportError:
    XGB_AVAILABLE = False
    print('xgboost not installed — XGBoost baseline will be skipped.')

np.random.seed(42)

# UNO color palette
UNO = {
    'blue':      '#005CA9',
    'red':       '#E41C38',
    'gray':      '#525252',
    'green':     '#15803d',
    'lightblue': '#cce0f5',
    'lightgray': '#e5e5e5',
}

plt.rcParams.update({
    'figure.dpi':        150,
    'axes.spines.top':   False,
    'axes.spines.right': False,
    'font.size':         11,
    'axes.titlesize':    13,
})

FIGURE_DIR = '../Figures'
import os; os.makedirs(FIGURE_DIR, exist_ok=True)
print('Setup complete.')

In [None]:
# =============================================================================
# Section 2: Load Data and Build Feature Matrix
# =============================================================================
# Identical feature engineering to Labs 08 and 09 for fair comparison.

# --- Load RSXFS ---
try:
    import pandas_datareader.data as web
    raw = web.DataReader('RSXFS', 'fred',
                         start='1992-01-01', end='2024-12-01')
    y_all = raw['RSXFS'].dropna()
    y_all.index = pd.to_datetime(y_all.index).to_period('M')
    print(f'Loaded FRED RSXFS: {len(y_all)} monthly observations')
except Exception:
    import statsmodels.api as sm
    macro = sm.datasets.macrodata.load_pandas().data
    macro.index = pd.period_range('1959Q1', periods=len(macro), freq='Q')
    y_all = macro['realgdp']
    print('Loaded statsmodels macrodata fallback.')

# --- Feature engineering (leakage-free, same as L08/L09) ---
def make_features(y, n_lags=12, roll_windows=(3, 6, 12), add_calendar=True):
    df = pd.DataFrame({'y': y})
    for k in range(1, n_lags + 1):
        df[f'lag_{k}'] = y.shift(k)
    y_lag1 = y.shift(1)
    for w in roll_windows:
        df[f'roll_mean_{w}'] = y_lag1.rolling(w).mean()
        df[f'roll_std_{w}']  = y_lag1.rolling(w).std()
    if add_calendar:
        if hasattr(y.index, 'to_timestamp'):
            month = y.index.to_timestamp().month
        elif hasattr(y.index, 'month'):
            month = y.index.month
        else:
            month = None
        if month is not None:
            for m in range(2, 13):
                df[f'month_{m}'] = (month == m).astype(int)
    df.dropna(inplace=True)
    return df.drop(columns=['y']), df['y']

X_df, y_series = make_features(y_all, n_lags=12, roll_windows=(3, 6, 12))
feat_names = X_df.columns.tolist()

# Convert to numpy for easier sequence slicing
X_arr = X_df.values.astype(np.float32)
y_arr = y_series.values.astype(np.float32)

n_total  = len(y_arr)
n_features = X_arr.shape[1]

print(f'Feature matrix: {n_total} × {n_features}')

In [None]:
# =============================================================================
# Section 3: Sequence Construction
# =============================================================================
# LSTM requires 3-D input: (n_samples, T, n_features)
# make_sequences stacks T consecutive feature vectors → one sample.

T = 24  # lookback window: two full years (recommended for monthly data)

def make_sequences(X_arr, y_arr, T):
    """Stack sliding windows of length T.

    Parameters
    ----------
    X_arr : ndarray of shape (n, p)
    y_arr : ndarray of shape (n,)
    T     : int, lookback window

    Returns
    -------
    Xs : ndarray (n-T, T, p)   — LSTM input
    ys : ndarray (n-T,)         — corresponding targets
    """
    Xs, ys = [], []
    for i in range(T, len(X_arr)):
        Xs.append(X_arr[i - T : i])   # window [i-T, i)
        ys.append(y_arr[i])            # target is the observation after the window
    return np.array(Xs, dtype=np.float32), np.array(ys, dtype=np.float32)

Xs, ys = make_sequences(X_arr, y_arr, T)
n_seq = len(ys)

# Three-way split — aligned with the sequence indices
# Note: first T observations are consumed to form the first sequence
n_test = int(0.15 * n_seq)
n_val  = int(0.15 * n_seq)
n_tr   = n_seq - n_val - n_test

Xs_tr, ys_tr = Xs[:n_tr],             ys[:n_tr]
Xs_va, ys_va = Xs[n_tr:n_tr+n_val],   ys[n_tr:n_tr+n_val]
Xs_te, ys_te = Xs[n_tr+n_val:],       ys[n_tr+n_val:]

# Index labels for the test period (for plotting)
test_index = y_series.index[T + n_tr + n_val : T + n_tr + n_val + len(ys_te)]

print(f'Sequences: {n_seq} | Train: {n_tr} | Val: {n_val} | Test: {len(ys_te)}')
print(f'Input shape: {Xs_tr.shape}  (n_train, T={T}, p={n_features})')

In [None]:
# =============================================================================
# Section 4: LSTM — Build, Train, and Diagnose
# =============================================================================

if TF_AVAILABLE:
    def build_lstm(T, n_features, units1=64, units2=32, dropout=0.2,
                   recurrent_dropout=0.1, lr=0.001):
        """Stacked two-layer LSTM for one-step-ahead regression."""
        model = keras.Sequential([
            layers.LSTM(
                units1,
                return_sequences=True,
                dropout=dropout,
                recurrent_dropout=recurrent_dropout,
                input_shape=(T, n_features)
            ),
            layers.LSTM(
                units2,
                dropout=dropout,
                recurrent_dropout=recurrent_dropout
            ),
            layers.Dense(1)
        ])
        model.compile(
            optimizer=keras.optimizers.Adam(learning_rate=lr),
            loss='mse'
        )
        return model

    # --- Train one LSTM (seed=42) ---
    tf.random.set_seed(42)
    np.random.seed(42)

    lstm_model = build_lstm(T, n_features)
    lstm_model.summary()

    cb = keras.callbacks.EarlyStopping(
        patience=20, restore_best_weights=True
    )

    history = lstm_model.fit(
        Xs_tr, ys_tr,
        validation_data=(Xs_va, ys_va),
        epochs=200,
        batch_size=16,
        callbacks=[cb],
        verbose=0
    )

    # --- Training / validation loss plot ---
    fig, ax = plt.subplots(figsize=(8, 4))
    ax.plot(history.history['loss'], color=UNO['blue'], lw=1.8,
            label='Train loss (MSE)')
    ax.plot(history.history['val_loss'], color=UNO['red'], lw=1.8,
            ls='--', label='Val loss (MSE)')
    stopped = cb.stopped_epoch if cb.stopped_epoch > 0 else len(history.history['loss'])
    best_ep = max(0, stopped - cb.patience)
    ax.axvline(best_ep, color=UNO['gray'], ls=':', lw=1.2,
               label=f'Best epoch ({best_ep})')
    ax.set_xlabel('Epoch')
    ax.set_ylabel('MSE')
    ax.set_title('LSTM Training vs Validation Loss')
    ax.legend()
    plt.tight_layout()
    plt.savefig(f'{FIGURE_DIR}/lecture10_training_loss.png',
                dpi=150, bbox_inches='tight')
    plt.show()
    print(f'Saved lecture10_training_loss.png')

    # --- Test RMSE (seed=42) ---
    y_pred_lstm_s42 = lstm_model.predict(Xs_te).flatten()
    rmse_lstm_s42 = np.sqrt(mean_squared_error(ys_te, y_pred_lstm_s42))
    print(f'Test RMSE (LSTM, seed=42): {rmse_lstm_s42:.1f}')

else:
    print('TensorFlow not available — skipping LSTM training.')
    y_pred_lstm_s42 = None
    rmse_lstm_s42   = float('nan')

In [None]:
# =============================================================================
# Section 5: Seed Variance — 5 Random Seeds
# =============================================================================
# Neural networks are sensitive to initialization. Always report median RMSE
# across multiple seeds, not just the best run.

if TF_AVAILABLE:
    SEEDS = [0, 7, 13, 42, 99]
    seed_rmses = []

    for s in SEEDS:
        tf.random.set_seed(s)
        np.random.seed(s)
        m = build_lstm(T, n_features)
        cb_s = keras.callbacks.EarlyStopping(
            patience=20, restore_best_weights=True, verbose=0
        )
        m.fit(
            Xs_tr, ys_tr,
            validation_data=(Xs_va, ys_va),
            epochs=200, batch_size=16,
            callbacks=[cb_s], verbose=0
        )
        pred_s = m.predict(Xs_te, verbose=0).flatten()
        r = np.sqrt(mean_squared_error(ys_te, pred_s))
        seed_rmses.append(r)
        print(f'  Seed {s:2d}: RMSE = {r:.1f}')

    median_rmse = np.median(seed_rmses)
    print(f'\nMedian RMSE (5 seeds): {median_rmse:.1f}')

    # --- Box plot ---
    fig, ax = plt.subplots(figsize=(5, 4))
    bp = ax.boxplot([seed_rmses], patch_artist=True,
                    medianprops=dict(color=UNO['red'], lw=2),
                    boxprops=dict(facecolor=UNO['lightblue']))
    ax.scatter([1] * len(seed_rmses), seed_rmses,
               color=UNO['blue'], zorder=3, s=50, label='Individual seeds')
    ax.set_xticks([1])
    ax.set_xticklabels(['LSTM\n(5 seeds)'])
    ax.set_ylabel('Test RMSE')
    ax.set_title('LSTM Seed Variance\n(n=300 monthly obs.)')
    ax.legend(fontsize=9)
    plt.tight_layout()
    plt.savefig(f'{FIGURE_DIR}/lecture10_seed_variance.png',
                dpi=150, bbox_inches='tight')
    plt.show()
    print('Saved lecture10_seed_variance.png')

else:
    print('TensorFlow not available — skipping seed variance analysis.')
    seed_rmses   = []
    median_rmse  = float('nan')

In [None]:
# =============================================================================
# Section 6: Lookback Ablation — T ∈ {12, 18, 24, 36}
# =============================================================================
# How sensitive is LSTM performance to the choice of lookback window T?

if TF_AVAILABLE:
    T_values = [12, 18, 24, 36]
    ablation_rmse = {}

    for T_ab in T_values:
        Xs_ab, ys_ab = make_sequences(X_arr, y_arr, T_ab)
        n_ab   = len(ys_ab)
        n_te   = int(0.15 * n_ab)
        n_va   = int(0.15 * n_ab)
        n_tr_ab = n_ab - n_va - n_te

        Xs_tr_ab = Xs_ab[:n_tr_ab]
        ys_tr_ab = ys_ab[:n_tr_ab]
        Xs_va_ab = Xs_ab[n_tr_ab:n_tr_ab+n_va]
        ys_va_ab = ys_ab[n_tr_ab:n_tr_ab+n_va]
        Xs_te_ab = Xs_ab[n_tr_ab+n_va:]
        ys_te_ab = ys_ab[n_tr_ab+n_va:]

        # Use seed=42 for each T
        tf.random.set_seed(42)
        np.random.seed(42)
        m_ab = build_lstm(T_ab, n_features)
        cb_ab = keras.callbacks.EarlyStopping(
            patience=20, restore_best_weights=True, verbose=0
        )
        m_ab.fit(
            Xs_tr_ab, ys_tr_ab,
            validation_data=(Xs_va_ab, ys_va_ab),
            epochs=200, batch_size=16,
            callbacks=[cb_ab], verbose=0
        )
        pred_ab = m_ab.predict(Xs_te_ab, verbose=0).flatten()
        r_ab = np.sqrt(mean_squared_error(ys_te_ab, pred_ab))
        ablation_rmse[T_ab] = r_ab
        print(f'  T={T_ab:2d}: Test RMSE = {r_ab:.1f}')

    # --- Lookback ablation curve ---
    fig, ax = plt.subplots(figsize=(7, 4))
    ax.plot(list(ablation_rmse.keys()), list(ablation_rmse.values()),
            'o-', color=UNO['blue'], lw=2, ms=7)
    ax.axvline(24, color=UNO['gray'], ls='--', lw=1.2,
               label='T=24 (recommended)')
    ax.set_xlabel('Lookback window T (months)')
    ax.set_ylabel('Test RMSE')
    ax.set_title('LSTM Lookback Ablation (seed=42)')
    ax.set_xticks(T_values)
    ax.legend()
    plt.tight_layout()
    plt.savefig(f'{FIGURE_DIR}/lecture10_lookback_ablation.png',
                dpi=150, bbox_inches='tight')
    plt.show()
    print('Saved lecture10_lookback_ablation.png')

else:
    print('TensorFlow not available — skipping lookback ablation.')
    ablation_rmse = {}

In [None]:
# =============================================================================
# Section 7: Baselines — SARIMA and XGBoost
# =============================================================================
# Using the same train/val/test split as Labs 08 and 09 for comparability.
# XGBoost uses the flat feature matrix (not sequences).

from statsmodels.tsa.statespace.sarimax import SARIMAX

# --- Flat-feature split (same as L08/L09) ---
n_flat   = len(y_arr)
n_te_fl  = int(0.15 * n_flat)
n_va_fl  = int(0.15 * n_flat)
n_tr_fl  = n_flat - n_va_fl - n_te_fl

X_train_fl = X_arr[:n_tr_fl]
y_train_fl = y_arr[:n_tr_fl]
X_val_fl   = X_arr[n_tr_fl:n_tr_fl+n_va_fl]
y_val_fl   = y_arr[n_tr_fl:n_tr_fl+n_va_fl]
X_tv_fl    = X_arr[:n_tr_fl+n_va_fl]
y_tv_fl    = y_arr[:n_tr_fl+n_va_fl]
X_test_fl  = X_arr[n_tr_fl+n_va_fl:]
y_test_fl  = y_arr[n_tr_fl+n_va_fl:]

# Keep index labels for the test period
flat_test_index = y_series.index[n_tr_fl+n_va_fl : n_tr_fl+n_va_fl+len(y_test_fl)]

# --- SARIMA ---
y_tv_series = y_series.iloc[:n_tr_fl+n_va_fl]
try:
    sarima_mod = SARIMAX(
        y_tv_series,
        order=(1, 1, 1),
        seasonal_order=(1, 1, 1, 12),
        enforce_stationarity=False,
        enforce_invertibility=False
    )
    sarima_res  = sarima_mod.fit(disp=False)
    y_pred_sar  = sarima_res.forecast(len(y_test_fl))
    rmse_sarima = np.sqrt(mean_squared_error(y_test_fl, y_pred_sar))
    sarima_ok   = True
    print(f'SARIMA RMSE: {rmse_sarima:.1f}')
except Exception as e:
    print(f'SARIMA failed: {e}')
    sarima_ok   = False
    y_pred_sar  = pd.Series([y_tv_series.mean()] * len(y_test_fl))
    rmse_sarima = float('nan')

# --- XGBoost ---
if XGB_AVAILABLE:
    dtrain_fl    = xgb.DMatrix(X_train_fl, label=y_train_fl)
    dval_fl      = xgb.DMatrix(X_val_fl,   label=y_val_fl)
    dtrainval_fl = xgb.DMatrix(X_tv_fl,    label=y_tv_fl)
    dtest_fl     = xgb.DMatrix(X_test_fl,  label=y_test_fl)

    xgb_params = {
        'learning_rate':    0.05,
        'max_depth':        4,
        'subsample':        0.8,
        'colsample_bytree': 0.8,
        'reg_lambda':       1.0,
        'objective':        'reg:squarederror',
        'eval_metric':      'rmse',
        'verbosity':        0,   # suppress output (verbose_eval deprecated in XGBoost 2.0)
        'seed':             42,
    }
    xgb_tmp = xgb.train(
        xgb_params, dtrain_fl,
        num_boost_round=2000,
        evals=[(dval_fl, 'val')],
        early_stopping_rounds=50
    )
    best_rounds = xgb_tmp.best_iteration
    xgb_final = xgb.train(
        xgb_params, dtrainval_fl,
        num_boost_round=best_rounds
    )
    y_pred_xgb = xgb_final.predict(dtest_fl)
    rmse_xgb   = np.sqrt(mean_squared_error(y_test_fl, y_pred_xgb))
    print(f'XGBoost RMSE: {rmse_xgb:.1f}')
else:
    y_pred_xgb = None
    rmse_xgb   = float('nan')
    print('XGBoost not available.')

In [None]:
# =============================================================================
# Section 8: Model Comparison Table and Bar Chart
# =============================================================================

def rmse_fn(actual, predicted):
    a = np.asarray(actual)
    p = np.asarray(predicted)[:len(a)]
    return float(np.sqrt(mean_squared_error(a[:len(p)], p)))

def mae_fn(actual, predicted):
    a = np.asarray(actual)
    p = np.asarray(predicted)[:len(a)]
    return float(np.mean(np.abs(a[:len(p)] - p)))

rows = [
    ('SARIMA(1,1,1)(1,1,1)_12',
     rmse_fn(y_test_fl, y_pred_sar),
     mae_fn(y_test_fl, y_pred_sar)),
]
if XGB_AVAILABLE and y_pred_xgb is not None:
    rows.append(('XGBoost (early stop)',
                 rmse_fn(y_test_fl, y_pred_xgb),
                 mae_fn(y_test_fl, y_pred_xgb)))
if TF_AVAILABLE and y_pred_lstm_s42 is not None:
    rows.append(('LSTM (seed=42)',
                 rmse_fn(ys_te, y_pred_lstm_s42),
                 mae_fn(ys_te, y_pred_lstm_s42)))
if seed_rmses:
    rows.append(('LSTM (median, 5 seeds)', float(np.median(seed_rmses)), float('nan')))

results = pd.DataFrame(rows, columns=['Model', 'RMSE', 'MAE'])
results['RMSE'] = results['RMSE'].round(1)
results['MAE']  = results['MAE'].round(1)

print('\n=== Test-Set Comparison ===')
print(results.to_string(index=False))

# --- Bar chart ---
plot_rows = results[results['RMSE'].notna() & (results['RMSE'] > 0)]
colors = [UNO['gray'], UNO['red'], UNO['blue'], UNO['blue']]

fig, ax = plt.subplots(figsize=(8, 4))
bars = ax.barh(
    plot_rows['Model'][::-1],
    plot_rows['RMSE'][::-1],
    color=colors[:len(plot_rows)][::-1],
    edgecolor='white'
)
for bar, rmse_v in zip(bars, plot_rows['RMSE'][::-1]):
    ax.text(bar.get_width() + 5, bar.get_y() + bar.get_height() / 2,
            f'{rmse_v:,.0f}', va='center', fontsize=9, color=UNO['gray'])
ax.set_xlabel('Test RMSE (lower is better)')
ax.set_title('Model Comparison: L01–L10 (Direct 1-step forecasts)')
plt.tight_layout()
plt.savefig(f'{FIGURE_DIR}/lecture10_model_comparison.png',
            dpi=150, bbox_inches='tight')
plt.show()
print('Saved lecture10_model_comparison.png')

In [None]:
# =============================================================================
# Section 9: Forecast Comparison Plot
# =============================================================================

fig, ax = plt.subplots(figsize=(11, 4))

# History context (last 24 months of trainval)
context = y_series.iloc[n_tr_fl+n_va_fl-24 : n_tr_fl+n_va_fl]
ctx_idx = context.index.astype(str)
ax.plot(ctx_idx, context.values, color=UNO['lightgray'], lw=1.5,
        label='History')

# Actuals
te_idx = flat_test_index.astype(str)
ax.plot(te_idx, y_test_fl, color='black', lw=2, label='Actual', zorder=5)

# SARIMA
if sarima_ok:
    sar_vals = np.asarray(y_pred_sar)[:len(y_test_fl)]
    ax.plot(te_idx[:len(sar_vals)], sar_vals,
            color=UNO['gray'], lw=1.5, ls='--', label='SARIMA')

# XGBoost
if XGB_AVAILABLE and y_pred_xgb is not None:
    ax.plot(te_idx, y_pred_xgb,
            color=UNO['red'], lw=1.8, ls='-.', label='XGBoost')

# LSTM (seed=42)
if TF_AVAILABLE and y_pred_lstm_s42 is not None:
    # Align LSTM predictions to flat test index
    lstm_plot = y_pred_lstm_s42[:len(te_idx)]
    ax.plot(te_idx[:len(lstm_plot)], lstm_plot,
            color=UNO['blue'], lw=2, label='LSTM (seed=42)')

ax.set_title('Forecast Comparison: SARIMA vs XGBoost vs LSTM (Test Set)')
ax.set_xlabel('Period')
ax.set_ylabel('Retail Sales (Millions USD)')
ax.legend(loc='upper left', fontsize=9)
ax.xaxis.set_major_locator(mticker.MaxNLocator(8))
plt.xticks(rotation=30)

# RMSE annotation
annots = []
if sarima_ok:
    annots.append(('SARIMA', UNO['gray'],
                   rmse_fn(y_test_fl, y_pred_sar)))
if XGB_AVAILABLE and y_pred_xgb is not None:
    annots.append(('XGBoost', UNO['red'],
                   rmse_fn(y_test_fl, y_pred_xgb)))
if TF_AVAILABLE and y_pred_lstm_s42 is not None:
    annots.append(('LSTM', UNO['blue'],
                   rmse_fn(ys_te, y_pred_lstm_s42)))

for i, (label, color, r_val) in enumerate(annots):
    ax.annotate(f'{label}: RMSE={r_val:,.0f}',
                xy=(0.99, 0.97 - i * 0.09),
                xycoords='axes fraction',
                ha='right', va='top', fontsize=9, color=color)

plt.tight_layout()
plt.savefig(f'{FIGURE_DIR}/lecture10_forecast_comparison.png',
            dpi=150, bbox_inches='tight')
plt.show()
print('Saved lecture10_forecast_comparison.png')

print('\n=== Final Results ===')
print(results.to_string(index=False))