# TSA Chapter 6: Case Study - Out-of-Sample Comparison

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/QuantLet/TSA/blob/main/TSA_ch6/TSA_ch6_case_oos_comparison/TSA_ch6_case_oos_comparison.ipynb)

Out-of-sample RMSE and MAE comparison: AR vs VAR vs Random Walk.

In [None]:
!pip install numpy pandas matplotlib statsmodels pandas-datareader -q

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.api import VAR
from statsmodels.tsa.ar_model import AutoReg
from statsmodels.tsa.stattools import adfuller, grangercausalitytests
import pandas_datareader.data as web
import warnings
warnings.filterwarnings('ignore')

In [None]:
import os
COLORS = {'blue': '#1A3A6E', 'red': '#DC3545', 'green': '#2E7D32', 'orange': '#E67E22', 'gray': '#666666', 'purple': '#8E44AD'}
BLUE, RED, GREEN, ORANGE, GRAY, PURPLE = COLORS['blue'], COLORS['red'], COLORS['green'], COLORS['orange'], COLORS['gray'], COLORS['purple']
plt.rcParams.update({
    'figure.facecolor': 'none', 'axes.facecolor': 'none', 'savefig.facecolor': 'none',
    'savefig.transparent': True, 'axes.spines.top': False, 'axes.spines.right': False,
    'axes.grid': False, 'font.size': 10, 'axes.titlesize': 12, 'axes.labelsize': 10,
    'xtick.labelsize': 9, 'ytick.labelsize': 9, 'legend.fontsize': 9, 'figure.dpi': 150,
    'lines.linewidth': 1.2, 'axes.linewidth': 0.6, 'legend.facecolor': 'none',
    'legend.framealpha': 0, 'legend.edgecolor': 'none',
})
def save_chart(fig, name):
    fig.savefig(f'{name}.pdf', bbox_inches='tight', transparent=True, dpi=150)
    fig.savefig(f'{name}.png', bbox_inches='tight', transparent=True, dpi=150)
    print(f'Saved: {name}')

In [None]:
# Download interest rate data for cointegration case study
try:
    gs10 = web.DataReader('GS10', 'fred', '1990-01-01', '2024-01-01').resample('MS').last().dropna()
    gs2 = web.DataReader('GS2', 'fred', '1990-01-01', '2024-01-01').resample('MS').last().dropna()
    rates = pd.DataFrame({'10Y': gs10.iloc[:,0], '2Y': gs2.iloc[:,0]}).dropna()
except:
    np.random.seed(42); n = 408; dates = pd.date_range('1990-01-01', periods=n, freq='MS')
    rw = np.cumsum(np.random.normal(0, 0.1, n)) + 5
    rates = pd.DataFrame({'10Y': rw + np.random.normal(0, 0.2, n), '2Y': rw - 0.5 + np.random.normal(0, 0.15, n)}, index=dates)
print(f'Rates data: {len(rates)} obs')

diff_rates = rates.diff().dropna()
n_test = 120

# Collect rolling forecasts for multiple models
models = {'VAR(2)': {}, 'VAR(4)': {}, 'AR(1)': {}, 'Random\nWalk': {}}
for c in diff_rates.columns:
    for m in models: models[m][c] = []

for i in range(n_test):
    train = diff_rates.iloc[:len(diff_rates)-n_test+i]
    for lag, name in [(2, 'VAR(2)'), (4, 'VAR(4)')]:
        try:
            res = VAR(train).fit(lag)
            fcast = res.forecast(train.values[-lag:], steps=1)
            for j, c in enumerate(diff_rates.columns): models[name][c].append(fcast[0, j])
        except:
            for c in diff_rates.columns: models[name][c].append(0)
    for c in diff_rates.columns:
        try:
            ar_res = AutoReg(train[c], lags=1).fit()
            models['AR(1)'][c].append(ar_res.forecast(1).values[0])
        except:
            models['AR(1)'][c].append(0)
        models['Random\nWalk'][c].append(0)  # random walk forecast for diff = 0

# Compute RMSE and MAE
metrics = {'RMSE': {}, 'MAE': {}}
for m in models:
    metrics['RMSE'][m] = {}
    metrics['MAE'][m] = {}
    for c in diff_rates.columns:
        actual = diff_rates[c].values[-n_test:]
        fcast = np.array(models[m][c])
        metrics['RMSE'][m][c] = np.sqrt(np.mean((actual - fcast)**2))
        metrics['MAE'][m][c] = np.mean(np.abs(actual - fcast))

fig, axes = plt.subplots(1, 2, figsize=(12, 4))
model_names = list(models.keys())
x = np.arange(len(model_names))
width = 0.3

for ax, metric in zip(axes, ['RMSE', 'MAE']):
    vals_10y = [metrics[metric][m]['10Y'] for m in model_names]
    vals_2y = [metrics[metric][m]['2Y'] for m in model_names]
    ax.bar(x - width/2, vals_10y, width, color=BLUE, alpha=0.7, label='10Y')
    ax.bar(x + width/2, vals_2y, width, color=RED, alpha=0.5, label='2Y')
    ax.set_title(f'Out-of-Sample {metric} Comparison', fontweight='bold')
    ax.set_xticks(x); ax.set_xticklabels(model_names)
    ax.set_ylabel(metric)
    ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15), ncol=2, frameon=False)

plt.tight_layout(); save_chart(fig, 'ch6_case_oos_comparison'); plt.show()