# TSA Chapter 0 Quiz: Time Series Cross-Validation

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/QuantLet/TSA/blob/main/TSA_ch0/TSA_ch0_quiz3_timeseries_cv/TSA_ch0_quiz3_timeseries_cv.ipynb)

This notebook demonstrates why k-fold CV is inappropriate for time series vs rolling origin CV.

In [None]:
!pip install matplotlib numpy -q

In [None]:
import numpy as np
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

BLUE = '#1A3A6E'
RED = '#DC3545'
GREEN = '#2E7D32'
ORANGE = '#E67E22'
PURPLE = '#7B2D8E'

def setup_style():
    plt.rcParams.update({
        'font.family': 'sans-serif',
        'font.size': 11,
        'axes.spines.top': False,
        'axes.spines.right': False,
        'axes.grid': False,
        'figure.facecolor': 'none',
        'axes.facecolor': 'none',
        'savefig.facecolor': 'none',
        'savefig.transparent': True,
    })

def save(fig, name):
    fig.savefig(f'{name}.pdf', bbox_inches='tight', dpi=150, transparent=True)
    fig.savefig(f'{name}.png', bbox_inches='tight', dpi=150, transparent=True)
    plt.close(fig)
    print(f'Saved {name}')


def quiz3_timeseries_cv():
    """Q3: Why not k-fold for time series."""
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 3.5))

    n_total = 20
    n_folds = 4

    # WRONG: k-fold shuffles temporal order
    np.random.seed(42)
    indices = np.random.permutation(n_total)
    fold_size = n_total // n_folds

    for fold in range(n_folds):
        test_idx = indices[fold * fold_size:(fold + 1) * fold_size]
        train_idx = np.setdiff1d(indices, test_idx)
        for idx in train_idx:
            ax1.barh(n_folds - 1 - fold, 0.8, left=idx, color=BLUE, alpha=0.6, edgecolor='white', linewidth=0.3)
        for idx in test_idx:
            ax1.barh(n_folds - 1 - fold, 0.8, left=idx, color=RED, alpha=0.8, edgecolor='white', linewidth=0.3)

    ax1.set_title('k-fold CV (WRONG)\nFuture leaks into training!', fontsize=10, fontweight='bold', color=RED)
    ax1.set_ylabel('Fold', fontsize=10)
    ax1.set_xlabel('Time', fontsize=10)
    ax1.set_yticks(range(n_folds))
    ax1.set_yticklabels([f'{i+1}' for i in reversed(range(n_folds))])

    # CORRECT: Rolling origin
    for fold in range(n_folds):
        train_end = 8 + fold * 3
        test_start = train_end
        test_end = test_start + 3
        for idx in range(train_end):
            ax2.barh(n_folds - 1 - fold, 0.8, left=idx, color=BLUE, alpha=0.6, edgecolor='white', linewidth=0.3)
        for idx in range(test_start, min(test_end, n_total)):
            ax2.barh(n_folds - 1 - fold, 0.8, left=idx, color=RED, alpha=0.8, edgecolor='white', linewidth=0.3)

    ax2.set_title('Rolling Origin CV (CORRECT)\nAlways train on past, test on future', fontsize=10, fontweight='bold', color=GREEN)
    ax2.set_ylabel('Fold', fontsize=10)
    ax2.set_xlabel('Time', fontsize=10)
    ax2.set_yticks(range(n_folds))
    ax2.set_yticklabels([f'{i+1}' for i in reversed(range(n_folds))])

    fig.legend([plt.Rectangle((0, 0), 1, 1, fc=BLUE, alpha=0.6),
                plt.Rectangle((0, 0), 1, 1, fc=RED, alpha=0.8)],
               ['Training', 'Test'],
               loc='lower center', bbox_to_anchor=(0.5, -0.08), ncol=2,
               frameon=False, fontsize=10)
    fig.tight_layout()
    fig.subplots_adjust(bottom=0.12)
    save(fig, 'ch0_quiz3_timeseries_cv')

setup_style()
quiz3_timeseries_cv()