# TSA_ch8_quiz4_model_complexity

Model performance vs dataset size for ARIMA, Random Forest, and LSTM

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/QuantLet/TSA/blob/main/TSA_ch8/TSA_ch8_quiz4_model_complexity/TSA_ch8_quiz4_model_complexity.ipynb)

In [None]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# Color palette and style setup
BLUE = '#1A3A6E'
RED = '#DC3545'
GREEN = '#2E7D32'
ORANGE = '#E67E22'
PURPLE = '#7B2D8E'

plt.rcParams.update({
    'font.family': 'sans-serif',
    'font.size': 11,
    'axes.spines.top': False,
    'axes.spines.right': False,
    'axes.grid': False,
    'figure.facecolor': 'none',
    'axes.facecolor': 'none',
    'savefig.facecolor': 'none',
    'savefig.transparent': True,
})

In [None]:
def quiz4_model_complexity():
    """Q4: Model selection — complexity vs data size."""
    fig, ax = plt.subplots(figsize=(8, 4.5))

    data_sizes = np.array([50, 100, 200, 500, 1000, 5000, 10000])

    # Performance curves for different model types
    arima_perf = 1 - 0.8 * np.exp(-data_sizes / 100)
    rf_perf = 1 - 1.2 * np.exp(-data_sizes / 300) - 0.05
    lstm_perf = 1 - 2.0 * np.exp(-data_sizes / 1000) - 0.1

    ax.plot(data_sizes, arima_perf, 'o-', color=BLUE, linewidth=2, markersize=6, label='ARIMA (simple, linear)')
    ax.plot(data_sizes, rf_perf, 's-', color=ORANGE, linewidth=2, markersize=6, label='Random Forest (moderate)')
    ax.plot(data_sizes, lstm_perf, '^-', color=RED, linewidth=2, markersize=6, label='LSTM (complex, nonlinear)')

    # Highlight regions
    ax.axvspan(50, 200, alpha=0.08, color=BLUE)
    ax.axvspan(200, 2000, alpha=0.08, color=ORANGE)
    ax.axvspan(2000, 10000, alpha=0.08, color=RED)

    ax.text(100, 0.35, 'ARIMA\nwins', fontsize=9, color=BLUE, ha='center', fontweight='bold')
    ax.text(700, 0.35, 'RF\ncompetitive', fontsize=9, color=ORANGE, ha='center', fontweight='bold')
    ax.text(5000, 0.35, 'LSTM\nshines', fontsize=9, color=RED, ha='center', fontweight='bold')

    ax.set_xscale('log')
    ax.set_xlabel('Dataset Size (observations)', fontsize=11)
    ax.set_ylabel('Forecast Accuracy', fontsize=11)
    ax.set_title('Model Performance vs Data Size', fontsize=12, fontweight='bold', color=BLUE)

    ax.legend(loc='lower center', bbox_to_anchor=(0.5, -0.2), ncol=3,
              frameon=False, fontsize=9)

    fig.tight_layout()
    fig.subplots_adjust(bottom=0.18)
    plt.show()

quiz4_model_complexity()