In [None]:
# === Environment Setup ===
import os, sys, math, time, random, json, textwrap, warnings
import numpy as np, pandas as pd
import matplotlib.pyplot as plt, seaborn as sns
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.api import VAR
from statsmodels.tsa.arima.model import ARIMA
from IPython.display import display, Markdown
try:
    from arch import arch_model
    ARCH_AVAILABLE = True
except ImportError:
    ARCH_AVAILABLE = False
try:
    import pandas_datareader.data as web
    PD_READER_AVAILABLE = True
except ImportError:
    PD_READER_AVAILABLE = False

# --- Configuration ---
plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams.update({'font.size': 14, 'figure.figsize': (12, 8), 'figure.dpi': 150})
np.set_printoptions(suppress=True, linewidth=120, precision=4)
warnings.filterwarnings('ignore', category=sm.tools.sm_exceptions.ValueWarning)

# --- Utility Functions ---
def note(msg): display(Markdown(f"<div class='alert alert-info'>📝 {textwrap.fill(msg, width=100)}</div>"))
def sec(title): print(f"\n{80*'='}\n| {title.upper()} |\n{80*'='}")

note(f"Environment initialized. ARCH available: {ARCH_AVAILABLE}, pandas_datareader available: {PD_READER_AVAILABLE}")

# Part 6: Econometrics
## Chapter 6.7: Classical Time Series Analysis

### Introduction: The Challenge of Dependent Data
A vast amount of economic data is **sequential**, where the order of observations is the primary source of information. This chapter provides a comprehensive, PhD-level introduction to the classical econometric modeling of **time series data**. A deep understanding of this framework is essential, as it provides the theoretical language for concepts like stationarity, autocorrelation, and identification, and serves as a crucial benchmark for more advanced techniques.

This chapter covers:
1.  **Theoretical Foundations:** Stationarity, the Wold Decomposition Theorem, and the lag operator.
2.  **Univariate Models:** The Box-Jenkins methodology for ARIMA models.
3.  **Multivariate Models:** Vector Autoregressions (VARs), cointegration, and state-space models.
4.  **Volatility Modeling:** Capturing time-varying volatility with ARCH and GARCH models.
5.  **Structural Analysis:** Using economic theory to identify fundamental shocks with Structural VARs (SVARs), including advanced long-run identification schemes.

### 1. Theoretical Foundations of Time Series
#### 1.1 Stationarity
A time series is **(covariance) stationary** if its first and second moments are time-invariant: a constant mean, a constant variance, and an autocovariance that depends only on the lag, not on time. Stationarity is crucial because it implies that the past is statistically similar to the future, making forecasting possible. Non-stationary series (e.g., those with trends) must be transformed, usually by **differencing**, to achieve stationarity before modeling.

We use the **Augmented Dickey-Fuller (ADF)** test (null hypothesis: unit root present) and the **KPSS** test (null hypothesis: series is stationary) to formally check for stationarity.

In [None]:
sec("Testing for Stationarity")
data = sm.datasets.macrodata.load_pandas().data
cpi = data['cpi']

def check_stationarity(series, name=''):
    adf_result = adfuller(series.dropna())
    kpss_result = kpss(series.dropna())
    print(f'--- Stationarity Check for {name} ---\n')
    print(f'ADF Test: Statistic={adf_result[0]:.2f}, p-value={adf_result[1]:.3f}')
    print(f'KPSS Test: Statistic={kpss_result[0]:.2f}, p-value={kpss_result[1]:.3f}')
    if adf_result[1] < 0.05 and kpss_result[1] > 0.05:
        note(f"Conclusion: The series **{name}** is likely stationary.")
    else:
        note(f"Conclusion: The series **{name}** is likely non-stationary.")

check_stationarity(cpi, 'CPI Level')
check_stationarity(cpi.diff().dropna(), 'CPI First Difference (Inflation)')

#### 1.2 The Wold Decomposition and ARMA Models
The **Wold Decomposition Theorem** is the theoretical foundation for time series analysis. It states that any covariance-stationary time series can be represented as the sum of a deterministic component and a stochastic component that is an infinite-order moving average of past white-noise error terms: $y_t = \mu + \sum_{j=0}^\infty \psi_j \epsilon_{t-j}$.

This is a profound result. It tells us that we can approximate any stationary process with a moving average model. If the polynomial in the lag operator can be represented as a ratio of two finite-order polynomials, we arrive at the parsimonious **ARMA(p,q)** model:
$$ \underbrace{y_t - \sum_{i=1}^p \phi_i y_{t-i}}_{AR(p) \text{ part}} = \underbrace{\epsilon_t + \sum_{j=1}^q \theta_j \epsilon_{t-j}}_{MA(q) \text{ part}} $$

### 2. Univariate Models: The Box-Jenkins Methodology
The **Box-Jenkins methodology** is a systematic process for applying ARIMA(p,d,q) models:
1.  **Identification:** Determine the order of differencing, $d$, to achieve stationarity. Then, use the **Autocorrelation Function (ACF)** and **Partial Autocorrelation Function (PACF)** plots to identify the potential orders ($p, q$).
    - ACF: Measures the correlation between $y_t$ and $y_{t-k}$. For an AR(p) process, it decays gradually. For an MA(q) process, it cuts off sharply after lag q.
    - PACF: Measures the correlation between $y_t$ and $y_{t-k}$ after controlling for the intermediate lags. For an AR(p) process, it cuts off sharply after lag p.
2.  **Estimation:** Estimate candidate models using Maximum Likelihood.
3.  **Diagnostic Checking:** Analyze the residuals of the estimated model. They should be white noise. Use information criteria (AIC, BIC) to select the best model.

In [None]:
sec("Box-Jenkins Example: Modeling CPI Inflation")
cpi_diff = data['cpi'].diff().dropna()

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
plot_acf(cpi_diff, ax=ax1, title='ACF of CPI Inflation')
plot_pacf(cpi_diff, ax=ax2, title='PACF of CPI Inflation')
plt.show()
note("The ACF plot shows a slowly decaying pattern, while the PACF plot appears to cut off after lag 1 or 2. This suggests an AR(p) model, likely with p=1 or p=2.")

model = ARIMA(cpi_diff, order=(1, 0, 1)).fit()
print(model.summary())

### 3. Multivariate Models: VAR, SVAR, and Cointegration
#### 3.1 Structural VAR (SVAR)
A **Vector Autoregression (VAR)** models each variable in a system as a linear function of its own past values and the past values of all other variables. A **Structural VAR (SVAR)** aims to recover the underlying, uncorrelated **structural shocks** from the estimated reduced-form residuals. To do this, we need to impose identifying restrictions. The most common approach is the **Cholesky decomposition**, which imposes a recursive ordering. For a system with variables $(y_1, y_2, y_3)$, this assumes that a shock to $y_1$ can affect all variables contemporaneously, a shock to $y_2$ can only affect $y_2$ and $y_3$ contemporaneously, and a shock to $y_3$ can only affect itself contemporaneously.

In [None]:
sec("SVAR Example: A Simple Monetary Policy Model")
df_svar = np.log(data[['realgdp', 'cpi', 'fedfunds']]).diff().dropna()
df_svar.rename(columns={'realgdp':'gdp', 'cpi':'inflation', 'fedfunds':'interest_rate'}, inplace=True)
model_var = VAR(df_svar).fit(2)
irf = model_var.irf(20)
irf.plot(orth=True, impulse='interest_rate', response=['gdp', 'inflation', 'interest_rate'], 
         signif=0.05, plot_stderr=True)
plt.suptitle('Structural IRFs to a Monetary Policy Shock (Cholesky ID)', fontsize=16, y=1.03)
plt.tight_layout(rect=[0, 0, 1, 0.96])
plt.show()
note("The IRFs show the dynamic response of GDP and inflation to an identified, one-standard-deviation contractionary monetary policy shock. As expected, the shock leads to a temporary decline in GDP and inflation.")

#### 3.2 Cointegration and Vector Error Correction Models (VECM)
If two or more time series are non-stationary, it is possible that a linear combination of them is stationary. In this case, the series are **cointegrated**, implying a long-run equilibrium relationship. The **Vector Error Correction Model (VECM)** is a restricted VAR designed for cointegrated series. It includes an error correction term that captures how the variables adjust back to their long-run equilibrium.

In [None]:
sec("Cointegration Example: GDP and Consumption")
from statsmodels.tsa.vector_ar.vecm import coint_johansen

df_coint = np.log(data[['realgdp', 'realcons']]).dropna()
result = coint_johansen(df_coint, det_order=0, k_ar_diff=1)

note(f"The Johansen test trace statistic is {result.lr1[0]:.2f}, which is greater than the 95% critical value of {result.cvt[0, 1]:.2f}. We reject the null of no cointegration and conclude that a long-run relationship exists.")

from statsmodels.tsa.vector_ar.vecm import VECM
model = VECM(df_coint, k_ar_diff=1, coint_rank=1, deterministic='c')
vecm_res = model.fit()
print("\nVECM Results (selected):")
print(vecm_res.summary().tables[0])
print(vecm_res.summary().tables[1])

### 4. State-Space Models and the Kalman Filter
**State-Space Models** provide a highly flexible framework for representing time series models, especially those with unobserved components. A linear Gaussian state-space model has two equations:
1.  **Measurement Equation:** Relates the observed data $y_t$ to a potentially unobserved state vector $\alpha_t$: $y_t = Z_t \alpha_t + d_t + \epsilon_t$.
2.  **Transition Equation:** Describes the evolution of the unobserved state vector: $\alpha_{t+1} = T_t \alpha_t + c_t + R_t \eta_t$.

The **Kalman Filter** is a recursive algorithm that provides the optimal estimate of the unobserved state $\alpha_t$ given the observed data. It operates in a two-step predict-update cycle, making it the workhorse for estimating state-space models.

In [None]:
sec("State-Space Example: Decomposing GDP into Trend and Cycle")
gdp = np.log(data['realgdp'])

# Specify a model with a stochastic trend (local linear trend) and a stochastic cycle (AR(2))
model_ss = sm.tsa.UnobservedComponents(gdp, 'local linear trend', cycle=True, stochastic_cycle=True)
results_ss = model_ss.fit(disp=False)

print(results_ss.summary())

fig = results_ss.plot_components(figsize=(12, 10))
plt.tight_layout()
plt.show()
note("The Kalman filter decomposes the observed GDP series into a smooth, unobserved trend component and a stationary cyclical component, providing a model-based alternative to ad-hoc filters like the HP-filter.")

### 5. Advanced Topics: Volatility and Structural Analysis

#### 5.1 Modeling Volatility: ARCH and GARCH Models

Many time series, particularly in finance, exhibit **volatility clustering**. This means that the magnitude of the series' fluctuations is not constant. Large changes tend to be followed by more large changes (in either direction), and small changes tend to be followed by small changes. This behavior, which violates the homoskedasticity assumption, is clearly visible when plotting the daily returns of a stock index like the S&P 500. The returns themselves fluctuate around zero, but the *size* of these fluctuations varies dramatically over time. A good proxy for this time-varying volatility is the squared return, which accentuates large movements.

In [None]:
sec("Visualizing Volatility Clustering in S&P 500 Returns")

# Attempt to download S&P 500 data
if PD_READER_AVAILABLE:
    try:
        sp500 = web.DataReader('^GSPC', 'yahoo', '2000-01-01', '2023-12-31')
        returns = 100 * sp500['Adj Close'].pct_change().dropna()
        note("S&P 500 data loaded successfully.")
        DATA_LOADED = True
    except Exception as e:
        note(f"Could not load S&P 500 data. Using simulated GARCH(1,1) data instead. Error: {e}")
        DATA_LOADED = False
else:
    DATA_LOADED = False

if not DATA_LOADED:
    rng = np.random.default_rng(123)
    n_sim = 5000
    sigma2 = np.zeros(n_sim)
    sim_returns = np.zeros(n_sim)
    omega, alpha, beta = 0.1, 0.1, 0.85
    sigma2[0] = omega / (1 - alpha - beta)
    sim_returns[0] = rng.normal(0, np.sqrt(sigma2[0]))
    for t in range(1, n_sim):
        sigma2[t] = omega + alpha * sim_returns[t-1]**2 + beta * sigma2[t-1]
        sim_returns[t] = rng.normal(0, np.sqrt(sigma2[t]))
    returns = pd.Series(sim_returns, index=pd.to_datetime(pd.date_range('2000-01-01', periods=n_sim)))

fig, axes = plt.subplots(2, 1, figsize=(12, 8), sharex=True)
returns.plot(ax=axes[0], lw=0.5, color='blue')
axes[0].set_title('S&P 500 Daily Returns')
axes[0].set_ylabel('Returns (%)')
(returns**2).plot(ax=axes[1], lw=0.5, color='red')
axes[1].set_title('S&P 500 Squared Returns (Proxy for Variance)')
axes[1].set_ylabel('Squared Returns')
axes[1].set_xlabel('Date')
fig.suptitle('Figure 1: Volatility Clustering in Financial Data', fontsize=16, y=1.02)
plt.tight_layout()
plt.show()
note("The bottom panel shows clear clustering. High-volatility periods, such as the 2008 financial crisis and the 2020 COVID-19 shock, are characterized by large, persistent spikes in squared returns.")

The **Autoregressive Conditional Heteroskedasticity (ARCH)** model, introduced by Robert Engle, models today's variance as a weighted average of past squared residuals. An ARCH(q) model is:
$$ \sigma_t^2 = \omega + \sum_{i=1}^q \alpha_i u_{t-i}^2 $$ 
The **Generalized ARCH (GARCH)** model provides a more parsimonious solution. The GARCH(1,1) model is:
$$ \sigma_t^2 = \omega + \alpha_1 u_{t-1}^2 + \beta_1 \sigma_{t-1}^2 $$

In [None]:
sec("Fitting a GARCH(1,1) Model")
if ARCH_AVAILABLE and DATA_LOADED:
    garch_model = arch_model(returns, mean='Constant', vol='Garch', p=1, q=1)
    garch_results = garch_model.fit(disp='off')
    print("--- GARCH(1,1) Model Results ---")
    print(garch_results.summary())
    persistence = garch_results.params['alpha[1]'] + garch_results.params['beta[1]']
    note(f"The persistence parameter (alpha[1] + beta[1]) is {persistence:.4f}. Since this is very close to 1, it indicates that volatility shocks are highly persistent.")
    fig = garch_results.plot(annualize='D')
    fig.suptitle('Conditional Volatility from GARCH(1,1) Model', fontsize=16, y=1.02)
    plt.tight_layout()
    plt.show()
else:
    note("Skipping GARCH model fitting as `arch` library is not available or data failed to load.")

#### 5.2 Asymmetric GARCH: The Leverage Effect
The **GJR-GARCH** model extends GARCH to account for the leverage effect, where negative news increases volatility more than positive news. It introduces an additional term for negative shocks:
$$ \sigma_t^2 = \omega + \alpha_1 u_{t-1}^2 + \gamma_1 I_{t-1} u_{t-1}^2 + \beta_1 \sigma_{t-1}^2 $$ 
A positive and significant $\gamma_1$ coefficient is evidence of the leverage effect.

In [None]:
sec("Fitting a GJR-GARCH Model to Test for Leverage Effects")
if ARCH_AVAILABLE and DATA_LOADED:
    gjr_garch_model = arch_model(returns, mean='Constant', vol='Garch', p=1, o=1, q=1)
    gjr_results = gjr_garch_model.fit(disp='off')
    print("--- GJR-GARCH(1,1,1) Model Results ---")
    print(gjr_results.summary())
    gamma_coeff = gjr_results.params['gamma[1]']
    note(f"The asymmetry coefficient, gamma[1], is {gamma_coeff:.4f} and is highly statistically significant. This provides strong evidence for the presence of a leverage effect.")
else:
    note("Skipping GJR-GARCH model fitting.")

#### 5.3 Non-Linear Models: Markov-Switching
A different approach to capturing changes in time series behavior is the **Markov-Switching Model**. Instead of assuming parameters are constant, this model allows them to switch between a discrete number of 'regimes' or 'states'. The switching process is governed by an unobserved Markov chain.
For example, a model of GDP growth might have a 'low-growth' regime and a 'high-growth' regime, each with its own mean and variance. The model estimates the parameters for each regime and, crucially, the probabilities of transitioning between them.

In [None]:
sec("Markov-Switching Model of GDP Growth")
dta = sm.datasets.macrodata.load_pandas().data
dta.index = pd.to_datetime(dta['year'].astype(str) + 'Q' + dta['quarter'].astype(str))
gdp_growth = dta['realgdp'].pct_change().dropna() * 100

# We specify a model with 2 regimes (k_regimes=2) and allow the variance to switch (order=1)
ms_model = sm.tsa.MarkovRegression(gdp_growth, k_regimes=2, trend='c', switching_variance=True).fit()
print(ms_model.summary())

fig, axes = plt.subplots(2, figsize=(12, 8), sharex=True)
axes[0].plot(gdp_growth)
axes[0].set_title('US Real GDP Growth')
axes[1].plot(ms_model.smoothed_marginal_probabilities[0])
axes[1].set_title('Smoothed Probability of Being in the Low-Growth, High-Variance Regime')
plt.tight_layout()
plt.show()
note("The model identifies two distinct regimes. The smoothed probabilities clearly show the model assigning a high probability of being in the high-volatility, low-growth regime during known NBER recession periods.")