In [None]:
"""
Notebook 02: Stationarity Analysis
===================================
This notebook performs comprehensive stationarity testing on Brent oil prices
and log returns to inform modeling choices.
"""

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller, kpss
from pathlib import Path
import sys

# Add src to path
sys.path.append(str(Path().resolve().parent / "src"))
from data_loader import load_brent_data, calculate_returns

# Load data
df = load_brent_data()
df['log_return'] = calculate_returns(df, method='log')

print("=" * 60)
print("STATIONARITY ANALYSIS")
print("=" * 60)
print(f"\nData loaded: {len(df)} observations")
print(f"Date range: {df.index.min()} to {df.index.max()}")


ModuleNotFoundError: No module named 'statsmodels'

In [None]:
# Augmented Dickey-Fuller (ADF) Test for Prices
print("\n" + "=" * 60)
print("AUGMENTED DICKEY-FULLER (ADF) TEST")
print("=" * 60)

# Test on prices (levels)
adf_result_prices = adfuller(df['Price'].dropna(), autolag='AIC')
print("\n1. ADF Test on PRICE LEVELS:")
print(f"   ADF Statistic: {adf_result_prices[0]:.6f}")
print(f"   p-value: {adf_result_prices[1]:.6f}")
print(f"   Critical Values:")
for key, value in adf_result_prices[4].items():
    print(f"      {key}: {value:.3f}")
print(f"   Result: {'Non-stationary (fail to reject H0)' if adf_result_prices[1] > 0.05 else 'Stationary (reject H0)'}")

# Test on log returns
adf_result_returns = adfuller(df['log_return'].dropna(), autolag='AIC')
print("\n2. ADF Test on LOG RETURNS:")
print(f"   ADF Statistic: {adf_result_returns[0]:.6f}")
print(f"   p-value: {adf_result_returns[1]:.6f}")
print(f"   Critical Values:")
for key, value in adf_result_returns[4].items():
    print(f"      {key}: {value:.3f}")
print(f"   Result: {'Non-stationary (fail to reject H0)' if adf_result_returns[1] > 0.05 else 'Stationary (reject H0)'}")


In [None]:
# KPSS Test (Kwiatkowski-Phillips-Schmidt-Shin)
print("\n" + "=" * 60)
print("KPSS TEST")
print("=" * 60)
print("(H0: Series is stationary, H1: Series has unit root)")

# Test on prices
try:
    kpss_result_prices = kpss(df['Price'].dropna(), regression='c', nlags='auto')
    print("\n1. KPSS Test on PRICE LEVELS:")
    print(f"   KPSS Statistic: {kpss_result_prices[0]:.6f}")
    print(f"   p-value: {kpss_result_prices[1]:.6f}")
    print(f"   Critical Values:")
    for key, value in kpss_result_prices[3].items():
        print(f"      {key}: {value:.3f}")
    print(f"   Result: {'Non-stationary (reject H0)' if kpss_result_prices[1] < 0.05 else 'Stationary (fail to reject H0)'}")
except Exception as e:
    print(f"   Error: {e}")

# Test on log returns
try:
    kpss_result_returns = kpss(df['log_return'].dropna(), regression='c', nlags='auto')
    print("\n2. KPSS Test on LOG RETURNS:")
    print(f"   KPSS Statistic: {kpss_result_returns[0]:.6f}")
    print(f"   p-value: {kpss_result_returns[1]:.6f}")
    print(f"   Critical Values:")
    for key, value in kpss_result_returns[3].items():
        print(f"      {key}: {value:.3f}")
    print(f"   Result: {'Non-stationary (reject H0)' if kpss_result_returns[1] < 0.05 else 'Stationary (fail to reject H0)'}")
except Exception as e:
    print(f"   Error: {e}")


In [None]:
# Visual Analysis: Autocorrelation Function (ACF)
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

fig, axes = plt.subplots(2, 2, figsize=(16, 10))

# ACF and PACF for prices
plot_acf(df['Price'].dropna(), lags=40, ax=axes[0, 0], title='ACF: Price Levels')
plot_pacf(df['Price'].dropna(), lags=40, ax=axes[0, 1], title='PACF: Price Levels', method='ywm')

# ACF and PACF for log returns
plot_acf(df['log_return'].dropna(), lags=40, ax=axes[1, 0], title='ACF: Log Returns')
plot_pacf(df['log_return'].dropna(), lags=40, ax=axes[1, 1], title='PACF: Log Returns', method='ywm')

plt.tight_layout()
plt.show()

print("\nInterpretation:")
print("- Slow decay in ACF for prices indicates non-stationarity")
print("- Fast decay in ACF for returns suggests stationarity")


## Summary and Modeling Implications

### Key Findings:

1. **Price Levels (I(1) Process)**:
   - ADF test: Non-stationary (p-value > 0.05)
   - KPSS test: Non-stationary (p-value < 0.05)
   - **Conclusion**: Prices contain a unit root and are integrated of order 1

2. **Log Returns (I(0) Process)**:
   - ADF test: Stationary (p-value < 0.05)
   - KPSS test: Stationary (p-value > 0.05)
   - **Conclusion**: Log returns are stationary and suitable for change point analysis

### Modeling Choice:
Since log returns are stationary, we will apply the Bayesian change point model to **log returns** rather than price levels. This ensures:
- Valid statistical inference
- Detection of changes in mean return and volatility regimes
- Avoidance of spurious regression issues
