# Task 1: Statistical Foundations & Change Point Analysis
Author: Hermona Addisu
Objective: Establish statistical stationarity, analyze volatility clustering, and prepare for Bayesian structural break modeling.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.stattools import adfuller

# Role of Events: This dataset anchors the Bayesian attribution pipeline
prices = pd.read_csv('../data/BrentOilPrices.csv')
prices['Date'] = pd.to_datetime(prices['Date'], format='mixed')
prices = prices.sort_values('Date').reset_index(drop=True)

events = pd.read_csv('../data/external_events.csv')
print("Ground Truth Events Loaded:")
display(events.head())

## 2. Trend and Stationarity Analysis
We use the Augmented Dickey-Fuller (ADF) test to prove non-stationarity. If p > 0.05, the data is non-stationary, meaning structural breaks are required to model the shifting mean.

In [None]:
plt.figure(figsize=(12, 5))
plt.plot(prices['Date'], prices['Price'])
plt.title('Brent Oil Price Trend (1987-2022)')
plt.show()

res = adfuller(prices['Price'])
print(f'ADF p-value: {res[1]:.4f}')

## 3. Volatility Profiling
We calculate the 21-day annualized rolling volatility to identify risk clusters associated with geopolitical shocks.

In [None]:
prices['Log_Ret'] = np.log(prices['Price'] / prices['Price'].shift(1))
prices['Vol'] = prices['Log_Ret'].rolling(window=21).std() * (252**0.5)

plt.figure(figsize=(12, 4))
plt.plot(prices['Date'], prices['Vol'], color='orange')
plt.title('Market Risk: 21-Day Rolling Volatility')
plt.show()

## 4. Bayesian Change Point Model Theory
We model the structural break using a Switch-Point ($\tau$). 
1. Prior: Discrete Uniform distribution for $\tau$ across all time indices.
2. Switch Function: pm.math.switch(index < tau, mu1, mu2).
3. Likelihood: Data is assumed Normal with a mean that shifts at time $\tau$.