In [None]:
# ==========================================================
# Task 2: Bayesian Change Point Analysis for Brent Oil Prices
# Optimized + Production-Ready Notebook
# ==========================================================

# -----------------------------
# Imports
# -----------------------------
import sys
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pymc as pm
import arviz as az
import importlib

# Add project root
project_root = Path().resolve().parent
sys.path.append(str(project_root))

# Reload modules
import src.data_processing.load_data as ld
import src.data_processing.preprocess as pp
import src.modelling.analysis_utils as au
import src.modelling.bayesian_cp_model as bcm

importlib.reload(ld)
importlib.reload(pp)
importlib.reload(au)
importlib.reload(bcm)

from src.data_processing.load_data import load_brent_prices, load_detected_events
from src.data_processing.preprocess import compute_log_returns
from src.modelling.analysis_utils import (
    get_change_point,
    plot_price_with_change_point,
    associate_events,
    quantify_impact,
    summarize_change_points
)
from src.modelling.bayesian_cp_model import (
    bayesian_change_point_model,
    plot_trace,
    plot_change_point_distribution
)

# -----------------------------
# Configuration
# -----------------------------
MODEL_WINDOW = 2000     # balanced statistical window
DRAWS = 1000
TUNE = 1000
TARGET_ACCEPT = 0.95

# -----------------------------
# Step 1: Load & Validate Data
# -----------------------------
try:
    prices_df = load_brent_prices(project_root / "data/raw/BrentOilPrices.csv")
    events_df = load_detected_events(project_root / "data/processed/detected_events.csv")
    prices_df = compute_log_returns(prices_df)

    if prices_df.empty:
        raise ValueError("Price dataframe is empty")

    if "log_return" not in prices_df.columns:
        raise ValueError("log_return column missing")

    print("‚úÖ Data loaded successfully")

except Exception as e:
    raise RuntimeError(f"Data preparation failed: {e}")

# -----------------------------
# Step 2: Prepare Modeling Window
# -----------------------------
try:
    prices_df = prices_df.dropna().reset_index(drop=True)

    if len(prices_df) > MODEL_WINDOW:
        model_df = prices_df.iloc[-MODEL_WINDOW:].copy()
    else:
        model_df = prices_df.copy()

    log_returns = model_df["log_return"].values
    dates = model_df["Date"].values

    print(f"‚úÖ Using {len(model_df)} observations for modeling")

except Exception as e:
    raise RuntimeError(f"Model window preparation failed: {e}")

# -----------------------------
# Step 3: Bayesian Model
# -----------------------------
try:
    trace, model = bayesian_change_point_model(
        log_returns,
        draws=DRAWS,
        tune=TUNE,
        target_accept=TARGET_ACCEPT
    )

    print("‚úÖ Bayesian model finished")

except Exception as e:
    raise RuntimeError(f"Model execution failed: {e}")

# -----------------------------
# Step 4: Diagnostics
# -----------------------------
try:
    plot_trace(trace)
    plot_change_point_distribution(trace, dates)

    summary_stats = az.summary(trace, round_to=4)
    display(summary_stats)

except Exception as e:
    print(f"Diagnostics warning: {e}")

# -----------------------------
# Step 5: Extract Change Point
# -----------------------------
try:
    tau, tau_samples = get_change_point(trace)

    cp_date = model_df["Date"].iloc[tau]
    print(f"‚úÖ Change point detected at index {tau}")
    print(f"üìÖ Date: {cp_date}")

    plot_price_with_change_point(model_df, tau)

except Exception as e:
    raise RuntimeError(f"Change point extraction failed: {e}")

# -----------------------------
# Step 6: Event Association
# -----------------------------
try:
    matched_events = associate_events(model_df, events_df, tau, window_days=5)
    print("‚úÖ Related events:")
    display(matched_events)

except Exception as e:
    print(f"Event matching warning: {e}")

# -----------------------------
# Step 7: Impact Quantification
# -----------------------------
try:
    impact = quantify_impact(model_df, tau)

    print("üìä Impact Summary")
    print(f"Before mean price: ${impact['before_mean']:.2f}")
    print(f"After mean price: ${impact['after_mean']:.2f}")
    print(f"Change: ${impact['change']:.2f}")
    print(f"Percent change: {impact['percent_change']:.2f}%")

except Exception as e:
    print(f"Impact calculation warning: {e}")

# -----------------------------
# Step 8: Final Summary Table
# -----------------------------
try:
    tau_list = [tau]
    summary_table = summarize_change_points(model_df, events_df, tau_list)

    print("‚úÖ Final Change Point Summary")
    display(summary_table)

    output_path = project_root / "data/processed/change_point_summary.csv"
    summary_table.to_csv(output_path, index=False)

    print(f"üìÅ Saved to: {output_path}")

except Exception as e:
    raise RuntimeError(f"Summary generation failed: {e}")


  df['Date'] = pd.to_datetime(df['Date'], dayfirst=True)
  df['Date'] = pd.to_datetime(df['Date'], dayfirst=True)


‚úÖ Data loaded successfully
‚úÖ Using 2000 observations for modeling


Multiprocess sampling (4 chains in 4 jobs)
CompoundStep
>Metropolis: [tau]
>NUTS: [mu1, mu2, sigma]


Output()