### Task 2: Change Point Modeling and Insight Generation

In [2]:
import pandas as pd
import numpy as np
import pymc as pm
import arviz as az
import matplotlib.pyplot as plt
import os
import sys 
sys.path.append(os.path.abspath("../Model"))
import warnings
warnings.filterwarnings('ignore')

In [3]:
os.chdir('..')

In [4]:
from model_data import *

In [5]:
# # Load our Brent oil price data (with change points)
df_price = load_and_preprocess('Data/BrentOilPrices.csv')
df_price

Unnamed: 0,Date,Price,LogReturn
1,1987-05-21,18.45,-0.009709
2,1987-05-22,18.55,0.005405
3,1987-05-25,18.60,0.002692
4,1987-05-26,18.63,0.001612
5,1987-05-27,18.60,-0.001612
...,...,...,...
9006,2022-11-08,96.85,-0.030706
9007,2022-11-09,93.05,-0.040026
9008,2022-11-10,94.25,0.012814
9009,2022-11-11,96.37,0.022244


In [6]:
# Load key events from 
df_events = load_event("Data/key_events.csv")
df_events

Unnamed: 0,Date,Event,Description
0,2019-12-06,OPEC+ Production Cut,OPEC and allies agreed to cut oil production t...
1,2020-03-08,Oil Price Crash,Price war between Russia and Saudi Arabia trig...
2,2020-04-20,Historic Negative Prices,WTI crude futures went negative for the first ...
3,2020-11-09,COVID-19 Vaccine Announcement,Positive news on vaccine development lifted oi...
4,2021-07-01,Global Demand Recovery,Signs of economic recovery increased oil deman...
5,2022-02-24,Russia-Ukraine Conflict,Geopolitical tensions disrupted supply chains ...


In [9]:
# Simulated result from our PyMC model: Assume you already have change points (e.g., from Bayesian inference)
change_point_indices = [1234, 2350, 2950]  # index in df_price
change_dates = df_price['Date'].iloc[change_point_indices].reset_index(drop=True)
change_dates

0   1992-03-17
1   1996-08-12
2   1999-01-04
Name: Date, dtype: datetime64[ns]

In [11]:
from datetime import timedelta

# How close should we consider a match? e.g., ±7 days
tolerance_days = 7

associated_events = []

for cp_date in change_dates:
    matched = df_events[
        (df_events['Date'] >= cp_date - timedelta(days=tolerance_days)) &
        (df_events['Date'] <= cp_date + timedelta(days=tolerance_days))
    ]
    if not matched.empty:
        for _, row in matched.iterrows():
            associated_events.append({
                "Change_Point_Date": cp_date.date(),
                "Event_Date": row["Date"].date(),
                "Event_Description": row["Event"]
            })
    else:
        associated_events.append({
            "Change_Point_Date": cp_date.date(),
            "Event_Date": None,
            "Event_Description": "No matching event within ±7 days"
        })

# Convert to DataFrame
df_association = pd.DataFrame(associated_events)

print("Change Points and Associated Events:")
print(df_association)


Change Points and Associated Events:
  Change_Point_Date Event_Date                 Event_Description
0        1992-03-17       None  No matching event within ±7 days
1        1996-08-12       None  No matching event within ±7 days
2        1999-01-04       None  No matching event within ±7 days


In [14]:
returns = df_price["LogReturn"].values
dates = df_price["Date"].values
returns

array([-0.00970881,  0.00540542,  0.00269179, ...,  0.01281384,
        0.02224412, -0.02927141], shape=(9010,))

In [None]:
model, trace = build_model(returns)

Multiprocess sampling (4 chains in 4 jobs)
CompoundStep
>Metropolis: [tau]
>NUTS: [mu1, mu2, sigma1, sigma2]


In [None]:
most_probable_tau, change_date = extract_change_point(trace, dates)
print(f"Most probable change point at index {most_probable_tau}, date: {change_date}")

In [None]:
plot_results(df, change_date, trace)