In [None]:
import pandas as pd
import numpy as np
from datetime import timedelta


In [None]:
events = pd.read_csv("../data/processed/event_data.csv")
events['date'] = pd.to_datetime(events['date'])
events


In [None]:
change_points = pd.DataFrame({
    "change_date": pd.to_datetime([
        "2008-09-15",
        "2014-11-27",
        "2020-03-15",
        "2022-02-24"
    ])
})

change_points


In [None]:
WINDOW_DAYS = 30

def match_events(change_date, events, window=30):
    start = change_date - timedelta(days=window)
    end = change_date + timedelta(days=window)
    
    matched = events[
        (events['date'] >= start) &
        (events['date'] <= end)
    ].copy()
    
    if not matched.empty:
        matched['days_from_change'] = (matched['date'] - change_date).dt.days
    
    return matched


In [None]:
associations = []

for cp in change_points['change_date']:
    matched = match_events(cp, events)
    
    if not matched.empty:
        for _, row in matched.iterrows():
            associations.append({
                "change_point_date": cp,
                "event_date": row['date'],
                "event": row['event'],
                "category": row['category'],
                "days_from_change": row['days_from_change']
            })
    else:
        associations.append({
            "change_point_date": cp,
            "event_date": None,
            "event": "No major recorded event",
            "category": None,
            "days_from_change": None
        })

association_df = pd.DataFrame(associations)
association_df


In [None]:
nearest_events = (
    association_df
    .dropna(subset=['days_from_change'])
    .loc[association_df.groupby('change_point_date')['days_from_change']
         .apply(lambda x: x.abs().idxmin())]
)

nearest_events


In [None]:
final_table = nearest_events[[
    "change_point_date",
    "event_date",
    "event",
    "category",
    "days_from_change"
]]

final_table


In [None]:
# ## Hypothesis-Based Interpretation (Non-Causal)

# The detected Bayesian change points align temporally with major geopolitical
# and economic events affecting oil markets.

# For example:

# - A change point detected around **March 2020** aligns closely with the
#   **COVID-19 global pandemic declaration**, suggesting a strong temporal
#   association with unprecedented demand and supply disruptions.

# - A structural break near **February 2022** coincides with the
#   **Russiaâ€“Ukraine conflict**, a major geopolitical event impacting global
#   energy supply expectations.

# These associations indicate **correlation in time**, not direct causation.
# Oil prices are influenced by overlapping market expectations, anticipatory
# behavior, and concurrent macroeconomic factors.


In [None]:
final_table.to_csv(
    "../data/processed/change_point_event_association.csv",
    index=False
)
