In [1]:
import sys
sys.path.append('../src')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from data_preparation import OilDataProcessor
from model import BayesianChangePointModel
import arviz as az



In [2]:
# 1. Load and prepare data
processor = OilDataProcessor(
    price_path='../data/BrentOilPrices.csv',
    events_path='../data/events.csv'
)

price_df, events_df = processor.load_and_prepare_data()

# 2. Prepare data for modeling (use log returns for stationarity)
model_data = processor.prepare_for_modeling(price_df, use_log_returns=True)
print(f"Model data shape: {model_data.shape}")

# 3. Build and fit Bayesian change point model
model = BayesianChangePointModel(
    data=model_data,
    model_type='mean_shift'  # Detect changes in mean
)

print("Fitting Bayesian change point model...")
model.fit(draws=3000, tune=1500, chains=2)

# 4. Check convergence
print("\n=== Convergence Diagnostics ===")
diagnostics = model.diagnose_convergence()
print(f"R-hat convergence: {diagnostics.get('rhat_convergence', 'N/A')}")
print(f"ESS adequate: {diagnostics.get('ess_adequate', 'N/A')}")

# 5. Display summary statistics
print("\n=== Model Summary ===")
print(model.summary)

ValueError: Missing column provided to 'parse_dates': 'date'

In [None]:
# 6. Plot posterior distributions
# Get dates for conversion (excluding first NaN from log return calculation)
dates_for_model = price_df.index[1:]  # Skip first date due to log return calculation
model.plot_posterior_distributions(dates=dates_for_model)

# 7. Quantify impact
impact = model.quantify_impact()
print("\n=== Impact Quantification ===")
for key, value in impact.items():
    print(f"{key}: {value:.4f}")

# 8. Identify most probable change point
tau_samples = model.get_change_point_posterior()
most_probable_tau = int(np.median(tau_samples))
change_date = dates_for_model[most_probable_tau]

print(f"\nMost probable change point index: {most_probable_tau}")
print(f"Most probable change date: {change_date.date()}")

# 9. Compare with events
print("\n=== Nearby Historical Events ===")
# Find events near the change point
time_window = pd.Timedelta(days=30)
nearby_events = events_df[
    (events_df.index >= change_date - time_window) & 
    (events_df.index <= change_date + time_window)
]

if len(nearby_events) > 0:
    print(f"Found {len(nearby_events)} event(s) within ±30 days:")
    for date, event in nearby_events.iterrows():
        print(f"  {date.date()}: {event['event_type']} - {event['event_description']}")
else:
    print("No events found within ±30 days")

# 10. Visualize change point on price series
plt.figure(figsize=(15, 8))

# Plot price series
plt.plot(price_df.index, price_df['Price'], linewidth=1, alpha=0.7, label='Price')

# Highlight change point region
change_price = price_df.loc[change_date, 'Price'] if change_date in price_df.index else None
if change_price:
    plt.axvline(x=change_date, color='red', linestyle='--', alpha=0.7, 
                label=f'Change Point: {change_date.date()}')
    plt.scatter(change_date, change_price, color='red', s=100, zorder=5)

# Add events
for date, event in events_df.iterrows():
    if date in price_df.index:
        plt.scatter(date, price_df.loc[date, 'Price'], 
                   color='green' if event['impact_direction'] == 'positive' else 'orange',
                   s=50, alpha=0.7, marker='^', zorder=4)

plt.title('Brent Oil Prices with Change Point and Events', fontsize=14)
plt.xlabel('Date')
plt.ylabel('Price (USD)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# 11. Advanced: Multiple change points (future work section)
print("\n=== Future Work: Multiple Change Points ===")
print("""
For more comprehensive analysis, consider:
1. Multiple change point models to detect several regime shifts
2. Incorporating external variables (GDP, inflation, exchange rates)
3. Using VAR models for dynamic relationships
4. Markov-switching models for regime classification
5. Machine learning approaches for event impact prediction
""")

# 12. Save results
results = {
    'change_date': change_date,
    'most_probable_tau': most_probable_tau,
    'tau_posterior': tau_samples,
    'impact_metrics': impact,
    'nearby_events': nearby_events.to_dict('records') if len(nearby_events) > 0 else []
}

import json
import datetime

# Convert datetime objects to strings
def json_serializer(obj):
    if isinstance(obj, (datetime.datetime, datetime.date)):
        return obj.isoformat()
    raise TypeError(f"Type {type(obj)} not serializable")

with open('../results/change_point_analysis.json', 'w') as f:
    json.dump(results, f, default=json_serializer, indent=2)

print("\nAnalysis complete! Results saved to '../results/change_point_analysis.json'")