## part 2.2 core analysis
# Modeling Change Points using PyMC3

In [None]:
import pandas as pd
import numpy as np
import pymc3 as pm
import matplotlib.pyplot as plt
import arviz as az

# Load Brent oil prices
df = pd.read_csv('BrentOilPrices.csv')
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
df.dropna(subset=['Date'], inplace=True)
df.set_index('Date', inplace=True)
df.sort_index(inplace=True)

# Compute log returns
df['LogReturn'] = np.log(df['Price']) - np.log(df['Price'].shift(1))
returns = df['LogReturn'].dropna().values
T = len(returns)

# Change point model
with pm.Model() as model:
    tau = pm.DiscreteUniform('tau', lower=0, upper=T)
    
    mu_1 = pm.Normal('mu_1', mu=0, sigma=1)
    mu_2 = pm.Normal('mu_2', mu=0, sigma=1)
    sigma = pm.HalfNormal('sigma', sigma=1)

    mu = pm.math.switch(tau > np.arange(T), mu_1, mu_2)
    
    obs = pm.Normal('obs', mu=mu, sigma=sigma, observed=returns)
    
    trace = pm.sample(2000, tune=1000, target_accept=0.95, return_inferencedata=True)


# Interpreting the Output

In [None]:
# Plot trace and posterior
az.plot_trace(trace)
plt.show()

# Summary of the model
summary = az.summary(trace)
print(summary)

# Most probable change point
most_likely_tau = int(summary.loc['tau', 'mean'])
change_date = df.index[most_likely_tau]
print(f"Most likely change point occurred on: {change_date.date()}")

mu_1 = summary.loc['mu_1', 'mean']
mu_2 = summary.loc['mu_2', 'mean']
impact_pct = ((mu_2 - mu_1) / abs(mu_1)) * 100

print(f"Relative shift in mean return: {impact_pct:.2f}%")
