In [None]:
# =============================
# 1. Import Libraries
# =============================
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from statsmodels.tsa.stattools import adfuller
import ruptures as rpt

pd.set_option('display.float_format', lambda x: '%.2f' % x)

# =============================
# 2. Load and Inspect Data
# =============================
file_path = "../data/BrentOilPrices.csv"  # adjust path if needed
df = pd.read_csv(file_path)

print("Initial Data Overview:")
print(df.head())
print(df.info())
print(df.describe())

# =============================
# 3. Data Cleaning
# =============================
# Convert Date to datetime
df['Date'] = pd.to_datetime(df['Date'], format='%d-%b-%y')

# Sort data by Date
df = df.sort_values('Date').reset_index(drop=True)

# Check for missing values
print("\nMissing values:\n", df.isnull().sum())

# Drop or fill missing price values
df = df.dropna(subset=['Price'])

# Ensure Price is numeric
df['Price'] = pd.to_numeric(df['Price'], errors='coerce')

# =============================
# 4. Exploratory Data Analysis (EDA)
# =============================
plt.figure(figsize=(15,6))
plt.plot(df['Date'], df['Price'], color='blue')
plt.title('Brent Oil Prices (1987 - 2022)')
plt.xlabel('Date')
plt.ylabel('Price (USD per barrel)')
plt.grid(True)
plt.show()

# =============================
# 5. Time Series Properties (Trend & Stationarity)
# =============================
rolling_mean = df['Price'].rolling(window=365).mean()
rolling_std = df['Price'].rolling(window=365).std()

plt.figure(figsize=(15,6))
plt.plot(df['Price'], label='Original')
plt.plot(rolling_mean, color='red', label='Rolling Mean (365 days)')
plt.plot(rolling_std, color='black', label='Rolling Std (365 days)')
plt.legend(loc='best')
plt.title('Rolling Mean & Standard Deviation')
plt.show()

# ADF Test
result = adfuller(df['Price'])
print('ADF Statistic:', result[0])
print('p-value:', result[1])

# =============================
# 6. Change Point Detection
# =============================
price_series = df['Price'].values

# Apply change point detection (Pelt method)
model = "rbf"  # cost function: mean shift
algo = rpt.Pelt(model=model).fit(price_series)
result = algo.predict(pen=10)  # penalty parameter can be tuned

# Visualize detected change points
rpt.display(price_series, result)
plt.show()

# =============================
# 7. Placeholder: Overlay Major Events (Future Step)
# =============================
# Example structure for event dataset (to be created manually/researched)
# events = pd.DataFrame({
#     'Event': [
#         'Gulf War', '2008 Financial Crisis', 'COVID-19 Pandemic', 'OPEC Production Cut'
#     ],
#     'Date': [
#         '1990-08-02', '2008-09-15', '2020-03-11', '2016-11-30'
#     ]
# })
# events['Date'] = pd.to_datetime(events['Date'])

# Plot events on price chart
# plt.figure(figsize=(15,6))
# plt.plot(df['Date'], df['Price'], label='Brent Oil Price')
# for idx, row in events.iterrows():
#     plt.axvline(row['Date'], color='red', linestyle='--', alpha=0.7)
#     plt.text(row['Date'], df['Price'].max()*0.9, row['Event'], rotation=90, fontsize=9)
# plt.title('Brent Oil Prices with Major Events')
# plt.xlabel('Date')
# plt.ylabel('Price (USD per barrel)')
# plt.legend()
# plt.show()

# =============================
# End of Analysis Setup
# =============================
