In [None]:
# =============================
# 1. Import Libraries
# =============================
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from statsmodels.tsa.stattools import adfuller
import ruptures as rpt

pd.set_option('display.float_format', lambda x: '%.2f' % x)

# =============================
# 2. Load and Inspect Data
# =============================
file_path = "../data/BrentOilPrices.csv"  # adjust path if needed
df = pd.read_csv(file_path)

print("Initial Data Overview:")
print(df.head())
print(df.info())
print(df.describe())

# =============================
# 3. Data Cleaning
# =============================
# Convert Date to datetime
df['Date'] = pd.to_datetime(df['Date'], format='%d-%b-%y')

# Sort data by Date
df = df.sort_values('Date').reset_index(drop=True)

# Check for missing values
print("\nMissing values:\n", df.isnull().sum())

# Drop or fill missing price values
df = df.dropna(subset=['Price'])

# Ensure Price is numeric
df['Price'] = pd.to_numeric(df['Price'], errors='coerce')

# =============================
# 4. Exploratory Data Analysis (EDA)
# =============================
plt.figure(figsize=(15,6))
plt.plot(df['Date'], df['Price'], color='blue')
plt.title('Brent Oil Prices (1987 - 2022)')
plt.xlabel('Date')
plt.ylabel('Price (USD per barrel)')
plt.grid(True)
plt.show()

# =============================
# 5. Time Series Properties (Trend & Stationarity)
# =============================
rolling_mean = df['Price'].rolling(window=365).mean()
rolling_std = df['Price'].rolling(window=365).std()

plt.figure(figsize=(15,6))
plt.plot(df['Price'], label='Original')
plt.plot(rolling_mean, color='red', label='Rolling Mean (365 days)')
plt.plot(rolling_std, color='black', label='Rolling Std (365 days)')
plt.legend(loc='best')
plt.title('Rolling Mean & Standard Deviation')
plt.show()

# ADF Test
result = adfuller(df['Price'])
print('ADF Statistic:', result[0])
print('p-value:', result[1])

# =============================
# 6. Change Point Detection
# =============================
price_series = df['Price'].values

# Apply change point detection (Pelt method)
model = "rbf"  # cost function: mean shift
algo = rpt.Pelt(model=model).fit(price_series)
result = algo.predict(pen=10)  # penalty parameter can be tuned

# Visualize detected change points
rpt.display(price_series, result)
plt.show()

# =============================
# 7. Placeholder: Overlay Major Events (Future Step)
# =============================
# Example structure for event dataset (to be created manually/researched)
# events = pd.DataFrame({
#     'Event': [
#         'Gulf War', '2008 Financial Crisis', 'COVID-19 Pandemic', 'OPEC Production Cut'
#     ],
#     'Date': [
#         '1990-08-02', '2008-09-15', '2020-03-11', '2016-11-30'
#     ]
# })
# events['Date'] = pd.to_datetime(events['Date'])

# Plot events on price chart
# plt.figure(figsize=(15,6))
# plt.plot(df['Date'], df['Price'], label='Brent Oil Price')
# for idx, row in events.iterrows():
#     plt.axvline(row['Date'], color='red', linestyle='--', alpha=0.7)
#     plt.text(row['Date'], df['Price'].max()*0.9, row['Event'], rotation=90, fontsize=9)
# plt.title('Brent Oil Prices with Major Events')
# plt.xlabel('Date')
# plt.ylabel('Price (USD per barrel)')
# plt.legend()
# plt.show()

# =============================
# End of Analysis Setup
# =============================


: 

In [None]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import ruptures as rpt
from statsmodels.tsa.stattools import adfuller

# Set plot style
sns.set(style="whitegrid")
plt.rcParams["figure.figsize"] = (14, 6)

# Step 1: Load the data
# Replace 'brent_oil_prices.csv' with your actual file name
df = pd.read_csv("brent_oil_prices.csv")

# Show first few rows
print("Initial data preview:")
print(df.head())

# Step 2: Data Cleaning
# Ensure datetime format and sort by date
df['Date'] = pd.to_datetime(df['Date'])
df = df.sort_values('Date')

# Check for missing values
print("\nMissing values:")
print(df.isnull().sum())

# Fill or drop missing values as needed
df['Price'] = df['Price'].fillna(method='ffill')  # Forward fill
# Or df.dropna(inplace=True)

# Step 3: Exploratory Data Analysis (EDA)
# Line plot of Brent oil prices over time
plt.plot(df['Date'], df['Price'], label='Brent Price', color='blue')
plt.title('Brent Oil Prices Over Time')
plt.xlabel('Date')
plt.ylabel('Price (USD)')
plt.legend()
plt.show()

# Check for stationarity using ADF Test
adf_result = adfuller(df['Price'])
print("\nADF Statistic:", adf_result[0])
print("p-value:", adf_result[1])
if adf_result[1] < 0.05:
    print("=> Series is stationary.")
else:
    print("=> Series is non-stationary.")

# Optionally apply differencing if needed
df['Price_diff'] = df['Price'].diff().dropna()

# Step 4: Change Point Detection
# We use the 'Price' column (or 'Price_diff' if differenced)

signal = df['Price'].values  # or df['Price_diff'].dropna().values
model = "l2"  # You can change to 'rbf', 'linear', etc.

# Choose algorithm and fit
algo = rpt.Pelt(model=model).fit(signal)
# Set penalty value (tune as needed)
penalty = 10
result = algo.predict(pen=penalty)

# Plot change points
rpt.display(signal, result)
plt.title("Change Point Detection in Brent Oil Prices")
plt.show()

# Step 5: Output change point dates
print("\nDetected Change Points (Indices and Dates):")
for idx in result[:-1]:  # exclude the last point (end of data)
    date = df.iloc[idx]['Date']
    price = df.iloc[idx]['Price']
    print(f"Change at index {idx}, Date: {date}, Price: {price}")

