In [None]:
# task-2.ipynb

# Import libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.arima.model import ARIMA
from arch import arch_model

# Step 1: Load and Preprocess Data
data_path = "../data/BrentOilPrices.csv"
df = pd.read_csv(data_path)

# Convert Date to datetime and set as index
df['Date'] = pd.to_datetime(df['Date'], infer_datetime_format=True)
df.set_index('Date', inplace=True)

# Visualize the data
plt.figure(figsize=(14, 7))
sns.lineplot(data=df, x='Date', y='Price')
plt.title('Brent Oil Prices Over Time')
plt.xlabel('Date')
plt.ylabel('Price (USD per Barrel)')
plt.show()

# Step 2: ARIMA Model Tuning
# Define the ARIMA model
arima_model = ARIMA(df['Price'], order=(5,1,0))
arima_fit = arima_model.fit()
print(arima_fit.summary())

# Step 3: GARCH Model Implementation
# Calculate the differences
price_diff = df['Price'].diff().dropna()

# Define and fit GARCH model
garch_model = arch_model(price_diff, vol='Garch', p=1, q=1)
garch_fit = garch_model.fit(disp='off')
print(garch_fit.summary())

# Step 4: Model Comparison
# Plot residuals for ARIMA
arima_resid = arima_fit.resid
plt.figure(figsize=(10, 6))
sns.histplot(arima_resid, kde=True)
plt.title('Residuals of ARIMA Model')
plt.xlabel('Residuals')
plt.show()

# Plot GARCH model volatility
garch_vol = garch_fit.conditional_volatility
plt.figure(figsize=(14, 7))
plt.plot(garch_vol)
plt.title('Conditional Volatility from GARCH Model')
plt.xlabel('Date')
plt.ylabel('Volatility')
plt.show()

# Summary
"""
Summary:
- The ARIMA model captures the overall trend but might miss volatility patterns.
- The GARCH model effectively captures volatility clustering.
Next Steps:
- Incorporate diagnostics to evaluate the models comprehensively.
- Explore additional econometric models if needed.
"""
