In [None]:
# task-1.ipynb

# Import libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.arima.model import ARIMA

# Step 1: Load and Preprocess Data

# Load the dataset
data_path = "../data/BrentOilPrices.csv"
df = pd.read_csv(data_path)

# Display the first few rows
print("First few rows of data:")
print(df.head())

# Step 2: Data Preprocessing
# Convert Date column to datetime format with automatic inference
df['Date'] = pd.to_datetime(df['Date'], infer_datetime_format=True)

# Drop missing values, if any
df.dropna(inplace=True)

# Display data info after preprocessing
print("\nData Info After Preprocessing:")
df.info()

# Step 3: Exploratory Data Analysis (EDA)

# Plot price trend over time
plt.figure(figsize=(14, 7))
sns.lineplot(data=df, x='Date', y='Price')
plt.title('Brent Oil Prices Over Time')
plt.xlabel('Date')
plt.ylabel('Price (USD per Barrel)')
plt.show()

# Step 4: Define Analysis Workflow and Assumptions

"""
Data Analysis Workflow
1. Load and Preprocess Data: Load the Brent oil prices data and preprocess it (date parsing, missing values handling).
2. Exploratory Data Analysis (EDA): Explore general trends and statistical properties of the dataset.
3. Model Selection: Evaluate suitable time series models, such as ARIMA and GARCH.
4. Change Point Analysis: Detect major changes or shifts in Brent oil prices, potentially using models like Bayesian change point detection.
5. Interpretation and Insights: Generate insights on price fluctuations and prepare reports for stakeholders.

Assumptions and Limitations
- The data is assumed to be a reliable representation of historical Brent oil prices.
- Only historical prices are used; other economic factors are not directly included in this dataset.
- Forecasting may have limitations due to unforeseen political or economic events.
"""

# Step 5: ARIMA Model Explanation and Fitting

# Fit an ARIMA model
def fit_arima_model(df):
    """Fit an ARIMA model to the Brent oil price data."""
    model = ARIMA(df['Price'], order=(5,1,0))  # Order (p,d,q) needs tuning
    model_fit = model.fit()
    print(model_fit.summary())
    return model_fit

# Fit the model and summarize results
arima_model = fit_arima_model(df)

# Step 6: Residuals Analysis

# Plot residuals of the model
def plot_residuals(model_fit):
    """Plot residuals to understand the model's performance."""
    residuals = model_fit.resid
    plt.figure(figsize=(10, 6))
    sns.histplot(residuals, kde=True)
    plt.title('Residuals of ARIMA Model')
    plt.xlabel("Residuals")
    plt.show()

plot_residuals(arima_model)

# Step 7: Summary and Next Steps

"""
Summary and Next Steps
- We've outlined a data analysis workflow and performed basic EDA.
- ARIMA model was fitted to understand potential trends and seasonality.
- Next steps include exploring additional models (e.g., GARCH) and implementing change point analysis to detect significant shifts.
- Document assumptions, limitations, and reporting formats for stakeholders.
"""

