# Import Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.varmax import VARMAX
from arch import arch_model
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.preprocessing import MinMaxScaler

# Set plot style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("viridis")


#  Data Loading and Preprocessing

In [6]:
import pandas as pd
import numpy as np
from datetime import datetime

# Load the dataset
df = pd.read_csv(r'C:\Users\MMM\Documents\10 Academy File\KAIM-Week-10\data\Copy of BrentOilPrices.csv', parse_dates=['Date'], index_col='Date')

# Handle missing values and outliers
df['Price'].fillna(method='ffill', inplace=True)
df = df[df['Price'] > 0]  # Filter out any zero or negative prices if present

# Feature Engineering: Calculate rolling means or differences if needed
df['Price_diff'] = df['Price'].diff().dropna()


  df = pd.read_csv(r'C:\Users\MMM\Documents\10 Academy File\KAIM-Week-10\data\Copy of BrentOilPrices.csv', parse_dates=['Date'], index_col='Date')
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Price'].fillna(method='ffill', inplace=True)
  df['Price'].fillna(method='ffill', inplace=True)


# Exploratory Data Analysis (EDA)

In [None]:
# Plot Brent oil prices
plt.figure(figsize=(12, 6))
plt.plot(df['Price'], color='purple')
plt.title('Brent Oil Prices Over Time')
plt.xlabel('Date')
plt.ylabel('Price')
plt.show()

# GDP vs. Brent oil prices correlation
plt.figure(figsize=(12, 6))
sns.scatterplot(x=df['GDP'], y=df['Price'], color='blue')
plt.title('GDP vs Brent Oil Prices')
plt.xlabel('GDP')
plt.ylabel('Brent Oil Price')
plt.show()


# Model Building

A. ARIMA and GARCH

In [None]:
# ARIMA Model
arima_model = ARIMA(df['Price'], order=(1, 1, 1))
arima_fit = arima_model.fit()
print(arima_fit.summary())

# GARCH Model
garch_model = arch_model(df['Price'], vol='Garch', p=1, q=1)
garch_fit = garch_model.fit(disp="off")
print(garch_fit.summary())


B. VAR (Vector Autoregression)

In [None]:
# VAR Model with Oil and GDP
var_model = VARMAX(df[['Price', 'GDP']], order=(1, 1))
var_fit = var_model.fit(disp=False)
print(var_fit.summary())


C. Markov-Switching Model

In [None]:
from statsmodels.tsa.regime_switching.markov_regression import MarkovRegression

# Fit Markov Switching Model
ms_model = MarkovRegression(df['Price'], k_regimes=2, trend='c', switching_variance=True)
ms_fit = ms_model.fit()
print(ms_fit.summary())


D. LSTM for Sequence Prediction

In [None]:
# Prepare data for LSTM
train_size = int(len(df_scaled) * 0.8)
train, test = df_scaled[:train_size], df_scaled[train_size:]

# Reshape for LSTM
def create_sequences(data, seq_length=10):
    x, y = [], []
    for i in range(len(data) - seq_length):
        x.append(data[i:i+seq_length])
        y.append(data[i+seq_length, 0])  # predicting the 'Price'
    return np.array(x), np.array(y)

seq_length = 10
X_train, y_train = create_sequences(train, seq_length)
X_test, y_test = create_sequences(test, seq_length)

# LSTM Model
model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
    Dropout(0.2),
    LSTM(50),
    Dropout(0.2),
    Dense(1)
])

model.compile(optimizer='adam', loss='mean_squared_error')
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))


# Model Evaluation

In [None]:
from sklearn.metrics import mean_squared_error

# ARIMA Predictions
arima_pred = arima_fit.forecast(steps=len(test))
rmse_arima = np.sqrt(mean_squared_error(df['Price'].iloc[-len(test):], arima_pred))
print(f'ARIMA RMSE: {rmse_arima}')

# LSTM Predictions
lstm_pred = model.predict(X_test)
lstm_pred = scaler.inverse_transform(np.concatenate([lstm_pred, np.zeros_like(lstm_pred)], axis=1))[:, 0]
rmse_lstm = np.sqrt(mean_squared_error(df['Price'].iloc[-len(lstm_pred):], lstm_pred))
print(f'LSTM RMSE: {rmse_lstm}')


# Insight Generation

This notebook provides a thorough framework for analyzing oil price data using a variety of time series and econometric models, as well as considering external factors such as economic and political influences. Let me know if you’d like additional help with any specific section!