## **Data Preprocessing**

In [None]:
import pandas as pd
import numpy as np

# Load the dataset
df = pd.read_csv('MarketPricePrediction.csv', index_col = 'date', parse_dates = True)

df.head()
print(df.info())
df.index.isna().sum()


In [None]:
df.head()

## **Exploratory Data Analysis (EDA)**

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Ploting the time series
plt.figure(figsize = (12, 6))
plt.plot(df['priceMod'])
plt.title('Time Series Data')
plt.xlabel('Date')
plt.ylabel('priceMod')
plt.show()

# Price Mode Graph
df['priceMod'].plot(kind='line', figsize=(8, 4), title='Price Mode')
plt.gca().spines[['top', 'right']].set_visible(False)

## **Feature Engeneering**

In [None]:
for lag in range(1, 13):
  df[f'lag_{lag}'] = df['priceMod'].shift(lag)

# Creating rolling statistics
df['rolling_mean'] = df['priceMod'].rolling(window = 12).mean()
df['rolling_std'] = df['priceMod'].rolling(window = 12).std()

df.dropna(inplace = True)

print(df.head())

## **Model Selection and training**

**ARIMA**

In [None]:
from statsmodels.tsa.arima.model import ARIMA

# Fit ARIMA Model
arima_model = ARIMA(df['priceMod'], order = (5, 1, 0))
arima_result = arima_model.fit()
print(arima_result.summary())

print("****************************************************")
# Forecast
steps = 12
arima_forecast = arima_result.forecast(steps = steps)
print(arima_forecast)

**Prophet**

In [None]:

from prophet import Prophet

# Prepare data for Prophet
df_prophet = df.reset_index()[['date', 'priceMod']].rename(columns = {'date': 'ds', 'priceMod': 'y'})

# Fit Prophet model
prophet_model = Prophet()
prophet_model.fit(df_prophet)

# Forecast
future = prophet_model.make_future_dataframe(periods = 12, freq = 'M')
prophet_forecast = prophet_model.predict(future)
print(prophet_forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail())



**LSTM**

In [None]:
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler

# Scaling data
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(df['priceMod'].values.reshape(-1, 1))

# Prepare the data for LSTM
def create_dataset(data, time_step=1):
    X, Y = [], []
    for i in range(len(data)-time_step-1):
        a = data[i:(i+time_step), 0]
        X.append(a)
        Y.append(data[i + time_step, 0])
    return np.array(X), np.array(Y)

time_step = 12
X, Y = create_dataset(scaled_data, time_step)
X = X.reshape(X.shape[0], X.shape[1], 1)

# Split data into train and test
train_size = int(len(X) * 0.8)
test_size = len(X) - train_size
X_train, X_test = X[0:train_size], X[train_size:len(X)]
Y_train, Y_test = Y[0:train_size], Y[train_size:len(Y)]

# LSTM model
model = tf.keras.Sequential()
model.add(tf.keras.layers.LSTM(50, return_sequences=True, input_shape=(time_step, 1)))
model.add(tf.keras.layers.LSTM(50, return_sequences=False))
model.add(tf.keras.layers.Dense(25))
model.add(tf.keras.layers.Dense(1))

model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(X_train, Y_train, batch_size=1, epochs=1)

# Predicting
train_predict = model.predict(X_train)
test_predict = model.predict(X_test)

# Inverse transform to get actual values
train_predict = scaler.inverse_transform(train_predict)
test_predict = scaler.inverse_transform(test_predict)


## **Model Evaluation**

**ARIMA Evaluation**

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

# ARIMA evaluation
actual_values = df['priceMod'][-steps:]

print("Length of actual values: ", len(actual_values))
print("Length of forecast values: ", len(arima_forecast))

arima_mae = mean_absolute_error(actual_values, arima_forecast)
arima_mse = mean_squared_error(actual_values, arima_forecast)
arima_rmse = np.sqrt(arima_mse)

print("ARIMA MAE: ", arima_mae, "\nARIMA MSE: ", arima_mse, "\nARIMA RMSE: ", arima_rmse)


**Prophet Evaluation**

In [None]:
# Prophet Evaluation
prophet_mae = mean_absolute_error(df['priceMod'][-12:], prophet_forecast['yhat'][-12:])
prophet_mse = mean_squared_error(df['priceMod'][-12:], prophet_forecast['yhat'][-12:])
prophet_rmse = np.sqrt(prophet_mse)

print("Prophet MAE: ", prophet_mae, "\nProphet MSE: ", prophet_mse, "\nProphet RMSE: ", prophet_rmse)

**LSTM Evaluation**

In [None]:
# LSTM Evaluation
lstm_mae = mean_absolute_error(Y_test, test_predict)
lstm_mse = mean_squared_error(Y_test, test_predict)
lstm_rmse = np.sqrt(lstm_mse)

print("LSTM MAE: ", lstm_mae, "\nLSTM MSE: ", lstm_mse, "\nLSTM RMSE: ", lstm_rmse)

## **Tuning and Validation**

In [None]:
# Since it is computationally expensive to use the whole grid
# p = (0, 1, 2, 3), d = 1, q = (0, 1, 2, 3)
# Since the dataset is large, we get a subset of it of 1000 obs
from pmdarima import auto_arima
subset_df = df['priceMod'].iloc[:1000]
auto_arima_model = auto_arima(subset_df, start_p = 0, start_q = 0,
                              max_p = 3, max_q = 3, seasonal = True, m = 12,
                              d = 1, D = 1, trace = True,
                              suppress_warnings = True, stepwise = True)
print(auto_arima_model.summary())

# Using the whole dataset with the best (p, d, q)
best_params = auto_arima_model.order

best_arima_model = ARIMA(df['priceMod'], order = best_params)
best_arima_result = best_arima_model.fit()
print(best_arima_result.summary())

best_arima_forecast = best_arima_result.forecast(steps = 12)
print(best_arima_forecast)

# Evaluating the best ARIMA model
best_arima_mae = mean_absolute_error(df['priceMod'][-12:], best_arima_forecast)
best_arima_mse = mean_squared_error(df['priceMod'][-12:], best_arima_forecast)
best_arima_rmse = np.sqrt(best_arima_mse)

print("***********************************")
print("Best ARIMA MAE: ", best_arima_mae, "\nBest ARIMA MSE: ", best_arima_mse, "\nBest ARIMA RMSE: ", best_arima_rmse)