In [None]:
# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.ar_model import AutoReg
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_squared_error
import itertools

In [None]:
# Step 1: Load the dataset
data = pd.read_excel('../datasets/master_dataset/master_dataset.xlsx')  # Ensure correct dataset path

# Step 2: Define target and exogenous columns
target_column = "Quantity"  # The main column for forecasting
exogenous_columns = ["Clicks", "Impressions"]  # Exogenous variables

# Step 3: Prepare the data
time_series = data[target_column].dropna()  # Remove missing values in the target column
exogenous_data = data[exogenous_columns].dropna()  # Remove missing values in exogenous variables

# Ensure the lengths of target and exogenous variables match
min_length = min(len(time_series), len(exogenous_data))
time_series = time_series.iloc[:min_length]
exogenous_data = exogenous_data.iloc[:min_length]

# Display first few rows
print(f"First 5 rows of the target column '{target_column}':\n", time_series.head())
print(f"First 5 rows of exogenous variables:\n", exogenous_data.head())

In [None]:
# Step 4: Check stationarity of the target column
adf_test = adfuller(time_series)
print(f"Dickey-Fuller Test Statistic: {adf_test[0]}")
print(f"p-value: {adf_test[1]}")

if adf_test[1] > 0.05:
    print(f"The target column '{target_column}' is non-stationary. Applying differencing...")
    time_series_diff = time_series.diff().dropna()
    exogenous_data = exogenous_data.iloc[1:].reset_index(drop=True)
else:
    print(f"The target column '{target_column}' is stationary.")
    time_series_diff = time_series

In [None]:
# Step 5: Implement and evaluate each model

# 5.1 Autoregression (AR)
def run_autoregression(series):
    ar_model = AutoReg(series, lags=1).fit()
    print("\nAutoregression (AR) Model Summary:")
    print(ar_model.summary())

    predictions = ar_model.predict(start=1, end=len(series) - 1)
    plt.figure(figsize=(10, 6))
    plt.plot(series[1:], label="Actual")
    plt.plot(predictions, label="Predicted", linestyle="--")
    plt.title("Autoregression (AR) Model")
    plt.legend()
    plt.show()

In [None]:
# 5.2 Moving Average (MA)
def run_moving_average(series, window_size=3):
    moving_avg = series.rolling(window=window_size).mean()
    plt.figure(figsize=(10, 6))
    plt.plot(series, label="Original")
    plt.plot(moving_avg, label="Moving Average", linestyle="--")
    plt.title("Moving Average (MA) Model")
    plt.legend()
    plt.show()

In [None]:
# 5.3 ARIMA
def run_arima(series, p=1, d=1, q=1):
    arima_model = SARIMAX(series, order=(p, d, q)).fit(disp=False)
    print("\nARIMA Model Summary:")
    print(arima_model.summary())

    predictions = arima_model.predict(start=1, end=len(series) - 1)
    plt.figure(figsize=(10, 6))
    plt.plot(series[1:], label="Actual")
    plt.plot(predictions, label="Predicted", linestyle="--")
    plt.title("ARIMA Model")
    plt.legend()
    plt.show()

In [None]:
# 5.4 SARIMA
def run_sarima(series, p=1, d=1, q=1, P=1, D=1, Q=1, s=12):
    sarima_model = SARIMAX(series, order=(p, d, q), seasonal_order=(P, D, Q, s)).fit(disp=False)
    print("\nSARIMA Model Summary:")
    print(sarima_model.summary())

    predictions = sarima_model.predict(start=1, end=len(series) - 1)
    plt.figure(figsize=(10, 6))
    plt.plot(series[1:], label="Actual")
    plt.plot(predictions, label="Predicted", linestyle="--")
    plt.title("SARIMA Model")
    plt.legend()
    plt.show()

In [None]:
# 5.5 ARIMAX
def run_arimax(series, exog, p=1, d=1, q=1):
    arimax_model = SARIMAX(series, exog=exog, order=(p, d, q)).fit(disp=False)
    print("\nARIMAX Model Summary:")
    print(arimax_model.summary())

    predictions = arimax_model.predict(start=1, end=len(series) - 1, exog=exog.iloc[1:])
    plt.figure(figsize=(10, 6))
    plt.plot(series[1:], label="Actual")
    plt.plot(predictions, label="Predicted", linestyle="--")
    plt.title("ARIMAX Model")
    plt.legend()
    plt.show()

In [None]:
# 5.6 SARIMAX
def run_sarimax(series, exog, p=1, d=1, q=1, P=1, D=1, Q=1, s=12):
    sarimax_model = SARIMAX(series, exog=exog, order=(p, d, q), seasonal_order=(P, D, Q, s)).fit(disp=False)
    print("\nSARIMAX Model Summary:")
    print(sarimax_model.summary())

    predictions = sarimax_model.predict(start=1, end=len(series) - 1, exog=exog.iloc[1:])
    plt.figure(figsize=(10, 6))
    plt.plot(series[1:], label="Actual")
    plt.plot(predictions, label="Predicted", linestyle="--")
    plt.title("SARIMAX Model")
    plt.legend()
    plt.show()

In [None]:
# Step 6: Run all models
print("\nRunning Autoregression (AR):")
run_autoregression(time_series_diff)

print("\nRunning Moving Average (MA):")
run_moving_average(time_series_diff)

print("\nRunning ARIMA:")
run_arima(time_series_diff)

print("\nRunning SARIMA:")
run_sarima(time_series_diff)

print("\nRunning ARIMAX:")
run_arimax(time_series_diff, exog=exogenous_data)

print("\nRunning SARIMAX:")
run_sarimax(time_series_diff, exog=exogenous_data)