In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.ar_model import AutoReg
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.stattools import adfuller
from sklearn.metrics import mean_squared_error
import itertools

# Step 1: Load the dataset
data = pd.read_excel('../datasets/master_dataset/master_dataset.xlsx')
data['Day Index'] = pd.to_datetime(data['Day Index'])  # Ensure Day Index is in datetime format
data.set_index('Day Index', inplace=True)

# Define target and exogenous variables
target_column = 'Quantity'
exogenous_columns = ['Clicks', 'Impressions']

# Step 2: Preprocessing
time_series = data[target_column].dropna()
exogenous_data = data[exogenous_columns].dropna()# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.stattools import adfuller
from sklearn.metrics import mean_squared_error
import itertools

# Step 1: Load the dataset
data = pd.read_excel('../datasets/master_dataset/master_dataset.xlsx')
data['Day Index'] = pd.to_datetime(data['Day Index'])  # Ensure Day Index is in datetime format
data.set_index('Day Index', inplace=True)

# Define target and exogenous variables
target_column = 'Quantity'
exogenous_columns = ['Clicks', 'Impressions']

# Step 2: Preprocessing
time_series = data[target_column].dropna()
exogenous_data = data[exogenous_columns].dropna()

# Ensure target and exogenous variables align
min_length = min(len(time_series), len(exogenous_data))
time_series = time_series.iloc[:min_length]
exogenous_data = exogenous_data.iloc[:min_length]

# Differencing if non-stationary
if adfuller(time_series)[1] > 0.05:
    time_series_diff = time_series.diff().dropna()
    exogenous_data = exogenous_data.iloc[1:]
else:
    time_series_diff = time_series

# Step 3: Function for hyperparameter tuning
def sarimax_hyperparameter_tuning(series, exog=None, p_values=[0, 1], d_values=[0, 1], q_values=[0, 1],
                                   P_values=[0, 1], D_values=[0, 1], Q_values=[0, 1], s_values=[12]):
    best_score = float("inf")
    best_params = None
    for (p, d, q, P, D, Q, s) in itertools.product(p_values, d_values, q_values, P_values, D_values, Q_values, s_values):
        try:
            model = SARIMAX(series, exog=exog, order=(p, d, q), seasonal_order=(P, D, Q, s)).fit(disp=False)
            aic = model.aic
            if aic < best_score:
                best_score = aic
                best_params = (p, d, q, P, D, Q, s)
        except:
            continue
    print(f"Best SARIMAX Parameters: {best_params} with AIC: {best_score}")
    return best_params

# Step 4: Perform hyperparameter tuning
print("\n*** Hyperparameter Tuning for SARIMAX Model ***")
p_values = [0, 1, 2]
d_values = [0, 1]
q_values = [0, 1, 2]
P_values = [0, 1]
D_values = [0, 1]
Q_values = [0, 1]
s_values = [12]

best_params = sarimax_hyperparameter_tuning(
    time_series_diff, exog=exogenous_data,
    p_values=p_values, d_values=d_values, q_values=q_values,
    P_values=P_values, D_values=D_values, Q_values=Q_values, s_values=s_values
)

# Step 5: Fit the optimized SARIMAX Model
print("\n*** Optimized SARIMAX Model ***")
(p, d, q, P, D, Q, s) = best_params
optimized_model = SARIMAX(time_series_diff, exog=exogenous_data, order=(p, d, q), seasonal_order=(P, D, Q, s)).fit()
print(optimized_model.summary())

# Step 6: Make predictions
predictions = optimized_model.predict(start=1, end=len(time_series_diff) - 1, exog=exogenous_data.iloc[1:])
plt.figure(figsize=(10, 6))
plt.plot(time_series_diff[1:], label="Actual", color="blue")
plt.plot(predictions, label="Predicted", color="red", linestyle="--")
plt.title(f"Optimized SARIMAX Model Predictions ({target_column})")
plt.xlabel("Time")
plt.ylabel(target_column)
plt.legend()
plt.grid()
plt.show()

# Step 7: Forecast future values
forecast_steps = 5
future_exog = exogenous_data.iloc[-forecast_steps:]
forecast = optimized_model.forecast(steps=forecast_steps, exog=future_exog)
print("\nForecasted Values:\n", forecast)

# Step 8: Visualize Forecasted Values
future_index = range(len(time_series_diff), len(time_series_diff) + forecast_steps)
plt.figure(figsize=(10, 6))
plt.plot(time_series_diff, label="Original", color="blue")
plt.plot(future_index, forecast, label="Forecast", color="green", linestyle="--")
plt.title(f"Forecasted Values - Optimized SARIMAX Model")
plt.xlabel("Time")
plt.ylabel(target_column)
plt.legend()
plt.grid()
plt.show()


# Ensure target and exogenous variables align
min_length = min(len(time_series), len(exogenous_data))
time_series = time_series.iloc[:min_length]
exogenous_data = exogenous_data.iloc[:min_length]

# Step 3: Define a function for stationarity check
def check_stationarity(series):
    adf_test = adfuller(series)
    print(f"ADF Test Statistic: {adf_test[0]}")
    print(f"p-value: {adf_test[1]}")
    return adf_test[1] > 0.05

# Differencing if non-stationary
if check_stationarity(time_series):
    time_series_diff = time_series.diff().dropna()
    exogenous_data = exogenous_data.iloc[1:]
else:
    time_series_diff = time_series

# Step 4: Define a function for plotting actual vs predicted values
def plot_results(actual, predicted, title):
    plt.figure(figsize=(10, 6))
    plt.plot(actual, label='Actual', color='blue')
    plt.plot(predicted, label='Predicted', color='red', linestyle='--')
    plt.title(title)
    plt.xlabel('Time')
    plt.ylabel(target_column)
    plt.legend()
    plt.grid()
    plt.show()

# Step 5: Autoregression Model
print("\n*** Autoregression Model ***")
ar_model = AutoReg(time_series_diff, lags=1).fit()
ar_predictions = ar_model.predict(start=1, end=len(time_series_diff) - 1)
plot_results(time_series_diff[1:], ar_predictions, "Autoregression Model")

# Step 6: Moving Average Model
print("\n*** Moving Average Model ***")
window_size = 3
ma_predictions = time_series_diff.rolling(window=window_size).mean().dropna()
plot_results(time_series_diff[window_size-1:], ma_predictions, "Moving Average Model")

# Step 7: ARIMA Model
print("\n*** ARIMA Model ***")
arima_model = ARIMA(time_series_diff, order=(1, 1, 1)).fit()
arima_predictions = arima_model.predict(start=1, end=len(time_series_diff) - 1)
plot_results(time_series_diff[1:], arima_predictions, "ARIMA Model")

# Step 8: SARIMA Model
print("\n*** SARIMA Model ***")
sarima_model = SARIMAX(time_series_diff, order=(1, 1, 1), seasonal_order=(1, 1, 1, 12)).fit()
sarima_predictions = sarima_model.predict(start=1, end=len(time_series_diff) - 1)
plot_results(time_series_diff[1:], sarima_predictions, "SARIMA Model")

# Step 9: ARIMAX Model
print("\n*** ARIMAX Model ***")
arimax_model = SARIMAX(time_series_diff, exog=exogenous_data, order=(1, 1, 1)).fit()
arimax_predictions = arimax_model.predict(start=1, end=len(time_series_diff) - 1, exog=exogenous_data.iloc[1:])
plot_results(time_series_diff[1:], arimax_predictions, "ARIMAX Model")

# Step 10: SARIMAX Model
print("\n*** SARIMAX Model ***")
sarimax_model = SARIMAX(time_series_diff, exog=exogenous_data, order=(1, 1, 1), seasonal_order=(1, 1, 1, 12)).fit()
sarimax_predictions = sarimax_model.predict(start=1, end=len(time_series_diff) - 1, exog=exogenous_data.iloc[1:])
plot_results(time_series_diff[1:], sarimax_predictions, "SARIMAX Model")

# Step 11: Forecasting Future Values
forecast_steps = 5
future_exog = exogenous_data.iloc[-forecast_steps:]
sarimax_forecast = sarimax_model.forecast(steps=forecast_steps, exog=future_exog)
print("\nForecasted Values (SARIMAX):\n", sarimax_forecast)


FileNotFoundError: [Errno 2] No such file or directory: '../datasets/master_dataset.xlsx'