**Forecast for Store 1:**

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_csv("Walmart.csv")    #weekly sales data per store (45 stores) for 143 weeks
df.head(5)

In [None]:
df["Date"] = pd.to_datetime(df["Date"], format="%d-%m-%Y")  #convert object to datetime

In [None]:
df = df.set_index("Date")

**Data is already clean, no further EDA required**

In [None]:
df = df[df["Store"]==1].loc[:, ["Weekly_Sales"]]
df.head(5)

In [None]:
df.info()

In [None]:
df.plot()

In [None]:
from statsmodels.tsa.stattools import adfuller
plt.style.use('dark_background')

def helper(data):
  # Rolling Mean
  rolling_mean = data.rolling(window = 4).mean()
  # Rolling Std Dev (1 year timeframe)
  rolling_std = data.rolling(window = 4).std()

  original_data = plt.plot(data, color = "red", label = "Original Data")
  mean_data = plt.plot(rolling_mean, color = "yellow", label = "Rolling Mean")
  std_data = plt.plot(rolling_std, color = "blue", label = "Rolling Std")
  plt.legend()
  plt.show()

  results = adfuller(data)
  if results[1] > 0.05:
    print(f"Non Stationary, P-value: {results[1]}")
  else:
    print(f"Stationary, P-value: {results[1]}")

In [None]:
helper(df)

In [None]:
# Data is stationary. Find ACF, PACF
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

In [None]:
plot_acf(df)
plt.show()
plt.close()

In [None]:
plot_pacf(df)
plt.show()
plt.close()

In [None]:
#assumed pdq = (1,1,1) or (1,0,1)
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX

In [None]:
model1 = ARIMA(df, order=(4,0,4), freq="W-FRI")  #W-FRI is the correct frequency deducted from dates
result1 = model1.fit()
error1 = sum((result1.fittedvalues - df["Weekly_Sales"])**2) #SSE
error1  #since weekly_sales are large numbers, error also shown in large number. try other models.

In [None]:
def rmse(actual, pred):
    squared_errors = [(p - a) ** 2 for a, p in zip(actual, pred)]
    mean_squared_error = np.mean(squared_errors)
    rmse = np.sqrt(mean_squared_error)
    return rmse

In [None]:
rmse(df["Weekly_Sales"], result1.fittedvalues)  #to ignore big values in target. still big value

**Auto ARIMA:**

In [None]:
!pip install pmdarima

In [None]:
import pmdarima as pm
from pmdarima.arima import auto_arima
from sklearn.model_selection import train_test_split

In [None]:
arima_model = auto_arima(
    df, # data
    test = "adf", # test we are considering
    start_p = 0,
    start_q = 0,
    seasonal = True,
    max_p = 13,
    max_q = 13,
    stepwise = True,
    trace = True,
    suppress_warnings = True,
    error_action = "ignore"
)

In [None]:
model2 = ARIMA(df, order=(1,0,0), freq="W-FRI")  #W-FRI is the correct frequency deducted from dates
result2 = model2.fit()
error2 = sum((result2.fittedvalues - df["Weekly_Sales"])**2) #SSE
error2 

In [None]:
plt.plot(df, label = "Original Data", color = "red")
plt.plot(result1.fittedvalues, color = "blue")
plt.title(f"Errors: {error1}")
plt.legend()
plt.show()

In [None]:
plt.figure(figsize=(12,8))
pred = result1.predict(1, 143 + 12)
plt.plot(df)
plt.plot(pred, color = "yellow")
plt.show()

**SARIMA:**

In [None]:
import itertools
p = d = q = range(0, 2)
pdq = list(itertools.product(p, d, q))
seasonal_pdq = [(x[0], x[1], x[2], 12) for x in pdq]

In [None]:
best_aic = float('inf')   #start with high, need low no.
best_order = None
best_seasonal_order = None
for x in pdq:
  for y in seasonal_pdq:
    try:
      model = SARIMAX(df,        # data
                      order = x,           # normal order (AR, d, MA) (1, 0, 1)
                      seasonal_order = y)  # seasonal order (AR, d, MA, periodcity) (1, 1, 1, 12)
      results = model.fit()                # Fitting the data into the model

      if results.aic < best_aic:  # we are comparing that if the best aic is greater than the current model's aic
        best_aic = results.aic    # IF yes, we will update the best aic with current model aic
        best_order = x
        best_seasonal_order = y
        print(f"SARIMA{x}x{y} - AIC: {results.aic}")
    except:
      continue


print(f"Best Model: SARIMA{best_order}x{best_seasonal_order}, AIC: {best_aic}")

In [None]:
#Best Model: SARIMA(1, 1, 1)x(0, 1, 1, 12), AIC: 3552.3525380447536

In [None]:
final_model1 = SARIMAX(df, order = best_order, seasonal_order = best_seasonal_order)
final_results1 = final_model1.fit()
forecast = final_results1.forecast(steps = 12)  #predict next 12 weeks

In [None]:
plt.figure(figsize=(10,7))
plt.plot(df, color = "red", label = "OD")
plt.plot(forecast, color = "yellow", label = "PV")
plt.legend()
plt.show()

In [None]:
forecast