# Sales Forecasting with SARIMA

This notebook focuses on building a SARIMA model to forecast future sales.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.stattools import adfuller
from sklearn.metrics import mean_absolute_error, mean_squared_error

plt.style.use('ggplot')

In [None]:
# Load Aggregated Data
data_path = "../data/monthly_sales.csv"
if not os.path.exists(data_path):
    data_path = "../../data/monthly_sales.csv"

df = pd.read_csv(data_path)
df['order_date'] = pd.to_datetime(df['order_date'])
df.set_index('order_date', inplace=True)

print(df.head())

## 1. Stationarity Check (ADF Test)

In [None]:
def adf_test(series):
    result = adfuller(series.dropna())
    print(f'ADF Statistic: {result[0]}')
    print(f'p-value: {result[1]}')
    if result[1] <= 0.05:
        print("Data is Stationary")
    else:
        print("Data is Non-Stationary")

adf_test(df['sales'])

## 2. Train/Test Split

In [None]:
train_size = int(len(df) * 0.8)
train, test = df.iloc[:train_size], df.iloc[train_size:]
print(f"Train size: {len(train)}, Test size: {len(test)}")

## 3. SARIMA Model Training
Using order=(1,1,1) and seasonal_order=(1,1,1,12) as a starting point.

In [None]:
model = SARIMAX(train['sales'], order=(1, 1, 1), seasonal_order=(1, 1, 1, 12))
results = model.fit()
print(results.summary())

## 4. Evaluation

In [None]:
predictions = results.predict(start=len(train), end=len(train)+len(test)-1, typ='levels')
predictions.index = test.index

mae = mean_absolute_error(test['sales'], predictions)
rmse = np.sqrt(mean_squared_error(test['sales'], predictions))

print(f"MAE: {mae}")
print(f"RMSE: {rmse}")

plt.figure(figsize=(12, 6))
plt.plot(train['sales'], label='Train')
plt.plot(test['sales'], label='Test')
plt.plot(predictions, label='Predictions')
plt.legend()
plt.show()

## 5. Future Forecast


In [None]:
# Retrain on full data
final_model = SARIMAX(df['sales'], order=(1, 1, 1), seasonal_order=(1, 1, 1, 12))
final_results = final_model.fit()

# Forecast next 12 months
forecast = final_results.get_forecast(steps=12)
forecast_mean = forecast.predicted_mean
conf_int = forecast.conf_int()

# Prepare DataFrame
# Combine Historical and Forecast
historical_df = df.copy()
historical_df['Type'] = 'Historical'

forecast_df = pd.DataFrame({
    'sales': forecast_mean,
    'Type': 'Forecast'
})
forecast_df.index.name = 'order_date'

final_df = pd.concat([historical_df, forecast_df.reset_index().set_index('order_date')])
final_df.reset_index(inplace=True)

# Save functionality commented out as Power BI is not used
# export_path = "../data/forecast_results.csv"
# final_df.to_csv(export_path, index=False)
# print(f"Exported forecast results to {export_path}")