In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings

from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.statespace.sarimax import SARIMAX, SARIMAXResults
from sklearn.metrics import (
    mean_squared_error,
    mean_absolute_error,
    r2_score
)
from pmdarima import auto_arima
# Suppress warnings (optional)
warnings.filterwarnings("ignore")

In [None]:
# ─── Config ────────────────────────────────────────────────────────────────────
DATA_PATH   = '../data/dm/train.csv'
MODEL_DIR   = '/models'
MODEL_PATH  = os.path.join(MODEL_DIR, 'sarimax_AUTOMOTIVE.pkl')
os.makedirs(MODEL_DIR, exist_ok=True)

In [None]:
# ─── 1. Load & Preprocess ───────────────────────────────────────────────────────
df = pd.read_csv(DATA_PATH, parse_dates=['date'])
df.drop(columns=['id'], inplace=True)
df = df[df['family']=='AUTOMOTIVE']

# aggregate daily across all stores
autodf = (
    df
    .groupby('date')[['sales','onpromotion']]
    .sum()
)
autodf.index = pd.DatetimeIndex(autodf.index).to_period('D')
autodf['y']  = np.log1p(autodf['sales'])

# keep 2017+ for plotting
autoplot = autodf.loc[autodf.index.to_timestamp() >= '2017-01-01']

In [None]:
# ─── 3. Auto-ARIMA order selection (once) ───────────────────────────────────────
stepwise = auto_arima(
    autodf['y'],
    seasonal=True, m=7,
    start_p=0, start_q=0, max_p=5, max_q=5,
    start_P=0, start_Q=0, max_P=2, max_Q=2,
    d=None, D=None,
    trace=True,
    suppress_warnings=True,
    error_action='ignore'
)
order          = stepwise.order
seasonal_order = stepwise.seasonal_order

In [None]:
# ─── 4. Load or Fit & Save SARIMAX ──────────────────────────────────────────────
if os.path.exists(MODEL_PATH):
    print("Loading saved model…")
    fit = SARIMAXResults.load(MODEL_PATH)
else:
    print("Fitting new model…")
    model = SARIMAX(
        autodf['y'],
        exog=autodf[['onpromotion']],
        order=order,
        seasonal_order=seasonal_order,
        enforce_stationarity=False,
        enforce_invertibility=False
    )
    fit = model.fit(disp=False)
    fit.save(MODEL_PATH)
    print(f"Model saved to {MODEL_PATH!r}")

In [None]:
# ─── 5. Forecast last 14 days ───────────────────────────────────────────────────
n_test    = 14
train     = autodf.iloc[:-n_test]
test      = autodf.iloc[-n_test:]
exog_test = test[['onpromotion']]

pred_log  = fit.get_forecast(steps=n_test, exog=exog_test).predicted_mean
pred      = np.expm1(pred_log)

In [None]:
# ─── 6. Compute Metrics ─────────────────────────────────────────────────────────
# RMSE
mse   = mean_squared_error(test['sales'], pred)
rmse  = np.sqrt(mse)

# MAE
mae   = mean_absolute_error(test['sales'], pred)

# R²
r2    = r2_score(test['sales'], pred)

# Mean daily sales (for relative RMSE)
mean_sales     = autodf['sales'].mean()
rel_rmse_pct   = 100 * rmse / mean_sales

# MAPE
mape = np.mean(np.abs((test['sales'] - pred) / test['sales'])) * 100

In [None]:
# ─── 7. Print Results ───────────────────────────────────────────────────────────
print(f"\nTest RMSE:           {rmse:.2f} units")
print(f"Mean daily sales:    {mean_sales:.2f} units")
print(f"Relative RMSE:       {rel_rmse_pct:.2f}% of mean sales")
print(f"MAPE:                {mape:.2f}%")
print(f"MAE:                 {mae:.2f} units")
print(f"R² (coefficient of determination): {r2:.3f}")

In [None]:
# ─── 8. Plot ────────────────────────────────────────────────────────────────────
plt.figure(figsize=(10,4))
plt.plot(autoplot.index.to_timestamp(), np.expm1(autoplot['y']), label='History (2017+)')
plt.plot(test.index.to_timestamp(), pred,                  label='Forecast')
plt.legend(loc='upper left')
plt.title("AUTOMOTIVE Sales – SARIMAX Forecast")
plt.grid(True)
plt.tight_layout()
plt.show()