###Baseline Model

In [None]:
# Statsmodels
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.holtwinters import ExponentialSmoothing

In [None]:
def ts_split(series, test_size=0.2):
    split = int(len(series) * (1 - test_size))
    return series[:split], series[split:]

In [None]:
all_results = []

group_cols = ['Category', 'Region', 'Store ID', 'Product ID']

In [None]:
#arima
for (cat, reg, store, prod), grp in df.groupby(group_cols):

    ts = grp.sort_values('Date').set_index('Date')['Demand']

    if len(ts) < 40:
        continue

    train, test = ts_split(ts)

    # ----------------------------
    # ARIMA (no lag features)
    # ----------------------------
    try:
        arima_model = ARIMA(train, order=(1, 1, 1))
        arima_fit = arima_model.fit()

        arima_forecast = arima_fit.forecast(steps=len(test))

        all_results.append({
            'Model': 'ARIMA',
            'Category': cat,
            'Region': reg,
            'Store ID': store,
            'Product ID': prod,
            'MAPE': mean_absolute_percentage_error(test, arima_forecast),
            'R2': r2_score(test, arima_forecast)
        })
    except Exception as e:
        pass

###Fine-tuned Model

In [None]:
df['Demand_log'] = np.log1p(df['Demand'])

def cap_outliers(series, q=0.99):
    cap = series.quantile(q)
    return np.where(series > cap, cap, series)

df['Demand_log'] = (
    df.groupby(['Category','Region','Store ID','Product ID'])['Demand_log']
      .transform(cap_outliers)
)

In [None]:
for lag in [1, 7, 14]:
    df[f'lag_{lag}'] = (
        df.groupby(['Category','Region','Store ID','Product ID'])['Demand_log']
          .shift(lag)
    )

In [None]:
df['day'] = df['Date'].dt.day
df['month'] = df['Date'].dt.month
df['dayofweek'] = df['Date'].dt.dayofweek

In [None]:
def train_test_split_ts(df, test_ratio=0.2):
    split = int(len(df) * (1 - test_ratio))
    return df.iloc[:split], df.iloc[split:]

In [None]:
import itertools
#arima
def get_d(series):
    return 0 if adfuller(series)[1] <= 0.05 else 1

# No longer need to initialize arima_results as a separate list

for (cat, reg, store, prod), grp in df.groupby(
    ['Category','Region','Store ID','Product ID']
):

    ts = grp.set_index('Date')['Demand_log'].dropna()

    if len(ts) < 40:
        continue

    train, test = ts_split(ts)
    d = get_d(train)

    best_order = None
    best_aic = np.inf

    for p, q in itertools.product(range(0,3), range(0,3)):
        try:
            model = ARIMA(train, order=(p,d,q)).fit()
            if model.aic < best_aic:
                best_aic = model.aic
                best_order = (p,d,q)
        except:
            continue

    if best_order:
        model = ARIMA(train, order=best_order).fit()
        pred = np.expm1(model.forecast(len(test)))
        actual = np.expm1(test)

        all_results.append({
            'Model': 'ARIMA (Fine-Tuned)',
            'Category': cat,
            'Region': reg,
            'Store ID': store,
            'Product ID': prod,
            'Order': best_order,
            'MAPE': mean_absolute_percentage_error(actual, pred),
            'R2': r2_score(actual, pred)
        })