###Baseline Model

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_percentage_error, r2_score

In [None]:
def ts_split(series, test_size=0.2):
    split = int(len(series) * (1 - test_size))
    return series[:split], series[split:]

In [None]:
all_results = []

group_cols = ['Category', 'Region', 'Store ID', 'Product ID']

In [None]:
#linear regression
for (cat, reg, store, prod), grp in df.groupby(group_cols):

    ts = grp.sort_values('Date').set_index('Date')['Demand']

    if len(ts) < 40:
        continue

    train, test = ts_split(ts)

    # ----------------------------
    # Linear Regression (NO LAGS)
    # ----------------------------
    X_train = np.arange(len(train)).reshape(-1, 1)
    X_test = np.arange(len(train), len(train) + len(test)).reshape(-1, 1)

    lr = LinearRegression()
    lr.fit(X_train, train.values)
    lr_forecast = lr.predict(X_test)

    all_results.append({
        'Model': 'Linear Regression',
        'Category': cat,
        'Region': reg,
        'Store ID': store,
        'Product ID': prod,
        'MAPE': mean_absolute_percentage_error(test, lr_forecast),
        'R2': r2_score(test, lr_forecast)
    })

###Fine-tuned Model

In [None]:
df['Demand_log'] = np.log1p(df['Demand'])

def cap_outliers(series, q=0.99):
    cap = series.quantile(q)
    return np.where(series > cap, cap, series)

df['Demand_log'] = (
    df.groupby(['Category','Region','Store ID','Product ID'])['Demand_log']
      .transform(cap_outliers)
)

In [None]:
for lag in [1, 7, 14]:
    df[f'lag_{lag}'] = (
        df.groupby(['Category','Region','Store ID','Product ID'])['Demand_log']
          .shift(lag)
    )

In [None]:
df['day'] = df['Date'].dt.day
df['month'] = df['Date'].dt.month
df['dayofweek'] = df['Date'].dt.dayofweek

In [None]:
def train_test_split_ts(df, test_ratio=0.2):
    split = int(len(df) * (1 - test_ratio))
    return df.iloc[:split], df.iloc[split:]

In [None]:
#Linear regression with ridge
# No longer need to initialize ridge_results as a separate list

features = [
    'Price','Discount','Promotion','Competitor Pricing',
    'day','month','dayofweek',
    'lag_1','lag_7','lag_14'
]

for (cat, reg, store, prod), grp in df.groupby(
    ['Category','Region','Store ID','Product ID']
):

    grp = grp.dropna(subset=features + ['Demand_log'])

    if len(grp) < 40:
        continue

    train, test = train_test_split_ts(grp)

    X_train = train[features]
    y_train = train['Demand_log']

    X_test = test[features]
    y_test_log = test['Demand_log']

    ridge = RidgeCV(alphas=np.logspace(-3, 3, 20))
    ridge.fit(X_train, y_train)

    pred_log = ridge.predict(X_test)
    pred = np.expm1(pred_log)
    actual = np.expm1(y_test_log)

    all_results.append({
        'Model': 'Ridge Regression (Lagged)',
        'Category': cat,
        'Region': reg,
        'Store ID': store,
        'Product ID': prod,
        'MAPE': mean_absolute_percentage_error(actual, pred),
        'R2': r2_score(actual, pred)
    })