# 01 — Baseline Model (Demand or Churn)
Use a simple baseline and one ML model, report business-facing metrics.

In [None]:

import pandas as pd, numpy as np
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.ensemble import GradientBoostingRegressor

data_path = Path(__file__).resolve().parents[1] / "data" / "sample_transactions.csv"
df = pd.read_csv(data_path, parse_dates=["date"])

# Aggregate to weekly demand per product-region
df['week'] = df['date'].dt.to_period('W').apply(lambda r: r.start_time)
agg = df.groupby(['week','product','region'], as_index=False).agg(qty=('qty','sum'),
                                                                  price=('price','mean'),
                                                                  promo=('promo','mean'))

# Simple features
agg['dow'] = agg['week'].dt.dayofweek
agg['month'] = agg['week'].dt.month
X = agg[['price','promo','dow','month']]
y = agg['qty']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=42)

# Baseline: predict train mean
baseline_pred = np.full_like(y_test, y_train.mean(), dtype=float)

# Model
model = GradientBoostingRegressor(random_state=42)
model.fit(X_train, y_train)
pred = model.predict(X_test)

mae_base = mean_absolute_error(y_test, baseline_pred)
mae_model = mean_absolute_error(y_test, pred)
rmse_base = mean_squared_error(y_test, baseline_pred, squared=False)
rmse_model = mean_squared_error(y_test, pred, squared=False)

print({"MAE_baseline": mae_base, "MAE_model": mae_model,
       "RMSE_baseline": rmse_base, "RMSE_model": rmse_model})

# Save quick artifacts
Path("figures").mkdir(exist_ok=True)
pd.DataFrame({"y":y_test.values, "yhat":pred}).to_csv("figures/test_preds.csv", index=False)
print("Artifacts saved to figures/")
