# AI-Driven Demand Forecasting for FMCG (Capstone)

This notebook:
- Loads the provided FMCG dataset
- Builds a daily demand time series for a selected segment (Product Category × Store Location)
- Reports MAE / RMSE and saves forecasts for the Streamlit app


In [None]:
import pandas as pd
import numpy as np

from src.data_prep import load_data, make_daily_segment, make_supervised_features

DATA_PATH = 'data/extended_fmcg_demand_forecasting.csv'
df = load_data(DATA_PATH)
df.head()


In [None]:
# Choose a segment
cat = df['Product_Category'].value_counts().index[0]
loc = df['Store_Location'].value_counts().index[0]
cat, loc


In [None]:
daily = make_daily_segment(df, cat, loc)
daily[['ds','y']].head(), daily.shape


In [None]:
# Train-test split (last 60 days)
test_days = min(60, max(30, len(daily)//5))
train = daily.iloc[:-test_days].copy()
test = daily.iloc[-test_days:].copy()

future_dates = test['ds']

pred_naive = baseline_naive(train['y'], steps=test_days)
pred_arima = fit_predict_arima(train['y'], steps=test_days)

m_naive = compute_metrics(test['y'].to_numpy(), pred_naive)
m_arima = compute_metrics(test['y'].to_numpy(), pred_arima)



In [None]:
# Random Forest (feature-based)
sup = make_supervised_features(daily)
test_size = min(test_days, len(sup)//3)
y_true, y_pred, rf_model, rf_features = fit_predict_rf(sup, test_size=test_size)

m_rf = compute_metrics(y_true, y_pred)
m_rf, rf_features


In [None]:
# Summary table
summary = pd.DataFrame([
    {'Model': 'Naive', 'MAE': m_naive.mae, 'RMSE': m_naive.rmse},
    {'Model': 'ARIMA (SARIMAX)', 'MAE': m_arima.mae, 'RMSE': m_arima.rmse},
    {'Model': 'Random Forest', 'MAE': m_rf.mae, 'RMSE': m_rf.rmse},
]).sort_values('RMSE')
summary
