In [1]:
# Section 1: Imports and settings
import os
from pathlib import Path
import logging
import joblib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import mean_absolute_error, r2_score
import math

# Plotting style
sns.set(style='whitegrid')
plt.rcParams['figure.dpi'] = 150
COLOR_PALETTE = sns.color_palette('tab10')

# Logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s')
logger = logging.getLogger(__name__)


In [3]:
# Section 2: Load dataset and models (robust path resolution)
from pathlib import Path

def find_file(name):
    p = Path(name)
    if p.exists():
        return p
    cwd = Path.cwd()
    # search up to 4 directory levels
    for parent in [cwd] + list(cwd.parents)[:4]:
        candidate = parent / name
        if candidate.exists():
            return candidate
    # fallback to rglob search in workspace root
    for parent in [cwd] + list(cwd.parents)[:4]:
        found = list(parent.rglob(name))
        if found:
            return found[0]
    raise FileNotFoundError(name)

DATA_PATH = find_file('data/ml_dataset_sample.csv')
MODEL_PERCAT = find_file('models/future_14d_sum_percat_models.joblib')
MODEL_GLOBAL = find_file('models/future_14d_sum_global_model.joblib')
OUT_DIR = Path('reports/forecast_diagnostics')
OUT_DIR.mkdir(parents=True, exist_ok=True)

logger.info('Loading dataset and models')
df = pd.read_csv(DATA_PATH, parse_dates=['date'])
models_percat = joblib.load(MODEL_PERCAT)
model_global = joblib.load(MODEL_GLOBAL)

# Validate target
if 'future_14d_sum' not in df.columns:
    raise AssertionError('Target future_14d_sum missing from dataset')

logger.info('Data shape: %s (loaded from %s)', df.shape, DATA_PATH)


2025-12-21 15:07:16,748 INFO: Loading dataset and models
2025-12-21 15:07:24,469 INFO: Data shape: (300441, 36) (loaded from c:\Users\gp123\Desktop\Dummy\Retail Store\data\ml_dataset_sample.csv)


In [4]:
# Section 3: Prepare test set (last 90 days, cutoff = 2024-10-02)
CUTOFF = pd.to_datetime('2024-10-02')
TEST_START = CUTOFF - pd.Timedelta(days=89)

df_test = df[(df['date'] >= TEST_START) & (df['date'] <= CUTOFF)].copy()
df_test = df_test.sort_values(['category','date']).reset_index(drop=True)

# unify promo column if present
promo_candidates = [c for c in df_test.columns if 'promo' in c.lower() or 'is_promo' in c.lower()]
if 'promo_flag' in df_test.columns:
    df_test['is_promo'] = df_test['promo_flag'].astype(int)
elif promo_candidates:
    df_test['is_promo'] = df_test[promo_candidates[0]].astype(int)
else:
    df_test['is_promo'] = 0

logger.info('Test rows: %d, date range %s - %s', len(df_test), TEST_START.date(), CUTOFF.date())


2025-12-21 15:07:40,624 INFO: Test rows: 36990, date range 2024-07-05 - 2024-10-02


In [5]:
# Section 4: Utility functions
from sklearn.metrics import mean_squared_error

def mae(y, yhat):
    return mean_absolute_error(y, yhat)

def rmse(y, yhat):
    return math.sqrt(mean_squared_error(y, yhat))

def r2(y, yhat):
    return r2_score(y, yhat)


def rolling_metrics_by_date(df_cat, col_actual='future_14d_sum', col_pred='pred'):
    # compute per-date MAE and RMSE across series in the category
    daily = df_cat.groupby('date').apply(lambda d: pd.Series({
        'mae': mae(d[col_actual], d[col_pred]),
        'rmse': rmse(d[col_actual], d[col_pred])
    })).reset_index()
    daily = daily.sort_values('date')
    daily['mae_14d'] = daily['mae'].rolling(14, min_periods=1).mean()
    daily['mae_30d'] = daily['mae'].rolling(30, min_periods=1).mean()
    daily['rmse_14d'] = daily['rmse'].rolling(14, min_periods=1).mean()
    daily['rmse_30d'] = daily['rmse'].rolling(30, min_periods=1).mean()
    return daily


def ensure_dir(path):
    Path(path).mkdir(parents=True, exist_ok=True)


def save_fig(fig, path):
    ensure_dir(Path(path).parent)
    fig.savefig(path, bbox_inches='tight')
    plt.close(fig)


In [6]:
# Section 5: Generate predictions per-category and global
logger.info('Generating predictions')

# Prepare container
df_test['pred_percat'] = np.nan

# models_percat: dict with key=category -> {'model':model,'features':features}
for cat, info in models_percat.items():
    try:
        model = info['model'] if isinstance(info, dict) and 'model' in info else info
        feat = info['features'] if isinstance(info, dict) and 'features' in info else None
        mask = df_test['category'] == cat
        if mask.sum() == 0:
            continue
        X = df_test.loc[mask, feat] if feat is not None else df_test.loc[mask].drop(columns=['future_14d_sum'], errors='ignore')
        X = X.fillna(-1)
        yhat_log = model.predict(X)
        # model may be a raw LightGBM booster or sklearn wrapper; assume log1p training
        yhat = np.expm1(yhat_log)
        yhat = np.clip(yhat, 0, None)
        df_test.loc[mask, 'pred_percat'] = yhat
    except Exception as e:
        logger.warning('Failed predict for %s: %s', cat, e)

# Global predictions
try:
    Xg = df_test[models_percat[next(iter(models_percat))]['features']].fillna(-1)
    # Rather than rely on features from one entry, compute features intersection
    feature_set = list(set.intersection(*[set(v['features']) for v in models_percat.values()]))
    if len(feature_set) > 0:
        Xg = df_test[feature_set].fillna(-1)
    else:
        Xg = df_test.drop(columns=['future_14d_sum']).select_dtypes(include=[np.number]).fillna(-1)
    yhatg_log = model_global.predict(Xg)
    df_test['pred_global'] = np.expm1(yhatg_log)
    df_test['pred_global'] = np.clip(df_test['pred_global'], 0, None)
except Exception as e:
    logger.error('Global prediction failed: %s', e)

# Residuals
df_test['resid_percat'] = df_test['future_14d_sum'] - df_test['pred_percat']
df_test['resid_global'] = df_test['future_14d_sum'] - df_test['pred_global']

logger.info('Predictions generated. Sample:')
print(df_test[['date','category','future_14d_sum','pred_percat','pred_global']].head())


2025-12-21 15:08:00,528 INFO: Generating predictions
2025-12-21 15:08:01,467 INFO: Predictions generated. Sample:


        date   category  future_14d_sum  pred_percat  pred_global
0 2024-07-05  Beverages            35.0    34.372059     5.504692
1 2024-07-05  Beverages            50.0    45.348040    11.248193
2 2024-07-05  Beverages            19.0    19.092280     7.548225
3 2024-07-05  Beverages            30.0    31.890552     9.371780
4 2024-07-05  Beverages            50.0    52.386430     5.286014


In [7]:
# Section 6: Plot functions
import matplotlib.dates as mdates
from scipy.stats import skew, kurtosis


def plot_actual_vs_pred(df_cat, category, outdir):
    fig, ax = plt.subplots(figsize=(10,4))
    ax.plot(df_cat['date'], df_cat['future_14d_sum'], label='Actual', color=COLOR_PALETTE[0])
    ax.plot(df_cat['date'], df_cat['pred_percat'], label='Pred (per-cat)', color=COLOR_PALETTE[1])
    ax.plot(df_cat['date'], df_cat['pred_global'], label='Pred (global)', color=COLOR_PALETTE[2], alpha=0.7)
    # highlight promo days
    promos = df_cat[df_cat['is_promo']==1]
    if not promos.empty:
        ax.scatter(promos['date'], promos['future_14d_sum'], marker='o', s=20, color='red', label='Promo')

    ax.set_title(f'{category} — Actual vs Predicted (14d sum)')
    ax.set_ylabel('Units (14d sum)')
    ax.legend()
    ax.xaxis.set_major_locator(mdates.WeekdayLocator(interval=2))
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %d'))
    fname = outdir / 'actual_vs_pred.png'
    save_fig(fig, fname)


def plot_residuals(df_cat, category, outdir):
    fig, axes = plt.subplots(1,2, figsize=(10,4))
    sns.histplot(df_cat['resid_percat'].dropna(), kde=True, ax=axes[0], color=COLOR_PALETTE[3])
    axes[0].axvline(0, color='k', linestyle='--')
    axes[0].set_title('Residuals (per-cat): histogram + KDE')

    axes[1].scatter(df_cat['pred_percat'], df_cat['resid_percat'], alpha=0.6)
    axes[1].axhline(0, color='k', linestyle='--')
    sns.regplot(x='pred_percat', y='resid_percat', data=df_cat, scatter=False, lowess=True, ax=axes[1], color='orange')
    axes[1].set_xlabel('Predicted')
    axes[1].set_ylabel('Residual (Actual - Pred)')
    axes[1].set_title('Residual vs Predicted')

    # annotations
    sk = skew(df_cat['resid_percat'].dropna())
    kt = kurtosis(df_cat['resid_percat'].dropna())
    axes[0].text(0.95, 0.95, f'skew={sk:.2f}\nkurt={kt:.2f}', transform=axes[0].transAxes, ha='right', va='top')

    fname = outdir / 'residuals_hist_kde.png'
    save_fig(fig, fname)


def plot_rolling_errors(df_cat, category, outdir):
    rolling = rolling_metrics_by_date(df_cat, col_actual='future_14d_sum', col_pred='pred_percat')
    fig, ax = plt.subplots(figsize=(10,4))
    ax.plot(rolling['date'], rolling['mae_14d'], label='MAE (14d)')
    ax.plot(rolling['date'], rolling['mae_30d'], label='MAE (30d)')
    ax.plot(rolling['date'], rolling['rmse_14d'], label='RMSE (14d)', linestyle='--')
    ax.plot(rolling['date'], rolling['rmse_30d'], label='RMSE (30d)', linestyle='--')
    ax.set_title(f'{category} — Rolling Errors')
    ax.legend()
    fname = outdir / 'rolling_errors.png'
    save_fig(fig, fname)


def plot_scatter_actual_vs_pred(df_cat, category, outdir):
    fig, ax = plt.subplots(figsize=(6,6))
    ax.scatter(df_cat['future_14d_sum'], df_cat['pred_percat'], alpha=0.4)
    maxv = max(df_cat['future_14d_sum'].max(), df_cat['pred_percat'].max())
    ax.plot([0,maxv],[0,maxv], color='k', linestyle='--')
    mae_v = mae(df_cat['future_14d_sum'], df_cat['pred_percat'])
    rmse_v = rmse(df_cat['future_14d_sum'], df_cat['pred_percat'])
    r2_v = r2(df_cat['future_14d_sum'], df_cat['pred_percat'])
    ax.set_title(f'{category} — Actual vs Pred (per-cat)\nMAE={mae_v:.2f}, RMSE={rmse_v:.2f}, R2={r2_v:.3f}')
    ax.set_xlabel('Actual')
    ax.set_ylabel('Predicted')
    fname = outdir / 'scatter_actual_vs_pred.png'
    save_fig(fig, fname)


In [8]:
# Section 7: Driver - iterate categories and save plots & metrics
categories = sorted(df_test['category'].unique())
summary_rows = []

for cat in categories:
    logger.info('Processing category: %s', cat)
    outdir = OUT_DIR / cat.replace(' ','_')
    ensure_dir(outdir)
    df_cat = df_test[df_test['category']==cat].copy()

    # ensure predictions exist
    if df_cat['pred_percat'].isna().all():
        logger.warning('No per-category predictions for %s, skipping plots', cat)
        continue

    # plots
    plot_actual_vs_pred(df_cat, cat, outdir)
    plot_residuals(df_cat, cat, outdir)
    plot_rolling_errors(df_cat, cat, outdir)
    plot_scatter_actual_vs_pred(df_cat, cat, outdir)

    # metrics for summary
    mae_v = mae(df_cat['future_14d_sum'], df_cat['pred_percat'])
    rmse_v = rmse(df_cat['future_14d_sum'], df_cat['pred_percat'])
    r2_v = r2(df_cat['future_14d_sum'], df_cat['pred_percat'])
    mean_bias = (df_cat['future_14d_sum'] - df_cat['pred_percat']).mean()
    median_bias = (df_cat['future_14d_sum'] - df_cat['pred_percat']).median()
    pct_under = (df_cat['resid_percat'] < 0).mean()

    summary_rows.append({
        'category':cat,
        'model':'percat',
        'MAE':mae_v,
        'RMSE':rmse_v,
        'R2':r2_v,
        'mean_bias':mean_bias,
        'median_bias':median_bias,
        'pct_under_pred':pct_under
    })

# Global metrics (apply to categories as group)
for cat in categories:
    df_cat = df_test[df_test['category']==cat].copy()
    mae_v = mae(df_cat['future_14d_sum'], df_cat['pred_global'])
    rmse_v = rmse(df_cat['future_14d_sum'], df_cat['pred_global'])
    r2_v = r2(df_cat['future_14d_sum'], df_cat['pred_global'])
    mean_bias = (df_cat['future_14d_sum'] - df_cat['pred_global']).mean()
    median_bias = (df_cat['future_14d_sum'] - df_cat['pred_global']).median()
    pct_under = (df_cat['resid_global'] < 0).mean()
    summary_rows.append({
        'category':cat,
        'model':'global',
        'MAE':mae_v,
        'RMSE':rmse_v,
        'R2':r2_v,
        'mean_bias':mean_bias,
        'median_bias':median_bias,
        'pct_under_pred':pct_under
    })

# Save summary
df_summary = pd.DataFrame(summary_rows)
summary_path = OUT_DIR / 'error_summary.csv'
df_summary.to_csv(summary_path, index=False)
logger.info('Saved summary to %s', summary_path)


2025-12-21 15:08:20,901 INFO: Processing category: Beverages
  daily = df_cat.groupby('date').apply(lambda d: pd.Series({
2025-12-21 15:08:29,841 INFO: Processing category: Dairy
  daily = df_cat.groupby('date').apply(lambda d: pd.Series({
2025-12-21 15:08:39,507 INFO: Processing category: Frozen
  daily = df_cat.groupby('date').apply(lambda d: pd.Series({
2025-12-21 15:08:47,589 INFO: Processing category: Household
  daily = df_cat.groupby('date').apply(lambda d: pd.Series({
2025-12-21 15:08:55,050 INFO: Processing category: Produce
  daily = df_cat.groupby('date').apply(lambda d: pd.Series({
2025-12-21 15:09:04,788 INFO: Processing category: Snacks
  daily = df_cat.groupby('date').apply(lambda d: pd.Series({
2025-12-21 15:09:11,730 INFO: Processing category: Stationery
  daily = df_cat.groupby('date').apply(lambda d: pd.Series({
2025-12-21 15:09:17,931 INFO: Saved summary to reports\forecast_diagnostics\error_summary.csv


In [9]:
# Section 8: Flagging categories with persistent bias and append annotations
flagged = []
for _, row in df_summary[df_summary['model']=='percat'].iterrows():
    mean_bias = row['mean_bias']
    mean_actual = df_test[df_test['category']==row['category']]['future_14d_sum'].mean()
    if abs(mean_bias) > 0.10 * mean_actual:
        flagged.append({'category':row['category'],'mean_bias':mean_bias,'mean_actual':mean_actual})

if flagged:
    logger.warning('Flagged categories with persistent bias (>10%% of mean actual): %s', [f['category'] for f in flagged])
    # append a small text file
    with open(OUT_DIR / 'flagged_categories.txt','w') as f:
        for fcat in flagged:
            f.write(f"{fcat['category']}: mean_bias={fcat['mean_bias']:.2f}, mean_actual={fcat['mean_actual']:.2f}\n")
else:
    logger.info('No categories flagged for persistent bias')


2025-12-21 15:09:27,472 INFO: No categories flagged for persistent bias


In [None]:
# Section 9: Run-all verification and quick inline sample
# Verify expected files
expected = [OUT_DIR / cat.replace(' ','_') / 'actual_vs_pred.png' for cat in categories]
existing = [p for p in expected if p.exists()]
logger.info('Produced %d/%d expected category plots', len(existing), len(expected))
print('Summary CSV sample:')
print(pd.read_csv(summary_path).head())

print('\nFlagged categories (if any):')
if (OUT_DIR / 'flagged_categories.txt').exists():
    print(open(OUT_DIR / 'flagged_categories.txt').read())
else:
    print('None')

logger.info('Diagnostics finished')
