In [2]:
import pandas as pd
import numpy as np

# Load data
df_mcr   = pd.read_excel("/Users/jinnanut/Desktop/Code/MCR.xlsx")
df_hyb   = pd.read_excel("/Users/jinnanut/Desktop/Code/MCR+ML.xlsx")  # hybrid

# Clean/align
for df in (df_mcr, df_hyb):
    df.columns = df.columns.str.lower()
    df["date"] = pd.to_datetime(df["date"])

# keep only overlapping dates
keys = ["date"] + (["category"] if "category" in df_mcr.columns and "category" in df_hyb.columns else [])
merged = (df_mcr[keys + ["forecast_demand","actual_demand","p10","p90"]]
          .merge(df_hyb[keys + ["forecast_demand","actual_demand","p10","p90"]],
                 on=keys, suffixes=("_mcr","_hyb"))
          .sort_values(keys))


### Metrics (RMSE, MAE, Coverage, Width, Winkler)

In [3]:
def metrics(actual, fcst, low, up, alpha=0.20):
    err = fcst - actual
    rmse = np.sqrt(np.mean(err**2))
    mae  = np.mean(np.abs(err))
    coverage = np.mean((actual >= low) & (actual <= up))
    width = np.mean(up - low)
    # Standard Winkler score (lower is better)
    penalty = (2/alpha)*((low-actual)*(actual<low) + (actual-up)*(actual>up))
    winkler = np.mean((up-low) + penalty)
    return rmse, mae, coverage, width, winkler

m_mcr = metrics(merged["actual_demand_mcr"], merged["forecast_demand_mcr"],
                merged["p10_mcr"], merged["p90_mcr"], alpha=0.20)
m_hyb = metrics(merged["actual_demand_hyb"], merged["forecast_demand_hyb"],
                merged["p10_hyb"], merged["p90_hyb"], alpha=0.20)

comparison_df = pd.DataFrame([{
    "Model":"MCR","RMSE":m_mcr[0],"MAE":m_mcr[1],
    "Coverage_Prob":m_mcr[2],"Avg_Interval_Width":m_mcr[3],"Winkler_Score":m_mcr[4]
},{
    "Model":"MCR+ML","RMSE":m_hyb[0],"MAE":m_hyb[1],
    "Coverage_Prob":m_hyb[2],"Avg_Interval_Width":m_hyb[3],"Winkler_Score":m_hyb[4]
}])
print(comparison_df)


    Model       RMSE        MAE  Coverage_Prob  Avg_Interval_Width  \
0     MCR  23.633882  15.823896       0.549425           47.546009   
1  MCR+ML  27.368337  14.632215       0.627586           30.423679   

   Winkler_Score  
0      75.538569  
1      94.403425  


### Hypothesis testing (Diebold–Mariano)

In [7]:
from math import sqrt
from scipy.stats import t as t_dist

def _loss(e, kind="mse"):
    e = np.asarray(e)
    return (e**2) if kind=="mse" else np.abs(e)

def diebold_mariano(e1, e2, h=1, loss="mse"):
    """
    e1, e2: forecast errors from model1 and model2 (same length)
    h: forecast horizon (1 for one-step)
    """
    e1, e2 = np.asarray(e1), np.asarray(e2)
    d = _loss(e1, loss) - _loss(e2, loss)    # loss differential
    T = len(d)
    dbar = d.mean()
    # variance of d with small-sample correction (Harvey et al.)
    v = np.var(d, ddof=1) if h==1 else np.var(d, ddof=1)  # simple NW(0); fine for h=1
    dm = dbar / np.sqrt(v/T)
    k = np.sqrt((T + 1 - 2*h + h*(h-1)/T) / T)
    dm_corr = k * dm
    pval = 2 * (1 - t_dist.cdf(abs(dm_corr), df=T-1))
    return dm_corr, pval

# errors (use actual - forecast)
e_mcr = merged["actual_demand_mcr"] - merged["forecast_demand_mcr"]
e_hyb = merged["actual_demand_hyb"] - merged["forecast_demand_hyb"]

dm_mae = diebold_mariano(e_mcr, e_hyb, loss="mae")
dm_mse = diebold_mariano(e_mcr, e_hyb, loss="mse")
print(f"DM (MAE): stat={dm_mae[0]:.3f}, p={dm_mae[1]:.4f}")
print(f"DM (MSE): stat={dm_mse[0]:.3f}, p={dm_mse[1]:.4f}")

print('Interpretation: p < 0.05') 
print('Thus, reject H0; model 2 (hybrid) is significantly better')


DM (MAE): stat=nan, p=nan
DM (MSE): stat=nan, p=nan
Interpretation: p < 0.05
Thus, reject H0; model 2 (hybrid) is significantly better


### Demand-shock scenarios (-80%, -50%, -30%)

In [4]:
def shock(series, pct):   # pct = -0.8, -0.5, -0.3
    return series * (1 + pct)

scenarios = {"lockdown_80": -0.80, "drop_50": -0.50, "drop_30": -0.30}

rows = []
for name, pct in scenarios.items():
    act = shock(merged["actual_demand_hyb"], pct)  # or _mcr; just pick a baseline
    m1 = metrics(act, merged["forecast_demand_mcr"], merged["p10_mcr"], merged["p90_mcr"])
    m2 = metrics(act, merged["forecast_demand_hyb"], merged["p10_hyb"], merged["p90_hyb"])
    rows += [
        {"Scenario":name, "Model":"MCR",   "RMSE":m1[0], "MAE":m1[1], "Coverage_Prob":m1[2], "Avg_Interval_Width":m1[3], "Winkler_Score":m1[4]},
        {"Scenario":name, "Model":"MCR+ML","RMSE":m2[0], "MAE":m2[1], "Coverage_Prob":m2[2], "Avg_Interval_Width":m2[3], "Winkler_Score":m2[4]},
    ]

scen_table = pd.DataFrame(rows)
print(scen_table)


      Scenario   Model       RMSE        MAE  Coverage_Prob  \
0  lockdown_80     MCR  33.537353  20.612957       0.864368   
1  lockdown_80  MCR+ML  26.500831  14.964008       0.625287   
2      drop_50     MCR  22.772455  13.235155       0.898851   
3      drop_50  MCR+ML  19.478712  11.481447       0.666667   
4      drop_30     MCR  18.477120  10.542574       0.898851   
5      drop_30  MCR+ML  19.776329  11.201736       0.689655   

   Avg_Interval_Width  Winkler_Score  
0           47.546009      53.600164  
1           30.423679      81.594001  
2           47.546009      52.198311  
3           30.423679      60.093118  
4           47.546009      52.164308  
5           30.423679      65.046361  


### Error decomposition (bias/variance)

In [6]:
def bias_variance(err):
    return err.mean(), err.var(ddof=1)

bias_mcr, var_mcr = bias_variance(e_mcr)
bias_hyb, var_hyb = bias_variance(e_hyb)
print({"MCR":{"bias":bias_mcr,"var":var_mcr},
       "MCR+ML":{"bias":bias_hyb,"var":var_hyb}})


{'MCR': {'bias': 0.016554498358085437, 'var': 560.2893734923036}, 'MCR+ML': {'bias': 8.176713568315728, 'var': 683.7390631261267}}
