# Hybrid‑CP: Day‑Hour Fusion (cp_hybrid_ieso.ipynb)

In [1]:
import os, pandas as pd, numpy as np
from hybrid_cp.core import preprocess as prep
from hybrid_cp.core.day_models import build_models, fit_and_predict, confusion_static_0p5, confusion_dynamic_top4
from hybrid_cp.core.hour_ensemble import horizon_probs, inverse_brier_weights, weighted_fusion
from hybrid_cp.core.metrics import confusion_from_probs

# Paths (1..6 day ahead)
paths = {i: f"IESO-Ontario Demand 2022-2025 {i} day ahead.csv" for i in range(1, 7)}

# 0) Daily aggregation + labels
daily = prep.load_daily_peaks(paths)
daily = prep.label_monthly_top4_daily(daily)
model_df, feature_cols = prep.build_feature_matrix(daily, selected_feature='TESLA')
train_df, test_df = prep.split_train_test(model_df, separation_year=2024)

# Use top 10% by actual_peak for training (as in cp_evaluation_2)
X_train, y_train = prep.select_top_percent_training(model_df, train_df, feature_cols,
                                                    top_pct=0.10, top_by='actual_peak', scope='overall')
X_test = test_df[feature_cols]
y_test = test_df['is_CP'].astype(int).values

print("Shapes:", X_train.shape, y_train.shape, X_test.shape, y_test.shape)
print("Feature columns:", feature_cols[:9], "... total", len(feature_cols))


Shapes: (123, 6) (123,) (497, 6) (497,)
Feature columns: ['TESLA_pred_peak_1d', 'TESLA_pred_peak_2d', 'TESLA_pred_peak_3d', 'TESLA_pred_peak_4d', 'TESLA_pred_peak_5d', 'TESLA_pred_peak_6d'] ... total 6


In [2]:
# 1) Day-level models (RF 0.5; ET dynamic Top-4 per month)
models = build_models(random_state=42)
fitted, prob_test = fit_and_predict(models, X_train, y_train, X_test)

# Confusions
rf_conf = confusion_static_0p5(test_df, prob_test['Random Forest'], y_col='is_CP')
et_conf = confusion_dynamic_top4(test_df, prob_test['Extra Trees'], y_col='is_CP')

print("Random Forest @0.5 (TP,FP,TN,FN) =", rf_conf)
print("Extra Trees @dynamic Top-4      =", et_conf)


Random Forest @0.5 (TP,FP,TN,FN) = (14, 2, 104, 2)
Extra Trees @dynamic Top-4      = (15, 1, 105, 1)


In [3]:
# 2) Hour-level Monte Carlo per-horizon, per-source (ECA/RTO/TESLA) -> weights -> fusion
sources = ['ECA', 'RTO', 'TESLA']
results = {}
for src in sources:
    prob_by_h = horizon_probs(paths, source=src, n_sims=2000)

    w_hourly = inverse_brier_weights(prob_by_h, labels_df=daily[['timestamp', 'is_CP']],
                                     kind='p_4cp_hourly_mlh')
    fused_hourly = weighted_fusion(prob_by_h, w_hourly, kind='p_4cp_hourly_mlh')

    w_daily = inverse_brier_weights(prob_by_h, labels_df=daily[['timestamp', 'is_CP']],
                                    kind='p_4cp_daily_monthlyTop4')
    fused_daily = weighted_fusion(prob_by_h, w_daily, kind='p_4cp_daily_monthlyTop4')

    fused = pd.concat([fused_hourly, fused_daily], axis=1)
    results[src] = dict(weights_hourly=w_hourly, weights_daily=w_daily, fused=fused)

# Show learned weights
for src, obj in results.items():
    w_h = obj.get('weights_hourly', obj.get('weights'))
    w_d = obj.get('weights_daily')
    print(f"{src}  weights_hourly(MLH):", w_h)
    print(f"{src}  weights_daily(Top-4 day):", w_d)

  x = asanyarray(arr - arrmean)
  x = asanyarray(arr - arrmean)
  x = asanyarray(arr - arrmean)
  x = asanyarray(arr - arrmean)
  x = asanyarray(arr - arrmean)
  x = asanyarray(arr - arrmean)
  x = asanyarray(arr - arrmean)
  x = asanyarray(arr - arrmean)
  x = asanyarray(arr - arrmean)
  x = asanyarray(arr - arrmean)
  x = asanyarray(arr - arrmean)
  x = asanyarray(arr - arrmean)
  x = asanyarray(arr - arrmean)
  x = asanyarray(arr - arrmean)
  x = asanyarray(arr - arrmean)
  x = asanyarray(arr - arrmean)
  x = asanyarray(arr - arrmean)
  x = asanyarray(arr - arrmean)


ECA  weights_hourly(MLH): {1: 0.17338001832674832, 2: 0.16583235792883833, 3: 0.16537212539297017, 4: 0.16537212539297017, 5: 0.16537212539297017, 6: 0.16467124756550283}
ECA  weights_daily(Top-4 day): {1: 0.18067569944039322, 2: 0.16567754620944536, 3: 0.16392605298080046, 4: 0.16392605298080046, 5: 0.16392605298080046, 6: 0.16186859540776008}
RTO  weights_hourly(MLH): {1: 0.17480475715730143, 2: 0.17324581150965104, 3: 0.1678720528918855, 4: 0.1678720528918855, 5: 0.1678720528918855, 6: 0.14833327265739105}
RTO  weights_daily(Top-4 day): {1: 0.1779223633080305, 2: 0.1753095117352931, 3: 0.16889708080796148, 4: 0.16889708080796148, 5: 0.16889708080796148, 6: 0.14007688253279207}
TESLA  weights_hourly(MLH): {1: 0.17480475715730143, 2: 0.17324581150965104, 3: 0.1678720528918855, 4: 0.1678720528918855, 5: 0.1678720528918855, 6: 0.14833327265739105}
TESLA  weights_daily(Top-4 day): {1: 0.1779223633080305, 2: 0.1753095117352931, 3: 0.16889708080796148, 4: 0.16889708080796148, 5: 0.16889708

In [4]:
# 3) Fusion with day-level gate and multiplication (p_day* × p_mc)

p_day_star = pd.Series(np.maximum(prob_test['Random Forest'], prob_test['Extra Trees']),
                       index=test_df.index)

FUSION_METHOD = "soft_or"


def align_to_dates(fused_df):
    s = fused_df.copy()
    s = s.reset_index().rename(columns={'date': 'timestamp'})
    s['timestamp'] = pd.to_datetime(s['timestamp'])
    return s.set_index('timestamp').sort_index()


aligned = {src: align_to_dates(obj['fused']) for src, obj in results.items()}

final = {}
idx_dates = pd.to_datetime(test_df['timestamp']).dt.floor('D')

for kind in ['p_4cp_hourly_mlh', 'p_4cp_daily_monthlyTop4']:
    src_stack = []
    for src, df in aligned.items():
        src_stack.append(df[kind].rename(src))
    P_mc = pd.concat(src_stack, axis=1).mean(axis=1)

    P_mc_aligned = P_mc.reindex(idx_dates.values)
    P_day = np.maximum(prob_test['Random Forest'], prob_test['Extra Trees'])

    if FUSION_METHOD == "soft_or":
        p_final = 1.0 - (1.0 - P_day) * (1.0 - P_mc_aligned.values)
    elif FUSION_METHOD == "max":
        p_final = np.maximum(P_day, P_mc_aligned.values)
    else:  # 'product'
        p_final = P_day * P_mc_aligned.values

    final[kind] = pd.Series(p_final, index=idx_dates)

In [5]:
# 4) Evaluate (if needed) + Save artifacts
import os
import pandas as pd
from hybrid_cp.core.metrics import confusion_from_probs

if 'final' not in globals():
    raise RuntimeError(
        "`final` does not exist. Please run the fusion unit first (the section where final[...] is generated).")
if 'daily' not in globals():
    raise RuntimeError("`daily` does not exist. Please run the data aggregation and marking unit first.")
if 'results' not in globals():
    raise RuntimeError("`results` does not exist. Please run the hourly probability and weighted fusion unit first.")
if 'prob_test' not in globals() or 'rf_conf' not in globals() or 'et_conf' not in globals():
    raise RuntimeError(
        "The output of the daily model is missing. Please run the daily model training/inference and evaluation unit first.")

if 'hourly_cm' not in globals():
    hourly_cm = confusion_from_probs(final['p_4cp_hourly_mlh'], daily[['timestamp', 'is_CP']])
if 'daily_cm' not in globals():
    daily_cm = confusion_from_probs(final['p_4cp_daily_monthlyTop4'], daily[['timestamp', 'is_CP']])

# Output directory
out_dir = "hybrid_cp/reports"
os.makedirs(out_dir, exist_ok=True)

# 1) Save the test period probability of the daily model
pd.DataFrame({
    'timestamp': pd.to_datetime(test_df['timestamp']).values,
    'p_rf': prob_test['Random Forest'],
    'p_et': prob_test['Extra Trees']
}).to_csv(os.path.join(out_dir, "day_probs_test.csv"), index=False)

# 2) Save the hour/day probability after each source is fused
for src, obj in results.items():
    obj['fused'].to_csv(os.path.join(out_dir, f"mc_fused_{src}.csv"))

# 3) Save confusion matrix summary
pd.DataFrame({
    'metric': [
        'RF@0.5_TP', 'RF@0.5_FP', 'RF@0.5_TN', 'RF@0.5_FN',
        'ET@dyn_TP', 'ET@dyn_FP', 'ET@dyn_TN', 'ET@dyn_FN',
        'HYB_hourly_TP', 'HYB_hourly_FP', 'HYB_hourly_TN', 'HYB_hourly_FN',
        'HYB_daily_TP', 'HYB_daily_FP', 'HYB_daily_TN', 'HYB_daily_FN'
    ],
    'value': [
        rf_conf[0], rf_conf[1], rf_conf[2], rf_conf[3],
        et_conf[0], et_conf[1], et_conf[2], et_conf[3],
        hourly_cm[0], hourly_cm[1], hourly_cm[2], hourly_cm[3],
        daily_cm[0], daily_cm[1], daily_cm[2], daily_cm[3]
    ]
}).to_csv(os.path.join(out_dir, "confusions.csv"), index=False)

print("Saved reports to", out_dir)
print("hourly_cm:", hourly_cm)
print("daily_cm :", daily_cm)

Saved reports to hybrid_cp/reports
hourly_cm: (16, 18, 88, 0)
daily_cm : (16, 23, 83, 0)


In [6]:
def confusion_from_probs_with_thr(prob_series, labels_df, thr=0.4):
    df = prob_series.rename('p').rename_axis('date').reset_index()
    lab = labels_df[['timestamp', 'is_CP']].copy()
    lab['date'] = pd.to_datetime(lab['timestamp']).dt.floor('D')

    d = pd.merge(df, lab[['date', 'is_CP']], on='date', how='left').dropna(subset=['p', 'is_CP'])
    d = d[d['date'].dt.month.isin([6, 7, 8, 9])].copy()
    y = d['is_CP'].astype(int).values
    pred = (d['p'].values >= float(thr)).astype(int)
    TP = int(((y == 1) & (pred == 1)).sum());
    FP = int(((y == 0) & (pred == 1)).sum())
    TN = int(((y == 0) & (pred == 0)).sum());
    FN = int(((y == 1) & (pred == 0)).sum())
    return TP, FP, TN, FN


hourly_cm = confusion_from_probs_with_thr(final['p_4cp_hourly_mlh'], daily[['timestamp', 'is_CP']], thr=0.4)
daily_cm = confusion_from_probs_with_thr(final['p_4cp_daily_monthlyTop4'], daily[['timestamp', 'is_CP']], thr=0.4)
print("Hybrid hourly @0.4:", hourly_cm)
print("Hybrid daily  @0.4:", daily_cm)


def sweep(prob_series, labels_df, grid=np.arange(0.30, 0.70, 0.02)):
    out = []
    df = prob_series.rename('p').rename_axis('date').reset_index()
    lab = labels_df[['timestamp', 'is_CP']].copy()
    lab['date'] = pd.to_datetime(lab['timestamp']).dt.floor('D')
    d = pd.merge(df, lab[['date', 'is_CP']], on='date', how='left').dropna(subset=['p', 'is_CP'])
    d = d[d['date'].dt.month.isin([6, 7, 8, 9])].copy()
    y = d['is_CP'].astype(int).values
    for th in grid:
        pred = (d['p'].values >= th).astype(int)
        TP = ((y == 1) & (pred == 1)).sum();
        FP = ((y == 0) & (pred == 1)).sum()
        TN = ((y == 0) & (pred == 0)).sum();
        FN = ((y == 1) & (pred == 0)).sum()
        P = TP / (TP + FP) if (TP + FP) else 0;
        R = TP / (TP + FN) if (TP + FN) else 0
        F1 = 2 * P * R / (P + R) if (P + R) else 0
        out.append((th, TP, FP, TN, FN, P, R, F1))
    return pd.DataFrame(out, columns=['thr', 'TP', 'FP', 'TN', 'FN', 'P', 'R', 'F1'])


s_hourly = sweep(final['p_4cp_hourly_mlh'], daily[['timestamp', 'is_CP']])
s_daily = sweep(final['p_4cp_daily_monthlyTop4'], daily[['timestamp', 'is_CP']])
display(s_hourly.sort_values('F1', ascending=False).head(10))
display(s_daily.sort_values('F1', ascending=False).head(10))


Hybrid hourly @0.4: (16, 24, 82, 0)
Hybrid daily  @0.4: (16, 31, 75, 0)


Unnamed: 0,thr,TP,FP,TN,FN,P,R,F1
19,0.68,14,10,96,2,0.583333,0.875,0.7
18,0.66,14,12,94,2,0.538462,0.875,0.666667
12,0.54,16,16,90,0,0.5,1.0,0.666667
11,0.52,16,17,89,0,0.484848,1.0,0.653061
10,0.5,16,18,88,0,0.470588,1.0,0.64
13,0.56,15,16,90,1,0.483871,0.9375,0.638298
14,0.58,15,16,90,1,0.483871,0.9375,0.638298
15,0.6,15,16,90,1,0.483871,0.9375,0.638298
17,0.64,14,14,92,2,0.5,0.875,0.636364
16,0.62,14,15,91,2,0.482759,0.875,0.622222


Unnamed: 0,thr,TP,FP,TN,FN,P,R,F1
18,0.66,15,15,91,1,0.5,0.9375,0.652174
19,0.68,15,15,91,1,0.5,0.9375,0.652174
17,0.64,16,18,88,0,0.470588,1.0,0.64
15,0.6,16,20,86,0,0.444444,1.0,0.615385
16,0.62,16,20,86,0,0.444444,1.0,0.615385
14,0.58,16,20,86,0,0.444444,1.0,0.615385
13,0.56,16,21,85,0,0.432432,1.0,0.603774
11,0.52,16,22,84,0,0.421053,1.0,0.592593
12,0.54,16,22,84,0,0.421053,1.0,0.592593
10,0.5,16,23,83,0,0.410256,1.0,0.581818


In [7]:
COLOR_BANDS = [
    (0.00, 'green'),
    (0.35, 'yellow'),
    (0.55, 'orange'),
    (0.75, 'red'),
]
TRIGGER_COLORS = {'orange', 'red'}


def color_grade(p: float) -> str:
    for thr, name in reversed(COLOR_BANDS):
        if p >= thr:
            return name
    return 'green'


p_daily = final['p_4cp_daily_monthlyTop4']
p_hourly = final['p_4cp_hourly_mlh']

score_df = pd.DataFrame({
    'p_daily': p_daily,
    'p_hourly': p_hourly,
}).dropna(how='all').sort_index()

score_df['p_risk'] = score_df[['p_daily', 'p_hourly']].max(axis=1)

score_df['color'] = score_df['p_risk'].apply(color_grade)

score_df['pre_trigger'] = score_df['color'].isin(TRIGGER_COLORS).astype(int)

# score_df = score_df[score_df.index.month.isin([6,7,8,9])]
score_df

Unnamed: 0_level_0,p_daily,p_hourly,p_risk,color,pre_trigger
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-01-01,0.130000,0.130000,0.130000,green,0
2024-01-02,0.316667,0.316667,0.316667,green,0
2024-01-03,0.123333,0.123333,0.123333,green,0
2024-01-04,0.133750,0.133750,0.133750,green,0
2024-01-05,0.120000,0.120000,0.120000,green,0
...,...,...,...,...,...
2025-05-11,0.130000,0.130000,0.130000,green,0
2025-05-12,0.130000,0.130000,0.130000,green,0
2025-05-13,0.130000,0.130000,0.130000,green,0
2025-05-14,0.130000,0.130000,0.130000,green,0


In [8]:
BUDGET_PER_MONTH = 4
ALLOW_TOPUP = True
COOLDOWN_DAYS = 0


def apply_monthly_budget(df: pd.DataFrame,
                         budget_per_month: int = 6,
                         allow_topup: bool = True,
                         cooldown_days: int = 0) -> pd.DataFrame:
    d = df.copy()

    if np.issubdtype(d.index.dtype, np.datetime64):
        d = d.rename_axis('date').reset_index()
    elif 'date' in d.columns:
        pass
    elif 'timestamp' in d.columns:
        d['date'] = pd.to_datetime(d['timestamp']).dt.floor('D')
    else:
        tmp = d.reset_index()
        for cand in ['date', 'index', 'level_0', 'level_1']:
            if cand in tmp.columns:
                try:
                    tmp['date'] = pd.to_datetime(tmp[cand]).dt.floor('D')
                    d = tmp
                    break
                except Exception:
                    continue
        if 'date' not in d.columns:
            raise ValueError(
                "apply_monthly_budget: The date cannot be inferred from the index or column. Please set the index to DatetimeIndex first.")

    d['date'] = pd.to_datetime(d['date']).dt.floor('D')
    d['year'] = d['date'].dt.year
    d['month'] = d['date'].dt.month

    out = []
    for (y, m), g in d.groupby(['year', 'month'], sort=False):
        g = g.sort_values(['pre_trigger', 'p_risk', 'p_daily', 'p_hourly'],
                          ascending=[False, False, False, False]).reset_index(drop=True)
        selected = np.zeros(len(g), dtype=bool)
        used = 0

        for i, row in g.iterrows():
            if row['pre_trigger'] == 0:
                break
            if cooldown_days > 0 and selected.any():
                prev_dates = g.loc[selected, 'date']
                if any(abs((row['date'] - pd.to_datetime(d0)).days) <= cooldown_days for d0 in prev_dates):
                    continue
            selected[i] = True
            used += 1
            if used >= budget_per_month:
                break

        if allow_topup and used < budget_per_month:
            for i, row in g.iterrows():
                if selected[i] or row['color'] != 'yellow':
                    continue
                if cooldown_days > 0 and selected.any():
                    prev_dates = g.loc[selected, 'date']
                    if any(abs((row['date'] - pd.to_datetime(d0)).days) <= cooldown_days for d0 in prev_dates):
                        continue
                selected[i] = True
                used += 1
                if used >= budget_per_month:
                    break

        g['rank_in_month'] = np.arange(1, len(g) + 1)
        g['trigger'] = selected.astype(int)
        g['month_budget_used'] = used
        g['month_budget'] = budget_per_month
        out.append(g)

    res = (pd.concat(out, ignore_index=True)
           .set_index('date')
           .sort_index())
    return res


plan_df = apply_monthly_budget(score_df)

summer_plan = plan_df[plan_df.index.month.isin([6, 7, 8, 9])]

summer_plan[['p_risk', 'p_daily', 'p_hourly', 'color', 'pre_trigger', 'trigger', 'rank_in_month', 'month_budget_used',
             'month_budget']]


Unnamed: 0_level_0,p_risk,p_daily,p_hourly,color,pre_trigger,trigger,rank_in_month,month_budget_used,month_budget
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2024-06-01,0.283333,0.283333,0.283333,green,0,0,16,6,6
2024-06-02,0.283333,0.283333,0.283333,green,0,0,17,6,6
2024-06-03,0.129598,0.129598,0.124965,green,0,0,27,6,6
2024-06-04,0.042954,0.042954,0.030507,green,0,0,30,6,6
2024-06-05,0.994300,0.994300,0.963769,red,1,1,5,6,6
...,...,...,...,...,...,...,...,...,...
2024-09-26,0.142494,0.142494,0.133455,green,0,0,21,6,6
2024-09-27,0.137619,0.137619,0.131445,green,0,0,27,6,6
2024-09-28,0.133029,0.133029,0.130969,green,0,0,29,6,6
2024-09-29,0.133924,0.133924,0.131533,green,0,0,28,6,6


In [9]:
out_dir = "hybrid_cp/reports"
os.makedirs(out_dir, exist_ok=True)

plan_df.to_csv(os.path.join(out_dir, "plan_with_colors_and_budget_full.csv"))
summer_plan.to_csv(os.path.join(out_dir, "plan_with_colors_and_budget_summer.csv"))

print("Saved:")
print(" -", os.path.join(out_dir, "plan_with_colors_and_budget_full.csv"))
print(" -", os.path.join(out_dir, "plan_with_colors_and_budget_summer.csv"))

Saved:
 - hybrid_cp/reports\plan_with_colors_and_budget_full.csv
 - hybrid_cp/reports\plan_with_colors_and_budget_summer.csv
