In [1]:

import time, numpy as np, pandas as pd
from prophe import Prophet
from sklearn.preprocessing import MinMaxScaler   # used only for lag features


In [35]:
df = (pd.read_csv("data.csv")
        [['min_t', 'temp', 'year', 'total_power']]
        .rename(columns={'min_t': 'date_time'}))

df['ds'] = pd.to_datetime(df['date_time'])
df.set_index('ds', inplace=True)

for i in range(96, 120):                       # 24 lagged features &  power T-1…T-24 ## lag goes to previous day so no leak of current day data is done
    df[f'temp_T_{i}']          = df['temp'].shift(i)
    df[f'total_power_T_{i-95}'] = df['total_power'].shift(i)

train = df[df['year'] == 2021].drop(columns='year').copy()
test  = df[df['year'] == 2022].drop(columns='year').copy()

exog = [c for c in train.columns if c not in ('date_time', 'total_power')]

train_p = train.reset_index().rename(columns={'total_power': 'y'})
test_p  = test .reset_index()                 # keep total_power for metrics

In [36]:
def winkler_score(lower, upper, y_true, ci_pct):
    ci   = ci_pct / 100
    invα = 1/(1-ci)
    y    = y_true.values

    lo  = y < lower.values
    hi  = y > upper.values
    mid = ~(lo | hi)

    width = upper - lower
    w     = np.empty_like(y, float)
    w[mid] = width[mid]
    w[lo]  = width[lo]  + 2*invα*(lower[lo] - y[lo])
    w[hi]  = width[hi]  + 2*invα*(y[hi]   - upper[hi])
    return w.mean()

In [10]:
CIS   = [85, 90, 95, 99]
rows  = []
total_time = 0.0

for ci in CIS:
    m = Prophet(interval_width=ci/100)
    for col in exog:
        m.add_regressor(col)

    t0 = time.perf_counter() 
    m.fit(train_p[['ds', 'y'] + exog])
    t_train = time.perf_counter() - t0
    total_time += t_train

    fcst = m.predict(test_p[['ds'] + exog])
    fcst.index = fcst['ds']

    cover = ((fcst['yhat_lower'] <= test['total_power']) &
             (test['total_power'] <= fcst['yhat_upper'])).mean()*100

    wink  = winkler_score(
                fcst['yhat_lower'],
                fcst['yhat_upper'],
                test['total_power'],
                ci
            )
    rows.append(dict(CI=ci, coverage=cover,
                     winkler=wink, train_time_s=t_train))

22:43:35 - cmdstanpy - INFO - Chain [1] start processing
22:44:00 - cmdstanpy - INFO - Chain [1] done processing
22:44:04 - cmdstanpy - INFO - Chain [1] start processing
22:44:29 - cmdstanpy - INFO - Chain [1] done processing
22:44:33 - cmdstanpy - INFO - Chain [1] start processing
22:44:58 - cmdstanpy - INFO - Chain [1] done processing
22:45:01 - cmdstanpy - INFO - Chain [1] start processing
22:45:27 - cmdstanpy - INFO - Chain [1] done processing


In [34]:
res = pd.DataFrame(rows)
print(res.to_string(index=False))
print(f"\nTotal Prophet training time: {total_time:.1f} seconds")

  CI  coverage   winkler  train_time_s
85.0 57.318203 18.016809     27.498494
90.0 74.687355 18.969706     27.860862
95.0 93.075498 22.541703     27.685296
99.0 98.564150 37.942000     27.520018

Total Prophet training time: 110.6 seconds
