In [None]:
import pandas as pd
import numpy as np
from plprob.utils import (split_actuals_hist_future, split_forecasts_hist_future)
from plprob.predictor import PlPredictor
import matplotlib.pyplot as plt
from math import ceil
from pandas.tseries.holiday import USFederalHolidayCalendar
from pandas.tseries.offsets import CustomBusinessDay

### Load data

In [None]:
rto_actual_df = pd.read_csv('../data/NYISO/rto_act.csv', parse_dates=['Time'], index_col='Time')
rto_forecast_df = pd.read_csv('../data/NYISO/rto_fcst.csv', parse_dates=['Issue_time', 'Forecast_time'])

### Remove weekends and holidays

In [None]:
us_bd = CustomBusinessDay(calendar=USFederalHolidayCalendar())
business_days = pd.bdate_range(start='2011-01-01',end='2023-12-31', freq=us_bd, tz='US/Eastern')

rto_actual_df = rto_actual_df.loc[rto_actual_df.index.floor('D').isin(business_days)]
rto_forecast_df = rto_forecast_df[rto_forecast_df['Forecast_time'].dt.floor('D').isin(business_days)]

### Run simulations from 2015 to 2022

In [None]:
hist_peak = rto_actual_df.iloc[rto_actual_df.index<='2015-01-01'].max()[0]
alerts = dict()
thres = 0.8

for y in range(2015, 2023):
    
    num_of_cps = 1
    this_year_cps = []

    start = str(y) + '-07-01'
    end = str(y) + '-08-31'
    
    for day in pd.bdate_range(start=start,end=end, freq=us_bd, tz='US/Eastern'):
        
        start_date = day.strftime('%Y-%m-%d')
    
        print(start_date)
        
        start_time = pd.to_datetime(start_date).tz_localize('US/Eastern')
        timesteps = pd.date_range(start=start_time, periods=24, freq='h')
    
        # Separate historical and future data
        (load_actual_hists,
             load_actual_futures) = split_actuals_hist_future(
                    rto_actual_df, timesteps)
        
        (load_forecast_hists,
             load_forecast_futures) = split_forecasts_hist_future(
                    rto_forecast_df, timesteps)
    
        # Fit model and compute probability
        predictor = PlPredictor(load_actual_hists, load_forecast_hists, start_time, 
                                num_of_cps, this_year_cps, forecast_lead_time_in_hour=12)
        predictor.fit(0.05, 0.05)
    
        predictor.create_scenario(1000, load_forecast_futures)
        predictor.compute_cp_probs()
    
        # Update historical CPs
        today_peak = load_actual_futures.loc[timesteps, 'NYISO'].max()
        predictor.update_cp(today_peak)
        if this_year_cps != predictor.hist_cps:
            this_year_cps = predictor.hist_cps

        if load_forecast_futures[load_forecast_futures['Forecast_time'].isin(timesteps)]['NYISO'].max() > thres * hist_peak:
            alerts[start_date] = predictor.cp_prob

    hist_peak = max(hist_peak, this_year_cps)


### Select alerts whose probability >= 0.4

In [None]:
alert_days = set()
for d, prob in alerts.items():
    if prob[0] > 0.4:
        alert_days.add(d)

In [None]:
true_cps = {rto_actual_df[rto_actual_df.index.year==y].sort_values('NYISO', 
                ascending=False).index[0].strftime('%Y-%m-%d') for y in range(2015, 2023)}

### Capture all CPs?

In [None]:
true_cps.issubset(alert_days)

In [None]:
print(f"number of alerts is {len(alert_days)}")
print(f"number of false alerts is {len(alert_days) - len(true_cps)}")