In [None]:
import pandas as pd
import numpy as np
from plprob.utils import (split_actuals_hist_future, split_forecasts_hist_future)
from plprob.predictor import ConPlPredictor
import matplotlib.pyplot as plt
from math import ceil

### Load data

In [None]:
seed = 1234
midatl_actual_df = pd.read_csv('../data/PJM/MIDATL_ACT.csv', parse_dates=['Time'], index_col='Time')
midatl_forecast_df = pd.read_csv('../data/PJM/MIDATL_FCST.csv', parse_dates=['Issue_time', 'Forecast_time'])

ps_actual_df = pd.read_csv('../data/PJM/PS_ACT.csv', parse_dates=['Time'], index_col='Time')

In [None]:
cp_probs = dict()
intraday_probs = dict()
daily_peaks = dict()
new_cp_days = []

num_of_cps = 1
hist_cps = []

for day in pd.date_range(start='2023-06-01', periods=122, freq='D'):
    
    start_date = day.strftime('%Y-%m-%d')

    print(start_date)
    
    start_time = pd.to_datetime(start_date).tz_localize('US/Eastern')
    timesteps = pd.date_range(start=start_time, periods=24, freq='h')

    # Separate historical and future data
    (midatl_load_actual_hists,
         midatl_load_actual_futures) = split_actuals_hist_future(
                midatl_actual_df, timesteps)
    
    (midatl_load_forecast_hists,
         midatl_load_forecast_futures) = split_forecasts_hist_future(
                midatl_forecast_df, timesteps)

    (ps_load_actual_hists,
         ps_load_actual_futures) = split_actuals_hist_future(
                ps_actual_df, timesteps)
    
    # Fit model and compute probability
    
    cond_predictor = ConPlPredictor(ps_load_actual_hists, midatl_load_actual_hists, 
                               midatl_load_forecast_hists, start_time, 
                               num_of_cps, hist_cps, forecast_lead_time_in_hour=1, seed=seed)
    cond_predictor.fit_model(0.05)
    cond_predictor.create_scenario(1000, midatl_load_forecast_futures)
    cond_predictor.fit_generate_cond_scenario(0.05, 1000)
    
    cond_predictor.compute_cp_probs()

    # Update historical CPs
    today_peak = ps_load_actual_futures.loc[timesteps, 'PS'].max()
    cond_predictor.update_cp(today_peak)
    if hist_cps != cond_predictor.hist_cps:
        new_cp_days.append(start_date)
        hist_cps = cond_predictor.hist_cps

    # Save CP and hour probability
    cp_probs[start_date] = cond_predictor.cp_prob
    n = 0
    while n < num_of_cps + 1:
        if n not in cp_probs[start_date]:
            cp_probs[start_date][n] = cp_probs[start_date][n-1]
        n += 1

    daily_peaks[start_date] = today_peak
    intraday_probs[start_date] = cond_predictor.peak_hour_prob
    

### Collect results

In [None]:
daily_peak_df = pd.DataFrame({'Time': pd.to_datetime(list(daily_peaks.keys())), 
                              'PS': daily_peaks.values()}).set_index('Time').resample('H').ffill()

In [None]:
cp_prob_dict = {'Time':pd.to_datetime(list(cp_probs.keys()))}

for n in range(num_of_cps):
    n_cp_prob = []
    for day, probs in cp_probs.items():
        n_cp_prob.append(probs[n])
    cp_prob_dict[n] = n_cp_prob
cp_prob_df = pd.DataFrame(cp_prob_dict).set_index('Time').resample('H').ffill()

In [None]:
fig = plt.figure(figsize=(15, 5))

ax = fig.add_subplot(111)
ax.plot(daily_peak_df, label='Daily peak')

new_cp_hours = pd.to_datetime(new_cp_days) + pd.Timedelta(12,unit='H')
ax.scatter(new_cp_hours, 
              daily_peak_df.loc[new_cp_hours], color='green', label='New CP')

ax2 = ax.twinx()
ax2.plot(cp_prob_df[0], color='red', linestyle='dashed', label='Prob of being new CP')


h1, l1 = ax.get_legend_handles_labels()
h2, l2 = ax2.get_legend_handles_labels()
ax.legend(h1+h2, l1+l2, loc=1, prop={'size': 10})

In [None]:
fig = plt.figure(figsize=(ceil(len(new_cp_days) / 2) * 4, 16))

for i, day in enumerate(new_cp_days):
    
    timesteps = pd.date_range(start=day, periods=24, freq='H', tz='US/Eastern')

    a_df = ps_actual_df.loc[timesteps]

    ax = fig.add_subplot(ceil(len(new_cp_days) / 2), 2, i + 1)

    probs = intraday_probs[day]
    p = [probs[t] if t in probs else 0 for t in timesteps]
    
    ax.bar(range(24), p, width=0.5, alpha=0.5,
           color='blue',edgecolor='black',label='Daily peak prob')


    ax2 = ax.twinx()
    ax2.plot(range(24), a_df,
             '-r', label='Load')
    ax2.scatter(a_df.idxmax().dt.hour, a_df.max(), c = 'green', marker='o', s=200, label='CP')
    
    h1, l1 = ax.get_legend_handles_labels()
    h2, l2 = ax2.get_legend_handles_labels()
    ax.legend(h1+h2, l1+l2, loc=2)
    

    ax.set_title(day, fontsize=10)
