## NYISO Load Prediction
- Objective: Utilize the NBEATS model to predict NYISO load data for 2023-12-31 using historical data from 2013-01-01 to 2023-12-30.
- Zones: `N.Y.C.`, `NORTH`, `CENTRL`
- Scaling methods: [definition](https://nixtlaverse.nixtla.io/neuralforecast/common.scalers.html)
     - [`identity`](https://nixtlaverse.nixtla.io/neuralforecast/common.scalers.html#std-statistics)
     - [`standard`](https://nixtlaverse.nixtla.io/neuralforecast/common.scalers.html#std-statistics)
     - [`minmax`](https://nixtlaverse.nixtla.io/neuralforecast/common.scalers.html#minmax-statistics)
     - [`robust`](https://nixtlaverse.nixtla.io/neuralforecast/common.scalers.html#robust-statistics)
     - `revin`:  learnable normalization parameters are added on top of the usual normalization technique.

In [1]:
import warnings
import pickle

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from IPython.display import display, clear_output

from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource, Select, CustomJS, DatetimeTickFormatter
from bokeh.layouts import column
from bokeh.palettes import Category20

from ipywidgets import widgets, HBox, VBox
from ipywidgets.embed import embed_minimal_html


# Suppress warnings
warnings.filterwarnings("ignore")
# Load filtered data from pickle files
with open('train_dfs.pkl', 'rb') as f:
    train_dfs = pickle.load(f)

with open('test_dfs.pkl', 'rb') as f:
    test_dfs = pickle.load(f)
    
with open('all_prediction_dfs.pkl', 'rb') as f:
    all_prediction_dfs = pickle.load(f)

with open('zones.pkl', 'rb') as f:
    zones = pickle.load(f)

filtered_data_reconstructed = {zone: pd.concat([train_df, test_dfs[zone]]) for zone, train_df in train_dfs.items()}

### PCA + NBEATS Method
1. Build Eigenspace: Use the first 10 principal components (PCs) from the training data to capture the main patterns.
2. Reconstruct Training Data: Rebuild the training data using these principal components.
3. Calculate Residuals: Measure the difference between the original and reconstructed training data to obtain residuals.
4. Apply to Test Data: Use the eigenspace from the training data to reconstruct and analyze the test data.

In [2]:
def plot_reconstructed_ts(df, zone):
    # # Plot the residuals for weekdays and weekends separately
    df = df.sort_index()
    plt.figure(figsize=(14, 10))
    
    plt.subplot(3, 1, 1)
    plt.plot(df.index, df['y'], label=f'Original {zone} Data')
    plt.title('Original Data')
    plt.xlabel('Timestamp')
    plt.ylabel('Value')
    plt.legend()
    
    plt.subplot(3, 1, 2)
    plt.plot(df.index, df['reconstructed'], label='Reconstructed Data using PC1', color='green')
    plt.title('Reconstructed Data by First 10 PCs')
    plt.xlabel('Timestamp')
    plt.ylabel('Value')
    plt.legend()
    
    plt.subplot(3, 1, 3)
    plt.plot(df.index, df['residuals'], label='Residuals', color='blue')
    plt.title('Residuals (Original - Reconstructed Data)')
    plt.xlabel('Timestamp')
    plt.ylabel('Residual Value')
    plt.legend()
    
    plt.tight_layout()
    plt.show()


# Initial zone
initial_zone = zones[0]

# Create output widgets for plots
output_reconstructed = widgets.Output()

# Display initial plots
with output_reconstructed:
    clear_output(wait=True)
    fig_reconstructed = plot_reconstructed_ts(filtered_data_reconstructed[initial_zone], initial_zone)
    display(fig_reconstructed)

# Define update function for dropdown
def update_plot(change):
    selected_zone = change['new']
    
    # Update reconstructed plot
    with output_reconstructed:
        clear_output(wait=True)
        fig_reconstructed = plot_reconstructed_ts(filtered_data_reconstructed[selected_zone], selected_zone)
        display(fig_reconstructed)


# Create dropdown menu
dropdown = widgets.Dropdown(
    options=zones,
    value=initial_zone,
    description='Select Zone:',
    style={'description_width': 'initial'}
)

# Attach update function to dropdown
dropdown.observe(update_plot, names='value')

# Arrange the dropdown and plots in a layout with two columns
layout = VBox([dropdown, HBox([output_reconstructed])])

# Display the layout
display(layout)

embed_minimal_html('export.html', views=[dropdown, output_reconstructed], title='Widgets Export')

VBox(children=(Dropdown(description='Select Zone:', options=('CAPITL', 'CENTRL', 'DUNWOD', 'GENESE', 'HUD VL',…

In [3]:
output_notebook()

# Load the data from pickle files
with open('all_prediction_dfs.pkl', 'rb') as f:
    all_prediction_dfs = pickle.load(f)

with open('zones.pkl', 'rb') as f:
    zones = pickle.load(f)

# Ensure that zones are in list format
zones = list(zones)

# Initialize ColumnDataSources for each zone, excluding the 'unique_id' column
sources = {zone: ColumnDataSource(all_prediction_dfs[zone].iloc[-168:].drop("unique_id", axis=1)) for zone in zones}

# Function to create the plot
def plot_prediction(source, zone):
    # Create a figure
    p = figure(title=f"NYISO - {zone}", x_axis_type='datetime', x_axis_label='Date', y_axis_label='Load', width=1400, height=850)
    
    # Get the list of column names excluding 'ds'
    plot_df = source.data
    columns_to_plot = [col for col in plot_df.keys() if col != 'ds']
    
    # Use Category20 color palette
    colors = Category20[len(columns_to_plot)]
    
    # Plot each column except 'ds'
    for i, col in enumerate(columns_to_plot):
        p.line(x='ds', y=col, source=source, line_width=2, color=colors[i], legend_label=col)
    
    # Customize the legend and axis format
    p.legend.title = ''
    p.legend.title_text_font_size = '12pt'
    p.legend.label_text_font_size = '10pt'
    p.legend.location = 'bottom_left'
    
    p.xaxis.formatter = DatetimeTickFormatter(
        days="%Y-%m-%d",
        months="%Y-%m-%d",
        years="%Y-%m-%d"
    )
    
    return p
# Initial zone for plotting
initial_zone = zones[0]
source = sources[initial_zone]

# Create the initial plot
p = plot_prediction(source, initial_zone)

# Dropdown menu for selecting zone
select = Select(title="Select Zone:", value=initial_zone, options=zones)

# JavaScript callback to update the plot when a new zone is selected
callback = CustomJS(args=dict(sources=sources, p=p), code="""
    var zone_name = cb_obj.value;
    var source = sources[zone_name];
    
    p.change.emit();
    p.renderers.forEach(function(renderer, i) {
        renderer.data_source.data = source.data;
    });
    p.title.text = 'NYISO - ' + zone_name;
""")

select.js_on_change('value', callback)

# Arrange the dropdown and plot in a layout
layout = column(select, p)

# Show the plot in the Jupyter Notebook
show(layout)