## NYISO Load Prediction - Smoothing Kernel
- Objective: Utilize the NBEATS model to predict NYISO load data for 2023-12-31 using historical data from 2013-01-01 to 2023-12-30.
- Zones: `N.Y.C.`, `NORTH`, `CENTRL`
- Scaling methods: [definition](https://nixtlaverse.nixtla.io/neuralforecast/common.scalers.html)
     - [`identity`](https://nixtlaverse.nixtla.io/neuralforecast/common.scalers.html#std-statistics)
     - `revin`:  learnable normalization parameters are added on top of the usual normalization technique.
     - `smoothing`: Apply Gaussian kernel smoothing with window size

In [50]:
import os
import io
import pickle
import logging
import warnings
from datetime import timedelta

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.signal import savgol_filter
from scipy.ndimage import gaussian_filter1d
from IPython.display import display, clear_output

from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource, Select, CustomJS, DatetimeTickFormatter
from bokeh.layouts import column
from bokeh.palettes import Category10

import ipywidgets as widgets
from ipywidgets import HBox, VBox
from ipywidgets.embed import embed_minimal_html

# Suppress warnings
warnings.filterwarnings("ignore")

In [42]:
class Normalizer:
    def __init__(self, window_size=7, sigma=2):
        """
        Initialize the normalizer with kernel smoothing parameters.
        
        Parameters:
        - window_size: The size of the window for rolling standard deviation.
        - sigma: The standard deviation for Gaussian kernel smoothing.
        """
        self.window_size = window_size
        self.sigma = sigma
        self.smoothed_y = None
        self.std_y = None

    def fit(self, y):
        # Fill NaN values before fitting
        y = pd.Series(y).fillna(method='ffill').fillna(method='bfill').values
        
        # Apply Gaussian kernel smoothing
        self.smoothed_y = gaussian_filter1d(y, sigma=self.sigma)
        
        # Calculate rolling standard deviation
        self.std_y = pd.Series(y).rolling(window=self.window_size, min_periods=1).std().fillna(method='bfill').values
        
        return self

    def transform(self, y):
        # Fill NaN values before transforming
        y = pd.Series(y).fillna(method='ffill').fillna(method='bfill').values
        
        if self.smoothed_y is None or self.std_y is None:
            raise RuntimeError("The normalizer must be fitted before calling transform.")
        
        smoothed_y_partial = self.smoothed_y[-len(y):]
        std_y_partial = self.std_y[-len(y):]

        # Handle any NaN or zero values in std_y_partial to avoid extreme values
        std_y_partial = np.where(np.isnan(std_y_partial) | (std_y_partial == 0), 1e-6, std_y_partial)
        
        normalized_y = (y - smoothed_y_partial) / std_y_partial

        # Forward-fill any remaining NaN values in normalized_y
        normalized_y = pd.Series(normalized_y).fillna(method='ffill').values
        
        return normalized_y

    def fit_transform(self, y):
        self.fit(y)
        return self.transform(y)

    def inverse_transform(self, normalized_y):
        if self.smoothed_y is None or self.std_y is None:
            raise RuntimeError("The normalizer must be fitted before calling inverse_transform.")
        
        smoothed_y_partial = self.smoothed_y[-len(normalized_y):]
        std_y_partial = self.std_y[-len(normalized_y):]
        
        denormalized_y = normalized_y * std_y_partial + smoothed_y_partial
        return denormalized_y


def calculate_overall_mse(zone_dfs, true_col='y', prediction_cols=['NBEATS - Identity', 'NBEATS - Reinv', 'NBEATS - Smoothing']):
    mse_results = {col: [] for col in prediction_cols}
    traning_len = len(zone_dfs)

    # Loop through all the DataFrames in the list
    for df in zone_dfs:
        # Ensure the DataFrame is properly formatted and named
        df = df.rename(columns={'NBEATS1': 'NBEATS - Reinv'})
        
        # Drop rows with NaN values in any of the relevant columns
        df = df.dropna(subset=[true_col] + prediction_cols)
        
        y_true = df[true_col].iloc[-24:]
        
        for col in prediction_cols:
            mse = mean_squared_error(y_true, df[col].iloc[-24:])
            mse_results[col].append(mse)
    
    #return {col: (np.mean(mse_results[col]), np.std(mse_results[col])/np.sqrt(traning_len)) for col in prediction_cols}
    return {col: round(np.mean(mse_results[col]), 3) for col in prediction_cols}


In [43]:
with open('all_prediction_dfs.pkl', 'rb') as f:
    all_prediction_dfs = pickle.load(f)

print("Data loaded from pickle files successfully.")

Data loaded from pickle files successfully.


### Overall MSE

In [44]:
# Assuming all_prediction_dfs is a dictionary where each key is a zone name and the value is a list of DataFrames
results = pd.DataFrame({zone: calculate_overall_mse(zone_dfs) for zone, zone_dfs in all_prediction_dfs.items()})

results

Unnamed: 0,CAPITL,CENTRL,DUNWOD,GENESE,HUD VL,LONGIL,MHK VL,MILLWD,N.Y.C.,NORTH,WEST
NBEATS - Identity,8201.244,13962.623,3854.546,6475.61,10805.418,58922.204,3340.392,1332.667,120901.085,555.585,9837.963
NBEATS - Reinv,8060.273,16798.849,4247.082,7154.422,11444.606,50559.936,3763.228,1720.273,144643.707,694.176,9051.257
NBEATS - Smoothing,11067.333,23723.661,6526.208,8508.908,10661.199,64729.461,5617.927,1598.675,284407.097,720.592,12948.159


### Visualization

In [51]:
output_notebook()

initial_zone = zones[0]
initial_sample_index = 0

output_prediction = widgets.Output()

def update_plot(change=None):
    selected_zone = dropdown_zone.value
    selected_sample_index = dropdown_sample_index.value
    
    # Update prediction plot
    with output_prediction:
        clear_output(wait=True)
        plot_df = all_prediction_dfs[selected_zone][selected_sample_index].drop("ds", axis=1).iloc[-168:].drop("unique_id", axis=1)
        fig_prediction = plot_prediction(plot_df, selected_zone)
        show(fig_prediction, notebook_handle=True)

# Dropdown for selecting zone
dropdown_zone = widgets.Dropdown(
    options=zones,
    value=initial_zone,
    description='Select Zone:',
    style={'description_width': 'initial'}
)
dropdown_zone.observe(update_plot, names='value')

# Dropdown for selecting sample index
sample_indices = list(range(len(all_prediction_dfs[initial_zone])))
dropdown_sample_index = widgets.Dropdown(
    options=sample_indices,
    value=initial_sample_index,
    description='Select Sample Index:',
    style={'description_width': 'initial'}
)
dropdown_sample_index.observe(update_plot, names='value')

layout = widgets.VBox([dropdown_zone, dropdown_sample_index, output_prediction])

display(layout)
update_plot()

VBox(children=(Dropdown(description='Select Zone:', options=('CAPITL', 'CENTRL', 'DUNWOD', 'GENESE', 'HUD VL',…