## NYISO Load Analysis
- $y_t$: hourly integrated load at time $t$
- $\bar{y}_t = \frac{1}{168} \sum_{i=t-168}^{t-1} y_i$ : 7-day moving average
- $\bar{\sigma}_t = \sqrt{\frac{1}{167} \sum_{i=t-168}^{t-1} (y_i - \bar{y}_t)^2}$: : 7-day moving standard deviation
- Note: 168 hours is the number of hours in a week


### 7-Day Moving Average

In [None]:
import logging
import warnings

import numpy as np
import pandas as pd
from bokeh.layouts import column
from bokeh.models import ColumnDataSource, Select, CustomJS
from bokeh.plotting import figure, show, output_notebook
from scipy.ndimage import gaussian_filter1d

from ts_scaler.data.data_handler import DataHandler
from ts_scaler.scaler.moving_average import MovingAverageCalculator
from ts_scaler.utils.logger import setup_logger

# Suppress specific warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

# Enable output for Jupyter notebook
output_notebook()

logger = setup_logger(level=logging.WARNING)

# Fetch data
data_handler = DataHandler(logger=logger)
df = data_handler.fetch_nyiso_data(local_dir="../data", start_date="2016101", end_date="20231231")

# Drop duplicates, NA values, and negative loads
df = df.drop_duplicates(subset=['time_stamp', 'zone_name'])
df = df.dropna()
df = df[df['integrated_load'] > 0]

# Convert time_stamp to datetime
df['time_stamp'] = pd.to_datetime(df['time_stamp'])

In [None]:
# Group by zone and calculate moving average and standard deviation
calculator = MovingAverageCalculator(window_size=168)  # Assuming window_size is 168 hours (7 days)
grouped_df = df.groupby('zone_name').apply(
    lambda x: calculator.calculate(x, time_column='time_stamp', value_column='integrated_load')).reset_index(drop=True)

unique_zone_names = grouped_df['zone_name'].unique()

# Create a ColumnDataSource for each zone_name
sources = {zone_name: ColumnDataSource(grouped_df[grouped_df['zone_name'] == zone_name]) for zone_name in
           unique_zone_names}

# Initial data
initial_zone = unique_zone_names[0]
source = sources[initial_zone]

# Create figures with improved color scheme
p1 = figure(width=800, height=250, x_axis_type="datetime", title="Raw Load")
p1.line('time_stamp', 'integrated_load', source=source, line_width=2, color='#1f77b4')

p2 = figure(width=800, height=250, x_axis_type="datetime", title="7-Day Moving Average")
p2.line('time_stamp', 'moving_average', source=source, line_width=2, color='#ff7f0e')

p3 = figure(width=800, height=250, x_axis_type="datetime", title="7-Day Moving Standard Deviation")
p3.line('time_stamp', 'moving_standard_deviation', source=source, line_width=2, color='#2ca02c')

# Dropdown menu for selecting zone
select = Select(title="Select Zone:", value=initial_zone, options=list(unique_zone_names))

# JavaScript callback to update the plots
callback = CustomJS(args=dict(sources=sources, p1=p1, p2=p2, p3=p3), code="""
    var zone_name = cb_obj.value;
    var source = sources[zone_name];

    p1.change.emit();
    p2.change.emit();
    p3.change.emit();

    p1.renderers[0].data_source.data = source.data;
    p2.renderers[0].data_source.data = source.data;
    p3.renderers[0].data_source.data = source.data;
""")

select.js_on_change('value', callback)

layout = column(select, p1, p2, p3)

# Show the plot in a Jupyter Notebook
show(layout)

### Gaussian Kernel $\sigma=168$

In [None]:
# Define the SmoothingNormalizer class
class SmoothingNormalizer:
    def __init__(self, sigma=448):
        self.sigma = sigma
        self.smoothed_y = None
        self.std_y = None

    def fit(self, y):
        y = pd.Series(y).ffill().bfill().values
        self.smoothed_y = gaussian_filter1d(y, sigma=self.sigma)
        deviations = y - self.smoothed_y
        smoothed_squared_deviations = gaussian_filter1d(deviations ** 2, sigma=self.sigma)
        self.std_y = np.sqrt(smoothed_squared_deviations)
        return self

    def fit_transform(self, y):
        self.fit(y)
        return self.smoothed_y, self.std_y


# Suppress specific warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

# Enable output for Jupyter notebook
output_notebook()

logger = setup_logger(level=logging.WARNING)

# Fetch data
data_handler = DataHandler(logger=logger)
df = data_handler.fetch_nyiso_data(local_dir="../data", start_date="2016101", end_date="20231231")

# Drop duplicates, NA values, and negative loads
df = df.drop_duplicates(subset=['time_stamp', 'zone_name'])
df = df.dropna()
df = df[df['integrated_load'] > 0]

# Convert time_stamp to datetime
df['time_stamp'] = pd.to_datetime(df['time_stamp'])


def normalize_group(group, sigma=448):
    normalizer = SmoothingNormalizer(sigma=sigma)
    smoothed_load, smoothed_std = normalizer.fit_transform(group['integrated_load'])
    group = group.copy()  # Make a copy to avoid SettingWithCopyWarning
    group['smoothed_load'] = smoothed_load
    group['smoothed_std'] = smoothed_std
    return group


def create_sources(df, unique_zone_names, sigma=448):
    sources = {
        zone_name: ColumnDataSource(normalize_group(df[df['zone_name'] == zone_name], sigma))
        for zone_name in unique_zone_names
    }
    return sources


unique_zone_names = df['zone_name'].unique()
sources = create_sources(df, unique_zone_names, sigma=168)

# Initial data
initial_zone = unique_zone_names[0]
source = sources[initial_zone]

# Create figures
p1 = figure(width=800, height=250, x_axis_type="datetime", title="Raw Load")
p1.line('time_stamp', 'integrated_load', source=source, line_width=2, color='#1f77b4')

p2 = figure(width=800, height=250, x_axis_type="datetime", title="Smoothed Load (Sigma=448)")
p2.line('time_stamp', 'smoothed_load', source=source, line_width=2, color='#ff7f0e')

p3 = figure(width=800, height=250, x_axis_type="datetime", title="Smoothed Std (Sigma=448)")
p3.line('time_stamp', 'smoothed_std', source=source, line_width=2, color='#2ca02c')

# Dropdown menu for selecting zone
select_zone = Select(title="Select Zone:", value=initial_zone, options=list(unique_zone_names))

# JavaScript callback to update the plots when the zone is changed
callback_zone = CustomJS(args=dict(sources=sources, p1=p1, p2=p2, p3=p3), code="""
    var zone_name = cb_obj.value;
    var source = sources[zone_name];
    
    p1.renderers[0].data_source.data = source.data;
    p2.renderers[0].data_source.data = source.data;
    p3.renderers[0].data_source.data = source.data;

    p1.change.emit();
    p2.change.emit();
    p3.change.emit();
""")

select_zone.js_on_change('value', callback_zone)

layout = column(select_zone, p1, p2, p3)

# Show the plot in a Jupyter Notebook
show(layout)


### Gaussian Kernel $\sigma=448$

In [None]:
def normalize_group(group, sigma=448):
    normalizer = SmoothingNormalizer(sigma=sigma)
    smoothed_load, smoothed_std = normalizer.fit_transform(group['integrated_load'])
    group = group.copy()  # Make a copy to avoid SettingWithCopyWarning
    group['smoothed_load'] = smoothed_load
    group['smoothed_std'] = smoothed_std
    return group


def create_sources(df, unique_zone_names, sigma=448):
    sources = {
        zone_name: ColumnDataSource(normalize_group(df[df['zone_name'] == zone_name], sigma))
        for zone_name in unique_zone_names
    }
    return sources


unique_zone_names = df['zone_name'].unique()
sources = create_sources(df, unique_zone_names, sigma=448)

# Initial data
initial_zone = unique_zone_names[0]
source = sources[initial_zone]

# Create figures
p1 = figure(width=800, height=250, x_axis_type="datetime", title="Raw Load")
p1.line('time_stamp', 'integrated_load', source=source, line_width=2, color='#1f77b4')

p2 = figure(width=800, height=250, x_axis_type="datetime", title="Smoothed Load (Sigma=448)")
p2.line('time_stamp', 'smoothed_load', source=source, line_width=2, color='#ff7f0e')

p3 = figure(width=800, height=250, x_axis_type="datetime", title="Smoothed Std (Sigma=448)")
p3.line('time_stamp', 'smoothed_std', source=source, line_width=2, color='#2ca02c')

# Dropdown menu for selecting zone
select_zone = Select(title="Select Zone:", value=initial_zone, options=list(unique_zone_names))

# JavaScript callback to update the plots when the zone is changed
callback_zone = CustomJS(args=dict(sources=sources, p1=p1, p2=p2, p3=p3), code="""
    var zone_name = cb_obj.value;
    var source = sources[zone_name];
    
    p1.renderers[0].data_source.data = source.data;
    p2.renderers[0].data_source.data = source.data;
    p3.renderers[0].data_source.data = source.data;

    p1.change.emit();
    p2.change.emit();
    p3.change.emit();
""")

select_zone.js_on_change('value', callback_zone)

layout = column(select_zone, p1, p2, p3)

# Show the plot in a Jupyter Notebook
show(layout)


### Gaussian Kernel $\sigma=896$

In [None]:
def normalize_group(group, sigma=448):
    normalizer = SmoothingNormalizer(sigma=sigma)
    smoothed_load, smoothed_std = normalizer.fit_transform(group['integrated_load'])
    group = group.copy()  # Make a copy to avoid SettingWithCopyWarning
    group['smoothed_load'] = smoothed_load
    group['smoothed_std'] = smoothed_std
    return group


def create_sources(df, unique_zone_names, sigma=448):
    sources = {
        zone_name: ColumnDataSource(normalize_group(df[df['zone_name'] == zone_name], sigma))
        for zone_name in unique_zone_names
    }
    return sources


unique_zone_names = df['zone_name'].unique()
sources = create_sources(df, unique_zone_names, sigma=896)

# Initial data
initial_zone = unique_zone_names[0]
source = sources[initial_zone]

# Create figures
p1 = figure(width=800, height=250, x_axis_type="datetime", title="Raw Load")
p1.line('time_stamp', 'integrated_load', source=source, line_width=2, color='#1f77b4')

p2 = figure(width=800, height=250, x_axis_type="datetime", title="Smoothed Load (Sigma=448)")
p2.line('time_stamp', 'smoothed_load', source=source, line_width=2, color='#ff7f0e')

p3 = figure(width=800, height=250, x_axis_type="datetime", title="Smoothed Std (Sigma=448)")
p3.line('time_stamp', 'smoothed_std', source=source, line_width=2, color='#2ca02c')

# Dropdown menu for selecting zone
select_zone = Select(title="Select Zone:", value=initial_zone, options=list(unique_zone_names))

# JavaScript callback to update the plots when the zone is changed
callback_zone = CustomJS(args=dict(sources=sources, p1=p1, p2=p2, p3=p3), code="""
    var zone_name = cb_obj.value;
    var source = sources[zone_name];
    
    p1.renderers[0].data_source.data = source.data;
    p2.renderers[0].data_source.data = source.data;
    p3.renderers[0].data_source.data = source.data;

    p1.change.emit();
    p2.change.emit();
    p3.change.emit();
""")

select_zone.js_on_change('value', callback_zone)

layout = column(select_zone, p1, p2, p3)

# Show the plot in a Jupyter Notebook
show(layout)
