# Process and Plot the Peak Historical Loads by Interconnection

This notebook analyzes the historical time series of extreme loads (summer and winter) by interconnection.

In [1]:
# Start by importing the packages we need:
import os

import pandas as pd
import matplotlib.pyplot as plt

from glob import glob


## Set the Directory Structure

In [60]:
# Identify the data input and output directories:
load_data_input_dir =  '/Users/burl878/Documents/IMMM/Data/TELL/Production_Runs/tell_data/outputs/postprocessed/interconnection_load_time_series/'
data_output_dir =  '/Users/burl878/Documents/IMMM/Data/TELL/Production_Runs/tell_data/outputs/postprocessed/interconnection_load_time_series/'
image_output_dir =  '/Users/burl878/Documents/IMMM/Images/TELL/Analysis/Interconnection_Peak_Loads/'


## Process the Historical Peak Loads

In [64]:
# Define a function to process the time series of historical peak loads:
def process_historical_peak_loads(load_data_input_dir: str, data_output_dir: str, interconnection: str):
    # Read in the historical interconnection loads file:
    ics_df = pd.read_csv(load_data_input_dir + 'historic.csv')
    
    # Subset to just the data for the interconnection being processed:
    ics_df = ics_df.loc[ics_df['Interconnection'] == interconnection]
    
    # Rename some variables for simplicity:
    ics_df.rename(columns={'Interconnection_Load_MWh': 'Load_MWh', 'Percentile': 'Load_Percentile'}, inplace=True)
    
    # Set the time value as a datetime variable:
    ics_df['Datetime'] = pd.to_datetime(ics_df['Time_UTC'])
        
    # Add columns with the year and month values to be used in grouping:
    ics_df['Year'] = ics_df['Datetime'].dt.strftime('%Y').astype(str).astype(int)
    ics_df['Month'] = ics_df['Datetime'].dt.strftime('%m').astype(str).astype(int)
    
    # Compute the annual mean and standard deviation of the load:
    ics_df['Year_Mean_Load'] = ics_df.groupby('Year')['Load_MWh'].transform('mean').round(2)
    ics_df['Year_STD_Load'] = ics_df.groupby('Year')['Load_MWh'].transform('std').round(2)
    
    # Compute the hourly normalized load by subtracting the annual mean and dividing by the annual standard deviation:
    ics_df['Normalized_Load'] = ((ics_df['Load_MWh'] - ics_df['Year_Mean_Load']) / ics_df['Year_STD_Load']).round(2)
    
    # Subset to just the columns we need:
    ics_df = ics_df[['Time_UTC', 'Year', 'Month', 'Load_MWh', 'Normalized_Load', 'Year_Mean_Load']].copy()
    
    #Initiate a counter and empty dataframe to store the results:
    counter = 0;
    stats_df = pd.DataFrame()
    
    # Loop over the years and find the maximum summer and winter load in each year:
    for year in range(1980,2020,1):
        # Iterate the counter by one:
        counter = counter + 1 
        
        # Subset the data to just the year being processed and to just the summer and winter months:
        subset_df = ics_df.loc[(ics_df['Year'] == year)]
        winter_df = subset_df.loc[(subset_df['Month'] == 1) | (subset_df['Month'] == 2) | (subset_df['Month'] == 3) | (subset_df['Month'] == 10) | (subset_df['Month'] == 11) | (subset_df['Month'] == 12)]
        summer_df = subset_df.loc[(subset_df['Month'] == 4) | (subset_df['Month'] == 5) | (subset_df['Month'] == 6) | (subset_df['Month'] == 7) | (subset_df['Month'] == 8) | (subset_df['Month'] == 9)]

        # Find the row of the maximum load for each subset:
        all_max_index = subset_df['Load_MWh'].idxmax()
        winter_max_index = winter_df['Load_MWh'].idxmax()
        summer_max_index = summer_df['Load_MWh'].idxmax()
                
        # Put the statistics in a new dataframe:
        stats_df.loc[counter, 'Year'] = str(year)
        stats_df.loc[counter, 'Mean_Load_MWh'] = subset_df['Year_Mean_Load'].mean().round(2)
        stats_df.loc[counter, 'All_Max_Time'] = subset_df.loc[all_max_index, 'Time_UTC']
        stats_df.loc[counter, 'All_Max_Load_MWh'] = subset_df.loc[all_max_index, 'Load_MWh']
        stats_df.loc[counter, 'All_Max_Load_Norm'] = subset_df.loc[all_max_index, 'Normalized_Load']
        stats_df.loc[counter, 'Win_Max_Time'] = winter_df.loc[winter_max_index, 'Time_UTC']
        stats_df.loc[counter, 'Win_Max_Load_MWh'] = winter_df.loc[winter_max_index, 'Load_MWh']
        stats_df.loc[counter, 'Win_Max_Load_Norm'] = winter_df.loc[winter_max_index, 'Normalized_Load']
        stats_df.loc[counter, 'Sum_Max_Time'] = summer_df.loc[summer_max_index, 'Time_UTC']
        stats_df.loc[counter, 'Sum_Max_Load_MWh'] = summer_df.loc[summer_max_index, 'Load_MWh']
        stats_df.loc[counter, 'Sum_Max_Load_Norm'] = summer_df.loc[summer_max_index, 'Normalized_Load']

        # Clean up and move to the next year:
        del subset_df, winter_df, summer_df, all_max_index, winter_max_index, summer_max_index
        
    # Write out the time series dataframe to a .csv file:
    ics_df.to_csv((os.path.join(data_output_dir + interconnection + '_Peak_Load_Time_Series.csv')), sep=',', index=False)
    stats_df.to_csv((os.path.join(data_output_dir + interconnection + '_Peak_Load_Statistics.csv')), sep=',', index=False)
    
    return ics_df, stats_df


In [68]:
ts_df, stats_df = process_historical_peak_loads(load_data_input_dir = load_data_input_dir, 
                                                data_output_dir = data_output_dir, 
                                                interconnection = 'WECC')

stats_df


Unnamed: 0,Year,Mean_Load_MWh,All_Max_Time,All_Max_Load_MWh,All_Max_Load_Norm,Win_Max_Time,Win_Max_Load_MWh,Win_Max_Load_Norm,Sum_Max_Time,Sum_Max_Load_MWh,Sum_Max_Load_Norm
1,1980,30914.57,1980-07-28 01:00:00,49354.14,4.24,1980-10-02 00:00:00,41939.69,2.54,1980-07-28 01:00:00,49354.14,4.24
2,1981,34288.05,1981-08-07 01:00:00,52510.3,3.71,1981-12-24 03:00:00,41388.38,1.45,1981-08-07 01:00:00,52510.3,3.71
3,1982,37573.28,1982-09-03 00:00:00,56159.95,3.67,1982-01-07 03:00:00,48504.08,2.16,1982-09-03 00:00:00,56159.95,3.67
4,1983,40843.36,1983-08-08 01:00:00,64112.88,4.13,1983-12-22 03:00:00,52777.76,2.12,1983-08-08 01:00:00,64112.88,4.13
5,1984,44007.92,1984-08-09 01:00:00,66528.09,3.61,1984-01-19 03:00:00,55326.52,1.82,1984-08-09 01:00:00,66528.09,3.61
6,1985,47399.02,1985-07-09 01:00:00,76729.13,4.39,1985-02-05 03:00:00,59361.03,1.79,1985-07-09 01:00:00,76729.13,4.39
7,1986,50678.95,1986-08-20 00:00:00,76601.63,3.7,1986-12-11 03:00:00,61380.81,1.53,1986-08-20 00:00:00,76601.63,3.7
8,1987,53960.87,1987-08-05 01:00:00,80905.86,3.68,1987-10-01 00:00:00,70201.19,2.22,1987-08-05 01:00:00,80905.86,3.68
9,1988,57072.15,1988-07-26 01:00:00,88757.64,3.87,1988-12-27 03:00:00,72163.87,1.84,1988-07-26 01:00:00,88757.64,3.87
10,1989,60522.61,1989-07-20 01:00:00,95155.04,4.16,1989-02-03 04:00:00,79807.79,2.32,1989-07-20 01:00:00,95155.04,4.16
