# Process Interconnection Load Time Series

This notebook process the time-series of historical hourly loads for each of the three electricity interconnections.

In [2]:
# Start by importing the packages we need:
import os

import pandas as pd
import numpy as np

from glob import glob


## Set the Directory Structure

In [5]:
# Identify the data input and output directories:
data_input_dir =  '/Users/burl878/Documents/IMMM/Data/TELL/Production_Runs/tell_data/outputs/tell_output/'
metadata_input_dir =  '/Users/burl878/Documents/IMMM/Data/TELL/Analysis/'
data_output_dir =  '/Users/burl878/Documents/IMMM/Data/TELL/Analysis/Interconnection_Load_Time_Series/'


## Process the TELL Output Files

In [6]:
# Read in the BA-to-Interconnection mapping file:
ba_mapping = pd.read_csv(metadata_input_dir + 'BA_to_Interconnection_Mapping.csv')


In [52]:
# Define a function to process the time series of hourly load for each interconnection:
def process_interconnection_load_time_series(data_input_dir: str, scenario: str):

    # Set the start and end years to loop over:
    if scenario == 'historic':
       start_year = 1980; end_year = 2020; interval = 1
    else:
       start_year = 2020; end_year = 2100; interval = 1
        
    # Loop over the years from the start_year to the end_year:
    for year in range(start_year, end_year, interval):
        # Read in the TELL BA output file for that year and scenario:
        tell_df = pd.read_csv(data_input_dir + scenario + '/' + str(year) + '/TELL_Balancing_Authority_Hourly_Load_Data_' + str(year) + '_Scaled_' + str(year) + '.csv')
             
        # Merge in the interconnection mapping using common BA codes:
        merged_df = pd.merge(tell_df, ba_mapping, on='BA_Code')
            
        # Sum the BA-level hourly loads into interconnection-level hourly loads:
        merged_df['Load_MWh'] = merged_df.groupby(['Interconnection', 'Time_UTC'])['Scaled_TELL_BA_Load_MWh'].transform('sum').round(3)  
            
        # Only keep the columns we need and subset to the unique values:
        merged_df = merged_df[['Time_UTC', 'Interconnection', 'Load_MWh']].drop_duplicates()
        
        # Compute the annual percentile for each hourly load value:
        merged_df['Percentile_Load'] = merged_df.groupby(['Interconnection'])['Load_MWh'].rank(pct=True).round(3)
        
        # Compute the normalized load for each hourly load value:
        merged_df['Load_Mean'] = merged_df.groupby(['Interconnection'])['Load_MWh'].transform('mean')
        merged_df['Load_STD'] = merged_df.groupby(['Interconnection'])['Load_MWh'].transform('std')
        merged_df['Normalized_Load'] = ((merged_df['Load_MWh'] - merged_df['Load_Mean']) / merged_df['Load_STD']).round(2)
        
        # Only keep the columns we need:
        merged_df = merged_df[['Time_UTC', 'Interconnection', 'Load_MWh', 'Percentile_Load', 'Normalized_Load']]
        
        # Store the output in a new dataframe:
        if year == start_year:
           output_df = merged_df
        else:
           output_df = pd.concat([output_df, merged_df])
            
        # Clean up:
        del tell_df, merged_df
            
    # Write out the dataframe to a .csv file:
    output_df.to_csv((os.path.join(data_output_dir, (scenario + '_Load_Time_Series.csv'))), sep=',', index=False)

    return output_df


In [60]:
output_df = process_interconnection_load_time_series(data_input_dir = data_input_dir, 
                                                     scenario = 'rcp85hotter_ssp5')

output_df


Unnamed: 0,Time_UTC,Interconnection,Load_MWh,Percentile_Load,Normalized_Load
0,2020-01-01 01:00:00,EIC,333160.69,0.500,-0.22
1,2020-01-01 02:00:00,EIC,327647.21,0.447,-0.30
2,2020-01-01 03:00:00,EIC,317783.13,0.365,-0.44
3,2020-01-01 04:00:00,EIC,307857.30,0.289,-0.58
4,2020-01-01 05:00:00,EIC,296846.44,0.216,-0.74
...,...,...,...,...,...
113875,2099-12-31 19:00:00,ERCOT,81620.84,0.225,-0.81
113876,2099-12-31 20:00:00,ERCOT,82159.70,0.238,-0.79
113877,2099-12-31 21:00:00,ERCOT,83014.88,0.259,-0.77
113878,2099-12-31 22:00:00,ERCOT,82018.60,0.234,-0.80
