# Process Interconnection Load Time Series

This notebook process the time-series of historical hourly loads for each of the three electricity interconnections.

In [1]:
# Start by importing the packages we need:
import os

import pandas as pd
import numpy as np

from glob import glob


## Set the Directory Structure

In [48]:
# Identify the data input and output directories:
data_input_dir =  '/Users/burl878/Documents/IMMM/Data/TELL/Production_Runs/tell_data/'
metadata_input_dir =  '/Users/burl878/Documents/IMMM/Data/TELL/Analysis/'
data_output_dir =  '/Users/burl878/Documents/IMMM/Data/TELL/Production_Runs/tell_data/outputs/postprocessed/Interconnection_Load_Time_Series/'


## Process the TELL Output Files

In [4]:
# Read in the BA-to-Interconnection mapping file:
ba_mapping = pd.read_csv(metadata_input_dir + 'BA_to_Interconnection_Mapping.csv')


In [75]:
# Define a function to process the time series of hourly load for each interconnection:
def process_interconnection_load_time_series(data_input_dir: str, scenario: str):

    # Set the start and end years to loop over:
    if scenario == 'historic':
       start_year = 1980; end_year = 2020; interval = 1
    else:
       start_year = 2020; end_year = 2100; interval = 5
        
    # Loop over the years from the start_year to the end_year:
    for year in range(start_year, end_year, interval):
        # Read in the TELL BA output file for that year and scenario:
        tell_df = pd.read_csv(data_input_dir + 'outputs/tell_output/' + scenario + '/' + str(year) + '/TELL_Balancing_Authority_Hourly_Load_Data_' + str(year) + '_Scaled_' + str(year) + '.csv')
             
        # Merge in the interconnection mapping using common BA codes:
        merged_df = pd.merge(tell_df, ba_mapping, on='BA_Code')
            
        # Sum the BA-level hourly loads into interconnection-level hourly loads:
        merged_df['Interconnection_Load_MWh'] = merged_df.groupby(['Interconnection', 'Time_UTC'])['Scaled_TELL_BA_Load_MWh'].transform('sum').round(3)  
            
        # Only keep the columns we need and subset to the unique values:
        merged_df = merged_df[['Time_UTC', 'Interconnection', 'Interconnection_Load_MWh']].drop_duplicates()
            
        # Compute the annual percentile for each hourly load value:
        merged_df['Percentile'] = merged_df['Interconnection_Load_MWh'].rank(pct=True).round(3)
            
        # Store the output in a new dataframe:
        if year == start_year:
           output_df = merged_df
        else:
           output_df = pd.concat([output_df, merged_df])
            
        # Clean up:
        del tell_df, merged_df
            
    # Write out the dataframe to a .csv file:
    output_df.to_csv((os.path.join(data_output_dir, (scenario + '.csv'))), sep=',', index=False)


In [76]:
process_interconnection_load_time_series(data_input_dir = data_input_dir, 
                                         scenario = 'rcp85hotter_ssp5')
