# Merge BA Loads

This notebook merges together the time series of hourly electricity demand from transportation and non-transportation sources by Balancing Authority (BA).

In [62]:
# Start by importing the packages we need:
import os
import glob

import pandas as pd


## Set the Directory Structure

In [63]:
# Identify the data input and output directories:
trans_data_input_dir =  '/Users/burl878/Documents/GODEEEP/Data/Transportation/Raw/'
trans_data_output_dir =  '/Users/burl878/Documents/GODEEEP/Data/Transportation/'
tell_data_input_dir =  '/Users/burl878/Documents/GODEEEP/Data/TELL/Production_Runs/tell_data/outputs/tell_output/'
merged_data_output_dir =  '/Users/burl878/Documents/GODEEEP/Data/Merged_BA_Loads/'


## Set the Scenario and Year You Want to Process

In [64]:
gcam_scenario_to_process = 'NetZeroNoCCS_Climate'
year_to_process = '2050'


## Merge the Transportation Output Files Together

In [65]:
def merge_transportation_data(gcam_scenario_to_process: str, year_to_process: str, trans_data_input_dir: str, trans_data_output_dir: str):
    # Check to see if the processed output file exist and if not then create it:
    if os.path.isfile((os.path.join(trans_data_output_dir, 
                      ('Transportation_Time_Series_' + gcam_scenario_to_process + '_rcp85hotter_' + year_to_process + '.csv')))) == False:
       
       # Create a list of all of the transportation output files in the "trans_data_input_dir" and aggregate the files in that list:
       trans_filelist = sorted(glob.glob(os.path.join(trans_data_input_dir, gcam_scenario_to_process, 'rcp85hotter', ('*' + year_to_process + '.csv'))))
       
       # Loop over the list of files:
       for file in range(len(trans_filelist)):
           # Read in the .csv file:
           trn_data = pd.read_csv(trans_filelist[file])

           # Rename a few variables for consistency with TELL:
           trn_data.rename(columns={'balancing_authority': 'BA_Code',
                                    'time': 'Time_UTC',
                                    'transportation_load_MWh': 'Transportation_Load_MWh'}, inplace=True)
        
           # Only keep the columns that are needed:
           trn_data = trn_data[['BA_Code', 'Time_UTC', 'Transportation_Load_MWh']].copy()
           
           # Strip the "+00:00" from the time string:
           trn_data['Time_UTC'] = trn_data['Time_UTC'].astype(str)
           trn_data['Time_UTC'] = trn_data['Time_UTC'].str.split('+').str[0]

           # Aggregate the output into a new dataframe:
           if file == 0:
              trn_output_df = trn_data
           else:
              trn_output_df = pd.concat([trn_output_df, trn_data])
         
           # Clean up and move to the next file:
           del trn_data
           
       # Write out the dataframe to a .csv file:
       trn_output_df.to_csv((os.path.join(trans_data_output_dir, 
                            ('Transportation_Time_Series_' + gcam_scenario_to_process + '_rcp85hotter_' + year_to_process + '.csv'))), sep=',', index=False)
   
    else:
       # Read in the already processed output file:
       trn_output_df = pd.read_csv((os.path.join(trans_data_output_dir, 
                                   ('Transportation_Time_Series_' + gcam_scenario_to_process + '_rcp85hotter_' + year_to_process + '.csv'))))            
                  
    return trn_output_df


In [66]:
# Merge the transportation output files into a single dataframe:
trn_df = merge_transportation_data(gcam_scenario_to_process = gcam_scenario_to_process, 
                                   year_to_process = year_to_process, 
                                   trans_data_input_dir = trans_data_input_dir, 
                                   trans_data_output_dir = trans_data_output_dir)

# Preview the transportation dataframe:
trn_df


Unnamed: 0,BA_Code,Time_UTC,Transportation_Load_MWh
0,AVA,2050-01-01 01:00:00,533.482972
1,AVA,2050-01-01 02:00:00,556.475413
2,AVA,2050-01-01 03:00:00,557.141012
3,AVA,2050-01-01 04:00:00,541.718939
4,AVA,2050-01-01 05:00:00,535.312815
...,...,...,...
8755,WWA,2050-12-31 20:00:00,0.560331
8756,WWA,2050-12-31 21:00:00,0.619543
8757,WWA,2050-12-31 22:00:00,0.621738
8758,WWA,2050-12-31 23:00:00,0.657654


## Read in the TELL Output File and Subset to Just the BAs in the WECC


In [67]:
# Read in the BA-to-Interconnection mapping file:
ba_mapping_df = pd.read_csv(merged_data_output_dir + 'BA_to_Interconnection_Mapping.csv')


In [68]:
# Read in the TELL BA output file for the year and scenario being processed:
tell_df = pd.read_csv(tell_data_input_dir + gcam_scenario_to_process + '/' + year_to_process + '/TELL_Balancing_Authority_Hourly_Load_Data_' + year_to_process + '_Scaled_' + year_to_process + '.csv')

# Rename a few variables for consistency:
tell_df.rename(columns={'Scaled_TELL_BA_Load_MWh': 'Non-Transportation_Load_MWh'}, inplace=True)

# Merge the ba_mapping_df and tell_df dataframes based on common "BA_Code":
tell_df = tell_df.merge(ba_mapping_df, on=['BA_Code'])

# Subset to just the WECC BAs:
tell_df = tell_df[tell_df['Interconnection'] == 'WECC'].copy()

# Make the time variable a string:
tell_df['Time_UTC'] = tell_df['Time_UTC'].astype(str)

# Only keep the columns that are needed:
tell_df = tell_df[['BA_Code', 'Time_UTC', 'Non-Transportation_Load_MWh']].copy()

# Preview the TELL dataframe:
tell_df


Unnamed: 0,BA_Code,Time_UTC,Non-Transportation_Load_MWh
17520,AVA,2050-01-01 00:00:00,3586.38
17521,AVA,2050-01-01 01:00:00,3656.43
17522,AVA,2050-01-01 02:00:00,3776.45
17523,AVA,2050-01-01 03:00:00,3842.49
17524,AVA,2050-01-01 04:00:00,3738.26
...,...,...,...
473035,WAUW,2050-12-31 19:00:00,249.09
473036,WAUW,2050-12-31 20:00:00,248.07
473037,WAUW,2050-12-31 21:00:00,245.95
473038,WAUW,2050-12-31 22:00:00,244.12


## Merge the TELL and Transportation Dataframes Together


In [69]:
# Merge the trn_df and tell_df dataframes based on common "BA_Code" and "Time_UTC":
merged_df = tell_df.merge(trn_df, on=['BA_Code', 'Time_UTC'])

# Compute the sum of the transportation and non-transportation loads:
merged_df['Total_Load_MWh'] = merged_df['Non-Transportation_Load_MWh'] + merged_df['Transportation_Load_MWh']

# Round off the values to make the output file more readable:
merged_df['Non-Transportation_Load_MWh'] = merged_df['Non-Transportation_Load_MWh'].round(2)
merged_df['Transportation_Load_MWh'] = merged_df['Transportation_Load_MWh'].round(2)
merged_df['Total_Load_MWh'] = merged_df['Total_Load_MWh'].round(2)

# Fill in missing values with -9999:
merged_df['Non-Transportation_Load_MWh'] = merged_df['Non-Transportation_Load_MWh'].fillna(-9999)
merged_df['Transportation_Load_MWh'] = merged_df['Transportation_Load_MWh'].fillna(-9999)
merged_df['Total_Load_MWh'] = merged_df['Total_Load_MWh'].fillna(-9999)

# Write out the dataframe to a .csv file:
merged_df.to_csv((os.path.join(merged_data_output_dir, ('Total_Load_Time_Series_' + gcam_scenario_to_process + '_' + year_to_process + '.csv'))), sep=',', index=False)

# Preview the merged dataframe:
merged_df


Unnamed: 0,BA_Code,Time_UTC,Non-Transportation_Load_MWh,Transportation_Load_MWh,Total_Load_MWh
0,AVA,2050-01-01 01:00:00,3656.43,533.48,4189.91
1,AVA,2050-01-01 02:00:00,3776.45,556.48,4332.93
2,AVA,2050-01-01 03:00:00,3842.49,557.14,4399.63
3,AVA,2050-01-01 04:00:00,3738.26,541.72,4279.98
4,AVA,2050-01-01 05:00:00,3607.47,535.31,4142.78
...,...,...,...,...,...
245247,WAUW,2050-12-31 19:00:00,249.09,92.33,341.42
245248,WAUW,2050-12-31 20:00:00,248.07,91.95,340.02
245249,WAUW,2050-12-31 21:00:00,245.95,91.83,337.78
245250,WAUW,2050-12-31 22:00:00,244.12,94.70,338.82
