# Process Interconnection Population-Weighted Meteorology Time Series

This notebook process the time-series of historical population-weighted meteorology for each of the three electricity interconnections.

In [None]:
# Start by importing the packages we need:
import os
import datetime

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from glob import glob


## Set the Directory Structure

In [None]:
# Identify the data input and output directories:
data_input_dir =  '/Users/burl878/Documents/IMMM/Data/TELL/Production_Runs/tell_data/wrf_tell_counties_output/'
metadata_input_dir = '/Users/burl878/Documents/IMMM/Data/TELL_Input_Data/forward_execution/Population_Forcing/'
data_output_dir =  '/Users/burl878/Documents/IMMM/Data/TELL/Analysis/Interconnection_Meteorology_Time_Series/'


## Set the Interconnections

In [None]:
# List the states that are in each interconnection:
wecc = ['Washington', 'Oregon', 'California', 'Nevada', 'Idaho', 'Montana', 'Arizona', 'Utah', 'New Mexico', 'Colorado', 'Wyoming']
ercot = ['Texas']
eic = ['Washington', 'Oregon', 'California', 'Nevada', 'Idaho', 'Montana', 'Arizona', 'Utah', 'New Mexico', 'Colorado', 'Wyoming', 'Texas']


## Process the Data

In [None]:
# Read in the county-level population data:
pop_df = pd.read_csv(metadata_input_dir + 'county_populations_2000_to_2019_long_format.csv')

# Subset the dataframe to only the year 2019:
pop_df = pop_df[pop_df['Year'] == 2019].copy()

# Rename the FIPS variable for consistency with the meteorology data:
pop_df.rename(columns={'County_FIPS': 'FIPS'}, inplace=True)

# Read in the county-to-state mapping file:
state_df = pd.read_csv(metadata_input_dir + 'state_and_county_fips_codes.csv')

# Rename the FIPS variable for consistency with the meteorology data and shorten the state name variable:
state_df.rename(columns={'county_FIPS': 'FIPS', 'state_name': 'State'}, inplace=True)

# Merge the two dataframes together based on common FIPS values:
pop_df = pop_df.merge(state_df, on=['FIPS'])

# Subset the dataframe and sort by FIPS code:
pop_df = pop_df[['FIPS', 'State', 'Population']]
pop_df = pop_df.sort_values(['FIPS'])

# Create an empty column:
pop_df['Interconnection'] = np.nan
        
# Assign the interconnection:
pop_df['Interconnection'].loc[pop_df['State'].isin(wecc)] = 'WECC'
pop_df['Interconnection'].loc[pop_df['State'].isin(ercot)] = 'ERCOT'
pop_df['Interconnection'].loc[~pop_df['State'].isin(eic)] = 'EIC'

# Return the dataframe:
pop_df


In [None]:
# Define a function to process the interconnection-level time series for a given year
def process_ics_meteorology_time_series(data_input_dir: str, data_output_dir: str, scenario: str, year: int):
    
    # Create a list of all county meteorology files in the input directory:
    list_of_files = glob(os.path.join(data_input_dir, scenario, str(year), '*.csv'))
    
    # Loop over that list process each file:
    for file in range(len(list_of_files)):
    
        # Extract the filename from the list:
        filename = list_of_files[file].rsplit('/', 1)[1]
       
        # Extract the time string from the name of the file:
        filetime = filename.replace("_UTC_County_Mean_Meteorology.csv", "")
                
        # Read in the .csv file:
        met_df = pd.read_csv(list_of_files[file])
        
        # Compute the 10-m wind speed based on the U10 and V10 variables:
        met_df['WSPD'] = (np.sqrt(np.square(met_df['U10']) + np.square(met_df['V10']))).round(2)
        
        # Merge the population data into the meteorology dataframe based on common FIPS values:
        met_df = met_df.merge(pop_df, on=['FIPS'])
               
        # Compute the fraction of the total population in each state that lives in a given county:
        met_df['Population_Sum'] = met_df.groupby('Interconnection')['Population'].transform('sum')
        met_df['Population_Fraction'] = met_df['Population'] / met_df['Population_Sum']

        # Population-weight the meteorological variables:
        met_df['T2_Weighted'] = (met_df['T2'].mul(met_df['Population_Fraction']))
        met_df['Q2_Weighted'] = (met_df['Q2'].mul(met_df['Population_Fraction']))
        met_df['SWDOWN_Weighted'] = (met_df['SWDOWN'].mul(met_df['Population_Fraction']))
        met_df['GLW_Weighted'] = (met_df['GLW'].mul(met_df['Population_Fraction']))
        met_df['WSPD_Weighted'] = (met_df['WSPD'].mul(met_df['Population_Fraction']))
        
        # Sum up the population-weighted meteorological variables by state:
        met_df['T2_Sum'] = (met_df.groupby('Interconnection')['T2_Weighted'].transform('sum')).round(2)
        met_df['Q2_Sum'] = (met_df.groupby('Interconnection')['Q2_Weighted'].transform('sum')).round(5)
        met_df['SWDOWN_Sum'] = (met_df.groupby('Interconnection')['SWDOWN_Weighted'].transform('sum')).round(2)
        met_df['GLW_Sum'] = (met_df.groupby('Interconnection')['GLW_Weighted'].transform('sum')).round(2)
        met_df['WSPD_Sum'] = (met_df.groupby('Interconnection')['WSPD_Weighted'].transform('sum')).round(2)
        
        # Copy the output to a new dataframe and remove the non-unique rows:
        temp_df = met_df[['Interconnection', 'T2_Sum', 'Q2_Sum', 'SWDOWN_Sum', 'GLW_Sum', 'WSPD_Sum']].copy().drop_duplicates() 
        
        # Add in the time variable:
        temp_df['Time_UTC'] = pd.to_datetime(filetime, exact=False, format='%Y_%m_%d_%H')
        
        # Rename the variables for consistency and reorder them:
        temp_df.rename(columns={'T2_Sum': 'T2', 'Q2_Sum': 'Q2', 'SWDOWN_Sum': 'SWDOWN', 'GLW_Sum': 'GLW', 'WSPD_Sum': 'WSPD'}, inplace=True)
        temp_df = temp_df[['Time_UTC', 'Interconnection', 'T2', 'Q2', 'SWDOWN', 'GLW', 'WSPD']].copy()
        
        # Aggregate the output into a new dataframe:
        if file == 0:
           output_df = temp_df
        else:
           output_df = pd.concat([output_df, temp_df])
            
        # Clean up the old dataframes and move to the next file in the loop:
        del filename, filetime, met_df, temp_df
        
    # Sort by state and then time:
    output_df = output_df.sort_values(['Interconnection', 'Time_UTC'])

    # Create the ouput filename:    
    csv_output_filename = os.path.join(data_output_dir, scenario, ('Interconnection_Meteorology_' + str(year) + '.csv'))
        
    # Write out the dataframe to a .csv file:
    output_df.to_csv(csv_output_filename, sep=',', index=False)
    
    # Print out the progress:
    print(('Scenario = ' + scenario + ', Year = ' + str(year)))
    
    return output_df


In [None]:
for year_to_process in range(2060,2100,1):
    output_df = process_ics_meteorology_time_series(data_input_dir = data_input_dir, 
                                                    data_output_dir = data_output_dir,
                                                    scenario = 'rcp85cooler',
                                                    year = year_to_process)

for year_to_process in range(2060,2100,1):
    output_df = process_ics_meteorology_time_series(data_input_dir = data_input_dir, 
                                                    data_output_dir = data_output_dir,
                                                    scenario = 'rcp85hotter',
                                                    year = year_to_process)
