# TELL Runs for IM3's Experiment Group B 

This notebook executes the initial set of runs of the TELL model for IM3's Experiment Group B.

In [1]:
# Start by importing the TELL package and information about your operating system:
import os 
import tell

import pandas as pd
import matplotlib.pyplot as plt

from tell.package_data import get_ba_abbreviations
from tell import state_metadata_from_state_abbreviation
from scipy import interpolate
from datetime import datetime


## Set the Directory Structure

In [2]:
# Identify the top-level directory and the subdirectory where the data will be stored:
current_dir =  '/Users/burl878/Documents/IMMM/Data/TELL/Production_Runs_V2'
tell_data_dir = os.path.join(current_dir, r'tell_data')

# If the "tell_data_dir" subdirectory doesn't exist then create it:
if not os.path.exists(tell_data_dir):
   os.makedirs(tell_data_dir)


## Run the MLP Models

In [None]:
# Generate a list of BA abbreviations to process:
ba_abbrev_list = tell.get_balancing_authority_to_model_dict().keys()

scenario_to_process = 'rcp85hotter_ssp5'

# Run the MLP prediction step for the list of BAs using parallel processing streams:
for year_to_process in range(2049,2050,1):
    pdf = tell.predict_batch(target_region_list = ba_abbrev_list,
                             year = year_to_process,
                             data_dir = os.path.join(tell_data_dir, r'wrf_to_tell_data', scenario_to_process),
                             datetime_field_name = 'Time_UTC',
                             save_prediction = True,
                             prediction_output_directory = os.path.join(tell_data_dir, r'outputs', r'mlp_output', scenario_to_process),
                             n_jobs = -1)


## Test the New Ingest Function for the GCAM-USA Data


In [3]:
gcam_usa_input_dir = '/Users/burl878/Documents/IMMM/Data/TELL/Production_Runs_V2/tell_data/gcamusa_data'


In [4]:
def extract_gcam_usa_loads(scenario_to_process: str):
    """Extracts the state-level annual loads from a GCAM-USA output file.

    :param scenario_to_process: Scenario to process
    :type scenario_to_process:  str

    :return:                    DataFrame of state-level annual total electricity loads

    """

    # Cluge the scenario for historical runs:
    if scenario_to_process == 'historic':
       scenario_to_process_gcam = 'rcp45cooler_ssp3'
    else:
       scenario_to_process_gcam = scenario_to_process

    # Create the filename for the needed GCAM run:
    filename = (os.path.join(gcam_usa_input_dir, ('electricity_demand_' + scenario_to_process_gcam + '.csv')))
    
    # Load in the raw GCAM-USA output file:
    gcam_usa_df = pd.read_csv(filename, index_col=None, header=0)
    
    # Make a list of all of the states in the "gcam_usa_df":
    states = gcam_usa_df['subRegion'].unique()

    # Loop over the states and interpolate their loads to an annual time step:
    for i in range(len(states)):
    # for i in range(1):

        # Subset to just the data for the state being processed:
        subset_df = gcam_usa_df[gcam_usa_df['subRegion'].isin([states[i]])].copy()

        # Retrieve the state metadata:
        (state_fips, state_name) = state_metadata_from_state_abbreviation(states[i])

        # Linearly interpolate the 5-year loads from GCAM-USA to an annual time step:
        annual_time_vector = pd.Series(range(subset_df['x'].min(), subset_df['x'].max()))
        interpolation_function = interpolate.interp1d(subset_df['x'], subset_df['value'], kind='linear')
        annual_loads = interpolation_function(annual_time_vector)

        # Create an empty dataframe and store the results:
        state_df = pd.DataFrame()
        state_df['Year'] = annual_time_vector.tolist()
        state_df['GCAM_USA_State_Annual_Load_TWh'] = annual_loads
        state_df['State_FIPS'] = state_fips
        state_df['State_Name'] = state_name
        state_df['State_Abbreviation'] = states[i]

        # Aggregate the output into a new dataframe:
        if i == 0:
            gcam_usa_output_df = state_df
        else:
            gcam_usa_output_df = pd.concat([gcam_usa_output_df, state_df])

    return gcam_usa_output_df


In [6]:
test_df = extract_gcam_usa_loads(scenario_to_process = 'rcp45cooler_ssp3')

test_df


Unnamed: 0,Year,GCAM_USA_State_Annual_Load_TWh,State_FIPS,State_Name,State_Abbreviation
0,1975,0.510868,2000,Alaska,AK
1,1976,0.807987,2000,Alaska,AK
2,1977,1.105106,2000,Alaska,AK
3,1978,1.402225,2000,Alaska,AK
4,1979,1.699344,2000,Alaska,AK
...,...,...,...,...,...
120,2095,43.169166,56000,Wyoming,WY
121,2096,43.198193,56000,Wyoming,WY
122,2097,43.227219,56000,Wyoming,WY
123,2098,43.256246,56000,Wyoming,WY


## Test the New Ingest Function for the Population Data

In [14]:
# Set the input directories based on the "data_input_dir" variable:
map_input_dir = '/Users/burl878/Documents/IMMM/Data/TELL/Production_Runs_V2/tell_data/ba_service_territory_data/'
pop_input_dir = '/Users/burl878/Documents/IMMM/Data/TELL/Production_Runs_V2/tell_data/population_data/'

ba_code = 'CISO'
year = 2050
scenario = 'ssp3'


In [30]:
# Read in the BA mapping .csv file:
mapping_df = pd.read_csv(os.path.join(map_input_dir, 'ba_service_territory_2019.csv'))

# Only keep the columns that are needed:
mapping_df = mapping_df[['County_FIPS', 'BA_Code']].copy()

# Subset to only the BA you want to process:
mapping_df = mapping_df[mapping_df["BA_Code"] == ba_code]

# Read in the population projection file for the scenario you want to process:
pop_df = pd.read_csv(os.path.join(pop_input_dir, f'{scenario}_county_population.csv'))

# Rename some columns for consistency:
pop_df.rename(columns={"FIPS": "County_FIPS"}, inplace=True)

# Merge the mapping dataframe to the the population dataframe based on county FIPS code:
mapping_df = mapping_df.merge(pop_df, on=['County_FIPS'])

# Only keep the columns that are needed:
df = mapping_df.drop(columns=['County_FIPS', 'BA_Code', 'state_name'])
#df = mapping_df[['2020', '2030', '2040', '2050', '2060', '2070', '2080', '2090', '2100']].copy()

# Sum the population across all counties:
df_sum = df.sum(axis=0)

# Convert the series to a dataframe:
df = pd.DataFrame({'Year': df_sum.index, 'Population': df_sum.values})

# Convert the year to a datetime variable:
df['Year'] = pd.to_datetime(df['Year'], format='%Y')

# Linearly interpolate from an decadal to an hourly resolution:
df_interp = df.set_index('Year').resample('H').interpolate('linear')

# Reset the index variable:
df_interp.reset_index(level=0, inplace=True)

# Set the start and end times for the year you want to process:
rng_start = f'{year}-01-01 00:00:00'
rng_end = f'{year}-12-31 23:00:00'

# Subset to only the year you want to process:
df_interp = df_interp[df_interp["Year"] >= (datetime.strptime(rng_start, "%Y-%m-%d %H:%M:%S"))]
df_interp = df_interp[df_interp["Year"] <= (datetime.strptime(rng_end, "%Y-%m-%d %H:%M:%S"))]

# Rename some columns for consistency:
df_interp.rename(columns={"Year": "Time"}, inplace=True)

# Extract the year, month, day, and hour for each date:
df_interp['Year'] = df_interp['Time'].dt.strftime('%Y')
df_interp['Month'] = df_interp['Time'].dt.strftime('%m')
df_interp['Day'] = df_interp['Time'].dt.strftime('%d')
df_interp['Hour'] = df_interp['Time'].dt.strftime('%H')

# Reorder the columns:
col = df_interp.pop("Year")
df_interp.insert(0, col.name, col)
col = df_interp.pop("Month")
df_interp.insert(1, col.name, col)
col = df_interp.pop("Day")
df_interp.insert(2, col.name, col)
col = df_interp.pop("Hour")
df_interp.insert(3, col.name, col)
col = df_interp.pop("Population")
df_interp.insert(4, col.name, col)

# Drop the index variable:
df_interp = df_interp.drop(columns='Time')

df_interp

Unnamed: 0,Year,Month,Day,Hour,Population
262992,2050,01,01,00,3.928664e+07
262993,2050,01,01,01,3.928662e+07
262994,2050,01,01,02,3.928660e+07
262995,2050,01,01,03,3.928659e+07
262996,2050,01,01,04,3.928657e+07
...,...,...,...,...,...
271747,2050,12,31,19,3.913023e+07
271748,2050,12,31,20,3.913021e+07
271749,2050,12,31,21,3.913019e+07
271750,2050,12,31,22,3.913017e+07


In [11]:
# Read in the raw file:
population_df = pd.read_csv(('/Users/burl878/Documents/IMMM/Data/TELL/Production_Runs_V2/tell_data/population_data/ssp3_county_population.csv'), dtype={'FIPS': str})

# Drop the 'state_name' column and rename the "FIPS" column:
population_df.drop(columns=['state_name'], inplace=True)
population_df.rename(columns={'FIPS': 'County_FIPS'}, inplace=True)

# Set county FIPS code as the index variable:
population_df.set_index('County_FIPS', inplace=True)

# Transpose the dataframe:
population_dft = population_df.T

# Bring the index back into the dataframe:
population_dft.reset_index(inplace=True)

# Rename the index column as "yr":
population_dft.rename(columns={'index': 'yr'}, inplace=True)

# Convert the year to a datetime variable:
population_dft['yr'] = pd.to_datetime(population_dft['yr'])

# Set the year as the index variable:
population_dft.set_index('yr', inplace=True)

# Interpolate the populations to an annual time-step and transpose the results:
population_interp_df = population_dft.resample('1Y').mean().interpolate('linear').T

# Convert the dataframe from a wide format to a long format and name the population variable:
population_interp_df = population_interp_df.stack().reset_index()
population_interp_df.rename(columns={0: 'Population'}, inplace=True)

# Change the time variable to only the year value:
population_interp_df['Year'] = population_interp_df['yr'].dt.year
population_interp_df.drop(columns=['yr'], inplace=True)

population_interp_df

Unnamed: 0,County_FIPS,Population,Year
0,1001,58694.300882,2020
1,1001,58920.459838,2021
2,1001,59140.941194,2022
3,1001,59340.943324,2023
4,1001,59520.830317,2024
...,...,...,...
251743,56045,5922.330839,2096
251744,56045,5862.017232,2097
251745,56045,5800.886737,2098
251746,56045,5738.971775,2099


## Run the Forward Execution

In [3]:
# Run the TELL model forward in time:
for year in range(2060,2100,1):
    summary_df, ba_time_series_df, state_time_series_df = tell.execute_forward(year_to_process = str(year),
                                                                               gcam_target_year = str(year), 
                                                                               scenario_to_process = 'rcp85hotter_ssp5',
                                                                               data_output_dir = '/Users/burl878/Documents/IMMM/Data/TELL/Production_Runs_V2/tell_data/outputs/tell_output',
                                                                               gcam_usa_input_dir = '/Users/burl878/Documents/IMMM/Data/TELL/Production_Runs_V2/tell_data/gcamusa_data',
                                                                               map_input_dir = '/Users/burl878/Documents/IMMM/Data/TELL/Production_Runs_V2/tell_data/ba_service_territory_data',
                                                                               mlp_input_dir = '/Users/burl878/Documents/IMMM/Data/TELL/Production_Runs_V2/tell_data/outputs/mlp_output',
                                                                               pop_input_dir = '/Users/burl878/Documents/IMMM/Data/TELL/Production_Runs_V2/tell_data/population_data',
                                                                               save_county_data = False)


Scenario =  rcp85hotter_ssp5  , Year =  2060
Elapsed time =  0:01:05.755619
Scenario =  rcp85hotter_ssp5  , Year =  2061
Elapsed time =  0:01:05.598809
Scenario =  rcp85hotter_ssp5  , Year =  2062
Elapsed time =  0:01:04.692725
Scenario =  rcp85hotter_ssp5  , Year =  2063
Elapsed time =  0:01:04.797280
Scenario =  rcp85hotter_ssp5  , Year =  2064
Elapsed time =  0:01:05.068745
Scenario =  rcp85hotter_ssp5  , Year =  2065
Elapsed time =  0:01:04.742813
Scenario =  rcp85hotter_ssp5  , Year =  2066
Elapsed time =  0:01:04.390458
Scenario =  rcp85hotter_ssp5  , Year =  2067
Elapsed time =  0:01:04.569967
Scenario =  rcp85hotter_ssp5  , Year =  2068
Elapsed time =  0:01:05.258785
Scenario =  rcp85hotter_ssp5  , Year =  2069
Elapsed time =  0:01:05.987016
Scenario =  rcp85hotter_ssp5  , Year =  2070
Elapsed time =  0:01:05.984742
Scenario =  rcp85hotter_ssp5  , Year =  2071
Elapsed time =  0:01:06.403690
Scenario =  rcp85hotter_ssp5  , Year =  2072
Elapsed time =  0:01:06.906895
Scenario =  