# Process State-Level Population-Weighted Meteorology Time Series

This notebook process the time-series of historical population-weighted meteorology for each of the 48 contiguous U.S. states and the District of Columbia.

In [53]:
# Start by importing the packages we need:
import os

import pandas as pd
import numpy as np

from glob import glob


## Set the Directory Structure

In [3]:
# Identify the data input and output directories:
data_input_dir =  '/Volumes/LaCie/Big_Data/wrf_to_tell/wrf_tell_counties_output/historic/'
metadata_input_dir = '/Users/burl878/Documents/IMMM/Data/TELL_Input_Data/forward_execution/Population_Forcing/'
data_output_dir =  '/Users/burl878/Documents/IMMM/Data/TELL/State_Meteorology_Time_Series/historic/'


## Process the Population Data

In [31]:
# Read in the county-level population data:
pop_df = pd.read_csv(metadata_input_dir + 'county_populations_2000_to_2019_long_format.csv')

# Subset the dataframe to only the year 2019:
pop_df = pop_df[pop_df['Year'] == 2019].copy()

# Rename the FIPS variable for consistency with the meteorology data:
pop_df.rename(columns={'County_FIPS': 'FIPS'}, inplace=True)

# Read in the county-to-state mapping file:
state_df = pd.read_csv(metadata_input_dir + 'state_and_county_fips_codes.csv')

# Rename the FIPS variable for consistency with the meteorology data and shorten the state name variable:
state_df.rename(columns={'county_FIPS': 'FIPS', 'state_name': 'State'}, inplace=True)

# Merge the two dataframes together based on common FIPS values:
pop_df = pop_df.merge(state_df, on=['FIPS'])

# Compute the fraction of the total population in each state that lives in a given county:
pop_df['Population_Sum'] = pop_df.groupby('State')['Population'].transform('sum')
pop_df['Population_Fraction'] = pop_df['Population'] / pop_df['Population_Sum']

# Subset the dataframe and sort by FIPS code :
pop_df = pop_df[['FIPS', 'State', 'Population', 'Population_Fraction']]
pop_df = pop_df.sort_values(['FIPS'])

# Return the dataframe:
pop_df


Unnamed: 0,FIPS,State,Population,Population_Fraction
0,1001,Alabama,55869,0.011394
1,1003,Alabama,223234,0.045528
2,1005,Alabama,24686,0.005035
3,1007,Alabama,22394,0.004567
4,1009,Alabama,57826,0.011794
...,...,...,...,...
3137,56037,Wyoming,42343,0.073162
3138,56039,Wyoming,23464,0.040542
3139,56041,Wyoming,20226,0.034947
3140,56043,Wyoming,7805,0.013486


In [79]:
# Define a function to process the state-level time series for a given year
def process_state_meteorology_time_series(data_input_dir: str, data_output_dir: str, year: int):
    
    # Initiate a counter and empty output dataframe to store the results:
    counter = 0
    output_df = pd.DataFrame()
    
    # Create a list of all county meteorology files in the input directory:
    list_of_files = glob(os.path.join(data_input_dir, str(year), '*.csv'))
    
    # Loop over that list process each file:
    for file in range(len(list_of_files)):
        # Extract the file name from the list:
        filename = list_of_files[file].rsplit('/', 1)[1]
       
        # Extract the time string from the name of the file:
        file_time = filename.replace("_UTC_County_Mean_Meteorology.csv", "")
                
        # Read in the .csv file:
        met_df = pd.read_csv(list_of_files[file])
        
        # Compute the 10-m wind speed based on the U10 and V10 variables:
        met_df['WSPD'] = (np.sqrt(np.square(met_df['U10']) + np.square(met_df['V10']))).round(2)
        
        # Merge the population data in based on common FIPS values:
        met_df = met_df.merge(pop_df, on=['FIPS'])
        
        # Make a list of all of the states
        states = met_df['State'].unique()
        
        # Loop over the states and calculate mean meteorology for each state:
        for i in range(len(states)):
            # Iterate the counter by one:
            counter = counter + 1
            
            # Subset to just the data for the state being processed:
            subset_df = met_df[met_df['State'].isin([states[i]])].copy()
          
            # Compute the population-weighted means and add the result to the output dataframe:
            output_df.loc[counter, 'Time_UTC'] = pd.to_datetime(file_time, exact=False, format='%Y_%m_%d_%H')
            output_df.loc[counter, 'State'] = states[i]
            output_df.loc[counter, 'T2'] = (subset_df['T2'].mul(subset_df['Population_Fraction'])).sum().round(2)
            output_df.loc[counter, 'Q2'] = (subset_df['Q2'].mul(subset_df['Population_Fraction'])).sum().round(5)
            output_df.loc[counter, 'SWDOWN'] = (subset_df['SWDOWN'].mul(subset_df['Population_Fraction'])).sum().round(2)
            output_df.loc[counter, 'GLW'] = (subset_df['GLW'].mul(subset_df['Population_Fraction'])).sum().round(2)
            output_df.loc[counter, 'WSPD'] = (subset_df['WSPD'].mul(subset_df['Population_Fraction'])).sum().round(2)
            
            # Clear up the variables and move to the next step in the loop:
            del subset_df
            
        # Clean up the variables and move to the next step in the loop:
        del filename, file_time, met_df, states, i

    # Sort by state and then time:
    output_df = output_df.sort_values(['State', 'Time_UTC'])

    # Create the ouput filename:    
    csv_output_filename = os.path.join(data_output_dir, ('State_Meteorology_' + str(year) + '.csv'))
        
    # Write out the dataframe to a .csv file:
    output_df.to_csv(csv_output_filename, sep=',', index=False)
    
    return output_df
   

In [80]:
output_df = process_state_meteorology_time_series(data_input_dir = data_input_dir, 
                                                  data_output_dir = data_output_dir, 
                                                  year = 2015)

output_df


Unnamed: 0,Time_UTC,State,T2,Q2,SWDOWN,GLW,WSPD
1,1980-01-01 00:00:00,Alabama,279.21,0.00462,0.00,322.85,2.38
49,1980-01-01 01:00:00,Alabama,278.99,0.00462,0.00,322.51,2.42
97,1980-01-01 02:00:00,Alabama,278.73,0.00462,0.00,321.03,2.71
145,1980-01-01 03:00:00,Alabama,278.42,0.00460,0.00,317.73,2.97
193,1980-01-01 04:00:00,Alabama,278.04,0.00458,0.00,310.53,3.21
...,...,...,...,...,...,...,...
421440,1980-12-31 19:00:00,Wyoming,277.89,0.00453,421.99,247.31,2.18
421488,1980-12-31 20:00:00,Wyoming,278.31,0.00463,400.33,249.72,2.24
421536,1980-12-31 21:00:00,Wyoming,278.40,0.00468,338.31,250.69,2.33
421584,1980-12-31 22:00:00,Wyoming,277.95,0.00464,209.28,251.26,2.40
