### This notebook calculates the emergency performance grant metric sourced from FEMA
* Total amount of emergency performance grant funding recieved per CA county between 2014-2022

In [1]:
import pandas as pd
import os
import sys
import math

# suppress pandas purely educational warnings
from warnings import simplefilter
simplefilter(action="ignore", category=pd.errors.PerformanceWarning)

sys.path.append(os.path.expanduser('../../'))

from scripts.utils.file_helpers import pull_csv_from_directory, upload_csv_aws, filter_counties
from scripts.utils.write_metadata import append_metadata

In [2]:
# pull csv from aws
bucket_name = 'ca-climate-index'
aws_dir = '2a_subset/governance/community_preparedness/fema/emergency_management_performance_grants/'

pull_csv_from_directory(bucket_name, aws_dir, search_zipped=False)

Saved DataFrame as 'fema_emergency_management_grants_subset.csv'


In [3]:
# read in FEMA emergency performance grants data
emergency_grants_data = pd.read_csv('fema_emergency_management_grants_subset.csv')
print(len(emergency_grants_data))
emergency_grants_data.head(5)
os.remove('fema_emergency_management_grants_subset.csv')

1280


In [4]:
emergency_grants_data.head()

Unnamed: 0,id,reportingPeriod,state,legalAgencyName,projectType,projectStartDate,projectEndDate,nameOfProgram,fundingAmount
0,1f318b75-85d7-4be0-bc13-a15358d94810,2014 Close-Out,California,EM- Tule River Tribe,Develop/enhance interoperable communications s...,2014-12-01T00:00:00.000Z,2015-03-01T00:00:00.000Z,EMPG,16345.0
1,fcdcdff4-96a5-40ea-87fa-3b583aaa8f2e,2014 Close-Out,California,EM-Agua Caliente Band of Cahuilla Indians,Establish/enhance sustainable homeland securit...,2014-09-01T00:00:00.000Z,2015-08-01T00:00:00.000Z,EMPG,17500.0
2,ce196d31-8065-4413-8c4a-528353812ff6,2014 Close-Out,California,OA-Alameda County,Establish / enhance citizen awareness of emerg...,2014-10-01T00:00:00.000Z,2015-06-01T00:00:00.000Z,EMPG,452545.0
3,3df463ec-5632-440a-bbb2-1f6ac8e9df55,2014 Close-Out,California,OA-Alpine County,Administer and manage the Homeland Security Gr...,2014-07-01T00:00:00.000Z,2015-06-01T00:00:00.000Z,EMPG,23496.0
4,93c01d7e-9ce3-4a31-8b2e-38bc61cc943f,2014 Close-Out,California,OA-Alpine County,Develop / enhance homeland security / emergenc...,2014-07-01T00:00:00.000Z,2015-06-01T00:00:00.000Z,EMPG,21403.0


In [5]:
# Create a new column 'county' by extracting characters after the first '-'
emergency_grants_data['county'] = emergency_grants_data['legalAgencyName'].str.split('-').str[1]

# Remove any mention of 'county' within the legalAgencyName column
emergency_grants_data['county'] = emergency_grants_data['county'].str.replace(' county', '', case=False)
emergency_grants_data['county'] = emergency_grants_data['county'].str.replace(' city/county', '', case=False)
emergency_grants_data['county'] = emergency_grants_data['county'].str.replace(', california', '', case=False)

# Drop any leading or trailing whitespace from the 'county' column
emergency_grants_data['county'] = emergency_grants_data['county'].str.strip()

# Display the modified DataFrame
emergency_grants_data.head()

Unnamed: 0,id,reportingPeriod,state,legalAgencyName,projectType,projectStartDate,projectEndDate,nameOfProgram,fundingAmount,county
0,1f318b75-85d7-4be0-bc13-a15358d94810,2014 Close-Out,California,EM- Tule River Tribe,Develop/enhance interoperable communications s...,2014-12-01T00:00:00.000Z,2015-03-01T00:00:00.000Z,EMPG,16345.0,Tule River Tribe
1,fcdcdff4-96a5-40ea-87fa-3b583aaa8f2e,2014 Close-Out,California,EM-Agua Caliente Band of Cahuilla Indians,Establish/enhance sustainable homeland securit...,2014-09-01T00:00:00.000Z,2015-08-01T00:00:00.000Z,EMPG,17500.0,Agua Caliente Band of Cahuilla Indians
2,ce196d31-8065-4413-8c4a-528353812ff6,2014 Close-Out,California,OA-Alameda County,Establish / enhance citizen awareness of emerg...,2014-10-01T00:00:00.000Z,2015-06-01T00:00:00.000Z,EMPG,452545.0,Alameda
3,3df463ec-5632-440a-bbb2-1f6ac8e9df55,2014 Close-Out,California,OA-Alpine County,Administer and manage the Homeland Security Gr...,2014-07-01T00:00:00.000Z,2015-06-01T00:00:00.000Z,EMPG,23496.0,Alpine
4,93c01d7e-9ce3-4a31-8b2e-38bc61cc943f,2014 Close-Out,California,OA-Alpine County,Develop / enhance homeland security / emergenc...,2014-07-01T00:00:00.000Z,2015-06-01T00:00:00.000Z,EMPG,21403.0,Alpine


In [6]:
# Extract only the year from the 'reportingPeriod' column and place into new 'year' column
emergency_grants_data['year'] = emergency_grants_data['reportingPeriod'].str.extract(r'(\d{4})')
emergency_grants_data['year'] = emergency_grants_data['year'].str.replace('-01-01', '', case=False)

emergency_grants_data.head(5)

Unnamed: 0,id,reportingPeriod,state,legalAgencyName,projectType,projectStartDate,projectEndDate,nameOfProgram,fundingAmount,county,year
0,1f318b75-85d7-4be0-bc13-a15358d94810,2014 Close-Out,California,EM- Tule River Tribe,Develop/enhance interoperable communications s...,2014-12-01T00:00:00.000Z,2015-03-01T00:00:00.000Z,EMPG,16345.0,Tule River Tribe,2014
1,fcdcdff4-96a5-40ea-87fa-3b583aaa8f2e,2014 Close-Out,California,EM-Agua Caliente Band of Cahuilla Indians,Establish/enhance sustainable homeland securit...,2014-09-01T00:00:00.000Z,2015-08-01T00:00:00.000Z,EMPG,17500.0,Agua Caliente Band of Cahuilla Indians,2014
2,ce196d31-8065-4413-8c4a-528353812ff6,2014 Close-Out,California,OA-Alameda County,Establish / enhance citizen awareness of emerg...,2014-10-01T00:00:00.000Z,2015-06-01T00:00:00.000Z,EMPG,452545.0,Alameda,2014
3,3df463ec-5632-440a-bbb2-1f6ac8e9df55,2014 Close-Out,California,OA-Alpine County,Administer and manage the Homeland Security Gr...,2014-07-01T00:00:00.000Z,2015-06-01T00:00:00.000Z,EMPG,23496.0,Alpine,2014
4,93c01d7e-9ce3-4a31-8b2e-38bc61cc943f,2014 Close-Out,California,OA-Alpine County,Develop / enhance homeland security / emergenc...,2014-07-01T00:00:00.000Z,2015-06-01T00:00:00.000Z,EMPG,21403.0,Alpine,2014


In [7]:
emergency_grants_data['year'] = emergency_grants_data['reportingPeriod'].str.extract(r'(\d{4})')

# Display unique years
unique_years = emergency_grants_data['year'].unique()
print(unique_years)

['2014' '2016' '2017' '2018' '2019' '2020' '2021' '2022']


In [8]:
# Isolate desired columns for CRI metric
cleaned_emergency_grants = emergency_grants_data[['county', 'year','fundingAmount']]
print(len(cleaned_emergency_grants))
cleaned_emergency_grants.head(10)

1280


Unnamed: 0,county,year,fundingAmount
0,Tule River Tribe,2014,16345.0
1,Agua Caliente Band of Cahuilla Indians,2014,17500.0
2,Alameda,2014,452545.0
3,Alpine,2014,23496.0
4,Alpine,2014,21403.0
5,Alpine,2014,21873.0
6,Alpine,2014,37652.0
7,Amador,2014,97376.0
8,Butte,2014,918.0
9,Butte,2014,97997.0


In [9]:
# call filter_counties function and output county results as filtered_df and the omitted results as omitted_df
filtered_df, omitted_df = filter_counties(cleaned_emergency_grants, 'county')

In [10]:
# look at the skipped over rows
print(len(omitted_df))
omitted_df

28


Unnamed: 0,county,year,fundingAmount
0,Tule River Tribe,2014,16345.0
1,Agua Caliente Band of Cahuilla Indians,2014,17500.0
119,CA California Governor's Office of Emergency S...,2014,13036747.0
120,Agua Caliente Band of Cahuilla Indians,2016,21074.0
121,Blue Lake Rancheria,2016,99063.0
122,Blue Lake Rancheria,2016,18623.0
123,Blue Lake Rancheria,2016,1200.0
124,Blue Lake Rancheria,2016,5180.0
182,Rincon Band of Luiseno Indians,2016,8500.0
244,CA California Governor's Office of Emergency S...,2016,13113306.0


In [11]:
# group the omitted results into total funding over the data date range
total_omitted = omitted_df.groupby(['county', 'year']).agg({'fundingAmount': 'sum'}).reset_index()
aggregated_omitted = total_omitted.groupby(['county']).agg({'fundingAmount': 'sum'}).reset_index()
aggregated_omitted.rename(columns={'fundingAmount': 'tribe_funding_2014_2022'}, inplace=True)
aggregated_omitted.rename(columns={'county': 'tribe'}, inplace=True)

print(len(aggregated_omitted))
aggregated_omitted.head(20)

8


Unnamed: 0,tribe,tribe_funding_2014_2022
0,Agua Caliente Band of Cahuilla Indians,38574.0
1,Barona Band of Mission Indians,101447.0
2,Blue Lake Rancheria,577907.0
3,CA California Governor's Office of Emergency S...,108535584.0
4,California State Library,110409.0
5,Hoopa Valley Tribe,96551.0
6,Rincon Band of Luiseno Indians,8500.0
7,Tule River Tribe,16345.0


#### Search through to find which county each of these tribes reside in and made a dictionary to attribute these tribes to a county
* tribe names will be retained as a separate column (along with their independent funding values)

In [12]:
indigenous_to_county_dict = {'Riverside' : 'agua caliente band of cahuilla indians',
                            'San Diego': ['barona band of mission indians', 'rincon band of luiseno indians'],
                            'Humboldt': ['blue lake rancheria', 'hoopa valley tribe'],
                             'Tulare' : 'tule river tribe'
                             }

#### Processing the county filtered data

In [13]:
# Call filter_counties function to omit of some non-county entries
cri_emergency_grants = filtered_df
print(len(cri_emergency_grants))
cri_emergency_grants

1252


Unnamed: 0,county,year,fundingAmount
2,Alameda,2014,452545.0
3,Alpine,2014,23496.0
4,Alpine,2014,21403.0
5,Alpine,2014,21873.0
6,Alpine,2014,37652.0
...,...,...,...
1274,Ventura,2022,20000.0
1275,Ventura,2022,216673.0
1276,Ventura,2022,11250.0
1277,Yolo,2022,181008.0


In [14]:
# Group the DataFrame by 'county' and 'year', then sum the 'damaged_destroyed_structures' and 'total_fatalities'
cri_emergency_grants = cri_emergency_grants.groupby(['county', 'year']).agg({'fundingAmount': 'sum'}).reset_index()

print(len(cri_emergency_grants))
cri_emergency_grants.head(5)

460


Unnamed: 0,county,year,fundingAmount
0,Alameda,2014,452545.0
1,Alameda,2016,455793.0
2,Alameda,2017,459407.0
3,Alameda,2018,460021.0
4,Alameda,2019,460021.0


In [15]:
# Group the data further by just county and sum the funding recieved for each county from 2014-2022 (minus 2015)
cri_emergency_grants_total_county = cri_emergency_grants.groupby(['county']).agg({'fundingAmount': 'sum'}).reset_index()
cri_emergency_grants_total_county.rename(columns={'fundingAmount': 'county_funding_2014_2022'}, inplace=True)

print(len(cri_emergency_grants_total_county))
cri_emergency_grants_total_county.head(5)

58


Unnamed: 0,county,county_funding_2014_2022
0,Alameda,3835219.0
1,Alpine,1041123.0
2,Amador,968348.0
3,Butte,1361817.0
4,Calaveras,1110376.0


#### Assigning the tribes to their corresponding county from the dictionary above

In [16]:
# Preprocess the dictionary to ensure all values are strings
processed_dict = {}
for county, tribes in indigenous_to_county_dict.items():
    if isinstance(tribes, list):
        processed_dict[county] = ', '.join(tribes)
    else:
        processed_dict[county] = tribes

# Map the processed dictionary to the DataFrame
cri_emergency_grants_total_county['tribe'] = cri_emergency_grants_total_county['county'].map(processed_dict)

# Display the DataFrame
# cri_emergency_grants_total_county

#### Manually adding the tribal funding values based on the county they reside in

In [17]:
cri_emergency_grants_total_county['tribe_funding_2014_2022'] = 0

# Define the specific values you want to add based on the county
specific_values = {
    'Riverside': 38574.0,
    'San Diego': 109947.0,
    'Humboldt' : 674458.0,
    'Tulare' : 16345.0
}

# Iterate over the specific_values dictionary and assign values to the 'tribe_funding_within_county' column based on the county
for county, value in specific_values.items():
    cri_emergency_grants_total_county.loc[cri_emergency_grants_total_county['county'] == county, 'tribe_funding_2014_2022'] = value

# Display the resulting DataFrame
cri_emergency_grants_total_county.head()

Unnamed: 0,county,county_funding_2014_2022,tribe,tribe_funding_2014_2022
0,Alameda,3835219.0,,0
1,Alpine,1041123.0,,0
2,Amador,968348.0,,0
3,Butte,1361817.0,,0
4,Calaveras,1110376.0,,0


#### Creating a total funding column by adding the county funding values with the tribe funding values

In [18]:
cri_emergency_grants_total_county['total_funding'] = cri_emergency_grants_total_county['county_funding_2014_2022'] + cri_emergency_grants_total_county['tribe_funding_2014_2022']

In [19]:
cri_emergency_grants_total_county

Unnamed: 0,county,county_funding_2014_2022,tribe,tribe_funding_2014_2022,total_funding
0,Alameda,3835219.0,,0,3835219.0
1,Alpine,1041123.0,,0,1041123.0
2,Amador,968348.0,,0,968348.0
3,Butte,1361817.0,,0,1361817.0
4,Calaveras,1110376.0,,0,1110376.0
5,Colusa,752142.0,,0,752142.0
6,Contra Costa,2720086.0,,0,2720086.0
7,Del Norte,1036874.0,,0,1036874.0
8,El Dorado,1379943.0,,0,1379943.0
9,Fresno,2755627.0,,0,2755627.0


In [20]:
# Saving metric df to .csv file
cri_emergency_grants_total_county.to_csv('governance_emergency_management_metric.csv')

In [21]:
# upload final csv file to aws
bucket_name = 'ca-climate-index'
file_name = ['governance_emergency_management_metric.csv']
directory = '3_fair_data/index_data'

#@append_metadata
upload_csv_aws(file_name, bucket_name, directory)
os.remove('governance_emergency_management_metric.csv')

governance_emergency_management_metric.csv uploaded to AWS


### Function call for this metric

In [2]:
@append_metadata
def calc_emergency_management_funding(df, export=False, export_filename=None, varname = ''):
    '''
    Calculates the total amount of emergency performance grant funding received per CA 
    county between 2014-2022 from FEMA: 
    https://www.fema.gov/openfema-data-page/emergency-management-performance-grants-v2

    Methods
    -------
    Data was cleaned to isolate funding for California counties and Indigenous tribes. 
    Tribe names were preserved while their funding was added to the total funding column 
    within the county they reside in.
    
    Parameters
    ----------
    df: string
        the dataframe containing the initial emergency management performance grant data
    export: True/False boolean
        False = will not upload resulting df containing CAL CRAI emergency management grant metric to AWS
        True = will upload resulting df containing CAL CRAI emergency management grant metric to AWS
    export_filename: string
        name of the csv file to be uploaded to AWS

    Script
    ------
    governance_emergency_management_grants.ipynb

    Note:
    This function assumes users have configured the AWS CLI such that their access key / secret key pair are 
    stored in ~/.aws/credentials.
    See https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html for guidance.
    '''

    print('Data transformation: ensure consistent county naming scheme.')
    # Create a new column 'county' by extracting characters after the first '-'
    emergency_grants_data['county'] = emergency_grants_data['legalAgencyName'].str.split('-').str[1]

    # Remove any mention of 'county' within the legalAgencyName column
    emergency_grants_data['county'] = emergency_grants_data['county'].str.replace(' county', '', case=False)
    emergency_grants_data['county'] = emergency_grants_data['county'].str.replace(' city/county', '', case=False)
    emergency_grants_data['county'] = emergency_grants_data['county'].str.replace(', california', '', case=False)

    # Drop any leading or trailing whitespace from the 'county' column
    emergency_grants_data['county'] = emergency_grants_data['county'].str.strip()

    print('Data transformation: adjust reporting period column to single year.')
    # Extract only the year from the 'reportingPeriod' column and place into new 'year' column
    emergency_grants_data['year'] = emergency_grants_data['reportingPeriod'].str.extract(r'(\d{4})')
    emergency_grants_data['year'] = emergency_grants_data['year'].str.replace('-01-01', '', case=False)

    print('Data transformation: isolate desired columns and run "filter_counties" through county column.')
    print('Data transformation: dropping all columns with recipients labeled "Cal OES etc" due to lack of clarity as to distribution at county/census tract scale.')

    # Isolate desired columns for CRI metric
    cleaned_emergency_grants = emergency_grants_data[['county', 'year','fundingAmount']]

    cri_emergency_grants, omitted_df = filter_counties(cleaned_emergency_grants, 'county')

    print('Data transformation: group the data by county and sum the funding received for each county from 2014-2022 (minus 2015).')
    cri_emergency_grants_total_county = cri_emergency_grants.groupby(['county']).agg({'fundingAmount': 'sum'}).reset_index()
    cri_emergency_grants_total_county.rename(columns={'fundingAmount': 'county_funding_2014_2022'}, inplace=True)

    print('Data transformation: attribute listed Tribes to a county they reside in, while preserving Tribe name and funding amount.')
    # Insert dictionary to assign tribes to the county they reside in
    indigenous_to_county_dict = {'Riverside' : 'agua caliente band of cahuilla indians',
                                'San Diego': ['barona band of mission indians', 'rincon band of luiseno indians'],
                                'Humboldt': ['blue lake rancheria', 'hoopa valley tribe'],
                                'Tulare' : 'tule river tribe'
                                }

    # Preprocess the dictionary to ensure all values are strings
    processed_dict = {}
    for county, tribes in indigenous_to_county_dict.items():
        if isinstance(tribes, list):
            processed_dict[county] = ', '.join(tribes)
        else:
            processed_dict[county] = tribes

    # Map the processed dictionary to the DataFrame
    cri_emergency_grants_total_county['tribe'] = cri_emergency_grants_total_county['county'].map(processed_dict)

    cri_emergency_grants_total_county['tribe_funding_2014_2022'] = 0

    print('Data transformation: add funding from Tribes to the total received per county.')
    # Define the specific values you want to add based on the county
    specific_values = {
        'Riverside': 38574.0,
        'San Diego': 109947.0,
        'Humboldt' : 674458.0,
        'Tulare' : 16345.0
    }

    # Iterate over the specific_values dictionary and assign values to the 'tribe_funding_within_county' column based on the county
    for county, value in specific_values.items():
        cri_emergency_grants_total_county.loc[cri_emergency_grants_total_county['county'] == county, 'tribe_funding_2014_2022'] = value

    cri_emergency_grants_total_county['total_funding'] = cri_emergency_grants_total_county['county_funding_2014_2022'] + cri_emergency_grants_total_county['tribe_funding_2014_2022']

    # export to csv and upload to AWS
    if export == True:
        cri_emergency_grants_total_county.to_csv(export_filename)
        bucket_name = 'ca-climate-index'
        directory = '3_fair_data/index_data'
        export_filename = [export_filename]
        upload_csv_aws(export_filename, bucket_name, directory)

        # Check if the file exists before attempting to remove it
        if os.path.exists('fema_emergency_management_grants_subset.csv'):
            os.remove('fema_emergency_management_grants_subset.csv')  # remove from local to clear up directory

        if os.path.exists(export_filename[0]):
            os.remove(export_filename[0])
            
    if export == False:
        print(f'{export_filename} uploaded to AWS.')


    return cri_emergency_grants_total_county # returns df

In [3]:
# pull csv from aws
bucket_name = 'ca-climate-index'
aws_dir = '2a_subset/governance/community_preparedness/fema/emergency_management_performance_grants/'
pull_csv_from_directory(bucket_name, aws_dir, search_zipped=False)

emergency_grants_data = pd.read_csv('fema_emergency_management_grants_subset.csv')

calc_emergency_management_funding(emergency_grants_data, export=False, export_filename = 'governance_emergency_management_metric.csv', varname='test')#'governance_fema_emergency_grants')

Saved DataFrame as 'fema_emergency_management_grants_subset.csv'


Unnamed: 0,county,county_funding_2014_2022,tribe,tribe_funding_2014_2022,total_funding
0,Alameda,3835219.0,,0,3835219.0
1,Alpine,1041123.0,,0,1041123.0
2,Amador,968348.0,,0,968348.0
3,Butte,1361817.0,,0,1361817.0
4,Calaveras,1110376.0,,0,1110376.0
5,Colusa,752142.0,,0,752142.0
6,Contra Costa,2720086.0,,0,2720086.0
7,Del Norte,1036874.0,,0,1036874.0
8,El Dorado,1379943.0,,0,1379943.0
9,Fresno,2755627.0,,0,2755627.0
