# This notebook calculates the hazard mitigation funding metric sourced from FEMA
* Total amount of hazard mitigation funding recieved per CA county between 1990-2023

In [1]:
import pandas as pd
import os
import sys
import math
import numpy as np

# suppress pandas purely educational warnings
from warnings import simplefilter
simplefilter(action="ignore", category=pd.errors.PerformanceWarning)

sys.path.append(os.path.expanduser('../../'))
from scripts.utils.file_helpers import pull_csv_from_directory, upload_csv_aws, filter_counties
from scripts.utils.write_metadata import append_metadata

In [2]:
# pull csv from aws
bucket_name = 'ca-climate-index'
aws_dir = '2a_subset/governance/community_preparedness/fema/hazard_mitigation_funding/'

pull_csv_from_directory(bucket_name, aws_dir, search_zipped=False)

Saved DataFrame as 'hazard_mitigation_funding_subset.csv'


In [3]:
# read in FEMA emergency performance grants data
hazard_mitigation_data = pd.read_csv('hazard_mitigation_funding_subset.csv')
print(len(hazard_mitigation_data))
os.remove('hazard_mitigation_funding_subset.csv')

2131


In [4]:
hazard_mitigation_data.columns

Index(['projectIdentifier', 'programArea', 'programFy', 'region', 'state',
       'stateNumberCode', 'county', 'countyCode', 'disasterNumber',
       'projectCounties', 'projectType', 'status', 'recipient',
       'recipientTribalIndicator', 'subrecipient',
       'subrecipientTribalIndicator', 'dataSource', 'dateApproved',
       'dateClosed', 'dateInitiallyApproved', 'projectAmount',
       'federalShareObligated', 'subrecipientAdminCostAmt', 'srmcObligatedAmt',
       'recipientAdminCostAmt', 'costSharePercentage', 'benefitCostRatio',
       'netValueBenefits', 'numberOfFinalProperties', 'numberOfProperties',
       'id'],
      dtype='object')

In [5]:
# renaming and selecting relevant columns to our CRI metric
hazard_mitigation_columns = hazard_mitigation_data[['programFy',
                                                    'county', 
                                                    'projectAmount',
                                                    'recipientTribalIndicator',
                                                    'subrecipientTribalIndicator']].copy()

hazard_mitigation_columns.rename(columns={'programFy': 'funding_year', 
                                      'projectAmount': 'project_amount',
                                      'subrecipientTribalIndicator': 'subrecipient_tribal_indicator',
                                      'recipientTribalIndicator': 'recipient_tribal_indicator'}, inplace=True)

hazard_mitigation_columns

Unnamed: 0,funding_year,county,project_amount,recipient_tribal_indicator,subrecipient_tribal_indicator
0,2019,Ventura,2159446.00,0.0,0.0
1,2022,Santa Barbara,196315.00,0.0,0.0
2,2018,Marin,1385685.00,0.0,0.0
3,2019,Ventura,1083095.00,0.0,0.0
4,2011,San Luis Obispo,3334701.39,0.0,0.0
...,...,...,...,...,...
2126,1997,Sacramento,3164945.00,,
2127,1992,San Bernardino,1108678.00,,
2128,1993,San Bernardino,170000.00,,
2129,1993,San Bernardino,100000.00,,


#### Assessing if both tribal indicators should be included
* 5 instances where the entries between the indicators differ

In [6]:
def check_tribal_discrepancy(row):
    recipient_indicator = row['recipient_tribal_indicator']
    subrecipient_indicator = row['subrecipient_tribal_indicator']
    
    # Handling NaN values explicitly
    if pd.isna(recipient_indicator) or pd.isna(subrecipient_indicator):
        return False
    elif recipient_indicator != subrecipient_indicator:
        return True
    else:
        return False

hazard_mitigation_columns['different_tribal_entries'] = hazard_mitigation_columns.apply(check_tribal_discrepancy, axis=1)

In [7]:
false_count = hazard_mitigation_columns.apply(check_tribal_discrepancy, axis=1).value_counts().get(True, 0)
print("Number of rows with a different recipient tribal and subrecipient tribal entries:", false_count)
filtered_df = hazard_mitigation_columns[hazard_mitigation_columns.apply(check_tribal_discrepancy, axis=1) == True]
filtered_df

Number of rows with a different recipient tribal and subrecipient tribal entries: 5


Unnamed: 0,funding_year,county,project_amount,recipient_tribal_indicator,subrecipient_tribal_indicator,different_tribal_entries
293,2018,Riverside,14500.0,0.0,1.0,True
1273,2021,Lake,92479.33,0.0,1.0,True
1426,2018,Del Norte,92011.0,0.0,1.0,True
1741,2017,San Diego,395185.0,0.0,1.0,True
1971,2018,Madera,17087.0,0.0,1.0,True


In [8]:
# identifying all unique entries within the tribal indicators
tribal_indicator = hazard_mitigation_columns['recipient_tribal_indicator'].unique()
print(tribal_indicator)

[ 0.  1. nan]


In [9]:
# running the filter county function to get rid of any non-CA county entries
filtered_hazard_mitigation, omitted_rows = filter_counties(hazard_mitigation_columns, 'county')

#### Looking to see how many total recipient_tribal_indicator's are in the filtered dataset
* our final total_tribal_recipient_count should total to 52

In [10]:
count_tribal_indicators = filtered_hazard_mitigation['recipient_tribal_indicator'] == 1.0
rows_with_one = filtered_hazard_mitigation[count_tribal_indicators]
print(len(rows_with_one))
rows_with_one.head()

52


Unnamed: 0,funding_year,county,project_amount,recipient_tribal_indicator,subrecipient_tribal_indicator,different_tribal_entries
14,2006,Siskiyou,5464.0,1.0,1.0,False
25,2008,Santa Barbara,10520.0,1.0,1.0,False
26,2013,Siskiyou,3203.0,1.0,1.0,False
29,2013,Siskiyou,84540.0,1.0,1.0,False
161,2003,Mendocino,44590.0,1.0,1.0,False


In [11]:
filtered_hazard_mitigation

Unnamed: 0,funding_year,county,project_amount,recipient_tribal_indicator,subrecipient_tribal_indicator,different_tribal_entries
0,2019,Ventura,2159446.00,0.0,0.0,False
1,2022,Santa Barbara,196315.00,0.0,0.0,False
2,2018,Marin,1385685.00,0.0,0.0,False
3,2019,Ventura,1083095.00,0.0,0.0,False
4,2011,San Luis Obispo,3334701.39,0.0,0.0,False
...,...,...,...,...,...,...
2126,1997,Sacramento,3164945.00,,,False
2127,1992,San Bernardino,1108678.00,,,False
2128,1993,San Bernardino,170000.00,,,False
2129,1993,San Bernardino,100000.00,,,False


#### Count the total number of times a county is funded
* doing this before grouping data and will maintain the values

In [12]:
filtered_hazard_mitigation = filtered_hazard_mitigation.copy()
filtered_hazard_mitigation.loc[:, 'total_times_funded'] = filtered_hazard_mitigation.groupby('county')['county'].transform('count')

filtered_hazard_mitigation.tail(5)

Unnamed: 0,funding_year,county,project_amount,recipient_tribal_indicator,subrecipient_tribal_indicator,different_tribal_entries,total_times_funded
2126,1997,Sacramento,3164945.0,,,False,29
2127,1992,San Bernardino,1108678.0,,,False,89
2128,1993,San Bernardino,170000.0,,,False,89
2129,1993,San Bernardino,100000.0,,,False,89
2130,1992,San Bernardino,97200.0,,,False,89


#### Looking at Riverside
* checking how many tribal indicators it has to fact check resulting values

In [13]:
riverside = filtered_hazard_mitigation[filtered_hazard_mitigation['county'] == 'Riverside']

In [14]:
print(len(riverside))
riverside.tail(5)

62


Unnamed: 0,funding_year,county,project_amount,recipient_tribal_indicator,subrecipient_tribal_indicator,different_tribal_entries,total_times_funded
764,1994,Riverside,277362.0,,,False,62
781,1994,Riverside,28654.0,,,False,62
793,1992,Riverside,452610.0,,,False,62
856,2002,Riverside,67576.0,,,False,62
864,1992,Riverside,109604.0,,,False,62


In [15]:
tribe_count = riverside['subrecipient_tribal_indicator'] == 1.0
rows_with_one = riverside[tribe_count]
print(len(rows_with_one))
rows_with_one.head(5)

17


Unnamed: 0,funding_year,county,project_amount,recipient_tribal_indicator,subrecipient_tribal_indicator,different_tribal_entries,total_times_funded
255,2019,Riverside,40833.0,1.0,1.0,False,62
276,2011,Riverside,100831.0,1.0,1.0,False,62
277,2015,Riverside,6841.0,1.0,1.0,False,62
278,2019,Riverside,50000.0,1.0,1.0,False,62
292,2011,Riverside,59952.0,1.0,1.0,False,62


#### Group by county
* utilize a function to assign values if any rows indicate indigenous funding

In [16]:
# count occurrences of 1.0
def tribal_indicator_agg(series):
    return (series == 1.0).sum()
# function to return the first value of total_times_funded per when grouping per county
def first_value(series):
    return series.iloc[0]

summed_hazard_mitigation = filtered_hazard_mitigation.groupby(['county', 'funding_year']).agg({
    'project_amount': 'sum',
    'recipient_tribal_indicator': tribal_indicator_agg,
    'subrecipient_tribal_indicator': tribal_indicator_agg,
    'total_times_funded' : first_value
}).reset_index()

summed_hazard_mitigation.head(5)

Unnamed: 0,county,funding_year,project_amount,recipient_tribal_indicator,subrecipient_tribal_indicator,total_times_funded
0,Alameda,1990,43439324.0,0,0,84
1,Alameda,1992,5097938.0,0,0,84
2,Alameda,1993,2904070.0,0,0,84
3,Alameda,1994,57804311.0,0,0,84
4,Alameda,2003,382740.0,0,0,84


In [17]:
# same process, just grouping further to get total funding per county for the duration of the study
final_hazard_mitigation_funding = summed_hazard_mitigation.groupby(['county']).agg({
    'project_amount': 'sum',
    'recipient_tribal_indicator': 'sum',
    'subrecipient_tribal_indicator': 'sum',
    'total_times_funded': first_value
}).reset_index()
final_hazard_mitigation_funding.rename(columns={'project_amount': 'funding_1990_2023',
                                        'recipient_tribal_indicator': 'total_tribal_recipient_count',
                                        'subrecipient_tribal_indicator': 'total_tribal_subrecipient_count'}, inplace=True)

In [18]:
final_hazard_mitigation_funding.head()

Unnamed: 0,county,funding_1990_2023,total_tribal_recipient_count,total_tribal_subrecipient_count,total_times_funded
0,Alameda,193501600.0,0,0,84
1,Alpine,319033.9,0,0,2
2,Amador,8581918.0,0,0,11
3,Butte,22708740.0,1,1,30
4,Calaveras,10750390.0,0,0,12


In [19]:
county_tract = "s3://ca-climate-index/0_map_data/ca_tract_county_population_2021.csv"
ca_county_tract = pd.read_csv(county_tract)
ca_county_tract = ca_county_tract.rename(columns={'Census Tract': 'census_tract', 'County':'county'})
ca_county_tract = ca_county_tract.drop(columns={'Unnamed: 0', 'COUNTYFP', 'Total Population 2021'})

In [28]:
merged_hazard_mitigation = pd.merge(ca_county_tract, final_hazard_mitigation_funding, on='county', how='left')

# Move column 'funding_1990_2023' to the end
column_to_move = 'funding_1990_2023'
merged_hazard_mitigation = merged_hazard_mitigation[[col for col in merged_hazard_mitigation.columns if col != column_to_move] + [column_to_move]]

In [29]:
merged_hazard_mitigation

Unnamed: 0,census_tract,county,total_tribal_recipient_count,total_tribal_subrecipient_count,total_times_funded,funding_1990_2023
0,6085504321,Santa Clara,0,0,34,7.402136e+07
1,6085504410,Santa Clara,0,0,34,7.402136e+07
2,6085507003,Santa Clara,0,0,34,7.402136e+07
3,6085507004,Santa Clara,0,0,34,7.402136e+07
4,6085502204,Santa Clara,0,0,34,7.402136e+07
...,...,...,...,...,...,...
9124,6059001303,Orange,0,0,113,1.509280e+08
9125,6059001304,Orange,0,0,113,1.509280e+08
9126,6059001401,Orange,0,0,113,1.509280e+08
9127,6013367200,Contra Costa,0,0,27,2.929703e+07


In [30]:
# Saving metric df to .csv file
merged_hazard_mitigation.to_csv('governance_hazard_mitigation_metric.csv')

### Function call for this metric

In [32]:
@append_metadata
def upload_emergency_management_funding(input_csv, export=False, varname=''):
    '''
    Uploads csv containing the total amount of hazard mitigation funding received per CA county between 
    1990-2023 from FEMA: 
    https://www.fema.gov/openfema-data-page/hazard-mitigation-assistance-projects-v3

    Methods
    -------
    Data was cleaned to isolate funding for California counties.
    Funding from 1990-2023 was summed per county. 
    Columns indicating number of tribal recipients are also retained for transparency.

    Parameters
    ----------
    df: string
        the dataframe containing the initial hazard mitigation funding data
    export: True/False boolean
        False = will not upload resulting df containing CAL CRAI hazard mitigation funding metric to AWS
        True = will upload resulting df containing CAL CRAI emergency hazard mitigation funding metric to AWS
    export_filename: string
        name of the csv file to be uploaded to AWS

    Script
    ------
    governance_hazard_mitigation_funding.ipynb

    Note:
    This function assumes users have configured the AWS CLI such that their access key / secret key pair are 
    stored in ~/.aws/credentials.
    See https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html for guidance.
    '''

    print('Data transformation: selecting and renaming relevant columns including indicators if funding was recieved by tribes.')
    print('Data transformation: apply "filter_counties" to county column to isolate CA counties.')
    print('Data transformation: dropping all columns with recipients labeled "statewide" due to lack of clarity as to distribution at county/census tract scale.')
    print('Data transformation: adding a column that maintains the total times a county was funded.')
    print('Data transformation: group the data by county and sum funding from 1990-2023.')
    print('Data transformation: count the number of times funding was recieved by a tribe per county.')

    if export == True:
        bucket_name = 'ca-climate-index'
        directory = '3_fair_data/index_data'
        export_filename = [input_csv]
        upload_csv_aws(export_filename, bucket_name, directory)

    if export == False:
        print(f'{os.path.basename(input_csv)} uploaded to AWS.')

In [33]:
filenames = [
            'governance_hazard_mitigation_metric.csv'
]

varnames = [
            'governance_fema_hazard_mitigation'
]

# Process the data and export
for filename, varname in zip(filenames, varnames):
    upload_emergency_management_funding(filename, export=True, varname='test') #varname)
    if os.path.exists(filename):
        os.remove(filename)