## Cal-CRAI Metric Calculation
Domain: Governance \
Indicator: Community Preparedness

This notebook calculates one metric, sourced from the Federal Emergency Management Agency:
* Metric 1: Total amount of hazard mitigation funding recieved per CA county between 1990-2023


In [34]:
import pandas as pd
import os
import sys
import math
import numpy as np

# suppress pandas purely educational warnings
from warnings import simplefilter
simplefilter(action="ignore", category=pd.errors.PerformanceWarning)

sys.path.append(os.path.expanduser('../../'))
from scripts.utils.file_helpers import pull_csv_from_directory, upload_csv_aws, filter_counties
from scripts.utils.write_metadata import append_metadata

In [None]:
# pull csv from aws
bucket_name = 'ca-climate-index'
aws_dir = '2a_subset/governance/community_preparedness/fema/hazard_mitigation_funding/'

pull_csv_from_directory(bucket_name, aws_dir, search_zipped=False)

In [None]:
# read in FEMA emergency performance grants data
hazard_mitigation_data = pd.read_csv('hazard_mitigation_funding_subset.csv')
print(len(hazard_mitigation_data))
os.remove('hazard_mitigation_funding_subset.csv')

In [None]:
hazard_mitigation_data.columns

In [None]:
# renaming and selecting relevant columns to our CRI metric
hazard_mitigation_columns = hazard_mitigation_data[['programFy',
                                                    'county', 
                                                    'projectAmount',
                                                    'recipientTribalIndicator',
                                                    'subrecipientTribalIndicator']].copy()

hazard_mitigation_columns.rename(columns={'programFy': 'funding_year', 
                                      'projectAmount': 'project_amount',
                                      'subrecipientTribalIndicator': 'subrecipient_tribal_indicator',
                                      'recipientTribalIndicator': 'recipient_tribal_indicator'}, inplace=True)

hazard_mitigation_columns

#### Assessing if both tribal indicators should be included
* 5 instances where the entries between the indicators differ

In [39]:
def check_tribal_discrepancy(row):
    recipient_indicator = row['recipient_tribal_indicator']
    subrecipient_indicator = row['subrecipient_tribal_indicator']
    
    # Handling NaN values explicitly
    if pd.isna(recipient_indicator) or pd.isna(subrecipient_indicator):
        return False
    elif recipient_indicator != subrecipient_indicator:
        return True
    else:
        return False

hazard_mitigation_columns['different_tribal_entries'] = hazard_mitigation_columns.apply(check_tribal_discrepancy, axis=1)

In [None]:
false_count = hazard_mitigation_columns.apply(check_tribal_discrepancy, axis=1).value_counts().get(True, 0)
print("Number of rows with a different recipient tribal and subrecipient tribal entries:", false_count)
filtered_df = hazard_mitigation_columns[hazard_mitigation_columns.apply(check_tribal_discrepancy, axis=1) == True]
filtered_df

In [None]:
# identifying all unique entries within the tribal indicators
tribal_indicator = hazard_mitigation_columns['recipient_tribal_indicator'].unique()
print(tribal_indicator)

In [42]:
# running the filter county function to get rid of any non-CA county entries
filtered_hazard_mitigation, omitted_rows = filter_counties(hazard_mitigation_columns, 'county')

#### Looking to see how many total recipient_tribal_indicator's are in the filtered dataset
* our final total_tribal_recipient_count should total to 52

In [None]:
count_tribal_indicators = filtered_hazard_mitigation['recipient_tribal_indicator'] == 1.0
rows_with_one = filtered_hazard_mitigation[count_tribal_indicators]
print(len(rows_with_one))
rows_with_one.head()

In [None]:
filtered_hazard_mitigation

#### Count the total number of times a county is funded
* doing this before grouping data and will maintain the values

In [None]:
filtered_hazard_mitigation = filtered_hazard_mitigation.copy()
filtered_hazard_mitigation.loc[:, 'total_times_funded'] = filtered_hazard_mitigation.groupby('county')['county'].transform('count')

filtered_hazard_mitigation.tail(5)

#### Looking at Riverside
* checking how many tribal indicators it has to fact check resulting values

In [46]:
riverside = filtered_hazard_mitigation[filtered_hazard_mitigation['county'] == 'Riverside']

In [None]:
print(len(riverside))
riverside.tail(5)

In [None]:
tribe_count = riverside['subrecipient_tribal_indicator'] == 1.0
rows_with_one = riverside[tribe_count]
print(len(rows_with_one))
rows_with_one.head(5)

#### Group by county
* utilize a function to assign values if any rows indicate indigenous funding

In [None]:
# count occurrences of 1.0
def tribal_indicator_agg(series):
    return (series == 1.0).sum()
# function to return the first value of total_times_funded per when grouping per county
def first_value(series):
    return series.iloc[0]

summed_hazard_mitigation = filtered_hazard_mitigation.groupby(['county', 'funding_year']).agg({
    'project_amount': 'sum',
    'recipient_tribal_indicator': tribal_indicator_agg,
    'subrecipient_tribal_indicator': tribal_indicator_agg,
    'total_times_funded' : first_value
}).reset_index()

summed_hazard_mitigation.head(5)

In [50]:
# same process, just grouping further to get total funding per county for the duration of the study
final_hazard_mitigation_funding = summed_hazard_mitigation.groupby(['county']).agg({
    'project_amount': 'sum',
    'recipient_tribal_indicator': 'sum',
    'subrecipient_tribal_indicator': 'sum',
    'total_times_funded': first_value
}).reset_index()
final_hazard_mitigation_funding.rename(columns={'project_amount': 'funding_1990_2023',
                                        'recipient_tribal_indicator': 'total_tribal_recipient_count',
                                        'subrecipient_tribal_indicator': 'total_tribal_subrecipient_count'}, inplace=True)

In [None]:
final_hazard_mitigation_funding.head()

In [52]:
county_tract = "s3://ca-climate-index/0_map_data/ca_tract_county_population_2021.csv"
ca_county_tract = pd.read_csv(county_tract)
ca_county_tract = ca_county_tract.rename(columns={'Census Tract': 'census_tract', 'County':'county'})
ca_county_tract = ca_county_tract.drop(columns={'Unnamed: 0', 'COUNTYFP', 'Total Population 2021'})

In [53]:
merged_hazard_mitigation = pd.merge(ca_county_tract, final_hazard_mitigation_funding, on='county', how='left')

merged_hazard_mitigation = merged_hazard_mitigation.rename(columns={'funding_1990_2023':'total_hazard_mitigation_funding'})

# Move column 'total_hazard_mitigation_funding' to the end
column_to_move = 'total_hazard_mitigation_funding'
merged_hazard_mitigation = merged_hazard_mitigation[[col for col in merged_hazard_mitigation.columns if col != column_to_move] + [column_to_move]]

In [None]:
merged_hazard_mitigation

In [55]:
# Saving metric df to .csv file
merged_hazard_mitigation.to_csv('governance_hazard_mitigation_metric.csv')

### Function call for this metric

In [56]:
@append_metadata
def upload_emergency_management_funding(input_csv, export=False, varname=''):
    '''
    Uploads csv containing the total amount of hazard mitigation funding received per CA county between 
    1990-2023 from FEMA: 
    https://www.fema.gov/openfema-data-page/hazard-mitigation-assistance-projects-v3

    Methods
    -------
    Data was cleaned to isolate funding for California counties.
    Funding from 1990-2023 was summed per county. 
    Columns indicating number of tribal recipients are also retained for transparency.

    Parameters
    ----------
    df: string
        the dataframe containing the initial hazard mitigation funding data
    export: True/False boolean
        False = will not upload resulting df containing CAL CRAI hazard mitigation funding metric to AWS
        True = will upload resulting df containing CAL CRAI emergency hazard mitigation funding metric to AWS
    export_filename: string
        name of the csv file to be uploaded to AWS

    Script
    ------
    governance_hazard_mitigation_funding.ipynb

    Note:
    This function assumes users have configured the AWS CLI such that their access key / secret key pair are 
    stored in ~/.aws/credentials.
    See https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html for guidance.
    '''

    print('Data transformation: selecting and renaming relevant columns including indicators if funding was recieved by tribes.')
    print('Data transformation: apply "filter_counties" to county column to isolate CA counties.')
    print('Data transformation: dropping all columns with recipients labeled "statewide" due to lack of clarity as to distribution at county/census tract scale.')
    print('Data transformation: adding a column that maintains the total times a county was funded.')
    print('Data transformation: group the data by county and sum funding from 1990-2023.')
    print('Data transformation: count the number of times funding was recieved by a tribe per county.')

    if export == True:
        bucket_name = 'ca-climate-index'
        directory = '3_fair_data/index_data'
        export_filename = [input_csv]
        upload_csv_aws(export_filename, bucket_name, directory)

    if export == False:
        print(f'{os.path.basename(input_csv)} uploaded to AWS.')

In [57]:
filenames = [
            'governance_hazard_mitigation_metric.csv'
]

varnames = [
            'governance_fema_hazard_mitigation'
]

# Process the data and export
for filename, varname in zip(filenames, varnames):
    upload_emergency_management_funding(filename, export=True, varname='test') #varname)
    if os.path.exists(filename):
        os.remove(filename)