## Cal-CRAI Metric Calculation
Domain: Governance \
Indicator: Community Preparedness

This notebook calculates one metric, sourced from the Federal Emergency Management Agency:
* Metric 1: Total amount of emergency performance grant funding recieved per CA county between 2014-2022

In [1]:
import pandas as pd
import os
import sys
import math

# suppress pandas purely educational warnings
from warnings import simplefilter
simplefilter(action="ignore", category=pd.errors.PerformanceWarning)

sys.path.append(os.path.expanduser('../../'))

from scripts.utils.file_helpers import pull_csv_from_directory, upload_csv_aws, filter_counties
from scripts.utils.write_metadata import append_metadata

In [None]:
# pull csv from aws
bucket_name = 'ca-climate-index'
aws_dir = '2a_subset/governance/community_preparedness/fema/emergency_management_performance_grants/'

pull_csv_from_directory(bucket_name, aws_dir, search_zipped=False)

In [None]:
# read in FEMA emergency performance grants data
emergency_grants_data = pd.read_csv('fema_emergency_management_grants_subset.csv')
print(len(emergency_grants_data))
emergency_grants_data.head(5)
os.remove('fema_emergency_management_grants_subset.csv')

In [None]:
emergency_grants_data.head()

In [None]:
# Create a new column 'county' by extracting characters after the first '-'
emergency_grants_data['county'] = emergency_grants_data['legalAgencyName'].str.split('-').str[1]

# Remove any mention of 'county' within the legalAgencyName column
emergency_grants_data['county'] = emergency_grants_data['county'].str.replace(' county', '', case=False)
emergency_grants_data['county'] = emergency_grants_data['county'].str.replace(' city/county', '', case=False)
emergency_grants_data['county'] = emergency_grants_data['county'].str.replace(', california', '', case=False)

# Drop any leading or trailing whitespace from the 'county' column
emergency_grants_data['county'] = emergency_grants_data['county'].str.strip()

# Display the modified DataFrame
emergency_grants_data.head()

In [None]:
# Extract only the year from the 'reportingPeriod' column and place into new 'year' column
emergency_grants_data['year'] = emergency_grants_data['reportingPeriod'].str.extract(r'(\d{4})')
emergency_grants_data['year'] = emergency_grants_data['year'].str.replace('-01-01', '', case=False)

emergency_grants_data.head(5)

In [None]:
emergency_grants_data['year'] = emergency_grants_data['reportingPeriod'].str.extract(r'(\d{4})')

# Display unique years
unique_years = emergency_grants_data['year'].unique()
print(unique_years)

In [None]:
# Isolate desired columns for CRI metric
cleaned_emergency_grants = emergency_grants_data[['county', 'year','fundingAmount']]
print(len(cleaned_emergency_grants))
cleaned_emergency_grants.head(10)

In [9]:
# call filter_counties function and output county results as filtered_df and the omitted results as omitted_df
filtered_df, omitted_df = filter_counties(cleaned_emergency_grants, 'county')

In [None]:
# look at the skipped over rows
print(len(omitted_df))
omitted_df

In [None]:
# group the omitted results into total funding over the data date range
total_omitted = omitted_df.groupby(['county', 'year']).agg({'fundingAmount': 'sum'}).reset_index()
aggregated_omitted = total_omitted.groupby(['county']).agg({'fundingAmount': 'sum'}).reset_index()
aggregated_omitted.rename(columns={'fundingAmount': 'tribe_funding_2014_2022'}, inplace=True)
aggregated_omitted.rename(columns={'county': 'tribe'}, inplace=True)

print(len(aggregated_omitted))
aggregated_omitted.head(20)

#### Search through to find which county each of these tribes reside in and made a dictionary to attribute these tribes to a county
* tribe names will be retained as a separate column (along with their independent funding values)

In [12]:
indigenous_to_county_dict = {'Riverside' : 'agua caliente band of cahuilla indians',
                            'San Diego': ['barona band of mission indians', 'rincon band of luiseno indians'],
                            'Humboldt': ['blue lake rancheria', 'hoopa valley tribe'],
                             'Tulare' : 'tule river tribe'
                             }

#### Processing the county filtered data

In [None]:
# Call filter_counties function to omit of some non-county entries
cri_emergency_grants = filtered_df
print(len(cri_emergency_grants))
cri_emergency_grants

In [None]:
# Group the DataFrame by 'county' and 'year', then sum the 'funding amount'
cri_emergency_grants = cri_emergency_grants.groupby(['county', 'year']).agg({'fundingAmount': 'sum'}).reset_index()

print(len(cri_emergency_grants))
cri_emergency_grants.head(5)

In [None]:
# Group the data further by just county and sum the funding recieved for each county from 2014-2022 (minus 2015)
cri_emergency_grants_total_county = cri_emergency_grants.groupby(['county']).agg({'fundingAmount': 'sum'}).reset_index()
cri_emergency_grants_total_county.rename(columns={'fundingAmount': 'county_funding_2014_2022'}, inplace=True)

print(len(cri_emergency_grants_total_county))
cri_emergency_grants_total_county.head(5)

#### Assigning the tribes to their corresponding county from the dictionary above

In [16]:
# Preprocess the dictionary to ensure all values are strings
processed_dict = {}
for county, tribes in indigenous_to_county_dict.items():
    if isinstance(tribes, list):
        processed_dict[county] = ', '.join(tribes)
    else:
        processed_dict[county] = tribes

# Map the processed dictionary to the DataFrame
cri_emergency_grants_total_county['tribe'] = cri_emergency_grants_total_county['county'].map(processed_dict)

# Display the DataFrame
# cri_emergency_grants_total_county

#### Manually adding the tribal funding values based on the county they reside in

In [None]:
cri_emergency_grants_total_county['tribe_funding_2014_2022'] = 0

# Define the specific values you want to add based on the county
specific_values = {
    'Riverside': 38574.0,
    'San Diego': 109947.0,
    'Humboldt' : 674458.0,
    'Tulare' : 16345.0
}

# Iterate over the specific_values dictionary and assign values to the 'tribe_funding_within_county' column based on the county
for county, value in specific_values.items():
    cri_emergency_grants_total_county.loc[cri_emergency_grants_total_county['county'] == county, 'tribe_funding_2014_2022'] = value

# Display the resulting DataFrame
cri_emergency_grants_total_county.head(5)

#### Creating a total funding column by adding the county funding values with the tribe funding values

In [18]:
cri_emergency_grants_total_county['total_funding'] = cri_emergency_grants_total_county['county_funding_2014_2022'] + cri_emergency_grants_total_county['tribe_funding_2014_2022']

In [None]:
cri_emergency_grants_total_county.head(5)

In [None]:
county_tract = "s3://ca-climate-index/0_map_data/ca_tract_county_population_2021.csv"
ca_county_tract = pd.read_csv(county_tract)
ca_county_tract = ca_county_tract.rename(columns={'Census Tract': 'census_tract', 'County':'county'})
ca_county_tract = ca_county_tract.drop(columns={'Unnamed: 0', 'COUNTYFP', 'Total Population 2021'})
ca_county_tract

In [None]:
merged_emergency_grants = pd.merge(ca_county_tract,cri_emergency_grants_total_county, on='county', how='left')
merged_emergency_grants = merged_emergency_grants.drop(columns={'county_funding_2014_2022','tribe','tribe_funding_2014_2022'})
merged_emergency_grants = merged_emergency_grants.rename(columns={'total_funding':'total_emergency_management_funding'})

merged_emergency_grants

In [23]:
# Saving metric df to .csv file
merged_emergency_grants.to_csv('governance_emergency_management_metric.csv')

### Function call for this metric

In [24]:
@append_metadata
def calc_emergency_management_upload(input_csv, export=False, varname=''):
    '''
    Uploads the csv file containing the total amount of emergency performance grant funding received per CA 
    county between 2014-2022 from FEMA: 
    https://www.fema.gov/openfema-data-page/emergency-management-performance-grants-v2

    Methods
    -------
    Data was cleaned to isolate funding for California counties and Indigenous tribes. 
    Tribe names were preserved while their funding was added to the total funding column 
    within the county they reside in.
    Data was grouped by county and year and funding was summed to calculate the final metric.

    Parameters
    ----------
    df: string
        the dataframe containing the initial emergency management performance grant data
    export: True/False boolean
        False = will not upload resulting df containing CAL CRAI emergency management grant metric to AWS
        True = will upload resulting df containing CAL CRAI emergency management grant metric to AWS
    export_filename: string
        name of the csv file to be uploaded to AWS

    Script
    ------
    governance_emergency_management_grants.ipynb

    Note:
    This function assumes users have configured the AWS CLI such that their access key / secret key pair are 
    stored in ~/.aws/credentials.
    See https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html for guidance.
    '''
    print('Data transformation: ensure consistent county naming scheme.')
    print('Data transformation: adjust reporting period column to single year.')
    print('Data transformation: isolate desired columns and run "filter_counties" through county column.')
    print('Data transformation: dropping all columns with recipients labeled "Cal OES etc" due to lack of clarity')
    print('Data transformation: group the data by county and sum the funding received for each county from 2014-2022 minus 2015.')
    print('Data transformation: attribute listed Tribes to a county they reside in, while preserving Tribe name and funding.')
    print('Data transformation: add funding from Tribes to the total received per county.')

    bucket_name = 'ca-climate-index'
    directory = '3_fair_data/index_data'
    export_filename = [input_csv]
    
    if export == True:
        upload_csv_aws(export_filename, bucket_name, directory)

    if export == False:
        print(f'{export_filename} uploaded to AWS.')

    if os.path.exists(input_csv):
        os.remove(input_csv)

In [25]:
input_csv = ['governance_emergency_management_metric.csv'
            ]

varnames = [
    'governance_fema_emergency_grants'
]

for csv, var in zip(input_csv, varnames):
    calc_emergency_management_upload(csv, export=True, varname='test')#var)