### Cal-CRAI metric calculation for flood insurance policies
* Enrollment in national flood insurance program -- community preparedness
* num. of NFIP participants -- personal preparedness

In [1]:
import pandas as pd
import os
import sys
import math
import geopandas as gpd

# suppress pandas purely educational warnings
from warnings import simplefilter
simplefilter(action="ignore", category=pd.errors.PerformanceWarning)

sys.path.append(os.path.expanduser('../../'))

from scripts.utils.file_helpers import pull_csv_from_directory, upload_csv_aws, filter_counties
from scripts.utils.write_metadata import append_metadata

In [None]:
# pull csv from aws
bucket_name = 'ca-climate-index'
aws_dir = '2a_subset/governance/community_preparedness/fema/nfip_community_status/fema_community_status_subset.csv'

pull_csv_from_directory(bucket_name, aws_dir, search_zipped=False)

## Metric 1:
* Enrollment in national flood insurance program -- community preparedness

In [None]:
community_flood_insurance_data = pd.read_csv('fema_community_status_subset.csv')
print(len(community_flood_insurance_data))
community_flood_insurance_data.head(5)
# os.remove('fema_community_status_subset.csv')

In [None]:
community_flood_insurance_data.columns

## Adjust the columns and entries within for consistency

In [None]:
community_flood_insurance_data.columns = community_flood_insurance_data.columns.str.lower()
community_flood_insurance_data = community_flood_insurance_data.applymap(lambda s: s.lower() if type(s) == str else s)
community_flood_insurance_data['county'] = community_flood_insurance_data['county'].str.replace(' county', '', case=False)

community_flood_insurance_data

In [None]:
unique_names = community_flood_insurance_data['communityname'].unique()
unique_names

## Utilize the communityname column's astrisk within its entries, which indicate a total communities nfip involvement
* look at just entries with the astrisk and identify counties with zero nfip participation

In [7]:
# Define the conditions
condition_1 = community_flood_insurance_data['communityname'].str.endswith('*')
condition_2 = community_flood_insurance_data['communityname'].str.lower() == 'san francisco, city and county of'

# Combine the conditions using the | (or) operator
isolated_flood_insurance_counties = community_flood_insurance_data[condition_1 | condition_2]

In [None]:
len(isolated_flood_insurance_counties)

##  Identify counties with no participation

In [None]:
isolated_flood_insurance_counties.loc[isolated_flood_insurance_counties.participatinginnfip == 0]

## Isolate relevant columns and adjust the entries within for increased transparency
* remove astrisks
* rename communityname column to county

In [None]:
flood_insurance_counties = isolated_flood_insurance_counties[['communityname', 'participatinginnfip']].copy()
flood_insurance_counties['communityname'] = flood_insurance_counties['communityname'].str.replace(' county *', '')
flood_insurance_counties['communityname'] = flood_insurance_counties['communityname'].str.replace(' county*', '')
flood_insurance_counties['communityname'] = flood_insurance_counties['communityname'].str.replace(', city and county of', '')

flood_insurance_counties = flood_insurance_counties.rename(columns={'communityname':'county', 'participatinginnfip':'nfip_participation'})

flood_insurance_counties.tail(5)

## Look at the original dataset to identify how many tribal participation entries there are

In [None]:
tribe_checking = community_flood_insurance_data[community_flood_insurance_data['tribal'] > 0]
tribe_checking

## As there are only three separate counties with tribal nfip participation, we can manually add the flag for participation

In [None]:
community_flood_metric = flood_insurance_counties
# Add new column with default value
community_flood_metric['at_least_one_tribe_enrolled_within_county'] = 0

# List of specified counties to set to 1
specified_counties = ['lake', 'san bernardino', 'riverside']

# Update the 'specified_county' column to 1 for specified counties
community_flood_metric.loc[community_flood_metric['county'].isin(specified_counties), 'at_least_one_tribe_enrolled_within_county'] = 1
community_flood_metric

## Merge the dataset with California census tract data, assigning values to tracts within its respective county

In [None]:
# read in CA census tiger file
ca_tract_county = "s3://ca-climate-index/0_map_data/ca_tracts_county.csv"
ca_tract_county = gpd.read_file(ca_tract_county)
ca_tract_county = ca_tract_county.drop(columns={'field_1', 'geometry'})
ca_tract_county.columns = ca_tract_county.columns.str.lower()
ca_tract_county = ca_tract_county.applymap(lambda s: s.lower() if type(s) == str else s)

ca_tract_county

In [None]:
ca_flood_community_metric = pd.merge(ca_tract_county, community_flood_metric, on='county', how='left')

# Move column 'nfip_participation' to the end
column_to_move = 'nfip_participation'
ca_flood_community_metric = ca_flood_community_metric[[col for col in ca_flood_community_metric.columns if col != column_to_move] + [column_to_move]]

# remove leading 0's from tract column
ca_flood_community_metric['tract'] = ca_flood_community_metric['tract'].str.lstrip('0')

print(len(ca_flood_community_metric))
ca_flood_community_metric.head()

In [35]:
# save as a csv for upload to s3 bucket
ca_flood_community_metric.to_csv('governance_community_flood_participation_metric.csv')

## Metric two:
* num. of NFIP participants -- personal preparedness

In [None]:
# pull csv from aws
# this dataset is quite large
bucket_name = 'ca-climate-index'
aws_dir = '1_pull_data/governance/personal_preparedness/fema/fema_fima_nfip_policies/fema_flood_redacted_policies.csv'

pull_csv_from_directory(bucket_name, aws_dir, search_zipped=False)

In [None]:
fema_flood_policy_data = pd.read_csv('fema_flood_redacted_policies.csv')
print(len(fema_flood_policy_data))
# os.remove('fema_flood_redacted_policies.csv')
pd.set_option('display.max_columns', None)
fema_flood_policy_data.tail(5)

In [None]:
# look at the datasets columns
fema_flood_policy_data.columns

In [None]:
# Display the number of non-NaN values in each column
non_nan_counts = fema_flood_policy_data.count()

# Set display option to show all rows
pd.set_option('display.max_rows', None)

# Display the counts
print(non_nan_counts)

## Isolate the dataset to policies that expired past 2023 for recent indications of flood coverage
* also get rid of a likely typo for year 2203

In [None]:
# Convert the 'policyTerminationDate' column to datetime
fema_flood_policy_data['policyTerminationDate'] = pd.to_datetime(fema_flood_policy_data['policyTerminationDate'], errors='coerce')

# Create a mask for dates after 2023
mask = (fema_flood_policy_data['policyTerminationDate'].dt.year > 2023) & (fema_flood_policy_data['policyTerminationDate'].dt.year != 2203)

# Apply the mask to filter the data
current_flood_policy = fema_flood_policy_data[mask]

print(len(current_flood_policy))
current_flood_policy.head(5)

In [None]:
# look at how many unique census tracts are within the dataset
len(current_flood_policy['censusTract'].unique())

## Select relevant columns, adjust tract column entries to match Cal-CRAIs standardized tract data

In [None]:
column_list = ['censusTract', 'countyCode', 'policyTerminationDate', 'id']

current_flood_policy_filter = current_flood_policy[column_list]
current_flood_policy_filter['censusTract'] = current_flood_policy_filter['censusTract'].apply(lambda x: '0' + str(int(x)) if pd.notna(x) else x)
pd.set_option('display.float_format', lambda x: '%.0f' % x)
print(len(current_flood_policy_filter))
current_flood_policy_filter.head(5)

## Drop duplicates if applicable, based on location, flood policy ID, and policy termination date
* no rows dropped

In [None]:
current_flood_policy_filter = current_flood_policy_filter.drop_duplicates(subset=['id', 'policyTerminationDate', 'censusTract'])
print(len(current_flood_policy_filter))

In [None]:
tract_flood_policy_count = current_flood_policy_filter['censusTract'].value_counts().reset_index()
tract_flood_policy_count = tract_flood_policy_count.rename(columns={'count':'num_flood_policies', 'censusTract':'tract'})

print(len(tract_flood_policy_count))
tract_flood_policy_count.head()

## Read in CA tract and county dataset and merge it with our flood policy data
* merge based on county column

In [None]:
# read in CA census tiger file
ca_tract_county = "s3://ca-climate-index/0_map_data/ca_tracts_county.csv"
ca_tract_county = gpd.read_file(ca_tract_county)
ca_tract_county = ca_tract_county.drop(columns={'field_1', 'geometry'})
ca_tract_county.columns = ca_tract_county.columns.str.lower()
ca_tract_county = ca_tract_county.applymap(lambda s: s.lower() if type(s) == str else s)

ca_tract_county.head()

In [None]:
tract_policy_merge = pd.merge(ca_tract_county, tract_flood_policy_count, on='tract', how='left')
tract_policy_merge.head()

In [None]:
trinity = tract_policy_merge[tract_policy_merge['county'] == 'trinity']
trinity

## Count the number of policies when grouping the dataset by county

In [None]:
county_flood_policy_count = tract_policy_merge.groupby('county', as_index=False)['num_flood_policies'].sum()
county_flood_policy_count

# Merge back with the tract/county dataset, this time merging to the CA tract dataset to attribute the counts to each CA tract

In [None]:
ca_flood_policy_county_metric = pd.merge(ca_tract_county, county_flood_policy_count, on='county', how='left')

# remove leading 0's from tract column
ca_flood_policy_county_metric['tract'] = ca_flood_policy_county_metric['tract'].str.lstrip('0')

print(len(ca_flood_policy_county_metric))
ca_flood_policy_county_metric.head()

In [37]:
# save as a csv for upload to s3 bucket
ca_flood_policy_county_metric.to_csv('governance_flood_policy_metric.csv')

## Function Call

In [38]:
@append_metadata
def flood_metrics_upload(input_csv, export=False, varname=''):
    '''
    Uploads flood related metrics within the CAL-CRAI governance domain to S3 bucket. The metrics are:
    
    * whether a county is enrolled in the national flood insurance policy (NFIP)
    * # of national flood insurance program participants per county
    
    Data for these metrics were sourced from the Federal Emergency Management Agency at:
    https://www.fema.gov/about/openfema/data-sets#nfip

    Note: For the number of participants per county, we were unable to distinguish policies
    per resident/house/rental/business and get proportions of policies to county population.
    
    Methods
    -------
    Relevant data columns were isolated, some were renamed and adjusted.
    Duplicate entries based on location were dropped.
    Columns were created to count or flag desired metric output at the county level.
    Data was then merged to California county and tract data to eventually extrapolate results to CA tracts.
    
    Parameters
    ----------
    input_csv: string
        csv PSPS data 
    export: True/False boolean
        False = will not upload resulting df containing CAL CRAI flood metric to AWS
        True = will upload resulting df containing CAL CRAI flood metric to AWS

    Script
    ------
    governance_flood_insurance.ipynb

    Note:
    This function assumes users have configured the AWS CLI such that their access key / secret key pair are stored in ~/.aws/credentials.
    See https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html for guidance.
    '''
    print('Data transformation: relevant columns were isolated and renamed')
    print('Data transformation: duplicate entries by location were dropped.')
    print('Data transformation: number of rows per county were totalled.')
    print('Data transformation: data was merged with CA county and tract data to generate final metric data.') 
 
    if export == True:
        bucket_name = 'ca-climate-index'
        directory = '3_fair_data/index_data'
        export_filename = [input_csv]
        upload_csv_aws(export_filename, bucket_name, directory)

    if export == False:
        print(f'{input_csv} uploaded to AWS.')
    
        #if os.path.exists(input_csv):
    #   os.remove(input_csv)

In [39]:
input_csvs = ['governance_community_flood_participation_metric.csv',
            'governance_flood_policy_metric.csv']

varnames = ['governance_fema_communitiy_flood',
            'governance_fema_flood_policy_participants']

# Process the data and export
for input_csv, varname in zip(input_csvs, varnames):
    flood_metrics_upload(input_csv, export=True, varname='test')