# This notebook calculates the personal preparedness metrics sourced from FEMA
* % of survey respondents who can live in their homes for at least 3 days without running water
* % of survey respondents who can live in their homes for at least 3 days without power
* % of survey respondents who have engaged in at least one preparedness action in the last year

In [1]:
import pandas as pd
import os
import sys
import math
import numpy as np
import geopandas as gpd

# suppress pandas purely educational warnings
from warnings import simplefilter
simplefilter(action="ignore", category=pd.errors.PerformanceWarning)

sys.path.append(os.path.expanduser('../../'))
from scripts.utils.file_helpers import pull_csv_from_directory, upload_csv_aws, filter_counties
from scripts.utils.write_metadata import append_metadata

In [None]:
# pull csv from aws
bucket_name = 'ca-climate-index'
aws_dir = '2a_subset/governance/personal_preparedness/fema/fema_national_household_survey/'

pull_csv_from_directory(bucket_name, aws_dir, search_zipped=False)

In [3]:
fema_household_data = pd.read_csv('fema_prepareness_survey_subset.csv')

In [None]:
fema_household_data

In [None]:
pd.set_option('display.max_columns', None)
print(fema_household_data.columns)

In [None]:
# Selecting specific columns for our metrics
selected_columns = ['What county in [state] do you live in? ',
'How long could you live in your home without power?',
'How long could you live in your home without running water? ',
'Thinking about preparing yourself for a disaster, which of the following best represents your degree of preparedness? ']

selected_fema_household_data = fema_household_data[selected_columns]
selected_fema_household_data

In [None]:
# Looking at Alameda's data to compare with our metric calculations below
alameda = selected_fema_household_data[selected_fema_household_data['What county in [state] do you live in? '] == 'Alameda']
alameda

In [None]:
# Looking at unique answers for each column to better identify which responses to isolate/count for
unique_values = {column: selected_fema_household_data[column].unique() for column in selected_fema_household_data.columns}
unique_values

In [None]:
# Define the conditions we want to isolate for each column/survey question
conditions = {
    'How long could you live in your home without power?': ['1 to 3 days', "Don't know", 'Less than 1 day'],
    'How long could you live in your home without running water? ': ["Don't know", 'Less than 1 day', '1 to 3 days'],
    'Thinking about preparing yourself for a disaster, which of the following best represents your degree of preparedness? ': [
        'I am NOT prepared, but I intend to get prepared in the next six months',
        'I am NOT prepared, and I do not intend to prepare in the next year',
        'I am NOT prepared, but I intend to start preparing in the next year',
        "Don't know"
    ]
}

# Create the new DataFrame with boolean values
boolean_df = pd.DataFrame()

# Retain the 'What county in [state] do you live in? ' column
boolean_df['What county in [state] do you live in? '] = selected_fema_household_data['What county in [state] do you live in? ']
for column, invalid_answers in conditions.items():
    boolean_df[column] = selected_fema_household_data[column].apply(lambda x: 0 if x in invalid_answers else 1)

boolean_df

In [10]:
boolean_df = boolean_df.rename(columns={
    'What county in [state] do you live in? ':'county',
    'How long could you live in your home without power?':'prepared_without_power',
    'How long could you live in your home without running water? ':'prepared_without_running_water',
    'Thinking about preparing yourself for a disaster, which of the following best represents your degree of preparedness? ':'general_preparedness'
})

In [None]:
boolean_df

In [None]:
# Group by county, sum the boolean values, and count the occurrences of each county
grouped_df = boolean_df.groupby('county').agg({
    'prepared_without_power': 'sum',
    'prepared_without_running_water': 'sum',
    'general_preparedness': 'sum',
    'county': 'size'
}).rename(columns={'county': 'county_count'}).reset_index()

# Group by county, sum the boolean values, and count the occurrences of each county
grouped_df = boolean_df.groupby('county').agg({
    'prepared_without_power': 'sum',
    'prepared_without_running_water': 'sum',
    'general_preparedness': 'sum'
})

# Add 'county_count' column
grouped_df['county_count'] = boolean_df['county'].value_counts()

# Reset the index to move 'county' back as a column
grouped_df.reset_index(inplace=True)

grouped_df

In [None]:
# Calculate percentage for each metric
fema_household_metric = pd.DataFrame()
fema_household_metric['county'] = grouped_df['county']
fema_household_metric['county_count'] = grouped_df['county_count']
fema_household_metric['percent_prepared_without_power'] = (grouped_df['prepared_without_power'] / grouped_df['county_count']) * 100
fema_household_metric['percent_prepared_without_water'] = (grouped_df['prepared_without_running_water'] / grouped_df['county_count']) * 100
fema_household_metric['percent_prepared_for_general_disaster'] = (grouped_df['general_preparedness'] / grouped_df['county_count']) * 100

fema_household_metric

In [None]:
# Verify county entries are valid
filtered_counties, omitted_rows = filter_counties(fema_household_metric, 'county')
omitted_rows

In [15]:
# read in CA census tiger file
ca_tract_county = "s3://ca-climate-index/0_map_data/ca_tracts_county.csv"
ca_tract_county = gpd.read_file(ca_tract_county)
ca_tract_county = ca_tract_county.drop(columns={'field_1', 'geometry', 'COUNTYFP'})
ca_tract_county = ca_tract_county.rename(columns={'TRACT':'tract', 'County':'county'})

In [None]:
ca_tract_county

In [None]:
# merge data with CA tract data so each CA tract has a value from its corresponding county
fema_household_merge = pd.merge(ca_tract_county, fema_household_metric, on='county', how='left')
fema_household_merge

In [18]:
# Create a unique DataFrame for each percent column
df_percent_power = fema_household_merge[['tract', 'county', 'county_count', 'percent_prepared_without_power']]
df_percent_water = fema_household_merge[['tract', 'county', 'county_count', 'percent_prepared_without_water']]
df_percent_general = fema_household_merge[['tract', 'county', 'county_count', 'percent_prepared_for_general_disaster']]

# Replace blank or empty entries with NaN
df_percent_power = df_percent_power.replace(r'^\s*$', np.nan, regex=True)
df_percent_water = df_percent_water.replace(r'^\s*$', np.nan, regex=True)
df_percent_general = df_percent_general.replace(r'^\s*$', np.nan, regex=True)

In [None]:
# Save each DataFrame as a CSV file
df_percent_power.to_csv('governance_percent_prepared_without_power_metric.csv', index=False)
df_percent_water.to_csv('governance_percent_prepared_without_water_metric.csv', index=False)
df_percent_general.to_csv('governance_percent_prepared_for_general_disaster_metric.csv', index=False)

## Function Call

In [20]:
@append_metadata
def fema_household_survey_upload(input_csv, export=False, varname=''):
    '''
    Uploads the disaster preparation metrics to S3 bucket. The metrics are:
    
    * % of survey respondents who can live in their homes for at least 3 days without running water
    * % of survey respondents who can live in their homes for at least 3 days without power
    * % of survey respondents who have engaged in at least one preparedness action in the last year

    Note: Sample size for this dataset is fairly small

    Data for this metric was sourced from the Federal Emergency Management Agency at:
    https://www.fema.gov/about/openfema/data-sets#emergency

    Methods
    -------
    Relevant data columns were isolated, some were renamed for later merging with California tract data.
    Specific answers to selected survey questions were isolate to represent participant preparedness.
    Answers were grouped by county and summed for total 'preparedness' and total answers.
    Percentage was calculated for the three survey questions and merged to California tract data.
    
    Parameters
    ----------
    input_csv: string
        csv disaster preparedness survey metric data 
    export: True/False boolean
        False = will not upload resulting df containing CAL CRAI disaster preparedness survey metric to AWS
        True = will upload resulting df containing CAL CRAI disaster preparedness survey metric to AWS

    Script
    ------
    governance_fema_household_survey.ipynb

    Note:
    This function assumes users have configured the AWS CLI such that their access key / secret key pair are stored in ~/.aws/credentials.
    See https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html for guidance.
    '''
    print('Data transformation: relevant columns and contaminants were isolated and renamed.')
    print('Data transformation: responses to specific columns were summed, including total responses.')
    print('Data transformation: percent preparedness was calculated for each metric.')

    if export == True:
        bucket_name = 'ca-climate-index'
        directory = '3_fair_data/index_data'
        export_filename = [input_csv]
        upload_csv_aws(export_filename, bucket_name, directory)

    if export == False:
        print(f'{input_csv} uploaded to AWS.')
 
    '''if os.path.exists(input_csv):
        os.remove(input_csv)'''

In [None]:
input_csv = ['governance_percent_prepared_without_water_metric.csv', 
             'governance_percent_prepared_without_power_metric.csv',
             'governance_percent_prepared_for_general_disaster_metric.csv']

varnames = ['governance_fema_household_water_resilience',
            'governance_fema_household_power_resilience',
            'governance_fema_household_preparedness']

bucket_name = 'ca-climate-index'
directory = '3_fair_data/index_data'

for csv, var in zip(input_csv, varnames):
    fema_household_survey_upload(csv, export=True, varname='test')#var)