### This notebook calculates the Cal-CRAI governance metric for emergency response
* Num of fire stations per 10,000 people

In [1]:
import pandas as pd
import os
import sys
import boto3

import io
import geopandas as gpd

# suppress pandas purely educational warnings
from warnings import simplefilter
simplefilter(action="ignore", category=pd.errors.PerformanceWarning)

sys.path.append(os.path.expanduser('../../'))
from scripts.utils.file_helpers import pull_gpkg_from_directory, upload_csv_aws, pull_csv_from_directory
from scripts.utils.write_metadata import append_metadata

In [None]:
# pull csv from aws
bucket_name = 'ca-climate-index'
aws_dir = '2b_reproject/governance/emergency_response/usgs/'

pull_gpkg_from_directory(bucket_name, aws_dir)

In [None]:
fire_station_data = gpd.read_file('governance_usgs_fire_stations.gpkg')
fire_station_data.columns

In [None]:
fire_station_data = fire_station_data.rename(columns={'USCB_COUNTYFP':'countyfp'})
fire_station_data

In [None]:
fire_station_data_county_count = fire_station_data[fire_station_data['countyfp'] == '037']
fire_station_data_county_count

In [None]:
filtered_fire_station_data = fire_station_data.drop_duplicates(subset=['USCB_GEOID', 'USCB_INTPTLAT', 'USCB_INTPTLON'])
filtered_fire_station_data

In [None]:
county_count_fire_stations = filtered_fire_station_data['countyfp'].value_counts().reset_index()
county_count_fire_stations = county_count_fire_stations.rename(columns={'count':'num_fire_stations'})
county_count_fire_stations.head(50)

In [None]:
# read in CA census tiger file
ca_tract_county = "s3://ca-climate-index/0_map_data/ca_tracts_county.csv"
ca_tract_county = gpd.read_file(ca_tract_county)
ca_tract_county = ca_tract_county.drop(columns={'field_1', 'geometry'})
ca_tract_county.columns = ca_tract_county.columns.str.lower()
ca_tract_county = ca_tract_county.applymap(lambda s: s.lower() if type(s) == str else s)

ca_tract_county

In [None]:
fire_stations_merged = pd.merge(ca_tract_county, county_count_fire_stations, how='left', on='countyfp')
fire_stations_merged

In [None]:
county_pop = "s3://ca-climate-index/0_map_data/county_est_pop_2022.csv"
county_pop = pd.read_csv(county_pop)
county_pop = county_pop.drop({'Unnamed: 0'}, axis=1)
county_pop['county'] = county_pop['county'].str.lower()

county_pop

In [None]:
fire_stations_per_population = pd.merge(fire_stations_merged, county_pop, on='county', how='left')
fire_stations_per_population

In [None]:
fire_stations_per_population['num_fire_stations_per_10000_people'] = (fire_stations_per_population['num_fire_stations'] / fire_stations_per_population['est_total_pop']) * 10000
fire_stations_per_population

In [20]:
fire_stations_per_population.to_csv('governance_fire_stations_metric.csv', index=False)

In [17]:
@append_metadata
def fire_station_upload(input_csv, export=False, varname=''):
    '''
    Uploads the number of California fire station metric to S3 bucket. The metric is:
    
    * Number of fire stations per CA county per 10,000 people
    
    Fire stations are likely municipal and CalFire stations, though we are unable to isoate them within the data.

    Data for this metric was sourced from the United States Geological Service at:
    https://azgeo-open-data-agic.hub.arcgis.com/ though the original dataset has been replaced with similar datasets

    Methods
    -------
    Relevant data columns were isolated, some were renamed for later merging with California tract data.
    Duplicate stations that matched anothers tract ID, latitude, and longitude were dropped.
    Number of fire stations per county was calculated by grouping countyfp's together and generating a count.
    Data was then merged with CA tract/county data to attribute each county total to all CA tracts.
    Data was once again merged with CA county population totals so number of fire stations per 10,000 could be calculated.
    
    Parameters
    ----------
    input_csv: string
        csv PSPS data 
    export: True/False boolean
        False = will not upload resulting df containing CAL CRAI Fire Station metric to AWS
        True = will upload resulting df containing CAL CRAI Fire Station metric to AWS

    Script
    ------
    governance_fire_stations.ipynb

    Note:
    This function assumes users have configured the AWS CLI such that their access key / secret key pair are stored in ~/.aws/credentials.
    See https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html for guidance.
    '''
    print('Data transformation: relevant columns were isolated and renamed')
    print('Data transformation: duplicate entries by location were dropped.')
    print('Data transformation: number of rows per county were totalled.')
    print('Data transformation: data was merged with CA county and population data to generate final metric data.') 
 
    if export == True:
        bucket_name = 'ca-climate-index'
        directory = '3_fair_data/index_data'
        export_filename = [input_csv]
        upload_csv_aws(export_filename, bucket_name, directory)

    if export == False:
        print(f'{input_csv} uploaded to AWS.')
 
    if os.path.exists(input_csv):
        os.remove(input_csv)

In [None]:
input_csv = 'governance_fire_stations_metric.csv'
varname = 'governance_usgs_fire_stations'

fire_station_upload(input_csv, export=True, varname='test')