### This notebook calculates the Cal-CRAI governance metric for emergency response
* Num of fire stations per 10,000 people

In [1]:
import pandas as pd
import os
import sys
import boto3

import io
import geopandas as gpd

# suppress pandas purely educational warnings
from warnings import simplefilter
simplefilter(action="ignore", category=pd.errors.PerformanceWarning)

sys.path.append(os.path.expanduser('../../'))
from scripts.utils.file_helpers import pull_gpkg_from_directory, upload_csv_aws, pull_csv_from_directory
from scripts.utils.write_metadata import append_metadata

In [2]:
# pull csv from aws
bucket_name = 'ca-climate-index'
aws_dir = '2b_reproject/governance/emergency_response/usgs/'

pull_gpkg_from_directory(bucket_name, aws_dir)

Saved GeoPackage as 'governance_usgs_fire_stations.gpkg' locally


In [3]:
fire_station_data = gpd.read_file('governance_usgs_fire_stations.gpkg')
fire_station_data.columns

Index(['USCB_STATEFP', 'USCB_COUNTYFP', 'USCB_TRACTCE', 'USCB_GEOID',
       'USCB_NAME', 'USCB_NAMELSAD', 'USCB_MTFCC', 'USCB_FUNCSTAT',
       'USCB_ALAND', 'USCB_AWATER', 'USCB_INTPTLAT', 'USCB_INTPTLON',
       'geometry'],
      dtype='object')

In [4]:
fire_station_data = fire_station_data.rename(columns={'USCB_COUNTYFP':'countyfp'})
fire_station_data

Unnamed: 0,USCB_STATEFP,countyfp,USCB_TRACTCE,USCB_GEOID,USCB_NAME,USCB_NAMELSAD,USCB_MTFCC,USCB_FUNCSTAT,USCB_ALAND,USCB_AWATER,USCB_INTPTLAT,USCB_INTPTLON,geometry
0,06,037,401902,06037401902,4019.02,Census Tract 4019.02,G5020,S,2656563,3536,+34.1011641,-117.7249135,POINT (-117.72358 34.09620)
1,06,037,400205,06037400205,4002.05,Census Tract 4002.05,G5020,S,23546199,341777,+34.1521556,-117.7176795,POINT (-117.70773 34.13628)
2,06,037,980013,06037980013,9800.13,Census Tract 9800.13,G5020,S,5308102,0,+33.9164970,-118.3871461,POINT (-118.38491 33.92406)
3,06,037,980013,06037980013,9800.13,Census Tract 9800.13,G5020,S,5308102,0,+33.9164970,-118.3871461,POINT (-118.37953 33.92064)
4,06,115,040901,06115040901,409.01,Census Tract 409.01,G5020,S,263666616,4473094,+39.1852102,-121.3585368,POINT (-121.28916 39.20226)
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3145,06,071,010806,06071010806,108.06,Census Tract 108.06,G5020,S,12526996,0,+34.2587545,-117.3129594,POINT (-117.30146 34.24676)
3146,06,071,006603,06071006603,66.03,Census Tract 66.03,G5020,S,1829958,0,+34.0821080,-117.3367255,POINT (-117.33472 34.08379)
3147,06,037,102107,06037102107,1021.07,Census Tract 1021.07,G5020,S,12908558,13753,+34.2409052,-118.3395988,POINT (-118.36711 34.23625)
3148,06,037,101110,06037101110,1011.10,Census Tract 1011.10,G5020,S,1142401,0,+34.2594737,-118.2929869,POINT (-118.30154 34.25842)


In [5]:
fire_station_data_county_count = fire_station_data[fire_station_data['countyfp'] == '037']
fire_station_data_county_count

Unnamed: 0,USCB_STATEFP,countyfp,USCB_TRACTCE,USCB_GEOID,USCB_NAME,USCB_NAMELSAD,USCB_MTFCC,USCB_FUNCSTAT,USCB_ALAND,USCB_AWATER,USCB_INTPTLAT,USCB_INTPTLON,geometry
0,06,037,401902,06037401902,4019.02,Census Tract 4019.02,G5020,S,2656563,3536,+34.1011641,-117.7249135,POINT (-117.72358 34.09620)
1,06,037,400205,06037400205,4002.05,Census Tract 4002.05,G5020,S,23546199,341777,+34.1521556,-117.7176795,POINT (-117.70773 34.13628)
2,06,037,980013,06037980013,9800.13,Census Tract 9800.13,G5020,S,5308102,0,+33.9164970,-118.3871461,POINT (-118.38491 33.92406)
3,06,037,980013,06037980013,9800.13,Census Tract 9800.13,G5020,S,5308102,0,+33.9164970,-118.3871461,POINT (-118.37953 33.92064)
135,06,037,261104,06037261104,2611.04,Census Tract 2611.04,G5020,S,4446940,151199,+34.1126013,-118.4072679,POINT (-118.40628 34.12698)
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3139,06,037,930400,06037930400,9304,Census Tract 9304,G5020,S,1489696833,4805908,+34.3144808,-117.9304341,POINT (-118.36110 34.29468)
3140,06,037,103300,06037103300,1033,Census Tract 1033,G5020,S,6141955,0,+34.2573786,-118.3554785,POINT (-118.33687 34.26400)
3147,06,037,102107,06037102107,1021.07,Census Tract 1021.07,G5020,S,12908558,13753,+34.2409052,-118.3395988,POINT (-118.36711 34.23625)
3148,06,037,101110,06037101110,1011.10,Census Tract 1011.10,G5020,S,1142401,0,+34.2594737,-118.2929869,POINT (-118.30154 34.25842)


In [6]:
filtered_fire_station_data = fire_station_data.drop_duplicates(subset=['USCB_GEOID', 'USCB_INTPTLAT', 'USCB_INTPTLON'])
filtered_fire_station_data

Unnamed: 0,USCB_STATEFP,countyfp,USCB_TRACTCE,USCB_GEOID,USCB_NAME,USCB_NAMELSAD,USCB_MTFCC,USCB_FUNCSTAT,USCB_ALAND,USCB_AWATER,USCB_INTPTLAT,USCB_INTPTLON,geometry
0,06,037,401902,06037401902,4019.02,Census Tract 4019.02,G5020,S,2656563,3536,+34.1011641,-117.7249135,POINT (-117.72358 34.09620)
1,06,037,400205,06037400205,4002.05,Census Tract 4002.05,G5020,S,23546199,341777,+34.1521556,-117.7176795,POINT (-117.70773 34.13628)
2,06,037,980013,06037980013,9800.13,Census Tract 9800.13,G5020,S,5308102,0,+33.9164970,-118.3871461,POINT (-118.38491 33.92406)
4,06,115,040901,06115040901,409.01,Census Tract 409.01,G5020,S,263666616,4473094,+39.1852102,-121.3585368,POINT (-121.28916 39.20226)
6,06,013,385100,06013385100,3851,Census Tract 3851,G5020,S,2390590,0,+37.9242312,-122.2957499,POINT (-122.29975 37.92984)
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3145,06,071,010806,06071010806,108.06,Census Tract 108.06,G5020,S,12526996,0,+34.2587545,-117.3129594,POINT (-117.30146 34.24676)
3146,06,071,006603,06071006603,66.03,Census Tract 66.03,G5020,S,1829958,0,+34.0821080,-117.3367255,POINT (-117.33472 34.08379)
3147,06,037,102107,06037102107,1021.07,Census Tract 1021.07,G5020,S,12908558,13753,+34.2409052,-118.3395988,POINT (-118.36711 34.23625)
3148,06,037,101110,06037101110,1011.10,Census Tract 1011.10,G5020,S,1142401,0,+34.2594737,-118.2929869,POINT (-118.30154 34.25842)


In [7]:
county_count_fire_stations = filtered_fire_station_data['countyfp'].value_counts().reset_index()
county_count_fire_stations = county_count_fire_stations.rename(columns={'count':'num_fire_stations'})
county_count_fire_stations.head(50)

Unnamed: 0,countyfp,num_fire_stations
0,37,376
1,73,165
2,71,147
3,59,136
4,65,128
5,1,92
6,85,85
7,67,80
8,13,64
9,29,61


In [8]:
# read in CA census tiger file
ca_tract_county = "s3://ca-climate-index/0_map_data/ca_tracts_county.csv"
ca_tract_county = gpd.read_file(ca_tract_county)
ca_tract_county = ca_tract_county.drop(columns={'field_1', 'geometry'})
ca_tract_county.columns = ca_tract_county.columns.str.lower()
ca_tract_county = ca_tract_county.applymap(lambda s: s.lower() if type(s) == str else s)

ca_tract_county

Unnamed: 0,tract,countyfp,county
0,06085504321,085,santa clara
1,06085504410,085,santa clara
2,06085507003,085,santa clara
3,06085507004,085,santa clara
4,06085502204,085,santa clara
...,...,...,...
9124,06059001303,059,orange
9125,06059001304,059,orange
9126,06059001401,059,orange
9127,06013367200,013,contra costa


In [9]:
fire_stations_merged = pd.merge(ca_tract_county, county_count_fire_stations, how='left', on='countyfp')
fire_stations_merged

Unnamed: 0,tract,countyfp,county,num_fire_stations
0,06085504321,085,santa clara,85
1,06085504410,085,santa clara,85
2,06085507003,085,santa clara,85
3,06085507004,085,santa clara,85
4,06085502204,085,santa clara,85
...,...,...,...,...
9124,06059001303,059,orange,136
9125,06059001304,059,orange,136
9126,06059001401,059,orange,136
9127,06013367200,013,contra costa,64


In [10]:
county_pop = "s3://ca-climate-index/0_map_data/county_est_pop_2022.csv"
county_pop = pd.read_csv(county_pop)
county_pop = county_pop.drop({'Unnamed: 0'}, axis=1)
county_pop['county'] = county_pop['county'].str.lower()

county_pop

Unnamed: 0,county,est_total_pop
0,alameda,1663823
1,alpine,1515
2,amador,40577
3,butte,213605
4,calaveras,45674
5,colusa,21811
6,contra costa,1162648
7,del norte,27462
8,el dorado,191713
9,fresno,1008280


In [11]:
fire_stations_per_population = pd.merge(fire_stations_merged, county_pop, on='county', how='left')
fire_stations_per_population

Unnamed: 0,tract,countyfp,county,num_fire_stations,est_total_pop
0,06085504321,085,santa clara,85,1916831
1,06085504410,085,santa clara,85,1916831
2,06085507003,085,santa clara,85,1916831
3,06085507004,085,santa clara,85,1916831
4,06085502204,085,santa clara,85,1916831
...,...,...,...,...,...
9124,06059001303,059,orange,136,3175227
9125,06059001304,059,orange,136,3175227
9126,06059001401,059,orange,136,3175227
9127,06013367200,013,contra costa,64,1162648


In [12]:
fire_stations_per_population['num_fire_stations_per_10000_people'] = (fire_stations_per_population['num_fire_stations'] / fire_stations_per_population['est_total_pop']) * 10000
fire_stations_per_population

Unnamed: 0,tract,countyfp,county,num_fire_stations,est_total_pop,num_fire_stations_per_10000_people
0,06085504321,085,santa clara,85,1916831,0.443440
1,06085504410,085,santa clara,85,1916831,0.443440
2,06085507003,085,santa clara,85,1916831,0.443440
3,06085507004,085,santa clara,85,1916831,0.443440
4,06085502204,085,santa clara,85,1916831,0.443440
...,...,...,...,...,...,...
9124,06059001303,059,orange,136,3175227,0.428316
9125,06059001304,059,orange,136,3175227,0.428316
9126,06059001401,059,orange,136,3175227,0.428316
9127,06013367200,013,contra costa,64,1162648,0.550468


In [20]:
fire_stations_per_population.to_csv('governance_fire_stations_metric.csv', index=False)

In [17]:
@append_metadata
def fire_station_upload(input_csv, export=False, varname=''):
    '''
    Uploads the number of California fire station metric to S3 bucket. The metric is:
    
    * Number of fire stations per CA county per 10,000 people
    
    Fire stations are likely municipal and CalFire stations, though we are unable to isoate them within the data.

    Data for this metric was sourced from the United States Geological Service at:
    https://azgeo-open-data-agic.hub.arcgis.com/ though the original dataset has been replaced with similar datasets

    Methods
    -------
    Relevant data columns were isolated, some were renamed for later merging with California tract data.
    Duplicate stations that matched anothers tract ID, latitude, and longitude were dropped.
    Number of fire stations per county was calculated by grouping countyfp's together and generating a count.
    Data was then merged with CA tract/county data to attribute each county total to all CA tracts.
    Data was once again merged with CA county population totals so number of fire stations per 10,000 could be calculated.
    
    Parameters
    ----------
    input_csv: string
        csv PSPS data 
    export: True/False boolean
        False = will not upload resulting df containing CAL CRAI Fire Station metric to AWS
        True = will upload resulting df containing CAL CRAI Fire Station metric to AWS

    Script
    ------
    governance_fire_stations.ipynb

    Note:
    This function assumes users have configured the AWS CLI such that their access key / secret key pair are stored in ~/.aws/credentials.
    See https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html for guidance.
    '''
    print('Data transformation: relevant columns were isolated and renamed')
    print('Data transformation: duplicate entries by location were dropped.')
    print('Data transformation: number of rows per county were totalled.')
    print('Data transformation: data was merged with CA county and population data to generate final metric data.') 
 
    if export == True:
        bucket_name = 'ca-climate-index'
        directory = '3_fair_data/index_data'
        export_filename = [input_csv]
        upload_csv_aws(export_filename, bucket_name, directory)

    if export == False:
        print(f'{input_csv} uploaded to AWS.')
 
    if os.path.exists(input_csv):
        os.remove(input_csv)

In [21]:
input_csv = 'governance_fire_stations_metric.csv'
varname = 'governance_usgs_fire_stations'

fire_station_upload(input_csv, export=True, varname='test')

Data transformation: relevant columns were isolated and renamed
Data transformation: duplicate entries by location were dropped.
Data transformation: number of rows per county were totalled.
Data transformation: data was merged with CA county and population data to generate final metric data.
governance_fire_stations_metric.csv uploaded to AWS
