## Cal-CRAI Metric Calculation
Domain: Governance \
Indicator: Emergency Response

This notebook calculates four metrics, sourced from the California Employment Development Department:
* Metric 1: Number of firefighters per 10,000 people
* Metric 2: Number of registered nurses per 10,000 people
* Metric 3: Number of paramedics and emergency medical technicians per 10,000 people
* Metric 4: Number of police officers per 10,000 people

In [1]:
import pandas as pd
import os
import sys
import math
import re
import shutil

# suppress pandas purely educational warnings
from warnings import simplefilter
simplefilter(action="ignore", category=pd.errors.PerformanceWarning)

sys.path.append(os.path.expanduser('../../'))

from scripts.utils.file_helpers import pull_csv_from_directory, upload_csv_aws, filter_counties
from scripts.utils.write_metadata import append_metadata

In [None]:
# pull csv from aws
bucket_name = 'ca-climate-index'
aws_dir = '1_pull_data/governance/emergency_response/ca_employment_development_dept/'

pull_csv_from_directory(bucket_name, aws_dir, search_zipped=False)

In [None]:
# read in data
# drop non-relevant columns and rename useful ones
emergency_employment_data = pd.read_csv('ca_emergency_employment.csv')
emergency_employment_data = emergency_employment_data.drop(columns={'Unnamed: 0', 'Subject'})
emergency_employment_data = emergency_employment_data.rename(columns={'Value':'number_employed'})
emergency_employment_data.columns = emergency_employment_data.columns.str.lower()
emergency_employment_data

In [None]:
# check to see if any entries are non-California counties or have a different naming scheme
filter_county_employ, omitted_data = filter_counties(emergency_employment_data, 'county')
omitted_data

In [None]:
# call in our estimated population by CA county dataset
county_pop_link = "s3://ca-climate-index/0_map_data/county_est_pop_2022.csv"
county_pop = pd.read_csv(county_pop_link)
county_pop = county_pop.drop(columns='Unnamed: 0')
county_pop.head()

In [None]:
# merge the employment data with population data, based on shared counties
ca_emergency_responder_population = pd.merge(county_pop, emergency_employment_data, on='county', how='right')
ca_emergency_responder_population

In [None]:
# create our metric calculation by dividing the employment per occupation by the corresponding counties population
ca_emergency_responder_population['number_employed_per_10000'] = (ca_emergency_responder_population['number_employed'] / ca_emergency_responder_population['est_total_pop']) * 10000
ca_emergency_responder_population

## Separate the data into four different dataframes, one for each occupation
* save each df as a csv, appending a 'governance_' prefix and '_metric' suffix

In [15]:
county_tract = "s3://ca-climate-index/0_map_data/ca_tract_county_population_2021.csv"
ca_county_tract = pd.read_csv(county_tract)
ca_county_tract = ca_county_tract.rename(columns={'Census Tract': 'census_tract', 'County':'county'})
ca_county_tract = ca_county_tract.drop(columns={'Unnamed: 0', 'COUNTYFP', 'Total Population 2021'})

In [None]:
unique_occupations = ca_emergency_responder_population['occupation code'].unique()

occupation_dfs = {}  # dictionary to store dataframes for each occupation

for occupation_code in unique_occupations:
    # get the occupation name for the current occupation code
    occupation_name = ca_emergency_responder_population.loc[ca_emergency_responder_population['occupation code'] == occupation_code, 'occupation code'].iloc[0]
    
    # create a dataframe for the current occupation code
    occupation_df = ca_emergency_responder_population[ca_emergency_responder_population['occupation code'] == occupation_code].copy()
    
    # store the DataFrame in the dictionary with the occupation name as the key
    occupation_dfs[occupation_name] = occupation_df
    
for occupation_name, occupation_df in occupation_dfs.items():
    # Clean up the occupation name
    occupation_name_clean = re.sub(r'\b\d+\b', '', occupation_name).strip()  # remove numbers
    occupation_name_clean = occupation_name_clean.lower().replace(' ', '_')  # convert to lowercase and replace spaces
    
    # Define a filename based on the cleaned occupation name
    filename = f"governance_{occupation_name_clean}_metric.csv"
        
    # Save files in the current directory
    save_path = './'  # Saves in the current directory
    
    # Construct the full file path
    file_path = os.path.join(save_path, filename)
    
    # Merge with 'ca_county_tract' and rename the relevant column
    occupation_df = pd.merge(ca_county_tract, occupation_df, how='left', on='county')
    occupation_df = occupation_df.rename(columns={'number_employed_per_10000': f'num_{occupation_name_clean}_employed_per_10000'})
    
    # Save the dataframe as a CSV file
    occupation_df.to_csv(file_path, index=False)
    
    print(f"Saved {filename} successfully.")

## Function Call

In [38]:
@append_metadata
def first_responders_upload(input_csv, export=False, varname=''):
    '''
    Uploads the calculated emergency responder metrics to S3 bucket. The metrics are:
    - # of firefighters per 10,000 people
    - # of registered nurses per 10,000 people
    - # of paramedics and emergency medical technicians per 10,000 people
    - # of police officers per 10,000 people

    Data for this metric was sourced from the California Employment Development Department at:
    https://labormarketinfo.edd.ca.gov/geography/demoaa.html

    Methods
    -------
    Data columns were renamed for better transparency.
    Population per California county data were merged with the employment data.
    A new column was calculated by dividing each emergency responder employment by each counties population.
    The dataset was split into four different datasets, one for each empergency responder position, then saved as their own csv's.

    Parameters
    ----------
    input_csv: string
        csv emergency responder employment data 
    export: True/False boolean
        False = will not upload resulting df containing CAL CRAI emergency responder metric to AWS
        True = will upload resulting df containing CAL CRAI emergency responder metric to AWS

    Script
    ------
    governance_emergency_responders.ipynb

    Note:
    This function assumes users have configured the AWS CLI such that their access key / secret key pair are 
    stored in ~/.aws/credentials.
    See https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html for guidance.
    '''
    print('Data transformation: columns were renamed for transparency.')
    print('Data transformation: population data were merged into the dataset')
    print('Data transformation: data were separated into four different datasets, one for each emergency responder position.')
    print('Data transformation: data were merged to CA census tract data.')
 
    if export == True:
        bucket_name = 'ca-climate-index'
        directory = '3_fair_data/index_data'
        export_filename = [input_csv]
        upload_csv_aws(export_filename, bucket_name, directory)

    if export == False:
        print(f'{os.path.basename(input_csv)} uploaded to AWS.')

In [39]:
filenames = [
            'governance_emergency_medical_technicians_and_paramedics_metric.csv',
            'governance_firefighting_and_prevention_workers_metric.csv',
            'governance_police_officers_metric.csv',
            'governance_registered_nurses_metric.csv'
]

varnames = [
            'governance_edd_responder_parametics',
            'governance_edd_responder_firefighter',
            'governance_edd_responder_police',
            'governance_edd_responder_nurse'
]

# Process the data and export
for filename, varname in zip(filenames, varnames):
    first_responders_upload(filename, export=True, varname='test') #varname)

    if os.path.exists(filename):
        os.remove(filename)