## Cal-CRAI Metric Calculation
Domain: Climate Risks \
Indicator: Heat Loss

This notebook calculates one metric, sourced from Public Health Institute: Tracking California:
* Metric 1: Average number of age adjusted heat related hospitalizations per 10,000

In [1]:
import os
import sys
import pandas as pd
import io
import numpy as np
import geopandas as gpd
from shapely.geometry import Point

sys.path.append(os.path.expanduser('../../'))
from scripts.utils.write_metadata import (
    append_metadata
)
from scripts.utils.file_helpers import (
    pull_csv_from_directory, upload_csv_aws
) 
pd.set_option('display.max_columns', None)

In [None]:
bucket_name = 'ca-climate-index'
aws_dir = '1_pull_data/climate_risk/extreme_heat/loss/public_health_institute/emergency_department_visits/'
folder = 'csv_folder'

pull_csv_from_directory(bucket_name, aws_dir, folder, search_zipped=False)

In [3]:
heat_dept_visits_data = pd.read_csv(r'csv_folder/heat_related_illness - emergency department visits.csv')

In [None]:
# rename columns, eliminate statewide data, and make all strings lowercase
heat_dept_visits_cleaned = heat_dept_visits_data.drop(columns={'Lower\n95% Limit', 'Upper\n95% Limit', 'Total'})
heat_dept_visits_cleaned = heat_dept_visits_cleaned.rename(columns={'Age-adjusted\nrate per 100,000': 'age_adjust_per_100000'})
heat_dept_visits_cleaned = heat_dept_visits_cleaned[heat_dept_visits_cleaned['County'] != 'California/Statewide']
heat_dept_visits_cleaned = heat_dept_visits_cleaned.applymap(lambda s: s.lower() if type(s) == str else s)
heat_dept_visits_cleaned.columns = heat_dept_visits_cleaned.columns.str.lower()

In [None]:
heat_dept_visits_cleaned

In [None]:
# checking results below with Alameda county
alameda = heat_dept_visits_cleaned[heat_dept_visits_cleaned['county'] == 'alameda']
alameda

In [None]:
# group the data by county and find the average of age adjusted heat hospitalizations column
heat_dept_visits_grouped = heat_dept_visits_cleaned.groupby(['county']).agg({
    'age_adjust_per_100000' : 'mean'
}).reset_index()

heat_dept_visits_grouped = heat_dept_visits_grouped.rename(columns={'age_adjust_per_100000': 'avg_age_adjust_per_100000'})

# multiply results by 10 to go from per 100,000 to 10,000
heat_dept_visits_grouped['avg_age_adjust_heat_hospitalizations_per_10000'] = heat_dept_visits_grouped['avg_age_adjust_per_100000'] * 10

# drop unnecessary columns
heat_dept_visits_grouped = heat_dept_visits_grouped.drop(columns={'avg_age_adjust_per_100000'})

print(len(heat_dept_visits_grouped))
heat_dept_visits_grouped.head(15)

In [None]:
# read in CA census tiger file
ca_tract_county = "s3://ca-climate-index/0_map_data/ca_tracts_county.csv"
ca_tract_county = gpd.read_file(ca_tract_county)
ca_tract_county = ca_tract_county.drop(columns={'field_1', 'geometry'})
ca_tract_county.columns = ca_tract_county.columns.str.lower()
ca_tract_county = ca_tract_county.applymap(lambda s: s.lower() if type(s) == str else s)

ca_tract_county

In [None]:
# merge data with CA county/tracts
heat_dept_visits_merge = pd.merge(ca_tract_county, heat_dept_visits_grouped, how='left', on='county')
heat_dept_visits_merge

In [10]:
heat_dept_visits_merge.to_csv('climate_heat_hospitalizations_metric.csv', index=False)

## Function Call

In [11]:
@append_metadata
def heat_hospitalizations_upload(input_csv, export=False, varname=''):
    '''
    Uploads the heat loss metric to the S3 bucket. The metric is:
    
    * average number of age adjusted heat related hospitalizations per 10,000
    
    Data for this metric was sourced Public Health Institute - Tracking California:
    https://www.phi.org/our-work/programs/tracking-california/

    Methods
    -------
    Relevant columns were isolated and renamed.
    California total rows were ommitted.
    Data was grouped by county and had its heat hospitalizations averaged.
    Values were multipled by 10 to get our per 10,000 portion of the metric.
    Data was merged to California census tract data.
    
    Parameters
    ----------
    input_csv: string
        csv heat loss data 
    export: True/False boolean
        False = will not upload resulting df containing CAL CRAI heat loss metrics to AWS
        True = will upload resulting df containing CAL CRAI heat loss metrics to AWS

    Script
    ------
    climate_heat_loss_hospitalizations.ipynb

    Note:
    This function assumes users have configured the AWS CLI such that their access key / secret key pair are stored in ~/.aws/credentials.
    See https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html for guidance.
    '''
    print('Data transformation: relevant columns were isolated and renamed')
    print('Data transformation: data was grouped by county and averaged.')
    print('Data transformation: data was merged with California census tracts.') 
 
    if export == True:
        bucket_name = 'ca-climate-index'
        directory = '3_fair_data/index_data'
        export_filename = [input_csv]
        upload_csv_aws(export_filename, bucket_name, directory)

    if export == False:
        print(f'{input_csv} uploaded to AWS.')
 
    #if os.path.exists(input_csv):
    #   os.remove(input_csv)

In [13]:
input_csv = 'climate_heat_hospitalizations_metric.csv'

var = 'climate_phi_heat_emergency_dept_visits'

# Process the data and export
heat_hospitalizations_upload(input_csv, export=False, varname=var)
