## Cal-CRAI Metric Calculation for: Climate Risk / extreme heat warnings
* Median annual number of days with excessive heat warnings

In [1]:
import geopandas as gpd
import s3fs
import pandas as pd
import boto3
import dask_geopandas
import dask.dataframe as dd
import matplotlib.pyplot as plt
import os
import sys
import numpy as np

sys.path.append(os.path.expanduser('../../'))
from scripts.utils.file_helpers import upload_csv_aws, pull_gpkg_from_directory
from scripts.utils.write_metadata import append_metadata

### Retrieve data

In [None]:
# pull gpkg data from AWS
bucket_name = 'ca-climate-index'
aws_dir = '2b_reproject/climate_risk/extreme_heat/exposure/iowa_state_environmental_mesonet/'

pull_gpkg_from_directory(bucket_name, aws_dir)

In [3]:
heat_warnings_data = gpd.read_file('climate_iowa_mesonet_heat_warnings.gpkg')
heat_warnings_data.columns
heat_warnings_data = heat_warnings_data[['ISSUED', 'USCB_GEOID', 'geometry']]

In [None]:
heat_warnings_data = heat_warnings_data[['ISSUED', 'USCB_GEOID', 'geometry']]
heat_warnings_data

### Clean and count the number of warning days

In [None]:
def clean_warnings(df):
    # reduce by counting the # of events per tract:
    # shave off time issued so we only have days    
    df['ISSUED_day'] = df['ISSUED'].str.slice(0,8)
    df = df.drop_duplicates(subset=['ISSUED_day', 'USCB_GEOID'], keep='first')
    df['ISSUED_year'] = df['ISSUED'].str.slice(0,4)
    
    df_out = df.groupby(['USCB_GEOID','ISSUED_day', 'ISSUED_year'])['ISSUED'].count().reset_index().rename(columns={'ISSUED':'number_warnings'})
    return df_out

df_cleaned = clean_warnings(heat_warnings_data)
df_cleaned

In [None]:
# count the number of days per year
dfg = df_cleaned.groupby(['USCB_GEOID', 'ISSUED_year']).count()
dfg = dfg = dfg.drop(columns='ISSUED_day').unstack()
dfg

In [None]:
# aggregate by year
df_agg = pd.DataFrame(dfg.median(axis=1)).reset_index()
df_agg = df_agg.rename(columns={0:"median_warning_days",'USCB_GEOID':'GEOID'}) 
df_agg

In [8]:
# read in CA census tiger file
census_shp_dir = "s3://ca-climate-index/0_map_data/2021_tiger_census_tract/2021_ca_tract/"
ca_boundaries = gpd.read_file(census_shp_dir)
heat_warning_df = pd.merge(df_agg,ca_boundaries,on="GEOID")
heat_gdf = gpd.GeoDataFrame(
    heat_warning_df, geometry=heat_warning_df["geometry"]
)

In [9]:
# merge with CA boundaries
merged_heat_data = pd.merge(df_agg, ca_boundaries, on='GEOID', how='right')

# drop unnecessary columns 
merged_heat_data = merged_heat_data[['GEOID', 'median_warning_days', 'geometry']]

In [None]:
# check island track and set to nan
merged_heat_data.loc[merged_heat_data['GEOID'] == '06075980401', 'median_warning_days'] = np.nan
merged_heat_data

# check island tract
island = merged_heat_data[merged_heat_data['GEOID'] == '06075980401']
island

In [None]:
merged_heat_data['median_warning_days'].min(), merged_heat_data['median_warning_days'].max()

### Visualizing the data

In [None]:
bins = [2,4,6,8,10]
fig, ax = plt.subplots(figsize=(10,10))
heat_gdf.plot(
    column="median_warning_days",
    legend=True,
    ax=ax,
    scheme='user_defined',
    classification_kwds={'bins': bins})
ax.set_title("Median annual extreme heat warning days")
plt.show()

In [None]:
merged_heat_data

### Export the data

In [14]:
merged_heat_data = merged_heat_data.drop(columns={'geometry'})
merged_heat_data = merged_heat_data.rename(columns={'median_warning_days':'median_heat_warning_days'})

merged_heat_data.to_csv('climate_heat_warning_metric.csv', index=False)

## Function Call

In [16]:
@append_metadata
def isu_extreme_heat_warning_upload(input_csv, export=False, varname=''):
    '''
    Uploads the calculated extreme heat warning metric to S3 bucket. The metric is:
    Median number of excessive heat warnings days
    
    Data for this metric was sourced from ISU's Environmental Mesonet at:
    https://mesonet.agron.iastate.edu/request/gis/watchwarn.phtml

    Methods
    -------
    Data files were read in.
    Excessive heat warning date, location, and count columns were retained.
    Duplicate entries for a given location and date were dropped.
    Data was grouped by location and warning year.
    Number of warnings per year were summed per census tract.
    The median number of warnings were calculated for each census tract.
    
    Parameters
    ----------
    input_csv: string
        csv excessive heat warning data 
    export: True/False boolean
        False = will not upload resulting df containing CAL CRAI excessive heat warning metric to AWS
        True = will upload resulting df containing CAL CRAI excessive heat warning metric to AWS

    Script
    ------
    climate_risk_heat_warning.ipynb

    Note:
    This function assumes users have configured the AWS CLI such that their access key / secret key pair are stored in ~/.aws/credentials.
    See https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html for guidance.
    '''
    print('Data transformation: relevant metric columns were isolated.')
    print('Data transformation: duplicate entries per location and date were dropped.')
    print('Data transformation: GEOID 06075980401 (Farallon Islands, San Francisco County) filled with nan.') 
 
    if export == True:
        bucket_name = 'ca-climate-index'
        directory = '3_fair_data/index_data'
        export_filename = [input_csv]
        upload_csv_aws(export_filename, bucket_name, directory)

    if export == False:
        print(f'{input_csv} uploaded to AWS.')
 
    if os.path.exists(input_csv):
        os.remove(input_csv)

In [17]:
input_csv = 'climate_heat_warning_metric.csv'
varname = 'climate_iowa_mesonet_heat_warnings'

isu_extreme_heat_warning_upload(input_csv, export=True, varname='test')