## Cal-CRAI Metric Calculation
Domain: Climate Risks \
Indicator: Flood Exposure

This notebook calculates one metric, sourced from Iowa State University - Environmental Mesonet:
* Metric 1: Median number of flood warnings

In [1]:
import geopandas as gpd
import s3fs
import pandas as pd
import boto3
import dask_geopandas
import dask.dataframe as dd
import matplotlib.pyplot as plt
import os
import sys

sys.path.append(os.path.expanduser('../../'))
from scripts.utils.file_helpers import pull_csv_from_directory, upload_csv_aws
from scripts.utils.write_metadata import append_metadata

### The biggest dataset: ISU Mesonet's flood warning database

In [2]:
# make a list of paths, since this is actually 10 files
def build_isu_mesonet_file_list(
    path='2b_reproject/climate_risk/flood/exposure/isu_environmental_mesonet'
):
    """ Build a list of shapefile URIs contained in S3 folder """
    # initiate empty list for s3 URIs
    all_shapefiles = []
    bucket_name = 'ca-climate-index' 
    # initiate s3 session
    session = boto3.Session()
    # use the session to get the resource
    s3 = session.resource('s3')
    my_bucket = s3.Bucket(bucket_name)
    # iterate through directory
    for obj in my_bucket.objects.filter(
        Prefix=path):
        all_shapefiles.append(obj.key)
    return all_shapefiles

In [3]:
pqt_list = build_isu_mesonet_file_list()
path = '2b_reproject/climate_risk/flood/exposure/isu_environmental_mesonet' 
to_drop = ['hilbert_distance', 'WFO', 'EXPIRED', 'INIT_ISS', 'INIT_EXP',
       'PHENOM', 'GTYPE', 'SIG', 'ETN', 'STATUS', 'NWS_UGC',
       'UPDATED', 'HV_NWSLI', 'HV_SEV', 'HV_CAUSE', 'HV_REC', 
       'POLY_BEG', 'POLY_END', 'WINDTAG', 'HAILTAG', 'TORNTAG', 
        'DAMAGTAG', 'index_right', 'USCB_NAME','AREA_KM2','EMERGENC',
          'geometry']
bucket = 'ca-climate-index'

df_list = []
for f in pqt_list:
    bucket_uri = f's3://{bucket}/{f}'
    # read in as dask geopandas dataframe
    df = dask_geopandas.read_parquet(bucket_uri)
    # reduce memory use by dropping unneeded columns
    df = df.drop(columns=to_drop)
    # reduce by counting the # of events per tract:
    # shave off time issued so we only have days    
    df['ISSUED_day'] = df['ISSUED'].str.slice(0,8)
    df = df.drop_duplicates(subset=['ISSUED_day', 'USCB_GEOID'], keep='first')
    df['ISSUED_year'] = df['ISSUED'].str.slice(0,4)
    df_out = df.groupby(
        ['USCB_GEOID','ISSUED_day']
    )['ISSUED'].count().compute().reset_index(
    ).rename(columns={'ISSUED':'number_warnings'})
    # append df_out to the list of dfs
    df_list.append(df_out)
    # clear memory
    df_out = None

In [None]:
df_merged = pd.concat(df_list)
# remove more duplicates which are exposed after merging
df_merged = df_merged.drop_duplicates(subset=['ISSUED_day', 'USCB_GEOID'], keep='first')
df_merged['ISSUED_year'] = df_merged['ISSUED_day'].str.slice(0,4)
df_merged

In [None]:
dfg = df_merged.groupby(
        ['USCB_GEOID','ISSUED_year']).count()
dfg = dfg.drop(columns='ISSUED_day').unstack()
dfg

In [None]:
df_agg = pd.DataFrame(dfg.median(axis=1)).reset_index()
df_agg = df_agg.rename(columns={0:"median_warning_days",'USCB_GEOID':'GEOID'}) 
df_agg

In [7]:
# read in CA census tiger file
census_shp_dir = "s3://ca-climate-index/0_map_data/2021_tiger_census_tract/2021_ca_tract/"
ca_boundaries = gpd.read_file(census_shp_dir)
flood_warning_df = pd.merge(df_agg,ca_boundaries,on="GEOID")
flood_gdf = gpd.GeoDataFrame(
    flood_warning_df, geometry=flood_warning_df["geometry"]
)

## Missing island tract, so we merge with 2021 census data which will add the island tract and give it NaN value

In [None]:
island_tract = df_agg[df_agg['GEOID'] == '06075980401']
island_tract

In [None]:
ca_boundaries = ca_boundaries['GEOID']
ca_boundaries

In [None]:
merged_flood_data = pd.merge(df_agg, ca_boundaries, on='GEOID', how='right')
merged_flood_data

In [None]:
island_tract = merged_flood_data[merged_flood_data['GEOID'] == '06075980401']
island_tract

## Visualizing the data

In [None]:
bins = [2,4,6,8,10,12,14,16,18,20]
fig, ax = plt.subplots(figsize=(10,10))
flood_gdf.plot(
    column="median_warning_days",
    legend=True,
    ax=ax,
    scheme='user_defined',
    classification_kwds={'bins': bins})
ax.set_title("Median annual flood warning days")
plt.show()

In [15]:
merged_flood_data = merged_flood_data.rename(columns={'median_warning_days':'median_flood_warning_days'})
merged_flood_data.to_csv('climate_flood_warning_metric.csv', index=False)

## Function Call

In [16]:
@append_metadata
def isu_flood_warning_upload(input_csv, export=False, varname=''):
    '''
    Uploads the calculated flood warning metric to S3 bucket. The metric is:
    Median number of flood (including coastal and flash) warning days
    
    Data for this metric was sourced from ISU's Environmental Mesonet at:
    https://mesonet.agron.iastate.edu/request/gis/watchwarn.phtml

    Methods
    -------
    Data parquet files were read in and merged together.
    Flood warning date, location, and count columns were retained.
    Duplicate entries for a given location and date were dropped.
    Data was grouped by location and flood warning year.
    Number of flood warnings per year were summed per census tract.
    The median number of flood warnings were calculated for each census tract.
    
    Parameters
    ----------
    input_csv: string
        csv flood warning data 
    export: True/False boolean
        False = will not upload resulting df containing CAL CRAI flood warning metric to AWS
        True = will upload resulting df containing CAL CRAI flood warning metric to AWS

    Script
    ------
    geoparquet-open.ipynb

    Note:
    This function assumes users have configured the AWS CLI such that their access key / secret key pair are stored in ~/.aws/credentials.
    See https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html for guidance.
    '''
    print('Data transformation: previously reprojected data parquets were read in and merged together.')
    print('Data transformation: relevant metric columns were isolated.')
    print('Data transformation: duplicate entries per location and date were dropped.')
    print('Data transformation: GEOID 06075980401 (Farallon Islands, San Francisco County) filled with nan.') 
 
    if export == True:
        bucket_name = 'ca-climate-index'
        directory = '3_fair_data/index_data'
        export_filename = [input_csv]
        upload_csv_aws(export_filename, bucket_name, directory)

    if export == False:
        print(f'{input_csv} uploaded to AWS.')
 
    if os.path.exists(input_csv):
        os.remove(input_csv)

In [17]:
input_csv = 'climate_flood_warning_metric.csv'
varname = 'climate_iowa_mesonet_flash_flood_warnings'

isu_flood_warning_upload(input_csv, export=True, varname='test')