## This notebook calculates the following Climate Domain Flood Loss metrics:
- number of direct flood/flash flood fatalities per county since 1996
- estimated monetary crop damage

In [2]:
import os
import sys
import pandas as pd
import io
import numpy as np
import geopandas as gpd
from shapely.geometry import Point

sys.path.append(os.path.expanduser('../../'))
from scripts.utils.write_metadata import (
    append_metadata
)
from scripts.utils.file_helpers import (
    pull_csv_from_directory, upload_csv_aws
) 
pd.set_option('display.max_columns', None)

In [3]:
bucket_name = 'ca-climate-index'
aws_dir = '1_pull_data/climate_risk/flood/loss/noaa/downloaded_files/all_events/'
folder = 'csv_folder'

pull_csv_from_directory(bucket_name, aws_dir, folder, search_zipped=False)

Saved DataFrame as 'csv_folder\all_noaa_storm_events_ca.csv'


In [4]:
# read in data
noaa_storm_event_data = pd.read_csv(f'csv_folder/all_noaa_storm_events_ca.csv')
noaa_storm_event_data

Unnamed: 0,BEGIN_YEARMONTH,BEGIN_DAY,BEGIN_TIME,END_YEARMONTH,END_DAY,END_TIME,EPISODE_ID,EVENT_ID,STATE,STATE_FIPS,YEAR,MONTH_NAME,EVENT_TYPE,CZ_TYPE,CZ_FIPS,CZ_NAME,WFO,BEGIN_DATE_TIME,CZ_TIMEZONE,END_DATE_TIME,INJURIES_DIRECT,INJURIES_INDIRECT,DEATHS_DIRECT,DEATHS_INDIRECT,DAMAGE_PROPERTY,DAMAGE_CROPS,SOURCE,MAGNITUDE,MAGNITUDE_TYPE,FLOOD_CAUSE,CATEGORY,TOR_F_SCALE,TOR_LENGTH,TOR_WIDTH,TOR_OTHER_WFO,TOR_OTHER_CZ_STATE,TOR_OTHER_CZ_FIPS,TOR_OTHER_CZ_NAME,BEGIN_RANGE,BEGIN_AZIMUTH,BEGIN_LOCATION,END_RANGE,END_AZIMUTH,END_LOCATION,BEGIN_LAT,BEGIN_LON,END_LAT,END_LON,EPISODE_NARRATIVE,EVENT_NARRATIVE,DATA_SOURCE
0,199707,27,2032,199707,27,2032,1048394,5617146,CALIFORNIA,6,1997,July,Thunderstorm Wind,C,65,RIVERSIDE,PSR,27-JUL-97 20:32:00,MST,27-JUL-97 20:32:00,0,0,0,0,,,,52.00,,,,,,,,,,,,,BLYTHE,,,BLYTHE,33.6200,-114.6000,33.6200,-114.6000,,Thunderstorm wind gust was measured at the Bly...,PDC
1,199707,28,1700,199707,28,1700,2070479,5622118,CALIFORNIA,6,1997,July,Flash Flood,C,93,SISKIYOU,MFR,28-JUL-97 17:00:00,PST,28-JUL-97 17:00:00,0,0,0,0,,,,,,,,,,,,,,,,,HAPPY CAMP,,,HAPPY CAMP,,,,,,FOREST SERVICE REPORTED A FLASH FLOOD ON GRANI...,PDC
2,199707,27,1600,199707,27,1800,2070480,5622119,CALIFORNIA,6,1997,July,Hail,C,93,SISKIYOU,MFR,27-JUL-97 16:00:00,PST,27-JUL-97 18:00:00,0,0,0,0,,,,0.75,,,,,,,,,,,8.0,S,ETNA,8.0,S,ETNA,41.3300,-122.9000,41.3300,-122.9000,,,PDC
3,199707,27,1600,199707,27,1800,2070480,5622120,CALIFORNIA,6,1997,July,Flash Flood,C,93,SISKIYOU,MFR,27-JUL-97 16:00:00,PST,27-JUL-97 18:00:00,0,0,0,0,,,,,,,,,,,,,,,4.0,S,ETNA,4.0,S,ETNA,,,,,,1.5 INCHES OF RAIN IN 2 HRS RESULTED IN MINOR ...,PDC
4,199707,30,1600,199707,30,1600,2070481,5622121,CALIFORNIA,6,1997,July,Flash Flood,C,93,SISKIYOU,MFR,30-JUL-97 16:00:00,PST,30-JUL-97 16:00:00,0,0,0,0,,,,,,,,,,,,,,,2.0,S,FT JONES,2.0,S,FT JONES,,,,,,2 INCHES OF RAIN IN 30 MIN. RESULTED IN 2 FT. ...,PDC
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26959,202004,26,900,202004,30,1900,162838,983437,CALIFORNIA,6,2020,April,Excessive Heat,Z,566,IMPERIAL COUNTY WEST,PSR,26-APR-20 09:00:00,PST-8,30-APR-20 19:00:00,0,0,0,0,0.00K,0.00K,County Official,,,,,,,,,,,,,,,,,,,,,,Strong high pressure over the Desert Southwest...,,CSV
26960,202010,26,330,202010,26,345,152864,920678,CALIFORNIA,6,2020,October,High Wind,Z,568,CHIRIACO SUMMIT,PSR,26-OCT-20 03:30:00,PST-8,26-OCT-20 03:45:00,0,0,0,0,20.00K,0.00K,Department of Highways,35.00,ES,,,,,,,,,,,,,,,,,,,,"During the morning hours on October 26th, a st...","Following a cold frontal passage, strong gusty...",CSV
26961,202008,15,1230,202008,15,1234,152203,916656,CALIFORNIA,6,2020,August,Tornado,C,35,LASSEN,REV,15-AUG-20 12:30:00,PST-8,15-AUG-20 12:34:00,0,0,0,0,0.00K,0.00K,NWS Storm Survey,,,,,EF1,0.43,137.0,,,,,3.0,WSW,PLUMAS,3.0,WSW,PLUMAS,39.7313,-120.1284,39.7301,-120.1209,Dry conditions and anomalously low fuel moistu...,A mesoanticyclone embedded within a deep pyroc...,CSV
26962,202008,15,1304,202008,15,1308,152203,916709,CALIFORNIA,6,2020,August,Tornado,C,35,LASSEN,REV,15-AUG-20 13:04:00,PST-8,15-AUG-20 13:08:00,0,0,0,0,0.00K,0.00K,NWS Storm Survey,,,,,EF1,0.09,20.0,,,,,32.0,ENE,COYOTEVILLE,32.0,ENE,COYOTEVILLE,39.7239,-120.1303,39.7226,-120.1299,Dry conditions and anomalously low fuel moistu...,A mesoanticyclone embedded within a deep pyroc...,CSV


In [5]:
noaa_storm_event_data.columns

Index(['BEGIN_YEARMONTH', 'BEGIN_DAY', 'BEGIN_TIME', 'END_YEARMONTH',
       'END_DAY', 'END_TIME', 'EPISODE_ID', 'EVENT_ID', 'STATE', 'STATE_FIPS',
       'YEAR', 'MONTH_NAME', 'EVENT_TYPE', 'CZ_TYPE', 'CZ_FIPS', 'CZ_NAME',
       'WFO', 'BEGIN_DATE_TIME', 'CZ_TIMEZONE', 'END_DATE_TIME',
       'INJURIES_DIRECT', 'INJURIES_INDIRECT', 'DEATHS_DIRECT',
       'DEATHS_INDIRECT', 'DAMAGE_PROPERTY', 'DAMAGE_CROPS', 'SOURCE',
       'MAGNITUDE', 'MAGNITUDE_TYPE', 'FLOOD_CAUSE', 'CATEGORY', 'TOR_F_SCALE',
       'TOR_LENGTH', 'TOR_WIDTH', 'TOR_OTHER_WFO', 'TOR_OTHER_CZ_STATE',
       'TOR_OTHER_CZ_FIPS', 'TOR_OTHER_CZ_NAME', 'BEGIN_RANGE',
       'BEGIN_AZIMUTH', 'BEGIN_LOCATION', 'END_RANGE', 'END_AZIMUTH',
       'END_LOCATION', 'BEGIN_LAT', 'BEGIN_LON', 'END_LAT', 'END_LON',
       'EPISODE_NARRATIVE', 'EVENT_NARRATIVE', 'DATA_SOURCE'],
      dtype='object')

In [6]:
# isolate for relevant columns
columns = [
'EVENT_ID',
'YEAR', 'EVENT_TYPE', 'CZ_TYPE', 'CZ_FIPS', 'DEATHS_DIRECT',
'DAMAGE_CROPS','BEGIN_LAT', 'BEGIN_LON'
]
noaa_storm_columns = noaa_storm_event_data[columns]
noaa_storm_columns

Unnamed: 0,EVENT_ID,YEAR,EVENT_TYPE,CZ_TYPE,CZ_FIPS,DEATHS_DIRECT,DAMAGE_CROPS,BEGIN_LAT,BEGIN_LON
0,5617146,1997,Thunderstorm Wind,C,65,0,,33.6200,-114.6000
1,5622118,1997,Flash Flood,C,93,0,,,
2,5622119,1997,Hail,C,93,0,,41.3300,-122.9000
3,5622120,1997,Flash Flood,C,93,0,,,
4,5622121,1997,Flash Flood,C,93,0,,,
...,...,...,...,...,...,...,...,...,...
26959,983437,2020,Excessive Heat,Z,566,0,0.00K,,
26960,920678,2020,High Wind,Z,568,0,0.00K,,
26961,916656,2020,Tornado,C,35,0,0.00K,39.7313,-120.1284
26962,916709,2020,Tornado,C,35,0,0.00K,39.7239,-120.1303


In [7]:
# checking how many nan entries are within one of the lat columns
nan_count = noaa_storm_columns.BEGIN_LAT.isna().sum()
print(f"Number of NaN values in BEGIN_LAT: {nan_count}")

Number of NaN values in BEGIN_LAT: 20517


In [8]:
# view all entries within the event type columns
noaa_storm_columns.EVENT_TYPE.unique()

array(['Thunderstorm Wind', 'Flash Flood', 'Hail', 'Wildfire',
       'High Surf', 'Heavy Rain', 'Dust Storm', 'Heat', 'Tornado',
       'Lightning', 'Heavy Snow', 'Flood', 'Winter Storm', 'Rip Current',
       'Funnel Cloud', 'High Wind', 'Waterspout', 'Cold/Wind Chill',
       'Winter Weather', 'Storm Surge/Tide', 'Strong Wind', 'Dust Devil',
       'Dense Fog', 'Tropical Storm', 'Frost/Freeze', 'Ice Storm',
       'Coastal Flood', 'Debris Flow', 'Avalanche', 'Blizzard', 'Drought',
       'Extreme Cold/Wind Chill', 'Tsunami', 'Excessive Heat',
       'Dense Smoke', 'Sneakerwave', 'Freezing Fog',
       'Astronomical Low Tide'], dtype=object)

In [9]:
# select for relevent flood related events
flood_types = [    
    'Flash Flood',
    'Heavy Rain',  
    'Flood',
    'Coastal Flood',  
    'Tsunami'
]

cz_type_county = ['C']

# filter for flood events based on event type
flood_events = noaa_storm_columns[noaa_storm_columns['EVENT_TYPE'].isin(flood_types)]
flood_events

Unnamed: 0,EVENT_ID,YEAR,EVENT_TYPE,CZ_TYPE,CZ_FIPS,DEATHS_DIRECT,DAMAGE_CROPS,BEGIN_LAT,BEGIN_LON
1,5622118,1997,Flash Flood,C,93,0,,,
3,5622120,1997,Flash Flood,C,93,0,,,
4,5622121,1997,Flash Flood,C,93,0,,,
7,5622040,1997,Flash Flood,C,93,0,,,
9,5600706,1997,Heavy Rain,C,83,0,,,
...,...,...,...,...,...,...,...,...,...
26622,930949,2020,Flash Flood,C,7,0,1.00K,39.6600,-121.3100
26623,930952,2020,Flood,C,89,0,0.00K,40.4403,-122.2917
26675,924368,2020,Coastal Flood,Z,505,0,0.00K,,
26676,924370,2020,Coastal Flood,Z,506,0,0.00K,,


In [10]:
# there is a small percentage of the data with lat & lon entries, so we will use county entries within the CZ_TYPE and CZ_FIPS
# turther filter for county events (CZ_TYPE == 'C')
flood_county_events = flood_events[flood_events['CZ_TYPE'].isin(cz_type_county)]

flood_county_events = flood_county_events.copy()
flood_county_event_filtered = flood_county_events.drop(columns=['BEGIN_LAT', 'BEGIN_LON'])
# display the updated DataFrame
flood_county_event_filtered

Unnamed: 0,EVENT_ID,YEAR,EVENT_TYPE,CZ_TYPE,CZ_FIPS,DEATHS_DIRECT,DAMAGE_CROPS
1,5622118,1997,Flash Flood,C,93,0,
3,5622120,1997,Flash Flood,C,93,0,
4,5622121,1997,Flash Flood,C,93,0,
7,5622040,1997,Flash Flood,C,93,0,
9,5600706,1997,Heavy Rain,C,83,0,
...,...,...,...,...,...,...,...
26446,932481,2020,Heavy Rain,C,35,0,0.00K
26500,915118,2020,Flash Flood,C,37,0,0.00K
26622,930949,2020,Flash Flood,C,7,0,1.00K
26623,930952,2020,Flood,C,89,0,0.00K


In [11]:
# drop duplicates based on event id columns, there are no duplicates
selected_columns = ['EVENT_ID']
flood_county_event_drop_duplicates = flood_county_event_filtered.duplicated(subset=selected_columns, keep='first')
flood_county_event_drop_duplicates
duplicate_count = flood_county_event_drop_duplicates.sum()
duplicate_count

0

In [12]:
flood_county_event_filtered

Unnamed: 0,EVENT_ID,YEAR,EVENT_TYPE,CZ_TYPE,CZ_FIPS,DEATHS_DIRECT,DAMAGE_CROPS
1,5622118,1997,Flash Flood,C,93,0,
3,5622120,1997,Flash Flood,C,93,0,
4,5622121,1997,Flash Flood,C,93,0,
7,5622040,1997,Flash Flood,C,93,0,
9,5600706,1997,Heavy Rain,C,83,0,
...,...,...,...,...,...,...,...
26446,932481,2020,Heavy Rain,C,35,0,0.00K
26500,915118,2020,Flash Flood,C,37,0,0.00K
26622,930949,2020,Flash Flood,C,7,0,1.00K
26623,930952,2020,Flood,C,89,0,0.00K


In [13]:
flood_county_event_filtered.DAMAGE_CROPS.unique()

array([nan, '0', '0M', '200M', '73.6M', '7.2M', '10.2M', '5.9M', '5.3M',
       '200K', '7.1M', '242K', '280K', '4.2M', '14.1M', '500K', '1.5M',
       '5.4M', '1.8M', '1M', '1.4M', '6.9M', '5.7M', '159K', '100K',
       '300K', '7.8M', '8.9M', '13.2M', '9.6M', '7.81M', '5.5M', '2M',
       '10.8M', '8M', '190M', '21.94M', '671K', '8.6M', '4M', '3M',
       '32.5M', '16.6M', '1.96M', '50K', '400K', '9.1M', '16M', '2.2M',
       '5.8M', '0.00K', '610.00K', '150.00K', '3.20M', '20.00M', '50.00K',
       '1.55M', '745.00K', '5.00K', '10.00K', '1.00K', '15.00M'],
      dtype=object)

In [14]:
# function to convert 'M' and 'K' suffixes while properly adjusting value
def convert_to_numeric(value):
    if pd.isna(value):
        return np.nan
    value = value.strip()
    if value.endswith('M'):
        return float(value[:-1]) * 1e6
    elif value.endswith('K'):
        return float(value[:-1]) * 1e3
    else:
        return float(value)
    
flood_county_event_adjusted = flood_county_event_filtered
# Apply the function to the DAMAGE_CROPS column
flood_county_event_adjusted['DAMAGE_CROPS'] = flood_county_event_adjusted['DAMAGE_CROPS'].apply(convert_to_numeric)

# Display the DataFrame to verify the conversion
flood_county_event_adjusted

Unnamed: 0,EVENT_ID,YEAR,EVENT_TYPE,CZ_TYPE,CZ_FIPS,DEATHS_DIRECT,DAMAGE_CROPS
1,5622118,1997,Flash Flood,C,93,0,
3,5622120,1997,Flash Flood,C,93,0,
4,5622121,1997,Flash Flood,C,93,0,
7,5622040,1997,Flash Flood,C,93,0,
9,5600706,1997,Heavy Rain,C,83,0,
...,...,...,...,...,...,...,...
26446,932481,2020,Heavy Rain,C,35,0,0.0
26500,915118,2020,Flash Flood,C,37,0,0.0
26622,930949,2020,Flash Flood,C,7,0,1000.0
26623,930952,2020,Flood,C,89,0,0.0


In [15]:
# sum indirect and direct deaths to create total fatalies column
flood_county_event_filtered['total_fatalities'] = flood_county_event_filtered['DEATHS_DIRECT']
flood_county_event_filtered['DAMAGE_CROPS'] = pd.to_numeric(flood_county_event_filtered['DAMAGE_CROPS'])

flood_county_event_filtered

Unnamed: 0,EVENT_ID,YEAR,EVENT_TYPE,CZ_TYPE,CZ_FIPS,DEATHS_DIRECT,DAMAGE_CROPS,total_fatalities
1,5622118,1997,Flash Flood,C,93,0,,0
3,5622120,1997,Flash Flood,C,93,0,,0
4,5622121,1997,Flash Flood,C,93,0,,0
7,5622040,1997,Flash Flood,C,93,0,,0
9,5600706,1997,Heavy Rain,C,83,0,,0
...,...,...,...,...,...,...,...,...
26446,932481,2020,Heavy Rain,C,35,0,0.0,0
26500,915118,2020,Flash Flood,C,37,0,0.0,0
26622,930949,2020,Flash Flood,C,7,0,1000.0,0
26623,930952,2020,Flood,C,89,0,0.0,0


In [16]:
columns_to_sum = ['total_fatalities','DAMAGE_CROPS']
flood_loss_sums = flood_county_event_filtered.groupby('CZ_FIPS')[columns_to_sum].sum().reset_index()
flood_loss_sums = flood_loss_sums.rename(columns={'CZ_FIPS': 'countyfp', 'DAMAGE_CROPS':'estimated_crop_loss_cost'})
flood_loss_sums['countyfp'] = flood_loss_sums['countyfp'].astype(str).str.zfill(3)

flood_loss_sums
                                                   

Unnamed: 0,countyfp,total_fatalities,estimated_crop_loss_cost
0,1,1,0.0
1,3,0,0.0
2,5,0,0.0
3,7,0,1000.0
4,9,0,0.0
5,11,0,0.0
6,13,0,50000.0
7,15,0,0.0
8,17,0,0.0
9,19,0,70440000.0


In [17]:
# read in CA census tiger file
ca_tract_county = "s3://ca-climate-index/0_map_data/ca_tracts_county.csv"
ca_tract_county = gpd.read_file(ca_tract_county)
ca_tract_county = ca_tract_county.drop(columns={'field_1', 'geometry'})
ca_tract_county.columns = ca_tract_county.columns.str.lower()
ca_tract_county = ca_tract_county.applymap(lambda s: s.lower() if type(s) == str else s)

ca_tract_county

  ca_tract_county = ca_tract_county.applymap(lambda s: s.lower() if type(s) == str else s)


Unnamed: 0,tract,countyfp,county
0,06085504321,085,santa clara
1,06085504410,085,santa clara
2,06085507003,085,santa clara
3,06085507004,085,santa clara
4,06085502204,085,santa clara
...,...,...,...
9124,06059001303,059,orange
9125,06059001304,059,orange
9126,06059001401,059,orange
9127,06013367200,013,contra costa


In [18]:
# merge flood loss data with California census tract data
flood_loss_merge = pd.merge(ca_tract_county, flood_loss_sums, on='countyfp', how='left')
flood_loss_merge

Unnamed: 0,tract,countyfp,county,total_fatalities,estimated_crop_loss_cost
0,06085504321,085,santa clara,2,5000.0
1,06085504410,085,santa clara,2,5000.0
2,06085507003,085,santa clara,2,5000.0
3,06085507004,085,santa clara,2,5000.0
4,06085502204,085,santa clara,2,5000.0
...,...,...,...,...,...
9124,06059001303,059,orange,3,722000.0
9125,06059001304,059,orange,3,722000.0
9126,06059001401,059,orange,3,722000.0
9127,06013367200,013,contra costa,0,50000.0


In [19]:
# save metric calculated data as separate csvs for upload
flood_crop_loss_metric = flood_loss_merge[['tract', 'countyfp', 'estimated_crop_loss_cost']]
flood_crop_loss_metric = flood_crop_loss_metric.rename(columns={'estimated_crop_loss_cost':'estimated_flood_crop_loss_cost'})

flood_fatality_loss_metric = flood_loss_merge[['tract', 'countyfp', 'total_fatalities']]
flood_fatality_loss_metric = flood_fatality_loss_metric.rename(columns={'total_fatalities':'total_flood_fatalities'})

flood_crop_loss_metric.to_csv('climate_flood_crop_loss_metric.csv', index=False)
flood_fatality_loss_metric.to_csv('climate_flood_fatality_loss_metric.csv', index=False)

In [23]:
@append_metadata
def flood_loss_upload(input_csv, export=False, varname=''):
    '''
    Uploads the flood loss metrics to S3 bucket. The metrics are:
    
    * number of direct flood/flash flood fatalities per county since 1996
    * estimated monetary crop damage
    
    Data for this metric was sourced from NOAA's - Storm event database:
    https://www.ncdc.noaa.gov/stormevents/

    Methods
    -------
    Relevant columns to our data metrics were isolated, various spatial options were considered, but county fips had the fewest NaN data.
    Data was isolated to include flooding and flood related storm events.
    Duplicates were removed based on storm event ID.
    Data was isolated to included populated county entries.
    Direct fatalities summed to create total fatalities column.
    Data were grouped by county and summed to calculate final metrics.
    
    Parameters
    ----------
    input_csv: string
        csv flood loss data 
    export: True/False boolean
        False = will not upload resulting df containing CAL CRAI flood loss metrics to AWS
        True = will upload resulting df containing CAL CRAI flood loss metrics to AWS

    Script
    ------
    climate_flood_loss.ipynb

    Note:
    This function assumes users have configured the AWS CLI such that their access key / secret key pair are stored in ~/.aws/credentials.
    See https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html for guidance.
    '''
    print('Data transformation: relevant columns were isolated and renamed')
    print('Data transformation: duplicate entries by event ID were dropped.')
    print('Data transformation: data was grouped by county and summed.')
    print('Data transformation: data was merged with California census tracts.') 
 
    if export == True:
        bucket_name = 'ca-climate-index'
        directory = '3_fair_data/index_data'
        export_filename = [input_csv]
        upload_csv_aws(export_filename, bucket_name, directory)

    if export == False:
        print(f'{input_csv} uploaded to AWS.')
 
    if os.path.exists(input_csv):
        os.remove(input_csv)

In [24]:
input_csvs = ['climate_flood_crop_loss_metric.csv',
               'climate_flood_fatality_loss_metric.csv']

varnames = ['climate_noaa_flood_crop_cost',
            'climate_noaa_flood_fatalities']

# Process the data and export
for input_csv, varname in zip(input_csvs, varnames):
    print(f'Processing {input_csv} with varname {varname}')
    flood_loss_upload(input_csv, export=True, varname='test')
    print(f'Completed uploading {input_csv} with varname {varname}')

Processing climate_flood_crop_loss_metric.csv with varname climate_noaa_flood_crop_cost
Completed uploading climate_flood_crop_loss_metric.csv with varname climate_noaa_flood_crop_cost
Processing climate_flood_fatality_loss_metric.csv with varname climate_noaa_flood_fatalities
Completed uploading climate_flood_fatality_loss_metric.csv with varname climate_noaa_flood_fatalities
