# Exercise 1 - Hazard Data

Generate hazard module from source data

...








### Import python libraries

In [None]:
# standard python libraries
import requests
import re
import os
from math import sin, cos, sqrt, atan2, radians

# non-standard python libraries
import pandas as pd
import matplotlib.pyplot as plt
import geopandas as gpd

from shapely.geometry import Point
from geopandas import GeoDataFrame

# constants

#import geopy.distance
#km_nm_factor = 1.852
#earth_radius = 6373.0


### Download event data from hurdat

In [None]:
# get data from URL and write to file
raw_fn = 'source_data/raw_hurdat_data.txt'
url = 'https://www.nhc.noaa.gov/data/hurdat/hurdat2-1851-2020-052921.txt'
data = requests.get(url)
with open(raw_fn,'w') as fw:
    fw.write(data.text)
    
# create an empty list to temoporarily store downloaded data
data=[]

# loop through downloaded data and parse by row
with open(raw_fn,'r') as fr:
    for row in fr:
        if row[0:2]=='AL':
            event_id=row[0:8]
            event_name=row[19:28]
            records=row[34:36]
        else:
            date=row[0:8]
            time=row[10:14]
            record_id=row[16]
            system_status=row[19:21]
            latitude=row[23:27]
            lat_hem=row[27]
            longitude=row[30:35]
            lon_hem=row[35]
            max_windspeed=row[39:41]
            min_pressure=row[43:47]
            radii_34kt_ne=row[49:53]
            radii_34kt_se=row[55:59]
            radii_34kt_sw=row[61:65]
            radii_34kt_nw=row[67:71]
            radii_50kt_ne=row[73:77]
            radii_50kt_se=row[79:83]
            radii_50kt_sw=row[85:89]
            radii_50kt_nw=row[91:95]
            radii_64kt_ne=row[97:101]
            radii_64kt_se=row[103:107]
            radii_64kt_sw=row[109:113]
            radii_64kt_nw=row[115:119]
            
            row_data = [
                event_id,
                event_name,
                records,
                date,
                time,
                record_id,
                system_status,
                latitude,
                lat_hem,
                longitude,
                lon_hem,
                max_windspeed,
                min_pressure,
                radii_34kt_ne,
                radii_34kt_se,
                radii_34kt_sw,
                radii_34kt_nw,
                radii_50kt_ne,
                radii_50kt_se,
                radii_50kt_sw,
                radii_50kt_nw,
                radii_64kt_ne,
                radii_64kt_se,
                radii_64kt_sw,
                radii_64kt_nw
            ]
            data.append(row_data)
    
# create a pandas dataframe with the list data
cols = ['id','name','records','date','time','record_id','system_status','latitude',
        'lat_hem','longitude','lon_hem','max_windspeed','min_pressure',
        'radii_34kt_ne','radii_34kt_se','radii_34kt_sw','radii_34kt_nw',
        'radii_50kt_ne','radii_50kt_se','radii_50kt_sw','radii_50kt_nw',
        'radii_64kt_ne','radii_64kt_se','radii_64kt_sw','radii_64kt_nw']



df_data=pd.DataFrame(data=data,columns=cols)

dtypes = {'id':str,'name':str,'records':int,'date':int,'time':int,
          'record_id':str,'system_status':str,'latitude':float,'lat_hem':str,
          'longitude':float,'lon_hem':str,'max_windspeed':int,'min_pressure':int,
          'radii_34kt_ne':int,'radii_34kt_se':int,'radii_34kt_sw':int,'radii_34kt_nw':int,
          'radii_50kt_ne':int,'radii_50kt_se':int,'radii_50kt_sw':int,'radii_50kt_nw':int,
          'radii_64kt_ne':int,'radii_64kt_se':int,'radii_64kt_sw':int,'radii_64kt_nw':int}

df_data = df_data.astype(dtypes)

# strip leading spaces
df_data['name']=df_data['name'].str.strip()

# set negative longitudes for western hemisphere
df_data['longitude']=-df_data['longitude']

# write the dataframe out to csv
formatted_fn = 'source_data/formatted_hurdat_data.csv'
df_data.to_csv(formatted_fn,index=False)

df_data

### Inspect the data for Hurricane HARVEY

In [None]:
# df_data = pd.read_csv('source_data/formatted_hurdat_data.csv')

df_data[df_data['id']=='AL092017'].head(20)

## Get model area grid

In [None]:
# get events which make happened since 2000
df_data_in_period = df_data[df_data['date']>=20000101]

df_data_in_period

In [None]:
# combine data
df_events = df_data_in_period['id'].drop_duplicates().reset_index()[['id']]
df_events['event_id']=df_events.index+1

df_events

In [None]:
# add the event ids into the data dataframe
df_data_in_period = df_data_in_period.merge(df_events,on='id')

In [None]:
df_grid = pd.read_csv('source_data/us_grid.csv')

df_grid

In [None]:
# show grid
geometry = [Point(xy) for xy in zip(df_grid['longitude'], df_grid['latitude'])]
gdf = GeoDataFrame(df_grid, geometry=geometry)   

#this is a simple map that comes with geopandas
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
gdf.plot(ax=world.plot(figsize=(20, 12)), marker='o', color='red', markersize=1);

In [None]:
# create cartisian indicies
df_x = df_grid[['latitude']].drop_duplicates().reset_index()[['latitude']]
df_x['x_index']=df_x.index+1

df_y = df_grid[['longitude']].drop_duplicates().reset_index()[['longitude']]
df_y['y_index']=df_y.index+1

df_grid_coord = df_grid.merge(df_x,on='latitude').merge(df_y,on='longitude')

# create area peril id for grid
df_grid_coord['areaperil_id']=df_grid_coord.index+1

del df_grid_coord['geometry']

df_grid_coord

In [None]:
# find areaperil cell of track point
df_events_ap = df_data_in_period.merge(df_grid_coord,on=['latitude','longitude'])

df_events_ap

In [None]:
# remove records below windspeed threhold
v_thresh = 45
df_events_thresh = df_events_ap[df_events_ap['max_windspeed'] >= v_thresh]

df_events_thresh

In [None]:
df_events_thresh[['id','name']].drop_duplicates()

In [None]:
# index windspeeds and assign size
df_intensity = df_events_thresh['max_windspeed'].drop_duplicates().sort_values().reset_index()
df_intensity['intensity_bin_index']=df_intensity.index+1

df_intensity = df_intensity[['intensity_bin_index','max_windspeed']]

df_intensity

In [None]:
df_events_intensity = df_events_thresh.merge(df_intensity,on='max_windspeed').sort_values(by=['date','time'])
df_events_intensity

In [None]:
df_events_intensity = df_events_thresh.merge(df_intensity,on='max_windspeed').sort_values(by=['date','time'])

# generate intensity values per event & areaperil


lst_fp = []

for index, row in df_events_intensity.iterrows():
    #print(row[['areaperil_id','x_index','y_index','intensity_bin_index','event_id']])
    i=0
    event_id = row['event_id']
    areaperil_id = row['areaperil_id']
    intensity_bin = row['intensity_bin_index']
        
    row_fp = [event_id,areaperil_id,intensity_bin]
    lst_fp.append(row_fp)
    
    #### to do - add in radius ####
    
    #if radius > 1:
    #    for r in range(radius):
    #        
    #    
    #for i in range(radius):
    #    i+=1
    #    print(event_id, radius, i)
    
df_footprint = pd.DataFrame(data=lst_fp,columns=['event_id','area_peril_id','intensity_bin_index'],dtype='int')
df_footprint['probability']=1

df_footprint
    

In [None]:
# write model files out
df_events[['event_id','id']].to_csv('model_data/events.csv',index=False)
df_footprint.to_csv('model_data/footprint.csv',index=False)
df_intensity.to_csv('model_data/intensity_bin_dict.csv',index=False)

In [None]:
# write keys data out
df_grid_coord[['area_peril_id','latitude','longitude']].to_csv('keys_data/areaperil_dict.csv',index=False)

In [None]:
df_events['key']=1
df_grid['key']=1

lon_lat_tollerance = 1

e_id = 'AL092017'

min_lat = df_events[df_events['id']==e_id]['latitude'].min() - lon_lat_tollerance
max_lat = df_events[df_events['id']==e_id]['latitude'].max() + lon_lat_tollerance
min_lon = df_events[df_events['id']==e_id]['longitude'].min() - lon_lat_tollerance
max_lon = df_events[df_events['id']==e_id]['longitude'].max() + lon_lat_tollerance

print(min_lat,max_lat,min_lon,max_lon)

df_grid_e = df_grid[
    (df_grid['grid_latitude']>=min_lat) &
    (df_grid['grid_latitude']<=max_lat) &
    (df_grid['grid_longitude']<=-min_lon) &
    (df_grid['grid_longitude']>=-max_lon)
]


df_e = df_events[df_events['id']==e_id].merge(df_grid_e)



#df_e[['latitude','longitude','grid_latitude','grid_longitude']] 

df_e['distance'] = df_e.apply(lambda x: get_distance(
    x['latitude'],
    x['longitude'],
    x['grid_latitude'],
    x['grid_longitude']),axis=1)

df_e

In [None]:
df_e.sort_values(by='distance')

In [None]:
get_distance(44.95,-67.25,44.95,-67.15)

In [None]:
df_events['radii_34kt_nw'].astype('float').max() 

In [None]:
df_events[df_events['radii_34kt_nw'].str.strip()=='600']
df_data[df_data['id'].str.strip()=='AL012016'].head(42)

In [None]:
df_events[['id','name']].drop_duplicates().tail(50)

In [None]:
# create function to calculate distance between two lat-lon points

def get_distance(lat1,lon1,lat2,lon2):
    lat1_r = radians(abs(lat1))
    lon1_r = radians(abs(lon1))
    lat2_r = radians(abs(lat2))
    lon2_r = radians(abs(lon2))

    dlon = lon2_r - lon1_r
    dlat = lat2_r - lat1_r

    a = sin(dlat / 2)**2 + cos(lat1_r) * cos(lat2_r) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))

    distance = earth_radius * c

    return distance