# Starter API Access Pipeline
---

## Dependencies

In [601]:
# Main ETL Libraries
import pandas as pd
import requests
from time import sleep

# Formating and Display Libraries
import hvplot.pandas
from pprint import pprint

# Turn off warnings for hvplot.pandas
import warnings
warnings.filterwarnings('ignore')

# File Library
from pathlib import Path

# Personal API Key File (Please use your own or comment out)!
import api_keys

In [602]:
fema_decl = pd.read_csv('Assets/CSVs/DisasterDeclarationsSummaries.csv', low_memory = False)
fema_decl.head()

Unnamed: 0,femaDeclarationString,disasterNumber,state,declarationType,declarationDate,fyDeclared,incidentType,declarationTitle,ihProgramDeclared,iaProgramDeclared,...,placeCode,designatedArea,declarationRequestNumber,lastIAFilingDate,incidentId,region,designatedIncidentTypes,lastRefresh,hash,id
0,FM-5530-NV,5530,NV,FM,2024-08-12T00:00:00.000Z,2024,Fire,GOLD RANCH FIRE,0,0,...,99031,Washoe (County),24123,,2024081201,9,R,2024-08-27T18:22:14.800Z,5d07e7c51bb300bfbec94a699a1e1ab1d61a97cd,f15a7a79-f1c3-41bb-8a5c-c05fbae34423
1,FM-5529-OR,5529,OR,FM,2024-08-09T00:00:00.000Z,2024,Fire,LEE FALLS FIRE,0,0,...,99067,Washington (County),24122,,2024081001,10,R,2024-08-27T18:22:14.800Z,ae87cf3c6ed795015b714af7166c7c295b2b67c7,09e3f81a-5e16-4b72-b317-1c64e0cfa59c
2,FM-5528-OR,5528,OR,FM,2024-08-06T00:00:00.000Z,2024,Fire,ELK LANE FIRE,0,0,...,99031,Jefferson (County),24116,,2024080701,10,R,2024-08-27T18:22:14.800Z,432cf0995c47e3895cea696ede5621b810460501,59983f89-30bf-4888-b21b-62e8d57d9aac
3,FM-5527-OR,5527,OR,FM,2024-08-02T00:00:00.000Z,2024,Fire,MILE MARKER 132 FIRE,0,0,...,99017,Deschutes (County),24111,,2024080301,10,R,2024-08-27T18:22:14.800Z,2f21d90cb6bc64b0d4121aa3f18d852bbb4b11fa,8d13ecf0-bc2f-496b-8c9f-b2e73da832a0
4,FM-5522-CA,5522,CA,FM,2024-07-27T00:00:00.000Z,2024,Fire,BOREL FIRE,0,0,...,99029,Kern (County),24102,,2024072701,9,R,2024-08-27T18:22:14.800Z,51ec819011c936f387edf10b9d5839b35419ca95,9b73e19b-d326-4992-8da1-7e658d97607c


In [603]:
# Removing unwanted/unusable columns
fema_decl_clean = fema_decl[
    [
        'disasterNumber', 'declarationRequestNumber', 'declarationTitle', 'incidentType', 
        'declarationType', 'designatedArea', 'state', 'region', 'declarationDate', 
        'incidentBeginDate', 'tribalRequest', 'ihProgramDeclared', 'iaProgramDeclared', 
        'paProgramDeclared', 'hmProgramDeclared', 'id'
    ]
].copy()
fema_decl_clean['declarationDate'] = fema_decl_clean[
    'declarationDate'
    ].str.slice_replace(10)
fema_decl_clean['incidentBeginDate'] = fema_decl_clean[
    'incidentBeginDate'
    ].str.slice_replace(10)
display(fema_decl_clean)

Unnamed: 0,disasterNumber,declarationRequestNumber,declarationTitle,incidentType,declarationType,designatedArea,state,region,declarationDate,incidentBeginDate,tribalRequest,ihProgramDeclared,iaProgramDeclared,paProgramDeclared,hmProgramDeclared,id
0,5530,24123,GOLD RANCH FIRE,Fire,FM,Washoe (County),NV,9,2024-08-12,2024-08-11,0,0,0,1,1,f15a7a79-f1c3-41bb-8a5c-c05fbae34423
1,5529,24122,LEE FALLS FIRE,Fire,FM,Washington (County),OR,10,2024-08-09,2024-08-08,0,0,0,1,1,09e3f81a-5e16-4b72-b317-1c64e0cfa59c
2,5528,24116,ELK LANE FIRE,Fire,FM,Jefferson (County),OR,10,2024-08-06,2024-08-04,0,0,0,1,1,59983f89-30bf-4888-b21b-62e8d57d9aac
3,5527,24111,MILE MARKER 132 FIRE,Fire,FM,Deschutes (County),OR,10,2024-08-02,2024-08-02,0,0,0,1,1,8d13ecf0-bc2f-496b-8c9f-b2e73da832a0
4,5522,24102,BOREL FIRE,Fire,FM,Kern (County),CA,9,2024-07-27,2024-07-25,0,0,0,1,1,9b73e19b-d326-4992-8da1-7e658d97607c
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67351,9,53010,FLOOD,Flood,DR,Statewide,TX,6,1953-06-19,1953-06-19,0,0,1,1,1,6ef68fa6-5889-466b-9e3b-ee4c06da0876
67352,8,53008,FLOOD,Flood,DR,Statewide,IA,7,1953-06-11,1953-06-11,0,0,1,1,1,f100b5c1-0f3f-4c3d-ad3c-50040e98ed8d
67353,7,53009,TORNADO,Tornado,DR,Statewide,MA,1,1953-06-11,1953-06-11,0,0,1,1,1,fca83efc-524a-45a0-ac98-2d43b954a5ea
67354,2,53003,TORNADO & HEAVY RAINFALL,Tornado,DR,Statewide,TX,6,1953-05-15,1953-05-15,0,0,1,1,1,ff821327-6b90-4246-b19f-fff8c4b288a8


In [604]:
# Unique FEMA Disaster Types
disaster_types = fema_decl_clean['incidentType'].unique()
pprint(disaster_types)

array(['Fire', 'Severe Storm', 'Straight-Line Winds', 'Flood',
       'Winter Storm', 'Hurricane', 'Tornado', 'Mud/Landslide',
       'Tropical Storm', 'Snowstorm', 'Earthquake', 'Coastal Storm',
       'Other', 'Severe Ice Storm', 'Biological', 'Dam/Levee Break',
       'Typhoon', 'Volcanic Eruption', 'Freezing', 'Toxic Substances',
       'Chemical', 'Terrorist', 'Drought', 'Human Cause',
       'Fishing Losses', 'Tsunami'], dtype=object)


In [605]:
# Filter for just natural disaster types
disaster_natural = pd.DataFrame(
    {
        'incidentType': [
            'Fire', 'Severe Storm', 'Straight-Line Winds', 
            'Flood', 'Winter Storm', 'Hurricane', 'Tornado',
            'Tropical Storm', 'Snowstorm', 'Coastal Storm', 
            'Severe Ice Storm', 'Typhoon', 'Freezing', 'Drought', 
            'Tsunami'
        ]
    }
)
fema_disasters = pd.merge(fema_decl_clean, disaster_natural, how = 'right', on = 'incidentType')
fema_disasters.to_csv('Assets/CSVs/femaDisasters_dirty.csv', index = False)

display(fema_disasters)

Unnamed: 0,disasterNumber,declarationRequestNumber,declarationTitle,incidentType,declarationType,designatedArea,state,region,declarationDate,incidentBeginDate,tribalRequest,ihProgramDeclared,iaProgramDeclared,paProgramDeclared,hmProgramDeclared,id
0,5530,24123,GOLD RANCH FIRE,Fire,FM,Washoe (County),NV,9,2024-08-12,2024-08-11,0,0,0,1,1,f15a7a79-f1c3-41bb-8a5c-c05fbae34423
1,5529,24122,LEE FALLS FIRE,Fire,FM,Washington (County),OR,10,2024-08-09,2024-08-08,0,0,0,1,1,09e3f81a-5e16-4b72-b317-1c64e0cfa59c
2,5528,24116,ELK LANE FIRE,Fire,FM,Jefferson (County),OR,10,2024-08-06,2024-08-04,0,0,0,1,1,59983f89-30bf-4888-b21b-62e8d57d9aac
3,5527,24111,MILE MARKER 132 FIRE,Fire,FM,Deschutes (County),OR,10,2024-08-02,2024-08-02,0,0,0,1,1,8d13ecf0-bc2f-496b-8c9f-b2e73da832a0
4,5522,24102,BOREL FIRE,Fire,FM,Kern (County),CA,9,2024-07-27,2024-07-25,0,0,0,1,1,9b73e19b-d326-4992-8da1-7e658d97607c
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
58774,1967,11042,TSUNAMI WAVES,Tsunami,DR,Honolulu (County),HI,9,2011-04-08,2011-03-11,0,0,0,1,1,9ce5ad8d-f0e1-4d12-9907-a704fe2f4511
58775,1967,11042,TSUNAMI WAVES,Tsunami,DR,Maui (County),HI,9,2011-04-08,2011-03-11,0,0,0,1,1,d64d8673-10bc-4799-809b-70317d714d68
58776,1964,11029,TSUNAMI WAVE SURGE,Tsunami,DR,Coos (County),OR,10,2011-03-25,2011-03-11,0,0,0,1,1,35f729e0-73ff-42aa-9b80-8bcc901be612
58777,1964,11029,TSUNAMI WAVE SURGE,Tsunami,DR,Curry (County),OR,10,2011-03-25,2011-03-11,0,0,0,1,1,89c45999-d563-423f-a2dd-e69b96103b55


In [606]:
# Counts of declaration types 
# DR = Major Disasters, EM = Emergency Declaratation, FM = Fire Management)
# For more info see README (in progress)
fema_disasters['declarationType'].value_counts()

declarationType
DR    40878
EM    15907
FM     1994
Name: count, dtype: int64

In [607]:
# Counts of natural disaster types
fema_disasters['incidentType'].value_counts()

incidentType
Severe Storm           18399
Hurricane              13721
Flood                  11093
Fire                    3759
Snowstorm               3707
Severe Ice Storm        2942
Tornado                 1623
Drought                 1292
Tropical Storm          1047
Coastal Storm            637
Freezing                 301
Typhoon                  130
Winter Storm             117
Tsunami                    9
Straight-Line Winds        2
Name: count, dtype: int64

In [608]:
# Validating no NaNs
fema_disasters.count()

disasterNumber              58779
declarationRequestNumber    58779
declarationTitle            58779
incidentType                58779
declarationType             58779
designatedArea              58779
state                       58779
region                      58779
declarationDate             58779
incidentBeginDate           58779
tribalRequest               58779
ihProgramDeclared           58779
iaProgramDeclared           58779
paProgramDeclared           58779
hmProgramDeclared           58779
id                          58779
dtype: int64

In [609]:
# Filter: Natural disasters per State
fema_disasters['state'].value_counts()

state
TX    4779
FL    2635
KY    2554
MO    2534
LA    2398
GA    2333
OK    2305
VA    2298
NC    2109
PR    1835
MS    1777
IA    1719
KS    1607
AL    1603
TN    1532
CA    1525
AR    1472
MN    1406
NE    1336
ND    1280
IN    1279
NY    1272
SD    1181
WV    1127
PA    1118
IL    1109
OH    1100
SC    1039
WA     827
WI     725
MI     595
NJ     556
CO     531
OR     530
MT     507
ME     426
MD     399
NM     380
VT     377
MA     365
NH     300
AZ     271
ID     265
CT     245
NV     197
AK     187
UT     181
RI     106
WY      86
HI      85
VI      79
MP      67
AS      61
MH      53
DE      47
FM      31
GU      19
DC      18
PW       1
Name: count, dtype: int64

In [610]:
# Filter: Natural disasters per hit area
fema_disasters['designatedArea'].value_counts()

designatedArea
Washington (County)                           507
Jefferson (County)                            460
Jackson (County)                              450
Statewide                                     446
Franklin (County)                             439
                                             ... 
Alabama and Coushatta Indian Reservation        1
Ysleta del Sur Pueblo (Indian Reservation)      1
Nondalton (ANV/ANVSA)                           1
Kusilvak Census Area                            1
Namorik (County-equivalent)                     1
Name: count, Length: 2187, dtype: int64

In [611]:
# Filter: How far back does this data set go?
pre_y2k = fema_disasters.loc[fema_disasters['declarationDate'] < '2000', :]
display(pre_y2k.sort_values('declarationDate'))

Unnamed: 0,disasterNumber,declarationRequestNumber,declarationTitle,incidentType,declarationType,designatedArea,state,region,declarationDate,incidentBeginDate,tribalRequest,ihProgramDeclared,iaProgramDeclared,paProgramDeclared,hmProgramDeclared,id
48713,1,53013,TORNADO,Tornado,DR,Statewide,GA,4,1953-05-02,1953-05-02,0,0,1,1,1,8943dfcf-9786-4e51-8889-d62014034bb2
48712,2,53003,TORNADO & HEAVY RAINFALL,Tornado,DR,Statewide,TX,6,1953-05-15,1953-05-15,0,0,1,1,1,ff821327-6b90-4246-b19f-fff8c4b288a8
26294,3,53005,FLOOD,Flood,DR,Statewide,LA,6,1953-05-29,1953-05-29,0,0,1,1,1,cd461e08-5ac9-4e70-8507-9c7a3cbff265
47640,4,53004,TORNADO,Tornado,DR,Statewide,MI,5,1953-06-02,1953-06-02,0,0,1,1,1,53be0c04-d2ae-42fb-b070-a01b0a50b7f6
26177,5,53006,FLOODS,Flood,DR,Statewide,MT,8,1953-06-06,1953-06-06,0,0,1,1,1,4b3ed0ac-299b-49f0-80d4-9a2a6bacd5a4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2589,2287,99153,JORDAN CREEK FIRE,Fire,FM,Fayette (County),TX,6,1999-11-22,1999-11-22,0,0,0,1,0,eabe2b0e-8d7a-431c-a9cf-dbe26ab4dead
45625,1309,99152,HURRICANE LENNY MAJOR DECLARATION,Hurricane,DR,St. John (Island) (County-equivalent),VI,2,1999-11-23,1999-11-17,0,0,1,1,1,89d1e548-2eb3-413c-af44-62d36637f17d
45624,1309,99152,HURRICANE LENNY MAJOR DECLARATION,Hurricane,DR,St. Croix (Island) (County-equivalent),VI,2,1999-11-23,1999-11-17,0,0,1,1,1,4db33a9d-c8cc-4e04-bf39-a22aa71638f0
45626,1309,99152,HURRICANE LENNY MAJOR DECLARATION,Hurricane,DR,St. Thomas (Island) (County-equivalent),VI,2,1999-11-23,1999-11-17,0,0,1,1,1,141bc5e4-ee78-457f-927a-656d46262c0a


## Cleaning Area Data for Geocoding

In [612]:
fema_disasters.head(10)

Unnamed: 0,disasterNumber,declarationRequestNumber,declarationTitle,incidentType,declarationType,designatedArea,state,region,declarationDate,incidentBeginDate,tribalRequest,ihProgramDeclared,iaProgramDeclared,paProgramDeclared,hmProgramDeclared,id
0,5530,24123,GOLD RANCH FIRE,Fire,FM,Washoe (County),NV,9,2024-08-12,2024-08-11,0,0,0,1,1,f15a7a79-f1c3-41bb-8a5c-c05fbae34423
1,5529,24122,LEE FALLS FIRE,Fire,FM,Washington (County),OR,10,2024-08-09,2024-08-08,0,0,0,1,1,09e3f81a-5e16-4b72-b317-1c64e0cfa59c
2,5528,24116,ELK LANE FIRE,Fire,FM,Jefferson (County),OR,10,2024-08-06,2024-08-04,0,0,0,1,1,59983f89-30bf-4888-b21b-62e8d57d9aac
3,5527,24111,MILE MARKER 132 FIRE,Fire,FM,Deschutes (County),OR,10,2024-08-02,2024-08-02,0,0,0,1,1,8d13ecf0-bc2f-496b-8c9f-b2e73da832a0
4,5522,24102,BOREL FIRE,Fire,FM,Kern (County),CA,9,2024-07-27,2024-07-25,0,0,0,1,1,9b73e19b-d326-4992-8da1-7e658d97607c
5,5521,24098,RETREAT FIRE,Fire,FM,Yakima (County),WA,10,2024-07-26,2024-07-23,0,0,0,1,1,bc02a243-2458-4af3-84e3-b9928bd1d41d
6,5520,24099,GWEN FIRE,Fire,FM,Nez Perce Indian Reservation,ID,10,2024-07-26,2024-07-25,0,0,0,1,1,85cccdec-5e22-436f-a3e7-acd7340172cf
7,5520,24099,GWEN FIRE,Fire,FM,Latah (County),ID,10,2024-07-26,2024-07-25,0,0,0,1,1,d0e4282c-5d39-4af2-814c-2192d748f16c
8,5520,24099,GWEN FIRE,Fire,FM,Nez Perce (County),ID,10,2024-07-26,2024-07-25,0,0,0,1,1,b058f371-8e3e-49dc-bb08-aac48229a85f
9,5519,24097,PARK FIRE,Fire,FM,Butte (County),CA,9,2024-07-25,2024-07-24,0,0,0,1,1,00045754-7992-448a-a8ae-181fb748a576


In [613]:
# Splitting up area from the areaType in designatedArea
geocode_df = fema_disasters[['designatedArea', 'state', 'id']].copy()
geocode_df[['area', 'areaType']] = geocode_df['designatedArea'].str.split('(', n = 1, expand = True)
geocode_df = geocode_df.drop(axis = 0, columns = 'designatedArea')
areas = geocode_df.pop('area')
areaTypes = geocode_df.pop('areaType')
areaTypes = '(' + areaTypes
geocode_df.insert(0, 'area', areas)
geocode_df.insert(1, 'areaType', areaTypes)

In [614]:
# Regex for cleaning up text in areaType
geocode_df['areaType'] = geocode_df['areaType'].str.extract(r'\(([^)]+)\)')
geocode_df['areaType'] = geocode_df['areaType'].str.replace(r'\b(of|Also|and)\b', '', regex = True).str.strip()
display(geocode_df)

Unnamed: 0,area,areaType,state,id
0,Washoe,County,NV,f15a7a79-f1c3-41bb-8a5c-c05fbae34423
1,Washington,County,OR,09e3f81a-5e16-4b72-b317-1c64e0cfa59c
2,Jefferson,County,OR,59983f89-30bf-4888-b21b-62e8d57d9aac
3,Deschutes,County,OR,8d13ecf0-bc2f-496b-8c9f-b2e73da832a0
4,Kern,County,CA,9b73e19b-d326-4992-8da1-7e658d97607c
...,...,...,...,...
58774,Honolulu,County,HI,9ce5ad8d-f0e1-4d12-9907-a704fe2f4511
58775,Maui,County,HI,d64d8673-10bc-4799-809b-70317d714d68
58776,Coos,County,OR,35f729e0-73ff-42aa-9b80-8bcc901be612
58777,Curry,County,OR,89c45999-d563-423f-a2dd-e69b96103b55


In [615]:
geocode_df['areaType'].unique()

array(['County', nan, 'Parish', 'Borough', 'Census Area',
       'Indian Reservation', 'Municipio', 'ANV/ANVSA', 'NM',
       'County-equivalent', 'OTSA', 'NM  UT', 'Sisseton', 'Reservation',
       'District', 'Island', 'AZ  UT', 'Township',
       'Unorganized Territory', 'Police Jury Ward', 'Nett Lake', 'TDSA',
       'KS', 'Municipality', 'CCD', 'State', 'Census Subarea'],
      dtype=object)

In [616]:
# Setting up the Data Frame containing Area Types with 1
# or more states instead of classic 'County' or 'Reservation'
# --> Result is duplicated incidents but for the additional states

# Multi-state regex location in 'areaType' column
multiState_regex = r'\b[A-Z]{2}(?:\s[A-Z]{2})*\b'
states_to_split = geocode_df['areaType'].str.contains(multiState_regex, na = False)
new_rows = []

# Iterate through just the Area Types that need to be split and duplicated
for index, row in geocode_df[states_to_split].iterrows():
    states = row['areaType'].split()
    for state in states:
        new_row = row.copy()
        new_row['state'] = state
        new_rows.append(new_row)

geocode_df = pd.concat(     # Concatenate duplicated rows for multiple states to the main dataframe
    [geocode_df, pd.DataFrame(new_rows)], ignore_index = True
    ).reset_index(drop = True).copy()

In [617]:
# Set Non-Capture Regex and location rows that can have areaType set to 'Reservation'
reservation_regex = r'(?:Indian)|(?:Reservation)|(?:Tribe)|(?:Trust Lands)|(?:TDSA)'
reservation_loc = geocode_df['area'].str.contains(reservation_regex, case = False, regex = True)
geocode_df.loc[reservation_loc, 'areaType'] = 'Native Land'

In [618]:
# Nitty-Gritty edits
#-----------------------------------------------
state_dict = {
    'AK': 'Alaska', 'AL': 'Alabama', 'AR': 'Arkansas', 'AS': 'American Samoa', 'AZ': 'Arizona', 
    'CA': 'California', 'CO': 'Colorado', 'CT': 'Connecticut', 'DC': 'District of Columbia', 
    'DE': 'Delaware', 'FL': 'Florida', 'GA': 'Georgia', 'GU': 'Guam', 'HI': 'Hawaii', 'IA': 'Iowa', 
    'ID': 'Idaho', 'IL': 'Illinois', 'IN': 'Indiana', 'KS': 'Kansas', 'KY': 'Kentucky', 'LA': 'Louisiana', 
    'MA': 'Massachusetts', 'MD': 'Maryland', 'ME': 'Maine', 'MI': 'Michigan', 'MN': 'Minnesota', 
    'MO': 'Missouri', 'MP': 'Northern Mariana Islands', 'MS': 'Mississippi', 'MT': 'Montana', 
    'NC': 'North Carolina', 'ND': 'North Dakota', 'NE': 'Nebraska', 'NH': 'New Hampshire', 'NJ': 'New Jersey', 
    'NM': 'New Mexico', 'NV': 'Nevada', 'NY': 'New York', 'OH': 'Ohio', 'OK': 'Oklahoma', 'OR': 'Oregon', 
    'PA': 'Pennsylvania', 'PR': 'Puerto Rico', 'RI': 'Rhode Island', 'SC': 'South Carolina', 
    'SD': 'South Dakota', 'TN': 'Tennessee', 'TX': 'Texas', 'UT': 'Utah', 'VA': 'Virginia', 
    'VI': 'U.S. Virgin Islands', 'VT': 'Vermont', 'WA': 'Washington', 'WI': 'Wisconsin', 
    'WV': 'West Virginia', 'WY': 'Wyoming'
}

# Found under Null types (NaN)
# Location Statewide listed areas, copying abbreviations from `state` column
# to the `area` column, extending them to full length names and then setting 
# the `areaType` to State
statewide_locs = (geocode_df['area'] == 'Statewide')    # Bool location
geocode_df.loc[statewide_locs, 'areaType'] = 'State'    # Set type to State
geocode_df.loc[statewide_locs, 'area'] = geocode_df.loc[statewide_locs, 'state']    # Copy State value to area
geocode_df['area'] = geocode_df['area'].replace(state_dict)   # Transform abbr states to Full name

# Using geocode_df['areaType'].unique() location type 'City  Borough'
# Only 1 entry of Wranell, AK (Alaska) -> Correct to 'Borough'
geocode_df.loc[geocode_df['areaType'] == 'City  Borough', 'areaType'] = 'Borough'

# Regional Educational Attendance Area `area` with Null `areaType` (it's the same with or without null spec)
reaa_regex = r'Regional Educational.*$'
reaa_loc = geocode_df['area'].str.contains(reaa_regex, case = False, regex = True)
geocode_df.loc[reaa_loc, 'areaType'] = 'County'
geocode_df.loc[reaa_loc, 'area'] = geocode_df.loc[reaa_loc, 'area'].str.replace(reaa_regex, '', regex = True)

In [619]:
# Organizing `areaType` into bins for granularity
areaType_bins = {
    'State': ['State', 'Territory', 'Unorganized Territory', 'Island'],
    'County': ['County', 'County-equivalent', 'Parish', 'Borough', 'Census Area'],
    'City': ['City', 'Township', 'Municipality', 'Municipio', 'Census Subarea', 'CCD'],
    'Native Land': ['Native Land', 'Reservation', 'Indian Reservation', 'ANV/ANVSA', 'OTSA', 'TDSA', 'Native Regional Corporation', 'Nett Lake'],
    'Other': ['Other', 'District', 'Police Jury Ward', 'Sisseton']
}

# Converting bins into reverse-lookup for mapping
# (Treats dictionary created with list-comprehension)
areaType_map = {
    old_type: new_type  # Specifying key:value pair setup
    for new_type, old_types in areaType_bins.items()    # Outer-iteration of type_key -> [type_val1, type_val2, ...]
    for old_type in old_types   # Inner-iteration of type in [type1, type2, type3, ...]
}

# Set mapping to DataFrame for corrections
geocode_df['areaType'] = geocode_df['areaType'].map(areaType_map, na_action = 'ignore')

# Drop null rows after filtering for null corrections
# Drop columns used for accurate location derivation
geocode_df = geocode_df.dropna()
fema_disasters = fema_disasters.drop(axis = 0, columns = ['designatedArea', 'state'])

# Finally correcting whitespace issues for file saving
geocode_df['area'] = geocode_df['area'].str.strip().str.title()
geocode_df['state'] = geocode_df['state'].str.strip()

## Geoapify API Call for Geocoding Data
FEMA has historical data from the 1960's in this set, and it's simply too big to do be able to do an API call for every distinct area, state pair
Data from 

## Geoapify API Call - Geocoding into Coordinate Pairs

In [620]:
# To prevent unnecessary API runs, only runs when location_geocode.csv doesn't exist
geocode_file = Path('Assets/CSVs/location_geocode.csv')
if not Path.exists(geocode_file):

    # Setting up key list for switching between keys for each loop
    geoapify_base = 'https://api.geoapify.com/v1/geocode/search?'
    keys = [
        api_keys.geoapify_NEEL_1,
        api_keys.geoapify_NEEL_2,
        api_keys.geoapify_NEEL_3,
        api_keys.geoapify_NEEL_4,
        api_keys.geoapify_KENDALL_1
    ]
    # Setting up DataFrame of unique areas to iterate through
    # TODO: MAKE SURE SORTING WORKED PROPERLY
    unique_loc = geocode_df[['area', 'state']].drop_duplicates().sort_values(['state', 'area'])
    api_calls = len(unique_loc)

    # Calculating sleep time and wait time by number of keys, and initializing empty dictionary
    sleep_time = (0.2 / len(keys))
    runtime_factor = 6.746798603026774  # Found from previous runs
    wait_time = api_calls * sleep_time * runtime_factor
    wait_time = round((wait_time / 60), 2)
    loc_dict = {}

    # Country Codes: fm -> Micronesia, mp -> Norhtern Mariana Islands,
    # gu -> Guam, as -> American Samoa, mh -> Marshall Islands, pr -> Puerto Rico,
    # um -> US Minor Outlying Islands, us -> USA, vi -> Virgin Islands, 
    box = {
        'lat_min': 16,
        'lat_max': 70,
        'lng_min': -168,
        'lng_max': -53
    }
    countryCodes = 'us,um,pr,vi'
    rectangle = f'{box['lng_min']},{box['lat_min']},{box['lng_max']},{box['lat_max']}'
    
    geoapify_params = {
        'filter': f'rect:{rectangle}|countrycode:{countryCodes}',
        'format': 'json'
    }
    
    print(
        'BEGIN GEOAPIFY API FETCH:\n' +
        f'Locating {api_calls} unique places...\n' + 
        f'Est. wait of {wait_time} minutes...\n' + 
        '*************************************\n'
        )
    
    for i, (_, row) in enumerate(unique_loc.iterrows()):
        area = row['area']
        state = row['state']
        geoapify_params = {     # Setting key parameter by iteration number to reduce rate limit
            'apiKey': keys[i % len(keys)],
            'text': f'{area}, {state}'
        }
        try:
            geo_response = requests.get(geoapify_base, geoapify_params).json()
            if geo_response['features']:
                lng, lat = geo_response['features'][0]['geometry']['coordinates']
                # TODO: CHECK THIS OUT TOO PLEASE
                loc_dict[(area, state)] = list(lat, lng)
                print(f'Fetching {area}, {state} -> ({lat}, {lng})')
            else:
                print(f'No result for: {area}, {state}')
        except Exception as e:
            print(f'Error return for {area}, {state}: {e}')
        sleep(sleep_time)   # Respecting rate limiting by number of keys

    print(
        '\n*************************************\n' + 
        f'{len(loc_dict)} locations geocoded successfully!\n' +
        'END FROM GEOAPIFY API FETCH\n'
        )
    # TODO: MAKE SURE THIS WORKS!!! Originally was applied to new column 'coords'
    geocode_df[['latitude', 'longitude']] = geocode_df.apply(
        lambda row: loc_dict.get((row['area'], row['state'])), axis = 1
    )
    geocode_df.to_csv(geocode_file, index = False)
else:
    print(
        '*****************\n' +
        'API CALL SKIPPED!\n' +
        '*****************\n' +
        'File created from combined API results and DataFrame already exists.\n' +
        f'In order to run API call, please delete {geocode_file.name} and try again.'
        )
    geocode_df = pd.read_csv(geocode_file)

*****************
API CALL SKIPPED!
*****************
File created from combined API results and DataFrame already exists.
In order to run API call, please delete location_geocode.csv and try again.


In [621]:
# Displaying newly retrieved data
geocode_df.head()

Unnamed: 0,area,areaType,state,id,coords
0,Washoe,County,NV,f15a7a79-f1c3-41bb-8a5c-c05fbae34423,"(40.5849048, -119.6131606)"
1,Washington,County,OR,09e3f81a-5e16-4b72-b317-1c64e0cfa59c,"(45.5601883, -123.0587907)"
2,Jefferson,County,OR,59983f89-30bf-4888-b21b-62e8d57d9aac,"(44.7224335, -123.007389)"
3,Deschutes,County,OR,8d13ecf0-bc2f-496b-8c9f-b2e73da832a0,"(44.1569232, -121.2586995)"
4,Kern,County,CA,9b73e19b-d326-4992-8da1-7e658d97607c,"(35.3145701, -118.753822)"


In [622]:
# Merge with DataFrame -> Work from fema_disasters now
# Drop id's and Nulls of any kind, as result will be rows with coordinates to them
fema_disasters = fema_disasters.merge(geocode_df, how = 'left', on = 'id').copy()
fema_disasters = fema_disasters[['disasterNumber', 'declarationRequestNumber', 'declarationTitle', 
                                'area', 'areaType', 'state', 'incidentType', 'declarationType', 
                                'declarationDate', 'incidentBeginDate', 'id', 'coords']]
fema_disasters = fema_disasters.drop(axis = 0, columns = 'id')
fema_disasters = fema_disasters.dropna(how = 'any')
# fema_disasters['latitude'] = fema_disasters['latitude'].astype(float)
# fema_disasters['longitude'] = fema_disasters['longitude'].astype(float)
# fema_disasters['declarationDate'] = fema_disasters['declarationDate'].astype('datetime64[ns]')
# fema_disasters['incidentBeginDate'] = fema_disasters['incidentBeginDate'].astype('datetime64[ns]')

# fema_disasters.to_csv('Assets/CSVs/femaDisasters_clean.csv', index = False)

In [623]:
# TODO: REMOVE THIS WHEN COORDINATE SPLITTING WORKS IN API LOOP
fema_disasters[['latitude', 'longitude']] = fema_disasters['coords'].str.lstrip('(').str.rstrip(')').str.split(', ', n = 1, expand = True)
fema_disasters = fema_disasters.drop(axis = 0, columns = 'coords')
fema_disasters['latitude'] = fema_disasters['latitude'].astype(float)
fema_disasters['longitude'] = fema_disasters['longitude'].astype(float)
fema_disasters['declarationDate'] = fema_disasters['declarationDate'].astype('datetime64[ns]')
fema_disasters['incidentBeginDate'] = fema_disasters['incidentBeginDate'].astype('datetime64[ns]')
fema_disasters.to_csv('Assets/CSVs/femaDisasters_clean.csv', index = False)

## Creating Tabular Summary of Disaster Numbers

Please note the unique structure of the disaster sequencing (due to a numbering system that originated in the 1950's-1970's):

0001-1999 Major Disaster Declaration

2000-2999 Fire Management

3000-3999 Emergency Declaration (Special Emergency)

4000-4999 Major Disaster Declaration

5000-5999 Fire Management

In [624]:
# Grouping to find summaries by disaster number
disaster_byNumber = fema_disasters.groupby('disasterNumber')

titles_byDisaster = disaster_byNumber['declarationTitle'].first()
incidents_byDisaster = disaster_byNumber['incidentType'].first()

declRequest_byDisaster_NUM = disaster_byNumber['declarationRequestNumber'].count()
declRequest_byDisaster = disaster_byNumber['declarationRequestNumber'].min().astype(str)

declDate_byDisaster_MIN = disaster_byNumber['declarationDate'].min()
declDate_byDisaster_MAX = disaster_byNumber['declarationDate'].max()

incDate_byDisaster_MIN = disaster_byNumber['incidentBeginDate'].min()
incDate_byDisaster_MAX = disaster_byNumber['incidentBeginDate'].max()

lat_byDisaster = disaster_byNumber['latitude'].mean()
lng_byDisaster = disaster_byNumber['longitude'].mean()

disaster_summary = pd.DataFrame(
    {
        'Title': titles_byDisaster,
        'Number of Requests': declRequest_byDisaster_NUM,
        'Declaration Request Number': declRequest_byDisaster,
        'Incident Type': incidents_byDisaster,
        'Incident Begin Date (First)': incDate_byDisaster_MIN,
        'Incident Begin Date (Last)': incDate_byDisaster_MAX,
        'Declaration Date (First)': declDate_byDisaster_MIN,
        'Declaration Date (Last)': declDate_byDisaster_MAX,
        'Latitude': lat_byDisaster,
        'Longitude': lng_byDisaster
    }
)
disaster_summary.index.names = ['Disaster Number']
disaster_summary.to_csv('Assets/CSVs/disasters_summary.csv')

In [648]:
end = ((disaster_summary['Incident Begin Date (Last)'].dt.year & disaster_summary['Declaration Date (Last)'].dt.year) <= )
disaster_summary.loc[end].sort_values('Declaration Date (Last)', ascending = False)

Unnamed: 0_level_0,Title,Number of Requests,Declaration Request Number,Incident Type,Incident Begin Date (First),Incident Begin Date (Last),Declaration Date (First),Declaration Date (Last),Latitude,Longitude
Disaster Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
4259,SEVERE STORMS AND FLOODING,34,16016,Severe Storm,2015-12-22,2015-12-22,2016-02-26,2016-02-26,31.091413,-77.294508
4258,"SEVERE WINTER STORMS, STRAIGHT-LINE WINDS, FLO...",14,16015,Severe Storm,2015-12-06,2015-12-06,2016-02-17,2016-02-17,42.557584,-105.324483
4257,SEVERE STORM,1,16014,Severe Storm,2015-12-12,2015-12-12,2016-02-17,2016-02-17,51.567222,178.877600
4256,SEVERE WINTER STORMS AND FLOODING,42,16012,Severe Storm,2015-12-26,2015-12-26,2016-02-10,2016-02-10,36.515604,-92.337483
4255,"SEVERE WINTER STORMS, TORNADOES, STRAIGHT-LINE...",51,16009,Severe Storm,2015-12-26,2015-12-26,2016-02-09,2016-02-09,32.687898,-98.151299
...,...,...,...,...,...,...,...,...,...,...
5,FLOODS,1,53006,Flood,1953-06-06,1953-06-06,1953-06-06,1953-06-06,46.540855,-111.946345
4,TORNADO,1,53004,Tornado,1953-06-02,1953-06-02,1953-06-02,1953-06-02,42.233092,-84.327177
3,FLOOD,1,53005,Flood,1953-05-29,1953-05-29,1953-05-29,1953-05-29,6.433931,-10.704273
2,TORNADO & HEAVY RAINFALL,1,53003,Tornado,1953-05-15,1953-05-15,1953-05-15,1953-05-15,29.396013,-94.917548


In [634]:
start_filter = ((disaster_summary['Incident Begin Date (First)'].dt.year & disaster_summary['Declaration Date (First)'].dt.year) >= 2000)
disaster_summary.loc[start_filter].sort_values('Declaration Date (First)')

Unnamed: 0_level_0,Title,Number of Requests,Declaration Request Number,Incident Type,Incident Begin Date (First),Incident Begin Date (Last),Declaration Date (First),Declaration Date (Last),Latitude,Longitude
Disaster Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2289,SADDLEBACK FIRE,1,3,Fire,2000-01-03,2000-01-03,2000-01-03,2000-01-03,32.513471,-96.035248
2290,PURGATORY FIRE,1,4,Fire,2000-01-04,2000-01-04,2000-01-04,2000-01-04,29.797970,-98.269079
2291,BOB'S TRAIL FIRE,1,5,Fire,2000-01-05,2000-01-05,2000-01-05,2000-01-05,30.110495,-97.315270
1310,"TORNADOES, SEVERE STORMS, TORRENTIAL RAINS, AN...",11,7,Tornado,2000-01-03,2000-01-03,2000-01-10,2000-01-10,37.938882,-85.580858
1311,SEVERE WINTER STORM,51,10,Severe Storm,2000-01-22,2000-01-22,2000-01-28,2000-01-28,33.367541,-83.935210
...,...,...,...,...,...,...,...,...,...,...
4853,SEVERE STORM AND FLOODING,1,24196,Severe Storm,2024-08-16,2024-08-16,2025-01-01,2025-01-01,59.936343,-164.040926
4855,"SEVERE STORMS, TORNADOES, STRAIGHT-LINE WINDS,...",14,24201,Severe Storm,2024-11-03,2024-11-03,2025-01-01,2025-01-01,37.631092,-92.073829
5549,PALISADES FIRE,1,25002,Fire,2025-01-07,2025-01-07,2025-01-07,2025-01-07,34.053691,-118.242766
5550,EATON FIRE,1,25003,Fire,2025-01-07,2025-01-07,2025-01-08,2025-01-08,34.053691,-118.242766


In [464]:
incident = ['Fire', 'Hurricane']
incident_filter = disaster_summary['Incident Type'].isin(incident)
disaster_summary.loc[incident_filter]

Unnamed: 0_level_0,Title,Number of Requests,Declaration Request Number,Incident Type,Incident Begin Date (First),Incident Begin Date (Last),Declaration Date (First),Declaration Date (Last),Latitude,Longitude
Disaster Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
11,FOREST FIRE,1,53011,Fire,1953-07-02,1953-07-02,1953-07-02,1953-07-02,43.644386,-71.586283
22,HURRICANES,1,54008,Hurricane,1954-09-02,1954-09-02,1954-09-02,1954-09-02,42.358752,-71.531484
23,HURRICANES,1,54009,Hurricane,1954-09-02,1954-09-02,1954-09-02,1954-09-02,27.207152,-80.826073
24,HURRICANES,1,54011,Hurricane,1954-09-13,1954-09-13,1954-09-13,1954-09-13,46.820043,-67.923079
25,HURRICANES,1,54013,Hurricane,1954-09-17,1954-09-17,1954-09-17,1954-09-17,41.843706,-73.329285
...,...,...,...,...,...,...,...,...,...,...
5547,JENNINGS CREEK FIRE,1,24199,Fire,2024-11-08,2024-11-08,2024-11-15,2024-11-15,41.539816,-74.098199
5548,FRANKLIN FIRE,1,24204,Fire,2024-12-09,2024-12-09,2024-12-10,2024-12-10,34.053691,-118.242766
5549,PALISADES FIRE,1,25002,Fire,2025-01-07,2025-01-07,2025-01-07,2025-01-07,34.053691,-118.242766
5550,EATON FIRE,1,25003,Fire,2025-01-07,2025-01-07,2025-01-08,2025-01-08,34.053691,-118.242766


In [465]:
def create_maps(df, incident = None, startYear = None, endYear = None, font_scale = 1.5):
    box = {
        'lat_min': 16,
        'lat_max': 70,
        'lng_min': -168,
        'lng_max': -53
    }
    filters = []
    if incident:
        incident = list(incident)
        if len(incident) > 1:
            incident_filter = df['Incident Type'].isin(incident)
        else:
            incident_filter = (df['Incident Type'] == incident)
        filters.append(incident_filter)
    if startYear:
        start_filter = ((df['Incident Begin Date (First)'] & df['Declaration Date (First)']) >= startYear)
        filters.append(start_filter)
    if endYear:
        end_filter = ((df['Incident Begin Date (Last)'] & df['Declaration Date (Last)']) <= endYear)
        filters.append(end_filter)
    
    for filter in filters:
        df = df.loc[filter, :]

    xLimit = [box['lng_min'], box['lng_max']]
    yLimit = [box['lat_min'], box['lat_max']]

    map = df.hvplot.points(
        'Longitude',
        'Latitude',
        geo = True, tiles = 'OSM',  # Map format
        size = 'Number of Requests', scale = 1,     # Dot size alg. and scale
        color = 'Incident Type', alpha = 0.5,       # Color by incident type and 50% opacity
        title = 'FEMA Disaster Declaration Summary Mapping', fontscale = font_scale,
        hover_cols = ['Title', 'Disaster Number', 'Declaration Request Number'],
        xlim = xLimit, ylim = yLimit,       # Set x-y limits to ensure no incorrect data is listed
        responsive = True
    )
    return map

incident_question = (
    'Please list the types of incidents you\'d like to map.\n' +
    'Leave blank to show all: '
)
start_question = (
    'Please list the 4 digit year you want to start with.\n' +
    'Leave blank to show all: '
)
end_question = (
    'Please list the 4 digit year you want to end with.\n' +
    'Leave blank to show all: '
)

incidents = input(incident_question)
startingYear = input(start_question)
endingYear = input(end_question)

fema_map = create_maps(disaster_summary, incident_question, start_question, end_question)

TypeError: unsupported operand type(s) for &: 'str' and 'str'

In [None]:
fema_map