# Wildfire Database Project

In [2]:
import pandas as pd

In [3]:
# define filepath of raw csv files
economic_file = 'raw_data/economic_data/economic-damage-from-natural-disasters.csv'
number_file = 'raw_data/economic_data/number-of-natural-disaster-events.csv'
fema_file = 'raw_data/fema_data/database.csv'

## Natural Disaster Economic Database

In [4]:
# import economic csv, isolate wildfire related data and drop 
# unnecessary columns
economic_df = pd.read_csv(economic_file)
economic_df.head()

Unnamed: 0,Entity,Code,Year,Total economic damage from natural disasters (US$)
0,All natural disasters,,1900,30000000
1,All natural disasters,,1901,0
2,All natural disasters,,1902,0
3,All natural disasters,,1903,480000000
4,All natural disasters,,1904,0


In [5]:
# locate wildfire in economic csv, rename bulky columns for postgres, 
# and drop unnecessary columns
economic_df = pd.read_csv(economic_file)
economic_df = economic_df.rename(columns = {'Total economic damage from natural disasters (US$)': 
                                            'total_wildfire_cost'})
economic_df = economic_df.loc[economic_df['Entity'] == 'Wildfire']
economic_df = economic_df.drop(columns = ['Code', 'Entity'])
economic_df.head()

Unnamed: 0,Year,total_wildfire_cost
517,1918,100000000
518,1922,8000000
519,1947,30000000
520,1965,1000000
521,1967,68500000


In [6]:
# economic_df.describe()

In [7]:
# import numbers csv, isolate wildfire related data and drop 
# unnecessary columns
number_df = pd.read_csv(number_file)
number_df.head()

Unnamed: 0,Entity,Code,Year,Number of reported natural disasters (reported disasters)
0,All natural disasters,,1900,5
1,All natural disasters,,1901,2
2,All natural disasters,,1902,9
3,All natural disasters,,1903,8
4,All natural disasters,,1904,2


In [8]:
# locate wildfire data, rename bulky columns for postgres, and drop unnecessary columns
number_df = number_df.rename(columns = {'Number of reported natural disasters (reported disasters)'
: 'total_wildfire'})
number_df = number_df.loc[number_df['Entity'] == 'Wildfire']
number_df = number_df.drop(columns = ['Code', 'Entity'])
number_df

Unnamed: 0,Year,total_wildfire
766,1911,1
767,1918,1
768,1922,1
769,1929,1
770,1939,1
...,...,...
823,2014,4
824,2015,12
825,2016,10
826,2017,13


In [9]:
# merge number & yearly data into one dataframe with "Year" as a key
annual_data = pd.merge(economic_df, number_df, on='Year', how="outer")
# Sort "Year" to organize the data 
annual_data = annual_data.sort_values(["Year"])
# Reset the index number
final_annual_data = annual_data.reset_index(drop = True)
final_annual_data

Unnamed: 0,Year,total_wildfire_cost,total_wildfire
0,1911,,1
1,1918,1.000000e+08,1
2,1922,8.000000e+06,1
3,1929,,1
4,1939,,1
...,...,...,...
57,2014,2.590000e+08,4
58,2015,3.439820e+09,12
59,2016,6.287000e+09,10
60,2017,1.019000e+09,13


## FEMA Natural Disaster Dataset

In [20]:
fema_df = pd.read_csv(fema_file)
fema_df

Unnamed: 0,Declaration Number,Declaration Type,Declaration Date,State,County,Disaster Type,Disaster Title,Start Date,End Date,Close Date,Individual Assistance Program,Individuals & Households Program,Public Assistance Program,Hazard Mitigation Program
0,DR-1,Disaster,05/02/1953,GA,,Tornado,Tornado,05/02/1953,05/02/1953,06/01/1954,Yes,No,Yes,Yes
1,DR-2,Disaster,05/15/1953,TX,,Tornado,Tornado and Heavy Rainfall,05/15/1953,05/15/1953,01/01/1958,Yes,No,Yes,Yes
2,DR-3,Disaster,05/29/1953,LA,,Flood,Flood,05/29/1953,05/29/1953,02/01/1960,Yes,No,Yes,Yes
3,DR-4,Disaster,06/02/1953,MI,,Tornado,Tornado,06/02/1953,06/02/1953,02/01/1956,Yes,No,Yes,Yes
4,DR-5,Disaster,06/06/1953,MT,,Flood,Floods,06/06/1953,06/06/1953,12/01/1955,Yes,No,Yes,Yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
46180,DR-4301,Disaster,02/14/2017,CA,Sutter County,Storm,"Severe Winter Storms, Flooding, and Mudslides",01/03/2017,01/12/2017,,No,No,Yes,Yes
46181,DR-4301,Disaster,02/14/2017,CA,Trinity County,Storm,"Severe Winter Storms, Flooding, and Mudslides",01/03/2017,01/12/2017,,No,No,Yes,Yes
46182,DR-4301,Disaster,02/14/2017,CA,Tuolumne County,Storm,"Severe Winter Storms, Flooding, and Mudslides",01/03/2017,01/12/2017,,No,No,Yes,Yes
46183,DR-4301,Disaster,02/14/2017,CA,Yolo County,Storm,"Severe Winter Storms, Flooding, and Mudslides",01/03/2017,01/12/2017,,No,No,Yes,Yes


In [29]:
fema_df = pd.read_csv(fema_file)
# fema_df = fema_df.loc[fema_df['Declaration Type'] == 'Fire']
fema_df['Declaration Type'].unique()

array(['Disaster', 'Emergency', 'Fire'], dtype=object)

In [10]:
# import fema csv, isolate all fire-related data, then drop unnecessary columns
fema_df = pd.read_csv(fema_file)
fema_df = fema_df.loc[fema_df['Disaster Type'] == 'Fire']
fema_df["Disaster Title"].unique()
fema_df = fema_df.drop(columns = ['Declaration Number', 'Declaration Type', 
                                  'Disaster Type', 'Disaster Title', 
                                  'Individual Assistance Program', 
                                  'Individuals & Households Program', 
                                  'Public Assistance Program', 
                                  'Hazard Mitigation Program'])
# split year & add column to dataframe
date_split = fema_df['Declaration Date'].str.split('/', n=2, expand = True)
fema_df['Year'] = date_split[2]
# Change columns' name for Postgres
final_fema_df = fema_df.rename(columns = {"Declaration Date": "declaration_date", 
                                          "State": "state", "County": "county", 
                                          "Start Date": "start_date", 
                                          "End Date": "end_date", 
                                          "Close Date": "close_date", 
                                          "Year": "year"})
# Reset index number
final_fema_df = final_fema_df.reset_index(drop=True)
final_fema_df.head()

Unnamed: 0,declaration_date,state,county,start_date,end_date,close_date,year
0,07/02/1953,NH,,07/02/1953,07/02/1953,02/01/1956,1953
1,12/29/1956,CA,,12/29/1956,12/29/1956,04/01/1959,1956
2,07/22/1960,ID,,07/22/1960,07/22/1960,09/20/1961,1960
3,11/16/1961,CA,,11/16/1961,11/16/1961,08/01/1963,1961
4,08/30/1967,ID,Benewah County,08/30/1967,08/30/1967,06/17/1970,1967


In [11]:
# export files to csv
final_fema_df.to_csv('clean_data/final_fema.csv')
final_annual_data.to_csv('clean_data/final_yearly_data.csv')