# emdat dataset 

Credits: EM-DAT: The Emergency Events Database - Université catholique de Louvain (UCL) - CRED, www.emdat.be, Brussels, Belgium

Thanks Akash!

In [1]:
import numpy as np
import pandas as pd
import shapefile as shp
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df = pd.read_csv('./datasets/emdat/Cyclones_1980_2020.csv')

In [3]:
#number of records
print('numer of records in the dataset:',len(df))

numer of records in the dataset: 2293


In [4]:
#columns
df.columns

Index(['Start date', 'End date', 'Country', 'ISO', 'Location', 'Latitude',
       'Longitude', 'Magnitude value', 'Magnitude scale', 'Disaster type',
       'Disaster subtype', 'Associated disaster', 'Associated disaster2',
       'Total deaths', 'Total affected', 'Total damage ('000 US$)',
       'Insured losses ('000 US$)', 'Disaster name', 'Disaster No.'],
      dtype='object')

In [5]:
df.head(5)

Unnamed: 0,Start date,End date,Country,ISO,Location,Latitude,Longitude,Magnitude value,Magnitude scale,Disaster type,Disaster subtype,Associated disaster,Associated disaster2,Total deaths,Total affected,Total damage ('000 US$),Insured losses ('000 US$),Disaster name,Disaster No.
0,10/01/1989,10/01/1989,American Samoa,ASM,,,,,Kph,Storm,Tropical cyclone,--,--,0,0,5000.0,0.0,Gina,1989-0561
1,05/01/2004,05/01/2004,American Samoa,ASM,American Samoa,,,310.0,Kph,Storm,Tropical cyclone,--,--,0,23060,150000.0,0.0,Heta,2004-0004
2,16/02/2005,16/02/2005,American Samoa,ASM,Manu'a island,,,,Kph,Storm,Tropical cyclone,--,--,0,0,0.0,0.0,Olaf,2005-0151
3,02/01/1955,02/01/1955,Anguilla,AIA,,,,,Kph,Storm,Tropical cyclone,--,--,0,0,0.0,0.0,Alice,1955-0025
4,04/09/1960,04/09/1960,Anguilla,AIA,South Hille and West End,,,,Kph,Storm,Tropical cyclone,--,--,5,1250,35000.0,0.0,Donna,1960-0026


# Some data wrangling

## convertion of dates from str to datetime

In [6]:
#output of dates: yyyy-mm-dd
df['Start date'] = pd.to_datetime(df['Start date'],errors='ignore')
df['End date']   = pd.to_datetime(df['End date'],errors='ignore')

## Sort by date (ascending)

In [7]:
df.sort_values(by=['Start date'],inplace=True)

## Selection of events from 1980+
The dataset includes info from 1900

In [8]:
df = df[df['Start date'] > '1980-01-01']
df.reset_index(drop=True,inplace=True)

## Column dropping

- 'Magnitude scale' has a single value (Kph); we append Kph to 'Magnitude value'
- 'Latitude' and 'Longitude' are not need since we have info on the affected areas (see 'Location')
- 'Disaster type' has a single value ('Storm')
- 'Disaster subtype' has a single value ('Tropical cyclone')
- 'Associated disaster' and 'Associated disaster2' are mostly filled with '--' (Thanks Beata!)

In [9]:
df.rename(columns={'Magnitude value':'Magnitude value (Kph)'},inplace=True)

In [11]:
df.drop(columns=['Latitude','Longitude','Magnitude scale',
                  'Disaster type','Disaster subtype',
                  'Associated disaster','Associated disaster2'],inplace=True)

In [12]:
df

Unnamed: 0,Start date,End date,Country,ISO,Location,Magnitude value (Kph),Total deaths,Total affected,Total damage ('000 US$),Insured losses ('000 US$),Disaster name,Disaster No.
0,1980-01-27,1980-01-27,Réunion,REU,,,25,7000,67000.0,0.0,Hyacinthe,1980-0029
1,1980-02-01,1980-02-01,Australia,AUS,Pilbara,,0,0,40026.0,0.0,Dean,1980-0287
2,1980-02-01,1980-02-01,Australia,AUS,Pilbara,,0,0,4605.0,0.0,Enid,1980-0289
3,1980-02-15,1980-02-15,Indonesia,IDN,West Java,,0,800,0.0,0.0,,1980-0315
4,1980-03-12,1980-03-12,India,IND,Tamil Nadu,,0,0,0.0,0.0,,1980-0228
...,...,...,...,...,...,...,...,...,...,...,...,...
1749,2019-12-10,2019-10-17,Japan,JPN,"Tokyo, Fukushima, Miyagi, Shizuoka, Kanawanga,...",160.0,99,390470,17000000.0,10000000.0,Tropical cylone 'Hagibis',2019-0492
1750,2019-12-11,2019-11-13,Viet Nam,VNM,"Phu Yen, Binh Dinh Provinces",,3,2150,4000.0,0.0,Tropical cyclone 'Nakri',2019-0549
1751,2019-12-24,2019-12-28,Philippines (the),PHL,"Salcedo, Eastern Samar; Tacloban City, Leyte; ...",150.0,69,2656862,28000.0,0.0,Tropical cyclone 'Phanfone' (Ursula),2019-0634
1752,2019-12-26,2019-12-26,Fiji,FJI,"Ba, Nadroga, Lau, Kadavu",,1,7780,0.0,0.0,Tropical cyclone 'Sarai',2019-0642
