### Credit: Hannah Ritchie and Max Roser (2014) - "Natural Disasters". Published online at OurWorldInData.org. Retrieved from: 'https://ourworldindata.org/natural-disasters' [Online Resource]

#### Reuse our work freely
#### All visualizations, data, and code produced by Our World in Data are completely open access under the Creative Commons BY license. You have the permission to use, distribute, and reproduce these in any medium, provided the source and authors are credited.

#### The data produced by third parties and made available by Our World in Data is subject to the license terms from the original third-party authors. We will always indicate the original source of the data in our documentation, so you should always check the license of any such third-party data before use and redistribution.

#### All of our charts can be embedded in any site.

In [1]:
import pandas as pd

In [2]:
# read earthquake data
earth_df = pd.read_csv('../../resources/significant-earthquakes.csv')
earth_df.head()

Unnamed: 0,Entity,Code,Year,Significant earthquake events (NGDC-NASA)
0,Afghanistan,AFG,-2150,0
1,Afghanistan,AFG,-2000,0
2,Afghanistan,AFG,-1610,0
3,Afghanistan,AFG,-1566,0
4,Afghanistan,AFG,-1450,0


In [3]:
# filter years from 2008 onwards
earth_df = earth_df[earth_df['Year'] >= 2008]
earth_df.head()

Unnamed: 0,Entity,Code,Year,Significant earthquake events (NGDC-NASA)
935,Afghanistan,AFG,2008,1
936,Afghanistan,AFG,2009,2
937,Afghanistan,AFG,2010,1
938,Afghanistan,AFG,2011,0
939,Afghanistan,AFG,2012,1


In [4]:
# reset index
earth_df.reset_index(drop=True, inplace=True)

In [5]:
# read volcanic eruption data
volcano_df = pd.read_csv('../../resources/significant-volcanic-eruptions.csv')
volcano_df.head()

Unnamed: 0,Entity,Code,Year,Number of significant volcanic eruptions (NGDC-WDS)
0,Antarctica,ATA,-1750,0
1,Antarctica,ATA,-1645,0
2,Antarctica,ATA,-1610,0
3,Antarctica,ATA,-1550,0
4,Antarctica,ATA,-1460,0


In [6]:
# filter years from 2008 onwards
volcano_df = volcano_df[volcano_df['Year'] >= 2008]

# reset index
volcano_df.reset_index(drop=True, inplace=True)
volcano_df.head()

Unnamed: 0,Entity,Code,Year,Number of significant volcanic eruptions (NGDC-WDS)
0,Antarctica,ATA,2008,0
1,Antarctica,ATA,2009,0
2,Antarctica,ATA,2010,0
3,Antarctica,ATA,2011,0
4,Antarctica,ATA,2012,0


In [7]:
# merge both df
nh_df = earth_df.merge(volcano_df, how="left", on=['Entity','Code', 'Year'])

In [8]:
# check df
nh_df.head()

Unnamed: 0,Entity,Code,Year,Significant earthquake events (NGDC-NASA),Number of significant volcanic eruptions (NGDC-WDS)
0,Afghanistan,AFG,2008,1,
1,Afghanistan,AFG,2009,2,
2,Afghanistan,AFG,2010,1,
3,Afghanistan,AFG,2011,0,
4,Afghanistan,AFG,2012,1,


In [9]:
# check null values
nh_df.isnull().sum()

Entity                                                    0
Code                                                     80
Year                                                      0
Significant earthquake events (NGDC-NASA)                 0
Number of significant volcanic eruptions (NGDC-WDS)    1080
dtype: int64

In [10]:
# Fill NA with 0 on columns for Volcanic Eruption Events
nh_df['Number of significant volcanic eruptions (NGDC-WDS)'].fillna(0, inplace=True)

In [11]:
# Change dtype of column to int64
nh_df['Number of significant volcanic eruptions (NGDC-WDS)'] = nh_df['Number of significant volcanic eruptions (NGDC-WDS)'].astype('Int64')

In [12]:
nh_df.head()

Unnamed: 0,Entity,Code,Year,Significant earthquake events (NGDC-NASA),Number of significant volcanic eruptions (NGDC-WDS)
0,Afghanistan,AFG,2008,1,0
1,Afghanistan,AFG,2009,2,0
2,Afghanistan,AFG,2010,1,0
3,Afghanistan,AFG,2011,0,0
4,Afghanistan,AFG,2012,1,0


In [13]:
# rename columns
nh_df.rename(columns={'Entity': 'Country',
                     'Significant earthquake events (NGDC-NASA)': 'significant_earthquakes',
                     'Number of significant volcanic eruptions (NGDC-WDS)': 'significant_volcanic_eruptions'}, inplace=True)

In [14]:
# check null values
nh_df.isnull().sum()

Country                            0
Code                              80
Year                               0
significant_earthquakes            0
significant_volcanic_eruptions     0
dtype: int64

In [15]:
# check countries where 'Code' is null
nh_df['Country'][nh_df['Code'].isnull() == True].unique()

array(['Atlantic Ocean', 'Azores', 'Bering Sea', 'Indian Ocean',
       'Kermadec Islands', 'Micronesia (region)', 'Pacific Ocean',
       'Solomon Sea'], dtype=object)

In [16]:
# Drop rows where 'Code' is null
nh_df.dropna(inplace=True)

In [17]:
# check for null values again
nh_df.isnull().sum()

Country                           0
Code                              0
Year                              0
significant_earthquakes           0
significant_volcanic_eruptions    0
dtype: int64

In [18]:
# check df
nh_df.head(25)

Unnamed: 0,Country,Code,Year,significant_earthquakes,significant_volcanic_eruptions
0,Afghanistan,AFG,2008,1,0
1,Afghanistan,AFG,2009,2,0
2,Afghanistan,AFG,2010,1,0
3,Afghanistan,AFG,2011,0,0
4,Afghanistan,AFG,2012,1,0
5,Afghanistan,AFG,2013,1,0
6,Afghanistan,AFG,2014,0,0
7,Afghanistan,AFG,2015,2,0
8,Afghanistan,AFG,2016,1,0
9,Afghanistan,AFG,2017,0,0


In [19]:
# export df
nh_df.to_csv('../../dataset/natural_hazards_cleaned.csv', index=False)