### Libraries

In [2]:
import pandas as pd
import os

### Csv file to data frame

In [3]:
# load dataset
file_name = 'public_emdat_project.csv'
file_path = os.path.abspath(os.path.join(os.getcwd(), file_name))
df = pd.read_csv(file_path, sep=',', encoding='ISO-8859-1')


### Checking a few first rows

In [4]:
df.head()

Unnamed: 0,DisNo.,Historic,Classification Key,Disaster Group,Disaster Subgroup,Disaster Type,Disaster Subtype,External IDs,Event Name,ISO,...,Reconstruction Costs ('000 US$),"Reconstruction Costs, Adjusted ('000 US$)",Insured Damage ('000 US$),"Insured Damage, Adjusted ('000 US$)",Total Damage ('000 US$),"Total Damage, Adjusted ('000 US$)",CPI,Admin Units,Entry Date,Last Update
0,1999-9388-DJI,No,nat-cli-dro-dro,Natural,Climatological,Drought,Drought,,,DJI,...,,,,,,,58.111474,"[{""adm1_code"":1093,""adm1_name"":""Ali Sabieh""},{...",2006-03-01,2023-09-25
1,1999-9388-SDN,No,nat-cli-dro-dro,Natural,Climatological,Drought,Drought,,,SDN,...,,,,,,,56.514291,"[{""adm1_code"":2757,""adm1_name"":""Northern Darfu...",2006-03-08,2023-09-25
2,1999-9388-SOM,No,nat-cli-dro-dro,Natural,Climatological,Drought,Drought,,,SOM,...,,,,,,,56.514291,"[{""adm1_code"":2691,""adm1_name"":""Bay""},{""adm1_c...",2006-03-08,2023-09-25
3,2000-0001-AGO,No,tec-tra-roa-roa,Technological,Transport,Road,Road,,,AGO,...,,,,,,,56.514291,,2004-10-27,2023-09-25
4,2000-0002-AGO,No,nat-hyd-flo-riv,Natural,Hydrological,Flood,Riverine flood,,,AGO,...,,,,,10000.0,17695.0,56.514291,"[{""adm2_code"":4214,""adm2_name"":""Baia Farta""},{...",2005-02-03,2023-09-25


### Death toll data


In [5]:
df_death = df[['Country','Region', 'Total Deaths', 'Event Name', 'Start Year', 'End Year', 'Disaster Group', 'Disaster Type']]
df_death.head()

Unnamed: 0,Country,Region,Total Deaths,Event Name,Start Year,End Year,Disaster Group,Disaster Type
0,Djibouti,Africa,,,2001,2001,Natural,Drought
1,Sudan,Africa,,,2000,2001,Natural,Drought
2,Somalia,Africa,21.0,,2000,2001,Natural,Drought
3,Angola,Africa,14.0,,2000,2000,Technological,Road
4,Angola,Africa,31.0,,2000,2000,Natural,Flood


### The most deadly disaster

In [6]:
df_death.loc[[df_death['Total Deaths'].idxmax()]]

Unnamed: 0,Country,Region,Total Deaths,Event Name,Start Year,End Year,Disaster Group,Disaster Type
7663,Haiti,Americas,222570.0,,2010,2010,Natural,Earthquake


### Additional libraries

In [7]:
import plotly.express as px

### Death toll by country

In [8]:
df_dtoll_by_country = df.groupby('Country')['Total Deaths'].sum().sort_values(ascending=False)
df_dtoll_by_country

Country
Haiti                        241207.0
Indonesia                    196343.0
Myanmar                      141286.0
China                        133988.0
India                        102593.0
                               ...   
Isle of Man                       0.0
Cook Islands                      0.0
Wallis and Futuna Islands         0.0
Saint Kitts and Nevis             0.0
Iceland                           0.0
Name: Total Deaths, Length: 222, dtype: float64

In [9]:
df_dtoll_by_country_filtered = df_dtoll_by_country[df_dtoll_by_country > 10000]
df_dtoll_by_country_filtered

Country
Haiti                               241207.0
Indonesia                           196343.0
Myanmar                             141286.0
China                               133988.0
India                               102593.0
Pakistan                             92445.0
Russian Federation                   63045.0
Türkiye                              55248.0
Italy                                41644.0
Sri Lanka                            38228.0
Iran (Islamic Republic of)           33755.0
France                               33173.0
Philippines                          30118.0
Nigeria                              27587.0
Spain                                27467.0
Somalia                              24749.0
Democratic Republic of the Congo     24293.0
Japan                                23768.0
Bangladesh                           19672.0
Afghanistan                          18232.0
Germany                              18209.0
Libya                                17457.0
Ne

In [10]:
df_dtoll_by_country_filtered.to_csv('dtoll_by_country.csv')

In [11]:

dtoll_by_country_plot = px.bar(df_dtoll_by_country_filtered, 
                               x=df_dtoll_by_country_filtered.index, 
                               y=df_dtoll_by_country_filtered.values, 
                               labels={'x':'Country', 'y':'Total Deaths'}, 
                               title="Total Deaths by Country (> 10000)")
dtoll_by_country_plot.update_layout(title_x=0.5, title_font=dict(size=25))
dtoll_by_country_plot.show()

### Death toll by region

In [12]:
df_dtoll_by_region = df.groupby('Region')['Total Deaths'].sum().sort_values(ascending=False)
df_dtoll_by_region

Region
Asia        953831.0
Americas    299746.0
Europe      232255.0
Africa      231059.0
Oceania       3896.0
Name: Total Deaths, dtype: float64

In [13]:
df_dtoll_by_region.to_csv('dtoll_by_region.csv')

In [14]:
dtoll_by_region_plot = px.bar(df_dtoll_by_region, 
                              x=df_dtoll_by_region.index, 
                              y=df_dtoll_by_region.values,
                              labels={'x':'Region', 'y':'Total Deaths'},
                              title="Total Deaths by Region")
dtoll_by_region_plot.update_layout(title_x=0.5, title_font=dict(size=25))
dtoll_by_region_plot.show()

### Number of disasters over years

In [32]:
df_disasters_over_years = df.groupby('Start Year')['DisNo.'].count()
# removing data from 2024 as it is not complete
df_disasters_over_years = df_disasters_over_years[df_disasters_over_years.index != 2024]

df_disasters_over_years.to_csv('disasters_over_years.csv')

In [31]:
distasters_over_years_plot = px.bar(df_disasters_over_years, 
                                    x=df_disasters_over_years.index, 
                                    y=df_disasters_over_years.values,
                                    title='Number of disasters over years',
                                    labels={'x':'Year', 'y':'Number of disasters'})
distasters_over_years_plot.update_layout(title_x=0.5, 
                                         title_font=dict(size=25), 
                                         xaxis=dict(tickmode='linear'), 
                                         xaxis_title='Year')
distasters_over_years_plot.show()

### Number of distasters by region by year

In [47]:
df_no_of_disasters_by_region_by_year = df.groupby(['Region', 'Start Year'])['DisNo.'].count().reset_index()

# removing data from 2024 as it is not complete
df_no_of_disasters_by_region_by_year = df_no_of_disasters_by_region_by_year[df_no_of_disasters_by_region_by_year['Start Year'] != 2024]
df_no_of_disasters_by_region_by_year.to_csv('no_of_disasters_by_region_by_year.csv', index=False)

In [46]:
df_no_of_disasters_by_region_by_year_plot = px.bar(df_no_of_disasters_by_region_by_year,
                                                    x='Start Year',
                                                    y='DisNo.',
                                                    color='Region',
                                                    title='Number of disasters by region and year',
                                                    barmode='group')
df_no_of_disasters_by_region_by_year_plot.update_layout(title_x=0.5, 
                                                        title_font=dict(size=25), 
                                                        xaxis=dict(tickmode='linear'), 
                                                        xaxis_title='Year',
                                                        yaxis_title='Number of disasters')
df_no_of_disasters_by_region_by_year_plot.show()

### Number of disasters by group by year

In [63]:
df_no_of_disasters_by_group_by_year = df.groupby(['Disaster Group', 'Region', 'Start Year'])['DisNo.'].count().reset_index()

# removing data from 2024 as it is not complete
df_no_of_disasters_by_group_by_year = df_no_of_disasters_by_group_by_year[df_no_of_disasters_by_group_by_year['Start Year'] != 2024]
df_no_of_disasters_by_group_by_year.to_csv('no_of_disasters_by_group_by_year.csv', index=False)

In [62]:
df_no_of_disasters_by_group_by_year_plot = px.bar(df_no_of_disasters_by_group_by_year,
                                                  x='Start Year',
                                                  y='DisNo.',
                                                  color='Region',
                                                  facet_col='Disaster Group',
                                                  title='Number of disasters by group, region and year',)
df_no_of_disasters_by_group_by_year_plot.update_layout(title_x=0.5, title_font=dict(size=25), yaxis_title='Number of disasters')
df_no_of_disasters_by_group_by_year_plot.update_xaxes(title_text='Year', tickmode='linear')
df_no_of_disasters_by_group_by_year_plot.show()