### Libraries

In [24]:
import pandas as pd
import os

### Csv file to data frame

In [25]:
# load dataset
file_name = 'public_emdat_project.csv'
file_path = os.path.abspath(os.path.join(os.getcwd(), file_name))
df = pd.read_csv(file_path, sep=',', encoding='ISO-8859-1')


### Checking a few first rows

In [None]:
df.head()

### Death toll data


In [None]:
df_death = df[['Country','Region', 'Total Deaths', 'Event Name', 'Start Year', 'End Year', 'Disaster Group', 'Disaster Type']]
df_death.head()

### The most deadly disaster

In [None]:
df_death.loc[[df_death['Total Deaths'].idxmax()]]

### Additional libraries

In [29]:
import plotly.express as px

### Death toll by country

In [None]:
df_dtoll_by_country = df.groupby('Country')['Total Deaths'].sum().sort_values(ascending=False)
df_dtoll_by_country

In [None]:
df_dtoll_by_country_filtered = df_dtoll_by_country[df_dtoll_by_country > 10000]
df_dtoll_by_country_filtered

In [32]:
df_dtoll_by_country_filtered.to_csv(os.path.join(os.getcwd(),'CSV_files_for_dashboard','dtoll_by_country.csv'))

In [None]:

dtoll_by_country_plot = px.bar(df_dtoll_by_country_filtered, 
                               x=df_dtoll_by_country_filtered.index, 
                               y=df_dtoll_by_country_filtered.values, 
                               labels={'x':'Country', 'y':'Total Deaths'}, 
                               title="Total Deaths by Country (> 10000)")
dtoll_by_country_plot.update_layout(title_x=0.5, title_font=dict(size=25))
dtoll_by_country_plot.show()

### Death toll by region

In [None]:
df_dtoll_by_region = df.groupby('Region')['Total Deaths'].sum().sort_values(ascending=False)
df_dtoll_by_region

In [35]:
df_dtoll_by_region.to_csv(os.path.join(os.getcwd(),'CSV_files_for_dashboard', 'dtoll_by_region.csv'))

In [None]:
dtoll_by_region_plot = px.bar(df_dtoll_by_region, 
                              x=df_dtoll_by_region.index, 
                              y=df_dtoll_by_region.values,
                              labels={'x':'Region', 'y':'Total Deaths'},
                              title="Total Deaths by Region")
dtoll_by_region_plot.update_layout(title_x=0.5, title_font=dict(size=25))
dtoll_by_region_plot.show()

### Number of disasters over years

In [37]:
df_disasters_over_years = df.groupby('Start Year')['DisNo.'].count()
# removing data from 2024 as it is not complete
df_disasters_over_years = df_disasters_over_years[df_disasters_over_years.index != 2024]

df_disasters_over_years.to_csv(os.path.join(os.getcwd(),'CSV_files_for_dashboard','disasters_over_years.csv'))

In [None]:
distasters_over_years_plot = px.bar(df_disasters_over_years, 
                                    x=df_disasters_over_years.index, 
                                    y=df_disasters_over_years.values,
                                    title='Number of disasters over years',
                                    labels={'x':'Year', 'y':'Number of disasters'})
distasters_over_years_plot.update_layout(title_x=0.5, 
                                         title_font=dict(size=25), 
                                         xaxis=dict(tickmode='linear'), 
                                         xaxis_title='Year')
distasters_over_years_plot.show()

### Number of distasters by region by year

In [39]:
df_no_of_disasters_by_region_by_year = df.groupby(['Region', 'Start Year'])['DisNo.'].count().reset_index()

# removing data from 2024 as it is not complete
df_no_of_disasters_by_region_by_year = df_no_of_disasters_by_region_by_year[df_no_of_disasters_by_region_by_year['Start Year'] != 2024]
df_no_of_disasters_by_region_by_year.to_csv(os.path.join(os.getcwd(),'CSV_files_for_dashboard','no_of_disasters_by_region_by_year.csv'), index=False)

In [None]:
df_no_of_disasters_by_region_by_year_plot = px.bar(df_no_of_disasters_by_region_by_year,
                                                    x='Start Year',
                                                    y='DisNo.',
                                                    color='Region',
                                                    title='Number of disasters by region and year',
                                                    barmode='group')
df_no_of_disasters_by_region_by_year_plot.update_layout(title_x=0.5, 
                                                        title_font=dict(size=25), 
                                                        xaxis=dict(tickmode='linear'), 
                                                        xaxis_title='Year',
                                                        yaxis_title='Number of disasters')
df_no_of_disasters_by_region_by_year_plot.show()

### Number of disasters by group by year

In [41]:
df_no_of_disasters_by_group_by_year = df.groupby(['Disaster Group', 'Region', 'Start Year'])['DisNo.'].count().reset_index()

# removing data from 2024 as it is not complete
df_no_of_disasters_by_group_by_year = df_no_of_disasters_by_group_by_year[df_no_of_disasters_by_group_by_year['Start Year'] != 2024]
df_no_of_disasters_by_group_by_year.to_csv(os.path.join(os.getcwd(),'CSV_files_for_dashboard','no_of_disasters_by_group_by_year.csv'), index=False)

In [None]:
df_no_of_disasters_by_group_by_year_plot = px.bar(df_no_of_disasters_by_group_by_year,
                                                  x='Start Year',
                                                  y='DisNo.',
                                                  color='Region',
                                                  facet_col='Disaster Group',
                                                  title='Number of disasters by group, region and year',)
df_no_of_disasters_by_group_by_year_plot.update_layout(title_x=0.5, title_font=dict(size=25), yaxis_title='Number of disasters')
df_no_of_disasters_by_group_by_year_plot.update_xaxes(title_text='Year', tickmode='linear')
df_no_of_disasters_by_group_by_year_plot.show()