In [None]:
import pandas as pd

In [None]:
disasters = pd.read_csv('1900_2021_DISASTERS.xlsx - emdat data.csv')
disasters
#'1970_2021_DISASTERS.csv'

In [None]:
disasters.columns = disasters.columns.str.lower()
disasters.columns = disasters.columns.str.replace(' ','_')
disasters.columns

In [None]:
disasters.isnull().sum()

In [None]:
disasters = disasters.drop(columns=['glide', 'disaster_subsubtype', 'event_name', 'location', 'associated_dis2', 'ofda_response',
                                    'appeal', 'declaration', 'aid_contribution', 'dis_mag_value', 'dis_mag_scale', 'latitude', 
                                   'longitude', 'local_time', 'river_basin', 'start_day', 'end_day', 'no_injured', 'no_affected',
                                   'no_homeless', "insured_damages_('000_us$)", "total_damages_('000_us$)",
                                   'geo_locations', 'admin2_code', 'admin1_code', 'adm_level', 'cpi', 'seq', 'iso', 'associated_dis', 'origin',
                                   'disaster_subtype', 'total_affected', 'start_year', 'end_year'])
disasters

#"reconstruction_costs_('000_us$)"  'dis_no'

In [None]:
disasters.isnull().sum()

In [None]:
disasters.value_counts('end_month')

In [None]:
# List of desired subgroups
desired_subgroups = ['Hydrological', 'Meteorological', 'Geophysical', 'Climatological']

# Filter the DataFrame to keep only the rows with desired subgroups
disasters = disasters[disasters['disaster_subgroup'].isin(desired_subgroups)]

# Display the first few rows of the filtered DataFrame
disasters

In [None]:
disasters.info()

In [None]:
#Null values
disasters['total_deaths'].fillna(disasters['total_deaths'].median(), inplace=True)

In [None]:
# Filling null values in the 'start_month' column with the mode
disasters['start_month'].fillna(disasters['start_month'].mode()[0], inplace=True)
# Filling null values in the 'end_month' column with the mode
disasters['end_month'].fillna(disasters['end_month'].mode()[0], inplace=True)

In [None]:
disasters.isnull().sum()

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Gráfico de dispersão para 'total_deaths' em relação a outra variável (exemplo: 'start_month')
plt.figure(figsize=(8, 6))
sns.scatterplot(x='start_month', y='total_deaths', data=disasters)
plt.title('Relação entre Mês de Início e Número de Mortes')
plt.show()

In [None]:
# Agrupe por ano e visualize a contagem de ocorrências
dados_por_ano = disasters.groupby('year')['total_deaths'].count()
plt.figure(figsize=(12, 8))
dados_por_ano.plot(kind='bar')
plt.title('Número de Ocorrências por Ano')
plt.xlabel('Ano')
plt.ylabel('Número de Ocorrências')
plt.show()

In [None]:
import geopandas as gpd

# Load the Natural Earth dataset with country geometries
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))

In [None]:
#Merge Geospatial Data with Disaster Data:
merged_data = world.merge(disasters, how='left', left_on='name', right_on='country')
merged_data

In [None]:
#Map Distribution of Different Types of Disasters:

import matplotlib.pyplot as plt

# Assuming 'disaster_type' is the column indicating the type of disaster
fig, ax = plt.subplots(1, 1, figsize=(15, 10))
merged_data.plot(column='disaster_type', legend=True, ax=ax, legend_kwds={'bbox_to_anchor': (1, 1)})
plt.title('Distribution of Different Types of Disasters')
plt.show()

In [None]:
import plotly.express as px

fig = px.choropleth(merged_data, geojson=merged_data.geometry, locations=merged_data.index,
                    color='disaster_type', projection='natural earth')
fig.update_geos(showcoastlines=True, coastlinecolor="black", showland=True, landcolor="lightgray")
fig.update_layout(title='Distribution of Different Types of Disasters')
fig.show()

In [None]:
#Desastre mais ocurrido em cada ano

grouped_data = disasters.groupby(['year', 'disaster_type']).size().reset_index(name='occurrences')
#Identifying the Most Occurred Disaster in Each Year:
idx = grouped_data.groupby(['year'])['occurrences'].transform(max) == grouped_data['occurrences']
most_occurred_per_year = grouped_data[idx]

In [None]:
# Example visualization using matplotlib
import matplotlib.pyplot as plt

plt.figure(figsize=(12, 8))
for year, group in most_occurred_per_year.groupby('year'):
    plt.bar(year, group['occurrences'], label=f'{year}: {group["disaster_type"].values[0]}')

plt.xlabel('Year')
plt.ylabel('Number of Occurrences')
plt.title('Most Occurred Disaster in Each Year')
plt.legend()
plt.show()

In [None]:
#Desastre que mais ocurreu em cada pais em cada ano

grouped_data = disasters.groupby(['year', 'country', 'disaster_type']).size().reset_index(name='occurrences')
idx = grouped_data.groupby(['year', 'country'])['occurrences'].transform(max) == grouped_data['occurrences']
most_occurred_per_country_year = grouped_data[idx]

In [None]:
import geopandas as gpd
import plotly.express as px

# Group by Year, Country, and Disaster Type
grouped_data = disasters.groupby(['year', 'country', 'disaster_type']).size().reset_index(name='occurrences')

# Identify the Most Occurred Disaster in Each Country and Year
idx = grouped_data.groupby(['year', 'country'])['occurrences'].transform(max) == grouped_data['occurrences']
most_occurred_per_country_year = grouped_data[idx]

# Load Geospatial Data
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))

# Create Maps for Each Year
for year in most_occurred_per_country_year['year'].unique():
    filtered_data = most_occurred_per_country_year[most_occurred_per_country_year['year'] == year]
    
    # Merge Geospatial Data with Disaster Data
    merged_data = world.merge(filtered_data, how='left', left_on='name', right_on='country')
    
    # Visualization with Plotly Express
    fig = px.choropleth(merged_data, geojson=merged_data.geometry, locations=merged_data.index,
                        color='disaster_type', projection='natural earth',
                        title=f'Most Occurred Disaster Type in Each Country in {year}')
    fig.update_geos(showcoastlines=True, coastlinecolor="black", showland=True, landcolor="lightgray")
    fig.show()

In [None]:
disasters2 = pd.read_csv('1900_2021_DISASTERS.xlsx - emdat data.csv')
disasters2

In [None]:
disasters2.isnull().sum()