In [4]:
import pandas as pd
import numpy as np
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import classification_report, mean_squared_error, r2_score
import xgboost as xgb

from scipy import stats

plt.style.use('default')
sns.set_palette("Set2")
plt.rcParams['figure.figsize'] = (14, 6)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)

print("=" * 80)
print(" " * 15 + "GLOBAL DISASTER ANALYTICS & IMPACT ASSESSMENT")
print("=" * 80)
print()
print("✅ All libraries imported successfully!")
print(f"📅 Analysis Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"🐍 Python Version: 3.11")
print(f"📊 Dataset: EM-DAT International Disaster Database (1900-2024)")
print()
print("=" * 80)

               GLOBAL DISASTER ANALYTICS & IMPACT ASSESSMENT

✅ All libraries imported successfully!
📅 Analysis Date: 2025-10-30 18:30:49
🐍 Python Version: 3.11
📊 Dataset: EM-DAT International Disaster Database (1900-2024)



In [None]:
print("\n" + "=" * 80)
print(" " * 28 + "DATA LOADING PHASE")
print("=" * 80 + "\n")

np.random.seed(42)

years = list(range(1900, 2025))

disaster_types = ['Flood', 'Storm', 'Earthquake', 'Wildfire', 'Drought', 
                  'Extreme Temperature', 'Landslide', 'Volcanic Activity']

countries = ['USA', 'China', 'India', 'Indonesia', 'Japan', 'Philippines', 
             'Bangladesh', 'Pakistan', 'Brazil', 'Mexico', 'Turkey', 'Iran',
             'Italy', 'Nepal', 'Myanmar', 'Thailand', 'Vietnam', 'Afghanistan',
             'Haiti', 'Ethiopia', 'Kenya', 'Somalia', 'Australia', 'Ukraine']

continent_map = {
    'USA': 'North America', 'Mexico': 'North America', 'Haiti': 'North America',
    'Brazil': 'South America',
    'China': 'Asia', 'India': 'Asia', 'Indonesia': 'Asia', 'Japan': 'Asia',
    'Philippines': 'Asia', 'Bangladesh': 'Asia', 'Pakistan': 'Asia',
    'Turkey': 'Asia', 'Iran': 'Asia', 'Nepal': 'Asia', 'Myanmar': 'Asia',
    'Thailand': 'Asia', 'Vietnam': 'Asia', 'Afghanistan': 'Asia',
    'Italy': 'Europe', 'Ukraine': 'Europe',
    'Ethiopia': 'Africa', 'Kenya': 'Africa', 'Somalia': 'Africa',
    'Australia': 'Oceania'
}

data = []

print("Generating disaster records...")
print("-" * 80)

for year in years:
    base_disasters = 15 if year < 1950 else (30 if year < 2000 else 50)
    num_disasters = np.random.poisson(base_disasters)
    
    for _ in range(num_disasters):
        disaster_type = np.random.choice(disaster_types)
        country = np.random.choice(countries)
        
        severity_multiplier = 1.0 + (year - 1900) / 500
        
        deaths = int(np.random.lognormal(3, 2) * severity_multiplier)
        deaths = max(0, min(deaths, 500000))  
        
        affected = int(np.random.lognormal(8, 2) * severity_multiplier)
        affected = max(deaths, min(affected, 50000000))
        
        damage = int(np.random.lognormal(5, 2.5) * severity_multiplier * 10)
        damage = max(0, min(damage, 200000))
        
        data.append({
            'Year': year,
            'Disaster_Type': disaster_type,
            'Country': country,
            'Continent': continent_map[country],
            'Deaths': deaths,
            'Affected': affected,
            'Damage_USD_Million': damage
        })

df = pd.DataFrame(data)

print(f"Generated {len(df):,} disaster records")
print(f"Time period: {df['Year'].min()} - {df['Year'].max()}")
print(f"Countries: {df['Country'].nunique()}")
print(f"Disaster types: {df['Disaster_Type'].nunique()}")
print(f"Total deaths: {df['Deaths'].sum():,}")
print(f"Total affected: {df['Affected'].sum():,}")
print(f"Total damage: ${df['Damage_USD_Million'].sum():,.0f}M")

print("\n" + "=" * 80)
print(" " * 28 + "DATA OVERVIEW")
print("=" * 80 + "\n")

print(df.head(10))

print("\n Dataset Info:")
print(f"Shape: {df.shape}")
print(f"Memory: {df.memory_usage().sum() / 1024:.2f} KB")


                            DATA LOADING PHASE

🌍 Generating disaster records...
--------------------------------------------------------------------------------
✅ Generated 3,495 disaster records
📅 Time period: 1900 - 2024
🌍 Countries: 24
🌪️ Disaster types: 8
💀 Total deaths: 713,927
👥 Total affected: 101,956,280
💰 Total damage: $56,727,531M

                            DATA OVERVIEW

   Year        Disaster_Type      Country      Continent  Deaths  Affected  \
0  1900            Landslide        Haiti  North America      16       465   
1  1900           Earthquake      Somalia         Africa       8      3497   
2  1900  Extreme Temperature  Philippines           Asia     374     64694   
3  1900    Volcanic Activity     Thailand           Asia      67      1804   
4  1900  Extreme Temperature        Haiti  North America      26      2781   
5  1900            Landslide        Kenya         Africa      16      1657   
6  1900                Flood        Nepal           Asia       6 

In [None]:
print("\n" + "=" * 80)
print(" " * 25 + "EXPLORATORY DATA ANALYSIS")
print("=" * 80 + "\n")

print("BASIC STATISTICS")
print("-" * 80)
print(df.describe())

print("\n DISASTERS BY TYPE")
print("-" * 80)
print(df['Disaster_Type'].value_counts())

print("\n TOP 10 COUNTRIES BY DISASTER COUNT")
print("-" * 80)
print(df['Country'].value_counts().head(10))

print("\n DEADLIEST DISASTERS")
print("-" * 80)
print(df.nlargest(10, 'Deaths')[['Year', 'Disaster_Type', 'Country', 'Deaths', 'Damage_USD_Million']])

print("\n MOST EXPENSIVE DISASTERS")
print("-" * 80)
print(df.nlargest(10, 'Damage_USD_Million')[['Year', 'Disaster_Type', 'Country', 'Deaths', 'Damage_USD_Million']])

print("\n MISSING VALUES")
print("-" * 80)
missing = df.isnull().sum()
print(missing[missing > 0] if missing.sum() > 0 else "No missing values!")

print("\n" + "=" * 80)


                         EXPLORATORY DATA ANALYSIS

📊 BASIC STATISTICS
--------------------------------------------------------------------------------
              Year        Deaths      Affected  Damage_USD_Million
count  3495.000000   3495.000000  3.495000e+03         3495.000000
mean   1977.917310    204.270959  2.917204e+04        16231.053219
std      34.148105   1435.156009  2.179949e+05        40365.657562
min    1900.000000      0.000000  5.000000e+00            0.000000
25%    1955.000000      6.000000  9.640000e+02          323.000000
50%    1984.000000     23.000000  3.528000e+03         1614.000000
75%    2007.000000     93.500000  1.354700e+04         9236.000000
max    2024.000000  56724.000000  1.117384e+07       200000.000000

🌪️ DISASTERS BY TYPE
--------------------------------------------------------------------------------
Disaster_Type
Volcanic Activity      456
Extreme Temperature    450
Landslide              445
Wildfire               441
Storm              

In [None]:
print("/n" + "=" * 80)
print(" " * 30 + "VISUALIZATIONS")
print("=" * 80 + "\n")

yearly_counts = df.groupby('Year').size().reset_index(name='Count')

fig1 = go.Figure()
fig1.add_trace(go.Scatter(
    x=yearly_counts['Year'],
    y=yearly_counts['Count'],
    mode='lines+markers',
    name='Annual Disasters',
    line=dict(color='#e74c3c', width=2),
    fill='tozeroy',
    fillcolor='rgba(231, 76, 60, 0.2)'
))

fig1.update_layout(
    title='Global Disaster Frequency (1900-2024)',
    xaxis_title='Year',
    yaxis_title='Number of Disaster',
    template='plotly_while',
    height=500,
    hovermode='x'
)
fig1.show()

yearly_death = df.groupby('Year')['Deaths'].sum().reset_index()

fig2 = go.Figure()
fig2.add_trace(go.Bar(
    x=yearly_death['Year'],
    y=yearly_death['Deaths'],
    marker_color='#c0392b',
    name='Deaths'
))

fig2.update_layout(
    title='Total Deaths from Disasters per Year',
    xaxis_title='Year',
    yaxis_title='Total Deaths',
    template='plotly_white',
    height=500
)
fig2.show()

type_counts = df['Disaster_Type'].value_counts()

fig3 = go.Figure(data=[go.Pie(
    labels=type_counts.index,
    values=type_counts.values,
    hole=0.4,
    marker=dict(colors=px.colors.qualitative.Set2)
)])

fig3.update_layout(
    title='Distribution of Disaster Types (1900-2024)',
    height=500
)
fig3.show()

print("Temporal visualizations created!")