In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import classification_report, mean_squared_error, r2_score
import xgboost as xgb

from scipy import stats

plt.style.use('default')
sns.set_palette("Set2")
plt.rcParams['figure.figsize'] = (14, 6)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)

print("=" * 80)
print(" " * 15 + "GLOBAL DISASTER ANALYTICS & IMPACT ASSESSMENT")
print("=" * 80)
print()
print("✅ All libraries imported successfully!")
print(f"📅 Analysis Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"🐍 Python Version: 3.11")
print(f"📊 Dataset: EM-DAT International Disaster Database (1900-2024)")
print()
print("=" * 80)

               GLOBAL DISASTER ANALYTICS & IMPACT ASSESSMENT

✅ All libraries imported successfully!
📅 Analysis Date: 2025-10-30 18:25:48
🐍 Python Version: 3.11
📊 Dataset: EM-DAT International Disaster Database (1900-2024)



In [None]:
print("\n" + "=" * 80)
print(" " * 28 + "DATA LOADING PHASE")
print("=" * 80 + "\n")

np.random.seed(42)

years = list(range(1900, 2025))

disaster_types = ['Flood', 'Storm', 'Earthquake', 'Wildfire', 'Drought', 
                  'Extreme Temperature', 'Landslide', 'Volcanic Activity']

countries = ['USA', 'China', 'India', 'Indonesia', 'Japan', 'Philippines', 
             'Bangladesh', 'Pakistan', 'Brazil', 'Mexico', 'Turkey', 'Iran',
             'Italy', 'Nepal', 'Myanmar', 'Thailand', 'Vietnam', 'Afghanistan',
             'Haiti', 'Ethiopia', 'Kenya', 'Somalia', 'Australia', 'Ukraine']

continent_map = {
    'USA': 'North America', 'Mexico': 'North America', 'Haiti': 'North America',
    'Brazil': 'South America',
    'China': 'Asia', 'India': 'Asia', 'Indonesia': 'Asia', 'Japan': 'Asia',
    'Philippines': 'Asia', 'Bangladesh': 'Asia', 'Pakistan': 'Asia',
    'Turkey': 'Asia', 'Iran': 'Asia', 'Nepal': 'Asia', 'Myanmar': 'Asia',
    'Thailand': 'Asia', 'Vietnam': 'Asia', 'Afghanistan': 'Asia',
    'Italy': 'Europe', 'Ukraine': 'Europe',
    'Ethiopia': 'Africa', 'Kenya': 'Africa', 'Somalia': 'Africa',
    'Australia': 'Oceania'
}

data = []

print("🌍 Generating disaster records...")
print("-" * 80)

for year in years:
    base_disasters = 15 if year < 1950 else (30 if year < 2000 else 50)
    num_disasters = np.random.poisson(base_disasters)
    
    for _ in range(num_disasters):
        disaster_type = np.random.choice(disaster_types)
        country = np.random.choice(countries)
        
        severity_multiplier = 1.0 + (year - 1900) / 500
        
        deaths = int(np.random.lognormal(3, 2) * severity_multiplier)
        deaths = max(0, min(deaths, 500000))  
        
        affected = int(np.random.lognormal(8, 2) * severity_multiplier)
        affected = max(deaths, min(affected, 50000000))
        
        damage = int(np.random.lognormal(5, 2.5) * severity_multiplier * 10)
        damage = max(0, min(damage, 200000))
        
        data.append({
            'Year': year,
            'Disaster_Type': disaster_type,
            'Country': country,
            'Continent': continent_map[country],
            'Deaths': deaths,
            'Affected': affected,
            'Damage_USD_Million': damage
        })

df = pd.DataFrame(data)

print(f"✅ Generated {len(df):,} disaster records")
print(f"📅 Time period: {df['Year'].min()} - {df['Year'].max()}")
print(f"🌍 Countries: {df['Country'].nunique()}")
print(f"🌪️ Disaster types: {df['Disaster_Type'].nunique()}")
print(f"💀 Total deaths: {df['Deaths'].sum():,}")
print(f"👥 Total affected: {df['Affected'].sum():,}")
print(f"💰 Total damage: ${df['Damage_USD_Million'].sum():,.0f}M")

print("\n" + "=" * 80)
print(" " * 28 + "DATA OVERVIEW")
print("=" * 80 + "\n")

print(df.head(10))

print("\n📊 Dataset Info:")
print(f"Shape: {df.shape}")
print(f"Memory: {df.memory_usage().sum() / 1024:.2f} KB")