# India Disaster Vulnerability Analysis - Visualization Dashboard

This notebook provides comprehensive visualizations of the integrated disaster risk data for Indian districts, including:
- Geographic distribution of disaster risks
- Earthquake event analysis
- Population exposure mapping
- Multi-hazard vulnerability indices
- Statistical insights and rankings

## 1. Import Required Libraries

In [None]:
# Import libraries for data manipulation and visualization
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import warnings
warnings.filterwarnings('ignore')

# Set visualization styles
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

print("✅ All libraries imported successfully!")

## 2. Load Integrated Datasets

In [None]:
# Load the integrated geospatial data
districts_gdf = gpd.read_file('india_disaster_risk_integrated.geojson')

# Load the simplified version
districts_simple = gpd.read_file('india_disaster_risk_simplified.geojson')

# Load CSV outputs
vulnerability_rankings = pd.read_csv('C:\\Users\\tanwa\\Downloads\\spatialDisaster\\spatialDisaster\\district wise population and centroids.csv')
earthquake_events = pd.read_csv('C:\\Users\\tanwa\\Downloads\\spatialDisaster\\spatialDisaster\\Indian_earthquake_data.csv')

print(f"📊 Loaded {len(districts_gdf)} districts")
print(f"📊 Loaded {len(earthquake_events)} earthquake events")
print(f"\nDataset columns: {list(districts_gdf.columns)[:10]}...")
print(f"\nFirst few rows of vulnerability rankings:")
vulnerability_rankings.head()

In [None]:
earthquake_events.head()

## 3. Data Overview and Summary Statistics

In [None]:
# Display summary statistics
print("=" * 80)
print("DATASET SUMMARY STATISTICS")
print("=" * 80)

# Check which columns exist
available_cols = districts_gdf.columns.tolist()
risk_cols = [col for col in ['MHVI', 'eq_hazard_score', 'exposure_score'] if col in available_cols]
eq_cols = [col for col in ['eq_count_total', 'eq_magnitude_max', 'eq_magnitude_mean'] if col in available_cols]
pop_cols = [col for col in ['population_2011', 'population_density'] if col in available_cols]

if risk_cols:
    print("\n📊 Risk Indices Summary:")
    print(districts_gdf[risk_cols].describe())

if eq_cols:
    print("\n🌍 Earthquake Statistics:")
    print(districts_gdf[eq_cols].describe())

if pop_cols:
    print("\n👥 Population Statistics:")
    print(districts_gdf[pop_cols].describe())

## 4. Geographic Visualization - Interactive Maps

In [None]:
# Create interactive choropleth map for Multi-Hazard Vulnerability Index (MHVI)
if 'MHVI' in districts_gdf.columns:
    fig = px.choropleth(districts_gdf,
                        geojson=districts_gdf.geometry,
                        locations=districts_gdf.index,
                        color='MHVI',
                        hover_name='DISTRICT',
                        hover_data=['STATE', 'population_2011', 'eq_count_total'] if all(c in districts_gdf.columns for c in ['STATE', 'population_2011', 'eq_count_total']) else None,
                        color_continuous_scale='Reds',
                        title='Multi-Hazard Vulnerability Index (MHVI) Across India Districts',
                        labels={'MHVI': 'Vulnerability Index'})
    
    fig.update_geos(fitbounds="locations", visible=False)
    fig.update_layout(height=600, width=1000)
    fig.show()
else:
    print("⚠️ MHVI column not found in dataset")

In [None]:
# Population Density Map
if 'population_density' in districts_gdf.columns:
    fig = px.choropleth(districts_gdf,
                        geojson=districts_gdf.geometry,
                        locations=districts_gdf.index,
                        color='population_density',
                        hover_name='DISTRICT',
                        hover_data=['STATE', 'population_2011'] if all(c in districts_gdf.columns for c in ['STATE', 'population_2011']) else None,
                        color_continuous_scale='Viridis',
                        title='Population Density by District',
                        labels={'population_density': 'People per sq km'})
    
    fig.update_geos(fitbounds="locations", visible=False)
    fig.update_layout(height=600, width=1000)
    fig.show()
else:
    print("⚠️ Population density column not found")

In [None]:
# Earthquake Count Distribution Map
if 'eq_count_total' in districts_gdf.columns:
    fig = px.choropleth(districts_gdf,
                        geojson=districts_gdf.geometry,
                        locations=districts_gdf.index,
                        color='eq_count_total',
                        hover_name='DISTRICT',
                        hover_data=['STATE', 'eq_magnitude_max'] if all(c in districts_gdf.columns for c in ['STATE', 'eq_magnitude_max']) else None,
                        color_continuous_scale='YlOrRd',
                        title='Total Earthquake Events by District',
                        labels={'eq_count_total': 'Number of Earthquakes'})
    
    fig.update_geos(fitbounds="locations", visible=False)
    fig.update_layout(height=600, width=1000)
    fig.show()
else:
    print("⚠️ Earthquake count column not found")

## 5. Earthquake Event Analysis

In [None]:
# Interactive scatter map of earthquake events
if 'latitude' in earthquake_events.columns and 'longitude' in earthquake_events.columns:
    eq_sample = earthquake_events.dropna(subset=['latitude', 'longitude', 'mag'])
    
    fig = px.scatter_geo(eq_sample,
                         lat='latitude',
                         lon='longitude',
                         color='mag',
                         size='mag',
                         hover_data=['DISTRICT', 'depth', 'year'] if all(c in eq_sample.columns for c in ['DISTRICT', 'depth', 'year']) else None,
                         color_continuous_scale='thermal',
                         size_max=15,
                         title='Earthquake Events Distribution (2019-2025)',
                         labels={'mag': 'Magnitude'})
    
    fig.update_geos(scope='asia', fitbounds="locations")
    fig.update_layout(height=600, width=1000)
    fig.show()
else:
    print("⚠️ Latitude/longitude columns not found in earthquake events")

In [None]:
# Magnitude distribution histogram
if 'mag' in earthquake_events.columns:
    fig = px.histogram(earthquake_events.dropna(subset=['mag']),
                       x='mag',
                       nbins=50,
                       title='Distribution of Earthquake Magnitudes',
                       labels={'mag': 'Magnitude', 'count': 'Frequency'},
                       color_discrete_sequence=['#E74C3C'])
    
    fig.update_layout(height=400, width=800, showlegend=False)
    fig.show()
    
    # Statistics
    print(f"\n📊 Magnitude Statistics:")
    print(f"Total Events: {len(earthquake_events)}")
    print(f"Mean Magnitude: {earthquake_events['mag'].mean():.2f}")
    print(f"Max Magnitude: {earthquake_events['mag'].max():.2f}")
    print(f"Events > 5.0: {(earthquake_events['mag'] >= 5.0).sum()}")
    print(f"Events > 6.0: {(earthquake_events['mag'] >= 6.0).sum()}")
else:
    print("⚠️ Magnitude column not found")

In [None]:
# Temporal trend analysis - Earthquakes over time
if 'year' in earthquake_events.columns:
    yearly_counts = earthquake_events.groupby('year').size().reset_index(name='count')
    
    fig = px.line(yearly_counts,
                  x='year',
                  y='count',
                  markers=True,
                  title='Earthquake Frequency Over Time (2019-2025)',
                  labels={'year': 'Year', 'count': 'Number of Events'})
    
    fig.update_traces(line_color='#3498DB', line_width=3, marker_size=8)
    fig.update_layout(height=400, width=800)
    fig.show()
else:
    print("⚠️ Year column not found")

## 6. Vulnerability Rankings and Comparisons

In [None]:
# Top 20 most vulnerable districts
if 'MHVI' in vulnerability_rankings.columns:
    top_20 = vulnerability_rankings.nlargest(20, 'MHVI')
    
    fig = px.bar(top_20,
                 x='MHVI',
                 y='DISTRICT',
                 orientation='h',
                 title='Top 20 Most Vulnerable Districts by MHVI',
                 labels={'MHVI': 'Multi-Hazard Vulnerability Index', 'DISTRICT': 'District'},
                 color='MHVI',
                 color_continuous_scale='Reds')
    
    fig.update_layout(height=600, width=900, yaxis={'categoryorder':'total ascending'})
    fig.show()
else:
    print("⚠️ MHVI column not found in rankings")

In [None]:
# State-level vulnerability comparison
if 'STATE' in vulnerability_rankings.columns and 'MHVI' in vulnerability_rankings.columns:
    state_avg = vulnerability_rankings.groupby('STATE')['MHVI'].mean().sort_values(ascending=False).head(15)
    
    fig = px.bar(x=state_avg.values,
                 y=state_avg.index,
                 orientation='h',
                 title='Top 15 States by Average MHVI',
                 labels={'x': 'Average MHVI', 'y': 'State'},
                 color=state_avg.values,
                 color_continuous_scale='OrRd')
    
    fig.update_layout(height=500, width=800, yaxis={'categoryorder':'total ascending'})
    fig.show()
else:
    print("⚠️ STATE or MHVI column not found")

## 7. Multi-Dimensional Analysis with Matplotlib & Seaborn

In [None]:
# Correlation heatmap for risk indicators
risk_indicators = ['MHVI', 'eq_hazard_score', 'exposure_score', 'eq_count_total', 
                   'eq_magnitude_max', 'population_density', 'eq_magnitude_mean']
available_indicators = [col for col in risk_indicators if col in districts_gdf.columns]

if len(available_indicators) > 2:
    correlation_data = districts_gdf[available_indicators].corr()
    
    plt.figure(figsize=(10, 8))
    sns.heatmap(correlation_data, annot=True, cmap='coolwarm', center=0, 
                square=True, linewidths=1, cbar_kws={"shrink": 0.8})
    plt.title('Correlation Matrix of Risk Indicators', fontsize=16, fontweight='bold')
    plt.tight_layout()
    plt.show()
else:
    print("⚠️ Not enough risk indicators available for correlation analysis")

In [None]:
# Scatter plot: Population vs Earthquake Count
if all(col in districts_gdf.columns for col in ['population_2011', 'eq_count_total']):
    plot_data = districts_gdf[districts_gdf['eq_count_total'] > 0].copy()
    
    plt.figure(figsize=(12, 6))
    scatter = plt.scatter(plot_data['population_2011'], 
                         plot_data['eq_count_total'],
                         c=plot_data['eq_magnitude_max'] if 'eq_magnitude_max' in plot_data.columns else 'blue',
                         s=100,
                         alpha=0.6,
                         cmap='YlOrRd',
                         edgecolors='black',
                         linewidth=0.5)
    
    plt.xlabel('Population (2011)', fontsize=12, fontweight='bold')
    plt.ylabel('Total Earthquake Count', fontsize=12, fontweight='bold')
    plt.title('Population vs Earthquake Frequency', fontsize=14, fontweight='bold')
    plt.grid(True, alpha=0.3)
    
    if 'eq_magnitude_max' in plot_data.columns:
        cbar = plt.colorbar(scatter)
        cbar.set_label('Max Magnitude', fontsize=10)
    
    plt.tight_layout()
    plt.show()
else:
    print("⚠️ Required columns not found for scatter plot")

In [None]:
# Box plot comparison of vulnerability scores by coastal vs inland districts
if 'coastal_district' in districts_gdf.columns and 'MHVI' in districts_gdf.columns:
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
    
    # MHVI comparison
    sns.boxplot(data=districts_gdf, x='coastal_district', y='MHVI', ax=axes[0], palette='Set2')
    axes[0].set_xlabel('Coastal District', fontsize=11, fontweight='bold')
    axes[0].set_ylabel('MHVI', fontsize=11, fontweight='bold')
    axes[0].set_title('Vulnerability: Coastal vs Inland', fontsize=13, fontweight='bold')
    axes[0].set_xticklabels(['Inland', 'Coastal'])
    
    # Earthquake count comparison
    if 'eq_count_total' in districts_gdf.columns:
        sns.boxplot(data=districts_gdf, x='coastal_district', y='eq_count_total', ax=axes[1], palette='Set3')
        axes[1].set_xlabel('Coastal District', fontsize=11, fontweight='bold')
        axes[1].set_ylabel('Earthquake Count', fontsize=11, fontweight='bold')
        axes[1].set_title('Earthquake Frequency: Coastal vs Inland', fontsize=13, fontweight='bold')
        axes[1].set_xticklabels(['Inland', 'Coastal'])
    
    plt.tight_layout()
    plt.show()
else:
    print("⚠️ Required columns not found for coastal comparison")

## 8. Comprehensive Dashboard - Multiple Subplots

In [None]:
# Create a comprehensive 2x2 dashboard
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# 1. Top 10 districts by earthquake count
if 'DISTRICT' in districts_gdf.columns and 'eq_count_total' in districts_gdf.columns:
    top_eq = districts_gdf.nlargest(10, 'eq_count_total')[['DISTRICT', 'eq_count_total']]
    axes[0, 0].barh(top_eq['DISTRICT'], top_eq['eq_count_total'], color='#E67E22')
    axes[0, 0].set_xlabel('Earthquake Count', fontweight='bold')
    axes[0, 0].set_title('Top 10 Districts by Earthquake Count', fontweight='bold', fontsize=12)
    axes[0, 0].invert_yaxis()

# 2. Distribution of MHVI
if 'MHVI' in districts_gdf.columns:
    axes[0, 1].hist(districts_gdf['MHVI'].dropna(), bins=30, color='#9B59B6', edgecolor='black', alpha=0.7)
    axes[0, 1].set_xlabel('MHVI', fontweight='bold')
    axes[0, 1].set_ylabel('Frequency', fontweight='bold')
    axes[0, 1].set_title('Distribution of Multi-Hazard Vulnerability Index', fontweight='bold', fontsize=12)
    axes[0, 1].axvline(districts_gdf['MHVI'].mean(), color='red', linestyle='--', linewidth=2, label='Mean')
    axes[0, 1].legend()

# 3. Magnitude categories pie chart
if 'mag' in earthquake_events.columns:
    mag_categories = pd.cut(earthquake_events['mag'], 
                            bins=[0, 4, 5, 6, 10], 
                            labels=['Minor (<4)', 'Moderate (4-5)', 'Strong (5-6)', 'Major (>6)'])
    mag_counts = mag_categories.value_counts()
    axes[1, 0].pie(mag_counts, labels=mag_counts.index, autopct='%1.1f%%', 
                   colors=['#52BE80', '#F39C12', '#E74C3C', '#8E44AD'], startangle=90)
    axes[1, 0].set_title('Earthquake Events by Magnitude Category', fontweight='bold', fontsize=12)

# 4. Population density distribution
if 'population_density' in districts_gdf.columns:
    pop_density = districts_gdf['population_density'].dropna()
    # Remove outliers for better visualization
    q95 = pop_density.quantile(0.95)
    pop_density_filtered = pop_density[pop_density <= q95]
    axes[1, 1].hist(pop_density_filtered, bins=40, color='#3498DB', edgecolor='black', alpha=0.7)
    axes[1, 1].set_xlabel('Population Density (per sq km)', fontweight='bold')
    axes[1, 1].set_ylabel('Frequency', fontweight='bold')
    axes[1, 1].set_title('Population Density Distribution (95th percentile)', fontweight='bold', fontsize=12)

plt.suptitle('India Disaster Vulnerability - Comprehensive Overview', fontsize=16, fontweight='bold', y=1.00)
plt.tight_layout()
plt.show()