In [None]:
# COVID-19 DASHBOARD - Summary Overview
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Set up styling
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
print("📊 COVID-19 SUMMARY DASHBOARD")
print("=" * 50)

try:
    # Load data with error handling
    df = pd.read_csv('../data/raw/covid_data.csv')
    df['date'] = pd.to_datetime(df['date'])
    
    print("✅ Data loaded successfully!")
    print(f"📅 Dataset range: {df['date'].min().date()} to {df['date'].max().date()}")
    print(f"🌍 Countries: {df['country'].nunique()}")
    
except FileNotFoundError:
    print("❌ Error: Data file not found. Make sure 'data/raw/covid_data.csv' exists.")
    print("💡 Run create_sample_data.py first if you haven't created the data yet.")
    exit()
except Exception as e:
    print(f"❌ Error loading data: {e}")
    exit()

# Calculate latest data
latest_date = df['date'].max()
latest_data = df[df['date'] == latest_date].copy()

# Calculate metrics
latest_data['mortality_rate'] = (latest_data['deaths'] / latest_data['confirmed'] * 100).fillna(0)
latest_data['recovery_rate'] = (latest_data['recovered'] / latest_data['confirmed'] * 100).fillna(0)

# Global totals
global_totals = latest_data.agg({
    'confirmed': 'sum',
    'deaths': 'sum', 
    'recovered': 'sum',
    'active': 'sum'
})

# KEY METRICS DISPLAY
print("\n" + "="*50)
print("🌍 GLOBAL OVERVIEW")
print("="*50)

print(f"📅 As of: {latest_date.date()}")
print(f"✅ Total Countries with Data: {len(latest_data)}")
print(f"🦠 Total Confirmed Cases: {global_totals['confirmed']:,.0f}")
print(f"💀 Total Deaths: {global_totals['deaths']:,.0f}")
print(f"💚 Total Recovered: {global_totals['recovered']:,.0f}")
print(f"🟡 Active Cases: {global_totals['active']:,.0f}")

global_mortality = (global_totals['deaths'] / global_totals['confirmed'] * 100) if global_totals['confirmed'] > 0 else 0
global_recovery = (global_totals['recovered'] / global_totals['confirmed'] * 100) if global_totals['confirmed'] > 0 else 0

print(f"📈 Global Mortality Rate: {global_mortality:.2f}%")
print(f"📈 Global Recovery Rate: {global_recovery:.2f}%")

# TOP COUNTRIES SUMMARY
print("\n" + "="*50)
print("🏆 TOP 5 COUNTRIES")
print("="*50)

top_countries = latest_data.nlargest(5, 'confirmed')[['country', 'confirmed', 'deaths', 'mortality_rate']]
for idx, row in top_countries.iterrows():
    print(f"   {row['country']}: {row['confirmed']:,.0f} cases, {row['deaths']:,.0f} deaths ({row['mortality_rate']:.2f}%)")

# COMPREHENSIVE DASHBOARD VISUALIZATION
print("\n" + "="*50)
print("📊 DASHBOARD VISUALIZATIONS")
print("="*50)

fig, axes = plt.subplots(2, 3, figsize=(20, 12))

# Plot 1: Global timeline
global_daily = df.groupby('date').agg({
    'confirmed': 'sum',
    'deaths': 'sum',
    'recovered': 'sum'
}).reset_index()

axes[0,0].plot(global_daily['date'], global_daily['confirmed'], label='Confirmed', linewidth=2, color='blue')
axes[0,0].plot(global_daily['date'], global_daily['deaths'], label='Deaths', linewidth=2, color='red')
axes[0,0].plot(global_daily['date'], global_daily['recovered'], label='Recovered', linewidth=2, color='green')
axes[0,0].set_title('Global COVID-19 Timeline', fontweight='bold', fontsize=14)
axes[0,0].set_ylabel('Cases')
axes[0,0].legend()
axes[0,0].tick_params(axis='x', rotation=45)
axes[0,0].grid(True, alpha=0.3)

# Plot 2: Top 10 countries by confirmed cases
top_10_confirmed = latest_data.nlargest(10, 'confirmed')
axes[0,1].barh(top_10_confirmed['country'], top_10_confirmed['confirmed'], color='lightblue')
axes[0,1].set_title('Top 10 Countries - Confirmed Cases', fontweight='bold', fontsize=14)
axes[0,1].set_xlabel('Confirmed Cases')
for i, v in enumerate(top_10_confirmed['confirmed']):
    axes[0,1].text(v + v*0.01, i, f'{v:,}', va='center', fontsize=9)

# Plot 3: Mortality rate distribution
significant_countries = latest_data[latest_data['confirmed'] > 1000]
axes[0,2].hist(significant_countries['mortality_rate'].dropna(), bins=20, 
               alpha=0.7, color='red', edgecolor='black')
axes[0,2].axvline(significant_countries['mortality_rate'].mean(), 
                 color='darkred', linestyle='--', linewidth=2,
                 label=f'Mean: {significant_countries["mortality_rate"].mean():.2f}%')
axes[0,2].set_title('Mortality Rate Distribution', fontweight='bold', fontsize=14)
axes[0,2].set_xlabel('Mortality Rate (%)')
axes[0,2].set_ylabel('Number of Countries')
axes[0,2].legend()
axes[0,2].grid(True, alpha=0.3)

# Plot 4: Recovery rate distribution
axes[1,0].hist(significant_countries['recovery_rate'].dropna(), bins=20, 
               alpha=0.7, color='green', edgecolor='black')
axes[1,0].axvline(significant_countries['recovery_rate'].mean(), 
                 color='darkgreen', linestyle='--', linewidth=2,
                 label=f'Mean: {significant_countries["recovery_rate"].mean():.2f}%')
axes[1,0].set_title('Recovery Rate Distribution', fontweight='bold', fontsize=14)
axes[1,0].set_xlabel('Recovery Rate (%)')
axes[1,0].set_ylabel('Number of Countries')
axes[1,0].legend()
axes[1,0].grid(True, alpha=0.3)

# Plot 5: Mortality vs Recovery scatter
scatter = axes[1,1].scatter(significant_countries['mortality_rate'], 
                           significant_countries['recovery_rate'],
                           s=significant_countries['confirmed']/10000,
                           alpha=0.6, cmap='viridis')
axes[1,1].set_xlabel('Mortality Rate (%)')
axes[1,1].set_ylabel('Recovery Rate (%)')
axes[1,1].set_title('Mortality vs Recovery Rate', fontweight='bold', fontsize=14)
axes[1,1].grid(True, alpha=0.3)

# Plot 6: Active cases by country (top 10)
top_10_active = latest_data.nlargest(10, 'active')
axes[1,2].barh(top_10_active['country'], top_10_active['active'], color='orange')
axes[1,2].set_title('Top 10 Countries - Active Cases', fontweight='bold', fontsize=14)
axes[1,2].set_xlabel('Active Cases')
for i, v in enumerate(top_10_active['active']):
    axes[1,2].text(v + v*0.01, i, f'{v:,}', va='center', fontsize=9)

plt.tight_layout()
plt.show()

# KEY INSIGHTS SECTION
print("\n" + "="*50)
print("💡 KEY INSIGHTS")
print("="*50)

# Insight 1: Global trends
total_cases = global_totals['confirmed']
total_deaths = global_totals['deaths']
print(f"1. Global Impact: {total_cases:,.0f} total cases with {total_deaths:,.0f} deaths worldwide")

# Insight 2: Country performance
best_recovery = latest_data[latest_data['confirmed'] > 10000].nlargest(1, 'recovery_rate')
if not best_recovery.empty:
    print(f"2. Best Recovery: {best_recovery.iloc[0]['country']} has the highest recovery rate ({best_recovery.iloc[0]['recovery_rate']:.1f}%)")

# Insight 3: Mortality analysis
high_mortality = latest_data[latest_data['confirmed'] > 10000].nlargest(1, 'mortality_rate')
if not high_mortality.empty:
    print(f"3. Highest Mortality: {high_mortality.iloc[0]['country']} has the highest mortality rate ({high_mortality.iloc[0]['mortality_rate']:.1f}%)")

# Insight 4: Data quality
countries_with_data = len(latest_data)
if countries_with_data > 100:
    print("4. Data Coverage: Excellent global coverage with data from many countries")
elif countries_with_data > 50:
    print("4. Data Coverage: Good global coverage")
else:
    print("4. Data Coverage: Limited country coverage - consider expanding dataset")

# RECOMMENDATIONS
print("\n" + "="*50)
print("🎯 RECOMMENDATIONS")
print("="*50)

print("• 📈 Monitor countries with high mortality rates for healthcare insights")
print("• 🌍 Analyze recovery rates to identify effective treatment protocols")  
print("• 🔍 Investigate countries with outstanding performance metrics")
print("• 📊 Continue tracking global trends for pandemic response planning")

print(f"\n{'='*50}")
print("✅ DASHBOARD COMPLETED SUCCESSFULLY!")
print(f"{'='*50}")

# Optional: Save summary data
try:
    summary_data = latest_data[['country', 'confirmed', 'deaths', 'recovered', 'active', 'mortality_rate', 'recovery_rate']]
    summary_data.to_csv('../data/processed/dashboard_summary.csv', index=False)
    print(f"\n💾 Summary data saved to: data/processed/dashboard_summary.csv")
except Exception as e:
    print(f"\n⚠️  Could not save summary data: {e}")

FileNotFoundError: [Errno 2] No such file or directory: '..data/raw/covid_data.csv'