In [None]:
# COVID-19 MAIN ANALYSIS: Global Trends & Country Comparisons
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

# Set up styling
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

print("🌍 COVID-19 MAIN ANALYSIS: Global Trends & Country Comparisons")
print("=" * 70)

# Load data
print("📁 Loading data...")
df = pd.read_csv('../data/raw/covid_data.csv')
df['date'] = pd.to_datetime(df['date'])

print(f"✅ Dataset: {df.shape[0]:,} records, {df['country'].nunique()} countries")

# GLOBAL TRENDS ANALYSIS
print("\n" + "="*50)
print("1. GLOBAL TRENDS ANALYSIS")
print("="*50)

# Global aggregation
global_daily = df.groupby('date').agg({
    'confirmed': 'sum',
    'deaths': 'sum',
    'recovered': 'sum',
    'active': 'sum'
}).reset_index()

# Calculate daily changes and moving averages
global_daily['new_cases'] = global_daily['confirmed'].diff()
global_daily['new_deaths'] = global_daily['deaths'].diff()
global_daily['cases_7d_avg'] = global_daily['new_cases'].rolling(7).mean()
global_daily['deaths_7d_avg'] = global_daily['new_deaths'].rolling(7).mean()

print(f"📅 Analysis period: {global_daily['date'].min().date()} to {global_daily['date'].max().date()}")
print(f"📈 Peak global cases: {global_daily['confirmed'].max():,}")
print(f"💀 Total global deaths: {global_daily['deaths'].max():,}")

# COUNTRY COMPARISON ANALYSIS
print("\n" + "="*50)
print("2. COUNTRY COMPARISON ANALYSIS")
print("="*50)

# Get latest data for each country
latest_data = df.loc[df.groupby('country')['date'].idxmax()].copy()
latest_data['mortality_rate'] = (latest_data['deaths'] / latest_data['confirmed'] * 100).fillna(0)
latest_data['recovery_rate'] = (latest_data['recovered'] / latest_data['confirmed'] * 100).fillna(0)

# Filter countries with significant data
significant_countries = latest_data[latest_data['confirmed'] > 1000]
print(f"🌍 Countries with >1,000 cases: {len(significant_countries)}")

# COMPREHENSIVE VISUALIZATION DASHBOARD
print("\n" + "="*50)
print("3. COMPREHENSIVE VISUALIZATION DASHBOARD")
print("="*50)

fig, axes = plt.subplots(3, 3, figsize=(20, 15))

# Plot 1: Global cumulative cases
axes[0,0].plot(global_daily['date'], global_daily['confirmed'], linewidth=2, color='blue', label='Confirmed')
axes[0,0].plot(global_daily['date'], global_daily['deaths'], linewidth=2, color='red', label='Deaths')
axes[0,0].plot(global_daily['date'], global_daily['recovered'], linewidth=2, color='green', label='Recovered')
axes[0,0].set_title('Global Cumulative Cases', fontweight='bold', fontsize=12)
axes[0,0].set_ylabel('Cases')
axes[0,0].legend()
axes[0,0].tick_params(axis='x', rotation=45)
axes[0,0].grid(True, alpha=0.3)

# Plot 2: Global daily new cases with 7-day average
axes[0,1].bar(global_daily['date'], global_daily['new_cases'], alpha=0.3, color='lightblue', label='Daily Cases')
axes[0,1].plot(global_daily['date'], global_daily['cases_7d_avg'], linewidth=2, color='darkblue', label='7-day Average')
axes[0,1].set_title('Global Daily New Cases', fontweight='bold', fontsize=12)
axes[0,1].set_ylabel('New Cases')
axes[0,1].legend()
axes[0,1].tick_params(axis='x', rotation=45)
axes[0,1].grid(True, alpha=0.3)

# Plot 3: Top 15 countries by confirmed cases
top_confirmed = significant_countries.nlargest(15, 'confirmed')
axes[0,2].barh(top_confirmed['country'], top_confirmed['confirmed'], color='lightblue')
axes[0,2].set_title('Top 15 Countries - Confirmed Cases', fontweight='bold', fontsize=12)
axes[0,2].set_xlabel('Confirmed Cases')

# Plot 4: Top 15 countries by deaths
top_deaths = significant_countries.nlargest(15, 'deaths')
axes[1,0].barh(top_deaths['country'], top_deaths['deaths'], color='salmon')
axes[1,0].set_title('Top 15 Countries - Deaths', fontweight='bold', fontsize=12)
axes[1,0].set_xlabel('Deaths')

# Plot 5: Highest mortality rates
high_mortality = significant_countries[significant_countries['confirmed'] > 10000].nlargest(15, 'mortality_rate')
axes[1,1].barh(high_mortality['country'], high_mortality['mortality_rate'], color='red')
axes[1,1].set_title('Highest Mortality Rates (%)', fontweight='bold', fontsize=12)
axes[1,1].set_xlabel('Mortality Rate (%)')

# Plot 6: Highest recovery rates
high_recovery = significant_countries[significant_countries['confirmed'] > 10000].nlargest(15, 'recovery_rate')
axes[1,2].barh(high_recovery['country'], high_recovery['recovery_rate'], color='green')
axes[1,2].set_title('Highest Recovery Rates (%)', fontweight='bold', fontsize=12)
axes[1,2].set_xlabel('Recovery Rate (%)')

# Plot 7: Country growth trajectories (selected countries)
selected_countries = ['United States', 'India', 'Brazil', 'Germany', 'Japan', 'UK']
for country in selected_countries:
    country_df = df[df['country'] == country]
    if not country_df.empty:
        axes[2,0].plot(country_df['date'], country_df['confirmed'], label=country, linewidth=2)
axes[2,0].set_title('Country Growth Trajectories', fontweight='bold', fontsize=12)
axes[2,0].set_ylabel('Confirmed Cases')
axes[2,0].set_yscale('log')
axes[2,0].legend()
axes[2,0].tick_params(axis='x', rotation=45)
axes[2,0].grid(True, alpha=0.3)

# Plot 8: Active cases comparison
top_active = significant_countries.nlargest(10, 'active')
axes[2,1].barh(top_active['country'], top_active['active'], color='orange')
axes[2,1].set_title('Top 10 Countries - Active Cases', fontweight='bold', fontsize=12)
axes[2,1].set_xlabel('Active Cases')

# Plot 9: Global active cases over time
axes[2,2].plot(global_daily['date'], global_daily['active'], linewidth=2, color='orange')
axes[2,2].set_title('Global Active Cases Over Time', fontweight='bold', fontsize=12)
axes[2,2].set_ylabel('Active Cases')
axes[2,2].tick_params(axis='x', rotation=45)
axes[2,2].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# KEY INSIGHTS AND METRICS
print("\n" + "="*50)
print("4. KEY INSIGHTS AND METRICS")
print("="*50)

latest_global = global_daily.iloc[-1]
print("🌍 GLOBAL SUMMARY:")
print(f"   Total Confirmed Cases: {latest_global['confirmed']:,.0f}")
print(f"   Total Deaths: {latest_global['deaths']:,.0f}")
print(f"   Total Recovered: {latest_global['recovered']:,.0f}")
print(f"   Active Cases: {latest_global['active']:,.0f}")
print(f"   Global Mortality Rate: {(latest_global['deaths']/latest_global['confirmed']*100):.2f}%")

print("\n🏆 TOP 5 COUNTRIES BY CONFIRMED CASES:")
top_5 = significant_countries.nlargest(5, 'confirmed')[['country', 'confirmed', 'deaths', 'mortality_rate']]
for idx, row in top_5.iterrows():
    print(f"   {row['country']}: {row['confirmed']:,.0f} cases, {row['deaths']:,.0f} deaths ({row['mortality_rate']:.2f}%)")

print("\n📊 GROWTH ANALYSIS:")
current_growth = global_daily['new_cases'].iloc[-7:].mean()  # Last 7 days average
previous_growth = global_daily['new_cases'].iloc[-14:-7].mean()  # Previous 7 days
growth_change = ((current_growth - previous_growth) / previous_growth * 100) if previous_growth > 0 else 0

print(f"   Current 7-day average: {current_growth:,.0f} new cases/day")
print(f"   Growth trend: {'↑ Increasing' if growth_change > 5 else '↓ Decreasing' if growth_change < -5 else '→ Stable'}")

print(f"\n{'='*70}")
print("✅ MAIN ANALYSIS COMPLETED SUCCESSFULLY!")
print(f"{'='*70}")