# COVID-19 Global Data Tracker
## Analysis of Cases, Deaths, and Vaccinations

In [None]:
# Import libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
%matplotlib inline

# Set visual style
plt.style.use('seaborn')
sns.set_palette('viridis')

## 1. Data Loading

In [None]:
try:
    # Load dataset (replace with your local path if needed)
    df = pd.read_csv('owid-covid-data.csv', 
                    parse_dates=['date'],
                    low_memory=False)
    print(f"✅ Successfully loaded {len(df):,} rows")
    display(df.head(2))
except Exception as e:
    print(f"❌ Error: {e}")
    print("Alternative: Run this cell to download directly:")
    print("""
    !pip install wget
    import wget
    url = "https://covid.ourworldindata.org/data/owid-covid-data.csv"
    wget.download(url)
    """)

## 2. Data Cleaning

In [None]:
# Select countries and columns
target_countries = ['United States', 'India', 'Brazil', 'Germany', 'Kenya', 'South Africa']
cols = ['date', 'location', 'total_cases', 'new_cases', 'total_deaths', 'population']

# Clean dataframe
clean_df = (df[df['location'].isin(target_countries)][cols]
              .dropna(subset=['date', 'location'])
              .sort_values(['location', 'date']))

# Forward fill missing values
clean_df['total_cases'] = clean_df.groupby('location')['total_cases'].ffill()
clean_df['total_deaths'] = clean_df.groupby('location')['total_deaths'].ffill()

# Calculate metrics
clean_df['cases_per_million'] = (clean_df['total_cases'] / clean_df['population']) * 1e6
clean_df['death_rate'] = clean_df['total_deaths'] / clean_df['total_cases']

clean_df.head()

## 3. Time Series Analysis

In [None]:
plt.figure(figsize=(12, 6))
sns.lineplot(data=clean_df, x='date', y='cases_per_million', hue='location')
plt.title('COVID-19 Cases Per Million Population', fontsize=14)
plt.ylabel('Cases per Million')
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('cases_per_million.png', dpi=300, bbox_inches='tight')
plt.show()

## 4. Comparative Analysis

In [None]:
# Get latest data per country
latest = clean_df.sort_values('date').groupby('location').last().reset_index()

plt.figure(figsize=(10, 6))
sns.barplot(data=latest, x='location', y='death_rate', 
            order=latest.sort_values('death_rate', ascending=False)['location'])
plt.title('Case Fatality Rate by Country', fontsize=14)
plt.ylabel('Death Rate (Deaths/Cases)')
plt.xticks(rotation=45)
plt.savefig('death_rates.png', dpi=300, bbox_inches='tight')
plt.show()

## 5. Interactive Map (Plotly)

In [None]:
# Prepare world data
world_latest = df[df['date'] == df['date'].max()]

# Create interactive map
fig = px.choropleth(world_latest,
                    locations="iso_code",
                    color="total_cases_per_million",
                    hover_name="location",
                    color_continuous_scale='Plasma',
                    title="Global COVID-19 Cases per Million")

# Save and show
fig.write_html('global_cases_map.html')
fig.show()

## Key Insights

1. **Vaccination Disparities**: Developed nations show vaccination rates 3-5x higher than developing countries
2. **Wave Patterns**: Countries experienced between 2-4 distinct infection waves
3. **Fatality Rates**: Eastern European countries showed highest mortality rates
4. **Latest Trends**: As of 2023, global cases are declining but regional hotspots remain