# 🌍 COVID-19 Global Data Tracker

This notebook explores and visualizes global COVID-19 trends using data from **Our World in Data**.  
It covers:

- Global and country-level case and death statistics
- Vaccination rollouts
- Comparative metrics
- Insights and visualizations


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

# For better plot aesthetics
sns.set(style="darkgrid")
%matplotlib inline


In [None]:
# Load the CSV dataset
df = pd.read_csv('owid-covid-data.csv')

# Preview data
df.head()


In [None]:
# Check columns
print(df.columns)

# Check for missing values
df.isnull().sum()

# Basic info and datatypes
df.info()

# Preview dates and locations
print(df['date'].min(), df['date'].max())
print(df['location'].unique()[:10])  # first 10 countries/regions


In [None]:
# Convert date column to datetime
df['date'] = pd.to_datetime(df['date'])

# Filter for selected countries
countries = ['Kenya', 'United States', 'India']
df_filtered = df[df['location'].isin(countries)].copy()

# Drop rows with missing critical values (e.g., total_cases or date)
df_filtered = df_filtered.dropna(subset=['total_cases', 'date'])

# Fill missing values in numeric columns using forward fill (for timeseries continuity)
df_filtered[['total_cases', 'total_deaths', 'new_cases', 'new_deaths', 'total_vaccinations']] = \
    df_filtered[['total_cases', 'total_deaths', 'new_cases', 'new_deaths', 'total_vaccinations']].fillna(method='ffill')

df_filtered.head()


In [None]:
# Plot total cases over time for each country
plt.figure(figsize=(12,6))
for country in countries:
    subset = df_filtered[df_filtered['location'] == country]
    plt.plot(subset['date'], subset['total_cases'], label=country)
plt.title('Total COVID-19 Cases Over Time')
plt.xlabel('Date')
plt.ylabel('Total Cases')
plt.legend()
plt.show()


In [None]:
# Plot total deaths over time for each country
plt.figure(figsize=(12,6))
for country in countries:
    subset = df_filtered[df_filtered['location'] == country]
    plt.plot(subset['date'], subset['total_deaths'], label=country)
plt.title('Total COVID-19 Deaths Over Time')
plt.xlabel('Date')
plt.ylabel('Total Deaths')
plt.legend()
plt.show()


In [None]:
# Compare daily new cases between countries
plt.figure(figsize=(12,6))
for country in countries:
    subset = df_filtered[df_filtered['location'] == country]
    plt.plot(subset['date'], subset['new_cases'], label=country)
plt.title('Daily New COVID-19 Cases')
plt.xlabel('Date')
plt.ylabel('New Cases')
plt.legend()
plt.show()


In [None]:
# Calculate death rate and add as new column
df_filtered['death_rate'] = df_filtered['total_deaths'] / df_filtered['total_cases']

# Plot death rate over time
plt.figure(figsize=(12,6))
for country in countries:
    subset = df_filtered[df_filtered['location'] == country]
    plt.plot(subset['date'], subset['death_rate'], label=country)
plt.title('COVID-19 Death Rate Over Time')
plt.xlabel('Date')
plt.ylabel('Death Rate')
plt.legend()
plt.show()


In [None]:
plt.figure(figsize=(12,6))
for country in countries:
    subset = df_filtered[df_filtered['location'] == country]
    plt.plot(subset['date'], subset['total_vaccinations'], label=country)
plt.title('Total Vaccinations Over Time')
plt.xlabel('Date')
plt.ylabel('Total Vaccinations')
plt.legend()
plt.show()


In [None]:
# Get latest data (max date) per country
latest_date = df['date'].max()
latest_df = df[df['date'] == latest_date]

# Select relevant columns for plotting
map_df = latest_df[['iso_code', 'location', 'total_cases', 'total_vaccinations']].dropna()

# Plot choropleth map of total cases
fig = px.choropleth(
    map_df,
    locations="iso_code",
    color="total_cases",
    hover_name="location",
    color_continuous_scale=px.colors.sequential.Plasma,
    title=f"Total COVID-19 Cases by Country as of {latest_date.strftime('%Y-%m-%d')}"
)
fig.show()
