# COVID-19 Global Data Tracker 🌍📊
Tracking and Visualizing Global COVID-19 Cases, Deaths, and Vaccinations

In [None]:
# Importing required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Optional: Configure plot style
sns.set(style="whitegrid")
plt.rcParams["figure.figsize"] = (12,6)

In [None]:
# Load dataset
df = pd.read_csv("owid-covid-data.csv")
df.head()

## Data Exploration

In [None]:
# View dataset structure
df.columns

In [None]:
# Summary statistics
df.describe()

In [None]:
# Check for missing values
df.isnull().sum().sort_values(ascending=False)

In [None]:
# Check unique countries/locations
df['location'].unique()

## Data Cleaning

In [None]:
# Convert 'date' column to datetime
df['date'] = pd.to_datetime(df['date'])

# Select countries of interest
countries = ['Kenya', 'United States', 'India']
df_filtered = df[df['location'].isin(countries)]

# Select important columns
columns_of_interest = ['date', 'location', 'total_cases', 'new_cases', 'total_deaths', 
                       'new_deaths', 'total_vaccinations', 'people_vaccinated_per_hundred']
df_filtered = df_filtered[columns_of_interest]

# Fill missing values
df_filtered = df_filtered.fillna(0)
df_filtered.head()

## Exploratory Data Analysis (EDA)

In [None]:
# Total Cases Over Time
for country in countries:
    subset = df_filtered[df_filtered['location'] == country]
    plt.plot(subset['date'], subset['total_cases'], label=country)

plt.title("Total COVID-19 Cases Over Time")
plt.xlabel("Date")
plt.ylabel("Total Cases")
plt.legend()
plt.show()

In [None]:
# Total Deaths Over Time
for country in countries:
    subset = df_filtered[df_filtered['location'] == country]
    plt.plot(subset['date'], subset['total_deaths'], label=country)

plt.title("Total COVID-19 Deaths Over Time")
plt.xlabel("Date")
plt.ylabel("Total Deaths")
plt.legend()
plt.show()

In [None]:
# Death Rate Calculation
df_filtered['death_rate'] = np.where(df_filtered['total_cases'] > 0,
                                     df_filtered['total_deaths'] / df_filtered['total_cases'], 0)

## Vaccination Progress

In [None]:
# Vaccination Plot
for country in countries:
    subset = df_filtered[df_filtered['location'] == country]
    plt.plot(subset['date'], subset['people_vaccinated_per_hundred'], label=country)

plt.title("Vaccination Progress (% of population)")
plt.xlabel("Date")
plt.ylabel("People Vaccinated per 100")
plt.legend()
plt.show()

## Key Insights
1. The United States had the highest peak of daily new cases.
2. India's vaccination rate grew rapidly after May 2021.
3. Kenya showed slower case growth compared to other countries.
4. The death rate varied widely, possibly due to healthcare access and testing.
5. Global vaccination disparities highlight inequality in health infrastructure.