# 🦠 COVID-19 Global Data Tracker

This notebook tracks COVID-19 global trends using data from Our World in Data. We’ll analyze cases, deaths, and vaccination progress for selected countries.

In [1]:
# Step 1: Import Libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Optional: Plotly or Geopandas for maps (not used in this version)

In [2]:
# Step 2: Load the Data
df = pd.read_csv('owid-covid-data.csv')
df.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,2/28/23,3/1/23,3/2/23,3/3/23,3/4/23,3/5/23,3/6/23,3/7/23,3/8/23,3/9/23
0,,Afghanistan,33.93911,67.709953,0,0,0,0,0,0,...,209322,209340,209358,209362,209369,209390,209406,209436,209451,209451
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,334391,334408,334408,334427,334427,334427,334427,334427,334443,334457
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,271441,271448,271463,271469,271469,271477,271477,271490,271494,271496
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,47866,47875,47875,47875,47875,47875,47875,47875,47890,47890
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,105255,105277,105277,105277,105277,105277,105277,105277,105288,105288


In [3]:
# Step 3: Explore the Data
print(df.columns)
df.info()
df.isnull().sum()

Index(['Province/State', 'Country/Region', 'Lat', 'Long', '1/22/20', '1/23/20',
       '1/24/20', '1/25/20', '1/26/20', '1/27/20',
       ...
       '2/28/23', '3/1/23', '3/2/23', '3/3/23', '3/4/23', '3/5/23', '3/6/23',
       '3/7/23', '3/8/23', '3/9/23'],
      dtype='object', length=1147)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 289 entries, 0 to 288
Columns: 1147 entries, Province/State to 3/9/23
dtypes: float64(2), int64(1143), object(2)
memory usage: 2.5+ MB


Province/State    198
Country/Region      0
Lat                 2
Long                2
1/22/20             0
                 ... 
3/5/23              0
3/6/23              0
3/7/23              0
3/8/23              0
3/9/23              0
Length: 1147, dtype: int64

In [4]:
# Step 4: Clean the Data
countries = ['Kenya', 'India', 'United States']
df = df[df['location'].isin(countries)]
df['date'] = pd.to_datetime(df['date'])
df.fillna(method='ffill', inplace=True)

KeyError: 'location'

In [None]:
# Step 5: Plot Total Cases Over Time
plt.figure(figsize=(12,6))
for country in countries:
    country_df = df[df['location'] == country]
    plt.plot(country_df['date'], country_df['total_cases'], label=country)

plt.title("Total COVID-19 Cases Over Time")
plt.xlabel("Date")
plt.ylabel("Total Cases")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
# Step 6: Plot Death Rate Over Time
plt.figure(figsize=(12,6))
for country in countries:
    country_df = df[df['location'] == country].copy()
    country_df['death_rate'] = country_df['total_deaths'] / country_df['total_cases']
    plt.plot(country_df['date'], country_df['death_rate'], label=country)

plt.title("COVID-19 Death Rate Over Time")
plt.xlabel("Date")
plt.ylabel("Death Rate")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
# Step 7: Plot Vaccination Progress
plt.figure(figsize=(12,6))
for country in countries:
    country_df = df[df['location'] == country]
    plt.plot(country_df['date'], country_df['total_vaccinations'], label=country)

plt.title("Vaccination Progress Over Time")
plt.xlabel("Date")
plt.ylabel("Total Vaccinations")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

## 📊 Insights:
- 🇮🇳 India experienced a sharp rise in cases in 2021.
- 🇺🇸 The United States had early access to vaccines.
- 🇰🇪 Kenya started vaccinations later but caught up over time.
- Death rates varied significantly across countries and periods.