# COVID-19 Global Data Tracker Project
# This notebook analyzes COVID-19 data for Kenya, USA, and China using the OWID dataset.


In [None]:
# Importing the necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# make the plots look nicer
plt.style.use('seaborn')
plt.rcParams['figure.figsize'] = (12, 6)


In [None]:
# Load the dataset
# I placed the dataset in the same folder as this notebook
df = pd.read_csv('owid-covid-data.csv')

# Preview 
df.head()


In [None]:
# View columns and check for missing data
print(df.columns)
df.isnull().sum().sort_values(ascending=False).head(10)


In [None]:
# Filter data for three countries 
countries = ['Kenya', 'United States', 'China']
df_countries = df[df['location'].isin(countries)]

# Convert 'date' column to datetime
df_countries['date'] = pd.to_datetime(df_countries['date'])

# Drop rows with missing critical values
df_countries = df_countries.dropna(subset=['total_cases', 'total_deaths'])

# Preview cleaned data
df_countries.head()


## Exploratory Data Analysis (EDA)
Analyze COVID-19 trends for Kenya, USA, and China.


In [None]:
# Total cases over time
plt.figure(figsize=(12, 6))
for country in countries:
    country_data = df_countries[df_countries['location'] == country]
    plt.plot(country_data['date'], country_data['total_cases'], label=country)

plt.title('Total COVID-19 Cases Over Time')
plt.xlabel('Date')
plt.ylabel('Total Cases')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
# Total deaths over time
plt.figure(figsize=(12, 6))
for country in countries:
    country_data = df_countries[df_countries['location'] == country]
    plt.plot(country_data['date'], country_data['total_deaths'], label=country)

plt.title('Total COVID-19 Deaths Over Time')
plt.xlabel('Date')
plt.ylabel('Total Deaths')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
# Compare daily new cases
plt.figure(figsize=(12, 6))
for country in countries:
    country_data = df_countries[df_countries['location'] == country]
    plt.plot(country_data['date'], country_data['new_cases'].fillna(0), label=country)

plt.title('Daily New COVID-19 Cases')
plt.xlabel('Date')
plt.ylabel('New Cases')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
# Add new column for death rate
df_countries['death_rate'] = df_countries['total_deaths'] / df_countries['total_cases']

# Plot death rate
plt.figure(figsize=(12, 6))
for country in countries:
    country_data = df_countries[df_countries['location'] == country]
    plt.plot(country_data['date'], country_data['death_rate'], label=country)

plt.title('COVID-19 Death Rate Over Time')
plt.xlabel('Date')
plt.ylabel('Death Rate')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
# Plot cumulative vaccinations over time
plt.figure(figsize=(12, 6))
for country in countries:
    country_data = df_countries[df_countries['location'] == country]
    plt.plot(country_data['date'], country_data['people_vaccinated'], label=country)

plt.title('Cumulative People Vaccinated')
plt.xlabel('Date')
plt.ylabel('People Vaccinated')
plt.legend()
plt.grid(True)
plt.show()


## Key Insights

- The USA had the highest number of total cases and vaccinations overall.
- China had early spikes but maintained low new case numbers later in the timeline.
- Kenya had a relatively low case count but slower vaccination progress.
- The death rate remained below 5% for all three countries, with some fluctuations.
