In [1]:
# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Optional: For interactive plots (map)
import plotly.express as px

# Set visualization style
sns.set(style='whitegrid')

# Load the dataset (update the path as needed)
df = pd.read_csv("owid-covid-data.csv")

# Preview the data
df.head()

# Check basic info
df.info()

# See missing values
df.isnull().sum().sort_values(ascending=False).head(15)

# Convert 'date' column to datetime
df['date'] = pd.to_datetime(df['date'])

# Filter countries of interest
countries = ['Kenya', 'United States', 'India']
df_filtered = df[df['location'].isin(countries)]

# Select relevant columns
columns = ['location', 'date', 'total_cases', 'new_cases', 'total_deaths', 'new_deaths', 'total_vaccinations']
df_filtered = df_filtered[columns]

# Fill missing values for analysis
df_filtered.fillna(0, inplace=True)

# Total cases over time
plt.figure(figsize=(12, 6))
for country in countries:
    temp = df_filtered[df_filtered['location'] == country]
    plt.plot(temp['date'], temp['total_cases'], label=country)

plt.title("Total COVID-19 Cases Over Time")
plt.xlabel("Date")
plt.ylabel("Total Cases")
plt.legend()
plt.tight_layout()
plt.show()

# Death Rate (Total Deaths / Total Cases)
df_filtered['death_rate'] = df_filtered['total_deaths'] / df_filtered['total_cases']

# Compare death rates
plt.figure(figsize=(12, 6))
for country in countries:
    temp = df_filtered[df_filtered['location'] == country]
    plt.plot(temp['date'], temp['death_rate'], label=country)

plt.title("COVID-19 Death Rate Over Time")
plt.ylabel("Death Rate")
plt.xlabel("Date")
plt.legend()
plt.tight_layout()
plt.show()

# Cumulative vaccinations
plt.figure(figsize=(12, 6))
for country in countries:
    temp = df_filtered[df_filtered['location'] == country]
    plt.plot(temp['date'], temp['total_vaccinations'], label=country)

plt.title("Vaccination Progress Over Time")
plt.ylabel("Total Vaccinations")
plt.xlabel("Date")
plt.legend()
plt.tight_layout()
plt.show()

# Prepare latest snapshot of total cases per country
latest_data = df[df['date'] == df['date'].max()]
map_data = latest_data[['iso_code', 'location', 'total_cases']].dropna()

# Plot map
fig = px.choropleth(map_data,
                    locations="iso_code",
                    color="total_cases",
                    hover_name="location",
                    color_continuous_scale="Reds",
                    title="Total COVID-19 Cases by Country (Latest)")
fig.show()

##  Key Insights

## 1. **India** experienced massive case spikes in 2021, yet has relatively low death rates.
## 2. **USA** leads in total vaccinations but had high early death rates before vaccines rolled out.
## 3. **Kenya**'s vaccination curve is gradual but consistent, reflecting phased rollout strategies.
## 4. Death rates tend to lag case spikes by 1–2 weeks, confirming trends seen globally.
## 5. Case surges align with variants and major waves (e.g., Delta, Omicron).

## ---

## This notebook combines code, visuals, and real-world data to explore the pandemic’s global impact and response.



ModuleNotFoundError: No module named 'pandas'