In [None]:
# Cell 1 - Markdown
# COVID-19 Data Analysis: Trends, Vaccination Impact, and Visual Insights
**Date:** 2025-06-04

This notebook explores the COVID-19 pandemic using a dataset containing daily new cases, deaths, and vaccination data for various countries.


In [None]:
# Cell 2 - Imports
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Set plotting aesthetics
sns.set(style='whitegrid')


In [None]:
# Cell 3 - Load and clean data
df = pd.read_csv("Dataset.csv")
df.columns = df.columns.str.strip()
df['DATE'] = pd.to_datetime(df['DATE'], errors='coerce')
df['vaccinated'] = pd.to_numeric(df['vaccinated'].replace('unknown', pd.NA), errors='coerce')
df.dropna(subset=['NEW Cases', 'NEW_DEATHS'], inplace=True)
df['CUM_CASES'] = df.groupby('country')['NEW Cases'].cumsum()
df['CUM_DEATHS'] = df.groupby('country')['NEW_DEATHS'].cumsum()
df.head()


In [None]:
# Cell 4 - Line plot: New Cases over time
plt.figure(figsize=(14,6))
top_countries = df.groupby('country')['NEW Cases'].sum().sort_values(ascending=False).head(5).index
for country in top_countries:
    sns.lineplot(data=df[df['country'] == country], x='DATE', y='NEW Cases', label=country)
plt.title("New COVID-19 Cases Over Time")
plt.xlabel("Date")
plt.ylabel("New Cases")
plt.legend()
plt.tight_layout()
plt.show()


In [None]:
# Cell 5 - Line plot: Vaccinations over time
plt.figure(figsize=(14,6))
for country in top_countries:
    sns.lineplot(data=df[df['country'] == country], x='DATE', y='vaccinated', label=country)
plt.title("Vaccinations Over Time")
plt.xlabel("Date")
plt.ylabel("People Vaccinated")
plt.legend()
plt.tight_layout()
plt.show()


In [None]:
# Cell 6 - Bar plot: Total Cases, Deaths, Vaccinations
agg_df = df.groupby('country').agg({
    'NEW Cases': 'sum',
    'NEW_DEATHS': 'sum',
    'vaccinated': 'max'
}).sort_values(by='NEW Cases', ascending=False).head(10)

agg_df.plot(kind='bar', figsize=(14,6), subplots=True, layout=(1, 3), sharex=True, legend=False)
plt.suptitle("Top 10 Countries by Cases, Deaths, and Vaccination")
plt.tight_layout()
plt.show()


In [None]:
# Cell 7 - Scatter plot: Vaccinations vs Cases
plt.figure(figsize=(10,6))
sns.scatterplot(data=agg_df, x='vaccinated', y='NEW Cases', hue=agg_df.index, s=150)
plt.title("Vaccinated vs Total COVID-19 Cases")
plt.xlabel("People Vaccinated")
plt.ylabel("Total COVID-19 Cases")
plt.tight_layout()
plt.show()


In [None]:
# Cell 8 - Summary & Insights
- Countries with higher vaccination rates often exhibit plateauing or declining new case trends.
- Death trends usually follow case trends with a lag.
- Visualization reveals disparities in both case loads and vaccination access across countries.
