In [2]:
pip install pandas matplotlib seaborn plotly

Defaulting to user installation because normal site-packages is not writeable
Looking in links: /usr/share/pip-wheels
Note: you may need to restart the kernel to use updated packages.


In [1]:
# COVID-19 Global Data Tracker


# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

# Load dataset
df = pd.read_csv("owid-covid-data.csv")

# Display first few rows
df.head()

FileNotFoundError: [Errno 2] No such file or directory: 'owid-covid-data.csv'

In [None]:
# Check column names
print(df.columns)

# Check for missing values
print(df.isnull().sum())

# Convert 'date' to datetime format
df['date'] = pd.to_datetime(df['date'])

# Selecting relevant columns
df = df[['date', 'location', 'total_cases', 'total_deaths', 'new_cases', 'new_deaths', 'total_vaccinations']]

# Fill missing numerical values with interpolation
df.fillna(method='ffill', inplace=True)

# Filter dataset for specific countries (Kenya, USA, India)
selected_countries = ['Kenya', 'United States', 'India']
df_filtered = df[df['location'].isin(selected_countries)]

# Display cleaned dataset
df_filtered.head()

In [None]:
import matplotlib.pyplot as plt
# Basic statistics
print(df_filtered.describe())

# Plot total cases over time
plt.figure(figsize=(10,5))
sns.lineplot(x='date', y='total_cases', hue='location', data=df_filtered)
plt.title("COVID-19 Cases Over Time")
plt.xlabel("Date")
plt.ylabel("Total Cases")
plt.legend(title="Country")
plt.show()

# Compare death rates
df_filtered['death_rate'] = df_filtered['total_deaths'] / df_filtered['total_cases']

plt.figure(figsize=(10,5))
sns.lineplot(x='date', y='death_rate', hue='location', data=df_filtered)
plt.title("COVID-19 Death Rates Over Time")
plt.xlabel("Date")
plt.ylabel("Death Rate")
plt.legend(title="Country")
plt.show()

In [None]:
plt.figure(figsize=(10,5))
sns.lineplot(x='date', y='total_vaccinations', hue='location', data=df_filtered)
plt.title("Total Vaccinations Over Time")
plt.xlabel("Date")
plt.ylabel("Total Vaccinations")
plt.legend(title="Country")
plt.show()

# Pie chart of vaccinations for latest available date
latest_data = df_filtered[df_filtered['date'] == df_filtered['date'].max()]
fig = px.pie(latest_data, values='total_vaccinations', names='location', title="Vaccination Progress Distribution")
fig.show()

In [None]:
# Plot COVID-19 cases on a world map using plotly
latest_data = df[df['date'] == df['date'].max()]
fig = px.choropleth(latest_data, locations="location", locationmode="country names",
                    color="total_cases", hover_name="location",
                    title="COVID-19 Cases Around the World", color_continuous_scale="Reds")
fig.show()

In [None]:
# Key insights
insights = [
    "India had a steep rise in COVID-19 cases during the Delta variant surge.",
    "The United States had one of the fastest vaccination rollouts globally.",
    "Kenya showed a slower but steady vaccination progress.",
    "Death rates dropped significantly after vaccination campaigns began."
]

for i, insight in enumerate(insights, 1):
    print(f"{i}. {insight}")

1. India had a steep rise in COVID-19 cases during the Delta variant surge.
2. The United States had one of the fastest vaccination rollouts globally.
3. Kenya showed a slower but steady vaccination progress.
4. Death rates dropped significantly after vaccination campaigns began.
