In [1]:
import pandas as pd

# Replace with the actual path to the dataset or download from Kaggle
df = pd.read_csv("owid-covid-data.csv")  # Common filename for COVID datasets on Kaggle
print(df.columns)
df.head()
df.isnull().sum()


countries = ['Kenya', 'United States', 'India']
df = df[df['location'].isin(countries)]
df = df.dropna(subset=['date', 'total_cases', 'total_deaths'])
df['date'] = pd.to_datetime(df['date'])
numeric_cols = ['total_cases', 'total_deaths', 'new_cases', 'new_deaths', 'total_vaccinations']
df[numeric_cols] = df[numeric_cols].interpolate()


import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(12, 6))
for country in countries:
    country_data = df[df['location'] == country]
    plt.plot(country_data['date'], country_data['total_cases'], label=country)
plt.title("Total COVID-19 Cases Over Time")
plt.xlabel("Date")
plt.ylabel("Total Cases")
plt.legend()
plt.show()


plt.figure(figsize=(12, 6))
for country in countries:
    country_data = df[df['location'] == country]
    plt.plot(country_data['date'], country_data['total_deaths'], label=country)
plt.title("Total COVID-19 Deaths Over Time")
plt.xlabel("Date")
plt.ylabel("Total Deaths")
plt.legend()
plt.show()


plt.figure(figsize=(12, 6))
for country in countries:
    country_data = df[df['location'] == country]
    plt.plot(country_data['date'], country_data['new_cases'], label=country)
plt.title("Daily New COVID-19 Cases")
plt.xlabel("Date")
plt.ylabel("New Cases")
plt.legend()
plt.show()


df['death_rate'] = df['total_deaths'] / df['total_cases']

plt.figure(figsize=(12, 6))
for country in countries:
    country_data = df[df['location'] == country]
    plt.plot(country_data['date'], country_data['death_rate'], label=country)
plt.title("COVID-19 Death Rate Over Time")
plt.xlabel("Date")
plt.ylabel("Death Rate")
plt.legend()
plt.show()


plt.figure(figsize=(12, 6))
for country in countries:
    country_data = df[df['location'] == country]
    plt.plot(country_data['date'], country_data['total_vaccinations'], label=country)
plt.title("Total Vaccinations Over Time")
plt.xlabel("Date")
plt.ylabel("Total Vaccinations")
plt.legend()
plt.show()


# Some datasets include 'people_fully_vaccinated' or 'people_vaccinated_per_hundred'
# Adjust column name accordingly
if 'people_vaccinated_per_hundred' in df.columns:
    plt.figure(figsize=(12, 6))
    for country in countries:
        country_data = df[df['location'] == country]
        plt.plot(country_data['date'], country_data['people_vaccinated_per_hundred'], label=country)
    plt.title("% of Population Vaccinated Over Time")
    plt.xlabel("Date")
    plt.ylabel("% Vaccinated")
    plt.legend()
    plt.show()


latest_date = df['date'].max()
latest_df = df[df['date'] == latest_date][['iso_code', 'location', 'total_cases']].dropna()
import plotly.express as px

fig = px.choropleth(latest_df,
                    locations='iso_code',
                    color='total_cases',
                    hover_name='location',
                    color_continuous_scale='Reds',
                    title='Total COVID-19 Cases by Country')
fig.show()


### Key Insights

1. The USA had the highest number of total cases and deaths throughout the pandemic.
2. India experienced sharp spikes in daily new cases around mid-2021.
3. Kenya’s vaccination rollout lagged behind compared to the USA and India.
4. Death rates decreased over time in all countries, likely due to improved treatment and vaccinations.
5. Total vaccinations closely followed case spikes, indicating policy response to surges.


SyntaxError: invalid character '’' (U+2019) (1281117157.py, line 109)