In [None]:
#  1. Import libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

# Optional: Set plot style
sns.set(style="darkgrid")
plt.rcParams['figure.figsize'] = (12, 6)

In [None]:
# 2. Load the dataset
df = pd.read_csv("owid-covid-data.csv")
df.head()

In [None]:
# 3. Exploring the data
print("Columns:\n", df.columns)
print("\nShape:", df.shape)
print("\nMissing values:\n", df.isnull().sum().sort_values(ascending=False).head(10))

In [None]:
#  4. Cleaning the data
df['date'] = pd.to_datetime(df['date'])

# Filtering selected countries
countries = ['Kenya', 'India', 'United States', 'Brazil', 'Germany']
df = df[df['location'].isin(countries)]

# Droping rows without dates or total_cases
df = df.dropna(subset=['date', 'total_cases'])

# Filling missing numerical values with interpolation
cols_to_fill = ['total_deaths', 'new_cases', 'new_deaths', 'total_vaccinations']
df[cols_to_fill] = df[cols_to_fill].interpolate()

In [None]:
# 5. EDA - Total Cases Over Time
plt.figure(figsize=(14, 7))
for country in countries:
    country_df = df[df['location'] == country]
    plt.plot(country_df['date'], country_df['total_cases'], label=country)
plt.title("Total COVID-19 Cases Over Time")
plt.xlabel("Date")
plt.ylabel("Total Cases")
plt.legend()
plt.show()

In [None]:
# 6. EDA - Total Deaths Over Time
plt.figure(figsize=(14, 7))
for country in countries:
    country_df = df[df['location'] == country]
    plt.plot(country_df['date'], country_df['total_deaths'], label=country)
plt.title("Total COVID-19 Deaths Over Time")
plt.xlabel("Date")
plt.ylabel("Total Deaths")
plt.legend()
plt.show()

In [None]:
# 7. Daily New Cases Comparison
plt.figure(figsize=(14, 7))
for country in countries:
    country_df = df[df['location'] == country]
    plt.plot(country_df['date'], country_df['new_cases'], label=country)
plt.title("Daily New COVID-19 Cases")
plt.xlabel("Date")
plt.ylabel("New Cases")
plt.legend()
plt.show()

In [None]:
 #8. Calculating Death Rate
df['death_rate'] = df['total_deaths'] / df['total_cases']

# Ploting death rate
plt.figure(figsize=(14, 7))
for country in countries:
    country_df = df[df['location'] == country]
    plt.plot(country_df['date'], country_df['death_rate'], label=country)
plt.title("COVID-19 Death Rate Over Time")
plt.xlabel("Date")
plt.ylabel("Death Rate")
plt.legend()
plt.show()

In [None]:
# 9. Vaccination Progress
plt.figure(figsize=(14, 7))
for country in countries:
    country_df = df[df['location'] == country]
    plt.plot(country_df['date'], country_df['total_vaccinations'], label=country)
plt.title("Cumulative Vaccinations Over Time")
plt.xlabel("Date")
plt.ylabel("Total Vaccinations")
plt.legend()
plt.show()

In [None]:
# 10. Optional Choropleth Map
# Geting latest vaccination data
latest = df[df['date'] == df['date'].max()]
choropleth_df = latest[['iso_code', 'location', 'total_vaccinations']].dropna()

# Ploting with plotly
fig = px.choropleth(choropleth_df,
                    locations='iso_code',
                    color='total_vaccinations',
                    hover_name='location',
                    color_continuous_scale='Viridis',
                    title='Total COVID-19 Vaccinations by Country')
fig.show()