## Step 3: Data Loading & Exploration
In this step, we'll load the COVID-19 dataset, check its structure, and explore the columns and rows.


In [5]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px


ModuleNotFoundError: No module named 'pandas'

In [None]:
# Load the dataset into a pandas DataFrame
df = pd.read_csv('owid-covid-data.csv')

# Preview the first few rows
df.head()


In [None]:
# Check the column names
df.columns


In [None]:
# Check for missing values
df.isnull().sum()


In [None]:
# Filter data for a specific country (e.g., USA)
df_usa = df[df['location'] == 'USA']
df_usa.head()


## Step 4: Data Cleaning
In this step, we clean the dataset by filtering for countries of interest, handling missing values, and ensuring the date column is in datetime format.


In [None]:
countries_of_interest = ['Kenya', 'USA', 'India']
df_filtered = df[df['location'].isin(countries_of_interest)]


In [None]:
df_filtered['date'] = pd.to_datetime(df_filtered['date'])


In [None]:
df_filtered.dropna(subset=['total_cases', 'total_deaths'], inplace=True)


## Step 5: Exploratory Data Analysis (EDA)
In this step, we will perform exploratory data analysis to uncover trends and patterns in the data.


In [None]:
plt.figure(figsize=(10, 6))
for country in countries_of_interest:
    country_data = df_filtered[df_filtered['location'] == country]
    plt.plot(country_data['date'], country_data['total_cases'], label=country)

plt.title("Total COVID-19 Cases Over Time")
plt.xlabel("Date")
plt.ylabel("Total Cases")
plt.legend()
plt.show()


In [None]:
plt.figure(figsize=(10, 6))
for country in countries_of_interest:
    country_data = df_filtered[df_filtered['location'] == country]
    plt.plot(country_data['date'], country_data['total_deaths'], label=country)

plt.title("Total COVID-19 Deaths Over Time")
plt.xlabel("Date")
plt.ylabel("Total Deaths")
plt.legend()
plt.show()


In [None]:
plt.figure(figsize=(10, 6))
for country in countries_of_interest:
    country_data = df_filtered[df_filtered['location'] == country]
    country_data['new_cases'] = country_data['total_cases'].diff()
    plt.plot(country_data['date'], country_data['new_cases'], label=country)

plt.title("Daily New COVID-19 Cases Over Time")
plt.xlabel("Date")
plt.ylabel("New Cases")
plt.legend()
plt.show()


In [None]:
df_filtered['death_rate'] = df_filtered['total_deaths'] / df_filtered['total_cases'] * 100
plt.figure(figsize=(10, 6))
for country in countries_of_interest:
    country_data = df_filtered[df_filtered['location'] == country]
    plt.plot(country_data['date'], country_data['death_rate'], label=country)

plt.title("Death Rate Over Time")
plt.xlabel("Date")
plt.ylabel("Death Rate (%)")
plt.legend()
plt.show()


## Step 6: Visualizing Vaccination Progress
In this step, we will analyze the vaccination rollout over time for the selected countries.


In [None]:
plt.figure(figsize=(10, 6))
for country in countries_of_interest:
    country_data = df_filtered[df_filtered['location'] == country]
    plt.plot(country_data['date'], country_data['total_vaccinations'], label=country)

plt.title("Cumulative Vaccinations Over Time")
plt.xlabel("Date")
plt.ylabel("Cumulative Vaccinations")
plt.legend()
plt.show()


In [None]:
df_filtered['vaccination_percentage'] = df_filtered['total_vaccinations'] / df_filtered['population'] * 100
plt.figure(figsize=(10, 6))
for country in countries_of_interest:
    country_data = df_filtered[df_filtered['location'] == country]
    plt.plot(country_data['date'], country_data['vaccination_percentage'], label=country)

plt.title("Vaccination Progress by Country")
plt.xlabel("Date")
plt.ylabel("Vaccinated (%)")
plt.legend()
plt.show()


## Step 7: Optional - Choropleth Map
In this optional step, we will create a choropleth map to visualize COVID-19 cases by country.


In [None]:
latest_data = df_filtered[df_filtered['date'] == df_filtered['date'].max()]
fig = px.choropleth(latest_data, locations="iso_code", color="total_cases", hover_name="location", color_continuous_scale="Viridis")
fig.update_layout(title="COVID-19 Cases by Country")
fig.show()
