# Load and Explore owid covid data

In [None]:
import pandas as pd

In [None]:
# Load dataset
df = pd.read_csv('owid-covid-data.csv')

# Check columns
print(df.columns)

In [None]:
# Preview first 5 rows
df.head()

# Check for missing values
df.isnull().sum()


# Clean the Data

In [None]:
countries = ['Kenya', 'United States', 'India']
df = df[df['location'].isin(countries)]

# Convert date column to datetime
df['date'] = pd.to_datetime(df['date'])

# Drop rows with missing critical values
df = df.dropna(subset=['date', 'total_cases', 'total_deaths'])

# Fill or interpolate other missing numeric values
df = df.fillna(method='ffill')


# Exploratory Data Analysis (EDA)

### 📈 Plot Total Cases Over Time

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(12,6))
for country in countries:
    country_data = df[df['location'] == country]
    plt.plot(country_data['date'], country_data['total_cases'], label=country)

plt.title("Total COVID-19 Cases Over Time")
plt.xlabel("Date")
plt.ylabel("Total Cases")
plt.legend()
plt.show()


### 📈 Daily New Cases Comparison

In [None]:
plt.figure(figsize=(12,6))
for country in countries:
    country_data = df[df['location'] == country]
    plt.plot(country_data['date'], country_data['new_cases'], label=country)

plt.title("Daily New Cases")
plt.xlabel("Date")
plt.ylabel("New Cases")
plt.legend()
plt.show()


### 💀 Calculate & Plot Death Rate

In [None]:
df['death_rate'] = df['total_deaths'] / df['total_cases']

plt.figure(figsize=(12,6))
for country in countries:
    country_data = df[df['location'] == country]
    plt.plot(country_data['date'], country_data['death_rate'], label=country)

plt.title("COVID-19 Death Rate Over Time")
plt.xlabel("Date")
plt.ylabel("Death Rate")
plt.legend()
plt.show()


## Analyze Vaccination Progress

In [None]:
plt.figure(figsize=(12,6))
for country in countries:
    country_data = df[df['location'] == country]
    plt.plot(country_data['date'], country_data['total_vaccinations'], label=country)

plt.title("Total Vaccinations Over Time")
plt.xlabel("Date")
plt.ylabel("Total Vaccinations")
plt.legend()
plt.show()


###  % of population vaccinated

In [None]:
plt.figure(figsize=(12,6))
for country in countries:
    country_data = df[df['location'] == country]
    plt.plot(country_data['date'], country_data['people_fully_vaccinated_per_hundred'], label=country)

plt.title("Fully Vaccinated (% of Population)")
plt.xlabel("Date")
plt.ylabel("% Fully Vaccinated")
plt.legend()
plt.show()


## Choropleth Map with Plotly

In [None]:
import plotly.express as px

# Use latest data
latest_date = df['date'].max()
latest_data = df[df['date'] == latest_date]

fig = px.choropleth(latest_data,
                    locations="iso_code",
                    color="total_cases",
                    hover_name="location",
                    title="Total COVID-19 Cases by Country",
                    color_continuous_scale='Reds')
fig.show()


# Insights:
- The USA had the highest total cases but also rolled out vaccines rapidly.
- Kenya experienced slower vaccine uptake and had higher death rates early on.
- India had large case spikes in mid-2021, followed by strong vaccination growth.
