In [None]:
# %% [markdown]
# # COVID-19 Data Analysis
# ## 1. Data Loading

# %%
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load data
df = pd.read_csv("data/owid-covid-data.csv", parse_dates=['date'])
print(df.head())

# %% [markdown]
# ## 2. Data Cleaning

# %%
# Filter countries
countries = ["United States", "India", "Kenya", "Brazil", "Germany"]
df = df[df['location'].isin(countries)]

# Handle missing values
df['total_cases'] = df['total_cases'].fillna(0)
df['people_vaccinated'] = df['people_vaccinated'].fillna(0)

# %% [markdown]
# ## 3. Exploratory Analysis

# %%
# Plot total cases
plt.figure(figsize=(12, 6))
for country in countries:
    country_data = df[df['location'] == country]
    plt.plot(country_data['date'], country_data['total_cases'], label=country)

plt.title("Total COVID-19 Cases (2020-2023)")
plt.xlabel("Date")
plt.ylabel("Cases (Millions)")
plt.legend()
plt.show()

# %%
# Death rate calculation
df['death_rate'] = (df['total_deaths'] / df['total_cases']) * 100
print(df.groupby('location')['death_rate'].max().sort_values(ascending=False))

# %% [markdown]
# ## 4. Advanced Visualizations (Plotly)

# %%
import plotly.express as px

# Interactive line plot
fig = px.line(df, x='date', y='total_cases', color='location', 
              title="COVID-19 Cases by Country")
fig.show()

# Choropleth map (latest data)
latest = df.drop_duplicates(subset='location', keep='last')
fig = px.choropleth(latest, locations="iso_code", color="total_cases", 
                    hover_name="location", title="Global Case Distribution")
fig.show()