In [None]:
# COVID-19 Global Data Tracker Notebook

# 📦 Import Required Libraries
import pandas as pd
import seaborn as sns
import plotly.express as px

# Attempt to import matplotlib, raise informative error if not available
try:
    import matplotlib.pyplot as plt
except ImportError:
    raise ImportError("matplotlib is not installed. Please install it using 'pip install matplotlib' and rerun the notebook.")

# Enable inline plotting for Jupyter Notebooks
try:
    get_ipython().run_line_magic('matplotlib', 'inline')
except:
    pass  # If not in a Jupyter notebook, skip

# 📁 Load the Cleaned Dataset (Kenya, USA, India)
df = pd.read_csv("../data/cleaned_covid_data.csv")
df["date"] = pd.to_datetime(df["date"])

# 📊 Exploratory Data Analysis (EDA)
## Total Cases Over Time
plt.figure(figsize=(10,6))
for country in df["location"].unique():
    subset = df[df["location"] == country]
    plt.plot(subset["date"], subset["total_cases"], label=country)
plt.title("Total COVID-19 Cases Over Time")
plt.xlabel("Date")
plt.ylabel("Total Cases")
plt.legend()
plt.tight_layout()
plt.show()

## Daily New Cases Comparison
plt.figure(figsize=(10,6))
for country in df["location"].unique():
    subset = df[df["location"] == country]
    plt.plot(subset["date"], subset["new_cases"], label=country)
plt.title("Daily New Cases")
plt.xlabel("Date")
plt.ylabel("New Cases")
plt.legend()
plt.tight_layout()
plt.show()

## Death Rate Over Time
df["death_rate"] = df["total_deaths"] / df["total_cases"]
plt.figure(figsize=(10,6))
for country in df["location"].unique():
    subset = df[df["location"] == country]
    plt.plot(subset["date"], subset["death_rate"] * 100, label=country)
plt.title("Death Rate (%) Over Time")
plt.xlabel("Date")
plt.ylabel("Death Rate (%)")
plt.legend()
plt.tight_layout()
plt.show()

# 💉 Vaccination Progress
plt.figure(figsize=(10,6))
for country in df["location"].unique():
    subset = df[df["location"] == country]
    plt.plot(subset["date"], subset["total_vaccinations"], label=country)
plt.title("Vaccination Rollout Over Time")
plt.xlabel("Date")
plt.ylabel("Total Vaccinations")
plt.legend()
plt.tight_layout()
plt.show()

# 🗺️ Choropleth Map Using Plotly
choropleth_df = pd.read_csv("../data/choropleth_data.csv")
fig = px.choropleth(
    choropleth_df,
    locations="iso_code",
    color="total_cases",
    hover_name="location",
    color_continuous_scale="Reds",
    title="Global COVID-19 Cases Snapshot"
)
fig.show()

# 📝 Insights
# 1. India consistently had higher total cases compared to Kenya and the USA.
# 2. The USA and India show strong early vaccination rollouts.
# 3. Kenya’s vaccination progress began later and grew gradually.
# 4. Death rates remained relatively low, but varied by country and wave.
# 5. The global snapshot shows uneven case distribution worldwide.

# ✅ End of Analysis
print("Analysis complete.")
