# COVID-19 Data Analysis Report
**Author**: Thembelani Bukali  
**Date**: 2025-05-05  

This notebook analyzes global COVID-19 data including cases, deaths, vaccinations, and hospitalization metrics.

*For questions, contact: siphothagreat@gmail.com*

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from ipywidgets import interact, widgets
from datetime import datetime
from covid_data_processing_utils import load_processed_data

# Set style
plt.style.use('ggplot')
sns.set_palette("husl")

In [None]:
# Load processed data
df, latest = load_processed_data()

## Enter Your Location and Month
Enter your country and the month/year to see COVID-19 statistics for that period. If data is not available, an explanation will be shown.

In [None]:
import calendar

def show_monthly_stats(country, year, month):
    # Filter for the selected country and month
    mask = (
        (df['location'].str.lower() == country.strip().lower()) &
        (df['date'].dt.year == year) &
        (df['date'].dt.month == month)
    )
    month_data = df[mask]
    if month_data.empty:
        print(f"No data available for {country.title()} in {calendar.month_name[month]} {year}.")
        print("Possible reasons: The country name may be misspelled, or data for this period is missing.")
        print("Try another country or month.")
    else:
        # Show summary statistics for the month
        summary = month_data.agg({
            'total_cases': 'max',
            'total_deaths': 'max',
            'people_fully_vaccinated': 'max',
            'icu_patients': 'mean',
            'hosp_patients': 'mean'
        })
        print(f"COVID-19 Stats for {country.title()} in {calendar.month_name[month]} {year}:")
        print(f"  Total Cases: {int(summary['total_cases']):,}")
        print(f"  Total Deaths: {int(summary['total_deaths']):,}")
        print(f"  Fully Vaccinated: {int(summary['people_fully_vaccinated']):,}")
        print(f"  Avg ICU Patients: {summary['icu_patients']:.1f}")
        print(f"  Avg Hospitalized Patients: {summary['hosp_patients']:.1f}")

import ipywidgets as widgets
widgets.interact(
    show_monthly_stats,
    country=widgets.Text(value='Kenya', description='Country:'),
    year=widgets.IntSlider(value=2022, min=2020, max=2025, description='Year:'),
    month=widgets.IntSlider(value=1, min=1, max=12, description='Month:')
);

In [None]:
# Calculate key metrics
def calculate_metrics(df):
    # Death rate with zero-division handling
    df['death_rate'] = np.where(
        df['total_cases'] > 0,
        df['total_deaths'] / df['total_cases'],
        np.nan
    )
    
    # Vaccination percentages
    df['pct_vaccinated'] = (df['people_vaccinated'] / df['population']) * 100
    df['pct_fully_vaccinated'] = (df['people_fully_vaccinated'] / df['population']) * 100
    
    # Hospitalization rates per 100k
    df['hosp_per_100k'] = (df['hosp_patients'] / (df['population'] / 100000))
    df['icu_per_100k'] = (df['icu_patients'] / (df['population'] / 100000))
    
    return df

df = calculate_metrics(df)

# Fill missing values
df['icu_patients'] = df['icu_patients'].fillna(0)
df['hosp_patients'] = df['hosp_patients'].fillna(0)

In [None]:
def interactive_analysis(country="United States", 
                        start_date=pd.to_datetime('2020-03-01'), 
                        end_date=pd.to_datetime('2023-01-01'),
                        show_icu=True,
                        show_vaccinations=True):
    
    # Filter data
    mask = (df['location'] == country) & \
           (df['date'] >= start_date) & \
           (df['date'] <= end_date)
    country_data = df[mask].set_index('date')
    
    # Create figure
    fig, ax = plt.subplots(figsize=(14, 8))
    
    # Plot cases/deaths
    ax.plot(country_data['total_cases'], label='Total Cases', color='blue')
    ax.plot(country_data['total_deaths'], label='Total Deaths', color='red')
    
    # Plot ICU if selected
    if show_icu and 'icu_patients' in country_data:
        ax.plot(country_data['icu_patients'], 
                label='ICU Patients', 
                color='purple',
                linestyle='--')
    
    # Plot vaccinations if selected
    if show_vaccinations and 'people_fully_vaccinated' in country_data:
        ax2 = ax.twinx()
        ax2.plot(country_data['pct_fully_vaccinated'], 
                 label='% Fully Vaccinated', 
                 color='green',
                 linestyle=':')
        ax2.set_ylabel('Vaccination %', color='green')
        ax2.tick_params(axis='y', labelcolor='green')
        ax2.set_ylim(0, 100)
    
    ax.set_title(f'COVID-19 in {country}')
    ax.set_xlabel('Date')
    ax.set_ylabel('Count')
    ax.legend(loc='upper left')
    ax.grid(True)
    
    plt.tight_layout()
    plt.show()

# Create interactive widget
interact(interactive_analysis,
         country=widgets.Dropdown(options=df['location'].unique(), 
                                value='United States',
                                description='Country:'),
         start_date=widgets.DatePicker(value=pd.to_datetime('2020-03-01'),
                                     description='Start Date:'),
         end_date=widgets.DatePicker(value=pd.to_datetime('2023-01-01'),
                                   description='End Date:'),
         show_icu=widgets.Checkbox(value=True,
                                 description='Show ICU Data'),
         show_vaccinations=widgets.Checkbox(value=True,
                                          description='Show Vaccinations'));

## Choropleth Map: Vaccination Rates by Country
This map visualizes the percentage of fully vaccinated population by country.

In [None]:
import plotly.express as px

# Prepare data for the latest date
choropleth_data = df[df['date'] == df['date'].max()]

# Create choropleth map
fig = px.choropleth(choropleth_data,
                    locations='iso_code',
                    color='pct_fully_vaccinated',
                    hover_name='location',
                    title='Global Vaccination Rates',
                    color_continuous_scale='Viridis')
fig.show()

## Key Insights
- The USA has one of the highest vaccination rates globally, with over 70% of the population fully vaccinated.
- Some countries still have very low vaccination rates, highlighting disparities in vaccine distribution.
- Death rates are generally higher in countries with lower vaccination rates.

---

**Report prepared by Thembelani Bukali, 2025-05-05**  
*Contact: siphothagreat@gmail.com*