# Package Installation
First, let's install the required Python packages using pip.

In [5]:
%pip install pandas numpy matplotlib seaborn plotly

1107.44s - pydevd: Sending message related to process being replaced timed-out after 5 seconds


Collecting plotly
Collecting plotly
  Downloading plotly-6.0.1-py3-none-any.whl.metadata (6.7 kB)
  Downloading plotly-6.0.1-py3-none-any.whl.metadata (6.7 kB)
Collecting narwhals>=1.15.1 (from plotly)
Collecting narwhals>=1.15.1 (from plotly)
  Downloading narwhals-1.38.1-py3-none-any.whl.metadata (9.3 kB)
  Downloading narwhals-1.38.1-py3-none-any.whl.metadata (9.3 kB)
Downloading plotly-6.0.1-py3-none-any.whl (14.8 MB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/14.8 MB[0m [31m?[0m eta [36m-:--:--[0mDownloading plotly-6.0.1-py3-none-any.whl (14.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.8/14.8 MB[0m [31m180.3 kB/s[0m eta [36m0:00:00[0m00:01[0m00:03[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.8/14.8 MB[0m [31m180.3 kB/s[0m eta [36m0:00:00[0m
[?25hDownloading narwhals-1.38.1-py3-none-any.whl (338 kB)
[?25lDownloading narwhals-1.38.1-py3-none-any.whl (338 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━

# COVID-19 Global Data Analysis

This notebook presents a comprehensive analysis of global COVID-19 data, including cases, deaths, and vaccination progress.

In [6]:
import sys
sys.path.append('../src')
from data_loader import CovidDataLoader

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

# Initialize and load data
loader = CovidDataLoader()
df = loader.load_data()
df_clean = loader.clean_data()

INFO:data_loader:Downloading data from web...
ERROR:data_loader:Error loading data: IncompleteRead(102694416 bytes read, 195015 more expected)


IncompleteRead: IncompleteRead(102694416 bytes read, 195015 more expected)

## Key Insights
Let's examine the latest statistics and trends across countries.

In [None]:
insights = loader.get_key_insights()
print(f"As of {insights['date']}, {insights['highest_cases']['country']} leads in total cases")

# Display all visualizations
loader.plot_total_cases()
loader.plot_new_cases_trend()
loader.plot_vaccination_progress()

## Growth Rate Analysis
Calculate and analyze the case doubling time for each country during the first 90 days of their pandemic.

In [None]:
def calculate_doubling_time(country_name, start_date, end_date):
    country_data = df_clean[(df_clean['location'] == country_name) & 
                           (df_clean['date'] >= start_date) & 
                           (df_clean['date'] <= end_date)]
    if len(country_data) < 2:
        return np.nan
    cases = country_data['total_cases'].values
    days = len(country_data)
    growth_rate = (cases[-1] / cases[0]) ** (1/days) - 1
    if growth_rate <= 0:
        return np.inf
    return np.log(2) / np.log(1 + growth_rate)

# Calculate doubling time for each country
for country in loader.countries_of_interest:
    country_data = df_clean[df_clean['location'] == country]
    start_date = country_data['date'].min()
    end_date = start_date + pd.Timedelta(days=90)
    doubling_time = calculate_doubling_time(country, start_date, end_date)
    print(f"{country} had a case doubling time of {round(doubling_time, 1)} days in the first 90 days.")

## Correlation Analysis
Examine relationships between different COVID-19 metrics and socioeconomic indicators.

In [None]:
# Get latest data for correlation analysis
latest_data = loader.get_latest_data()
corr_data = latest_data[['total_cases', 'total_deaths', 'total_vaccinations', 
                         'population', 'gdp_per_capita']]

# Create correlation matrix
corr_matrix = corr_data.corr()

# Plot heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', center=0)
plt.title('Correlation Matrix of COVID-19 Metrics', fontsize=16)
plt.tight_layout()
plt.show()