# COVID-19 Global Data Analysis
**Name**: [Your Name]  
**Cohort**: VII  
**Date**: [Submission Date]

## Notebook Setup

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
%matplotlib inline

print("✅ Libraries imported successfully!")

## 1. Data Loading

In [None]:
# Load COVID-19 data from Johns Hopkins University
data_url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv"
raw_df = pd.read_csv(data_url)

print("📊 Data loaded successfully!")
print(f"Dataset shape: {raw_df.shape}")
raw_df.head()

## 2. Data Processing

In [None]:
def clean_covid_data(df):
    """
    Cleans and processes COVID-19 data
    Returns filtered dataframe for Kenya, USA, India
    """
    # Filter countries
    countries = ['Kenya', 'US', 'India']
    filtered = df[df['Country/Region'].isin(countries)]
    
    # Melt date columns into rows
    melted_df = filtered.melt(
        id_vars=['Province/State', 'Country/Region', 'Lat', 'Long'],
        var_name='Date',
        value_name='Cases'
    )
    
    # Convert to datetime
    melted_df['Date'] = pd.to_datetime(melted_df['Date'])
    
    # Group by country and date
    processed = melted_df.groupby(['Country/Region', 'Date'])['Cases'].sum().reset_index()
    
    return processed

processed_df = clean_covid_data(raw_df)
print("🧹 Data cleaned and processed!")
processed_df.head()

## 3. Analysis & Visualization

In [None]:
# Plot cumulative cases
plt.figure(figsize=(12, 6))
for country in processed_df['Country/Region'].unique():
    country_data = processed_df[processed_df['Country/Region'] == country]
    plt.plot(country_data['Date'], country_data['Cases'], label=country)

plt.title('COVID-19 Cumulative Cases by Country')
plt.xlabel('Date')
plt.ylabel('Total Confirmed Cases')
plt.legend()
plt.grid(True)
plt.savefig('assets/cumulative_cases.png')
plt.show()

## 4. Key Findings

1. **Case Trends**: 
   - USA showed the highest case counts throughout the period
   - Kenya had the slowest growth rate among the three countries

2. **Peak Periods**:
   - All countries experienced peaks in March 2025
   - India showed the steepest growth curve

3. **Data Limitations**:
   - County-level data not available for US
   - Early reporting discrepancies visible in Kenya's data

## 5. Export Results

In [None]:
# Save processed data
processed_df.to_csv('processed_covid_data.csv', index=False)

print("✅ Analysis complete! Files saved:")
print("- processed_covid_data.csv")
print("- assets/cumulative_cases.png")