### 1. Imports

In [1]:
import os
import pandas as pd
from datetime import datetime


### 2. Reading Data Files

In [9]:
df=pd.read_csv("data/combine_monthly_summary.csv")

### 3. Explore the Columns

>Keep only the relevant columns for aggregation:

In [10]:
df = df[['Country_Region', 'Confirmed', 'Deaths', 'Recovered', 'Active', 'Incidence_Rate', 'Case-Fatality_Ratio']]

### 4. Handle Missing Values

In [11]:
df[['Confirmed', 'Deaths', 'Recovered', 'Active']] = df[['Confirmed', 'Deaths', 'Recovered', 'Active']].fillna(0)

### 5. Group by country and date to aggregate per country per month

>sum the absolute numbers (Confirmed, Deaths, etc.), then calculate the average of the ratios based on weighted values.

In [12]:
grouped = df.groupby(['Country_Region']).agg({
    'Confirmed': 'sum',
    'Deaths': 'sum',
    'Recovered': 'sum',
    'Active': 'sum'
}).reset_index()

### 6. Recalculate Incidence_Rate and Case-Fatality_Ratio

>calculate: Case-Fatality Ratio = (Total Deaths / Total Confirmed) × 100



In [13]:
grouped['Case_Fatality_Ratio'] = (grouped['Deaths'] / grouped['Confirmed'].replace(0, pd.NA)) * 100


In [14]:
# Optional: average incidence rate from original df (weighted by Confirmed cases)
incidence = df.groupby(['Country_Region']).apply(
    lambda x: (x['Incidence_Rate'] * x['Confirmed']).sum() / x['Confirmed'].sum()
).reset_index(name='Incidence_Rate')

# Merge it back
grouped = pd.merge(grouped, incidence, on=['Country_Region'], how='left')


  incidence = df.groupby(['Country_Region']).apply(


In [15]:
print(grouped.head())

  Country_Region  Confirmed    Deaths  Recovered    Active  \
0    Afghanistan  4525451.0  187722.0   628985.0  203169.0   
1        Albania  6514581.0   86161.0   790006.0  181308.0   
2        Algeria  6346085.0  169508.0   886854.0  370672.0   
3        Andorra   866288.0    4386.0   106824.0    6595.0   
4         Angola  2106830.0   42861.0   207288.0   44945.0   

   Case_Fatality_Ratio  Incidence_Rate  
0             4.148139        4.132967  
1             1.322587        3.990435  
2             2.671064        3.305196  
3             0.506298       45.381234  
4             2.034383        0.216509  
