In [55]:
import pandas as pd
import dask.dataframe as dd
import numpy as np
from datetime import datetime

In [56]:
dtypes = {
    'aggregate': 'object',
    'cases': 'float64',
    'city': 'object',
    'population': 'float64',
    'deaths': 'float64',
    'country': 'object',
    'state': 'object',
    'date': 'object'
}

In [57]:
df = dd.read_csv("timeseries.csv",dtype=dtypes)

In [58]:
# Filter for US states only and state-level data
us_df = df[(df['country'] == 'United States') & (df['aggregate'] == 'state')]

In [59]:
# Define date range
start_date = '2020-01-01'
end_date = '2021-02-28'
mask = (us_df['date'] >= start_date) & (us_df['date'] <= end_date)
period_df = us_df[mask]

In [60]:
state_metrics = period_df.groupby('state').agg({
    'deaths': 'sum',
    'population': 'mean'
}).compute()

state_metrics['per_capita_mortality'] = state_metrics['deaths'] / state_metrics['population']
state_metrics['mortality_rank'] = state_metrics['per_capita_mortality'].rank(ascending=False)

print(state_metrics.sort_values('mortality_rank'))

                                 deaths  population  per_capita_mortality  \
state                                                                       
New Jersey                     861317.0   8882190.0              0.096971   
Connecticut                    277777.0   3565287.0              0.077912   
Massachusetts                  482980.0   6892503.0              0.070073   
New York                      1358824.0  19453561.0              0.069850   
Louisiana                      213399.0   4648794.0              0.045904   
Washington, D.C.                32225.0    705749.0              0.045661   
Rhode Island                    46435.0   1059361.0              0.043833   
Michigan                       419841.0   9986857.0              0.042039   
Pennsylvania                   378963.0  12801989.0              0.029602   
Maryland                       175443.0   6045680.0              0.029020   
Delaware                        25878.0    973764.0              0.026575   

In [61]:
period_df = period_df.compute()
period_df['date'] = pd.to_datetime(period_df['date'])
period_df['month_year'] = period_df['date'].dt.strftime('%Y-%m')
monthly_metrics = period_df.groupby(['state', 'month_year']).agg({
    'deaths': 'last',
    'cases': 'last'
}).reset_index()

# Calculate CFR (Case Fatality Rate)
monthly_metrics['cfr'] = monthly_metrics['deaths'] / monthly_metrics['cases']
cfr_matrix = monthly_metrics.pivot(index='state', columns='month_year', values='cfr')
print(cfr_matrix)

  df = reader(bio, **kwargs)


month_year                    2020-01  2020-02   2020-03   2020-04   2020-05  \
state                                                                          
Alabama                           NaN      NaN  0.013013  0.038325  0.035094   
Alaska                            NaN      NaN       NaN  0.025352  0.017094   
American Samoa                    NaN      NaN       NaN       NaN       NaN   
Arizona                           NaN      NaN       NaN  0.041841  0.045295   
Arkansas                          NaN      NaN  0.014184  0.018740  0.018825   
Connecticut                       NaN      NaN  0.018004  0.081480  0.093979   
Delaware                          NaN      NaN  0.031348  0.032108  0.038742   
Georgia                           NaN      NaN  0.030362  0.042822  0.046548   
Guam                              NaN      NaN  0.027027  0.034247  0.028902   
Idaho                             NaN      NaN  0.017143  0.030242  0.028722   
Iowa                              NaN   

In [62]:
cfr_changes = cfr_matrix.diff(axis=1)
total_change = cfr_changes.abs().sum(axis=1)
cfr_rankings = total_change.rank(ascending=True)
print(cfr_rankings.sort_values())

state
American Samoa                   1.0
West Virginia                    2.0
South Dakota                     3.0
Utah                             4.0
Wyoming                          5.0
Arkansas                         6.0
Delaware                         7.0
Montana                          8.0
Iowa                             9.0
Alaska                          10.0
North Dakota                    11.0
Tennessee                       12.0
Guam                            13.0
Minnesota                       14.0
Texas                           15.0
Idaho                           16.0
Virginia                        17.0
Georgia                         18.0
Oregon                          19.0
Washington, D.C.                20.0
Arizona                         21.0
Mississippi                     22.0
New Mexico                      23.0
Kansas                          24.0
Louisiana                       25.0
Puerto Rico                     26.0
Alabama                         