In [1]:
import dask.dataframe as dd
import pandas as pd
import numpy as np
from datetime import datetime

In [2]:
df = dd.read_csv('timeseries.csv', 
                 dtype={
                     'population': 'float64',
                     'cases': 'float64',
                     'deaths': 'float64',
                     'aggregate': 'object',
                     'city': 'object',
                     'country': 'object',
                     'county': 'object',
                     'level': 'object',
                     'state': 'object',
                     'lat': 'float64',
                     'long': 'float64'
                 },
                 low_memory=False)

df['date'] = dd.to_datetime(df['date'])
mask = (df['country'] == 'United States') & \
       (df['level'] == 'state') & \
       (df['date'] >= '2020-01-01') & \
       (df['date'] <= '2021-02-28')

us_states_df = df[mask]

In [3]:
# Convert population and cases to float
us_states_df['population'] = us_states_df['population'].astype(float)
us_states_df['cases'] = us_states_df['cases'].astype(float)
us_states_df['deaths'] = us_states_df['deaths'].astype(float)

state_deaths = us_states_df.groupby('state')['deaths'].agg(['min', 'max']).compute()
state_avg_pop = us_states_df.groupby('state')['population'].mean().compute()
total_deaths = state_deaths['max'] - state_deaths['min']
per_capita_mortality = total_deaths / state_avg_pop

mortality_ranking = per_capita_mortality.sort_values(ascending=False)
print(mortality_ranking)

state
New Jersey                      0.001712
New York                        0.001280
Connecticut                     0.001216
Massachusetts                   0.001187
Rhode Island                    0.000903
Washington, D.C.                0.000791
Louisiana                       0.000706
Michigan                        0.000623
Illinois                        0.000553
Maryland                        0.000536
Pennsylvania                    0.000527
Delaware                        0.000520
Indiana                         0.000392
Mississippi                     0.000373
Colorado                        0.000295
New Hampshire                   0.000274
Georgia                         0.000269
Minnesota                       0.000253
Ohio                            0.000248
New Mexico                      0.000244
Arizona                         0.000231
Iowa                            0.000228
Virginia                        0.000217
Alabama                         0.000205
Washington

In [4]:
us_states_pd = us_states_df.compute()
monthly_data = us_states_pd.set_index('date').groupby('state').resample('M').agg({
    'cases': 'max',
    'deaths': 'max'
})

# Calculate month-over-month changes
monthly_changes = monthly_data.groupby('state').diff()

monthly_cfr = (monthly_changes['deaths'] / monthly_changes['cases'] * 100)
monthly_cfr = monthly_cfr.unstack('state')

print("\nMonthly CFR for first 5 states (first 5 months):")
print(monthly_cfr.iloc[:5, :5])


Monthly CFR for first 5 states (first 5 months):
state        Alabama    Alaska  American Samoa   Arizona  Arkansas
date                                                              
2020-01-31       NaN       NaN             NaN       NaN       NaN
2020-02-29       NaN       NaN             NaN       NaN       NaN
2020-03-31       NaN       NaN             NaN       NaN       NaN
2020-04-30  4.252492       NaN             NaN       NaN  1.969528
2020-05-31  3.301930  0.884956             NaN  4.744466  1.889764


  monthly_data = us_states_pd.set_index('date').groupby('state').resample('M').agg({


In [5]:
cfr_changes = monthly_cfr.diff()

# Aggregate absolute changes for ranking
total_cfr_change = cfr_changes.abs().sum()
cfr_change_ranking = total_cfr_change.sort_values(ascending=False)

print("\nTop 10 States by Total CFR Change:")
print(cfr_change_ranking.head(10))


Top 10 States by Total CFR Change:
state
United States Virgin Islands    66.666667
Alaska                          40.884956
Rhode Island                    31.083942
New Jersey                      26.936583
Pennsylvania                    20.208528
New Hampshire                   11.869711
Connecticut                     11.571058
Delaware                        10.729251
Missouri                        10.614211
Michigan                        10.267054
dtype: float64
