In [19]:
import dask.dataframe as dd
import pandas as pd

In [20]:
data = dd.read_csv('timeseries.csv', dtype={'fips': 'object', 'population': 'float64', 'cases': 'float64'})
us_data = data[(data['country'] == 'United States') & (data['level'] == 'state')]

In [21]:
us_data['date'] = dd.to_datetime(us_data['date'])
filtered_data = us_data[(us_data['date'] >= '2020-01-01') & (us_data['date'] <= '2021-02-28')]
avg_population = filtered_data.groupby('state')['population'].mean().reset_index()
total_deaths = filtered_data.groupby('state')['deaths'].sum().reset_index()
mortality_per_capita = total_deaths.merge(avg_population, on='state')
mortality_per_capita['per_capita_mortality'] = mortality_per_capita['deaths'] / mortality_per_capita['population']
ranked_mortality = mortality_per_capita[['state', 'per_capita_mortality']].compute().sort_values(by='per_capita_mortality', ascending=False)
print(ranked_mortality)

                           state  per_capita_mortality
31                    New Jersey              0.096971
7                    Connecticut              0.077912
22                 Massachusetts              0.070073
33                      New York              0.069850
19                     Louisiana              0.045904
52              Washington, D.C.              0.045661
42                  Rhode Island              0.043833
23                      Michigan              0.042039
14                      Illinois              0.030218
40                  Pennsylvania              0.029602
21                      Maryland              0.029020
8                       Delaware              0.026575
25                   Mississippi              0.017606
6                       Colorado              0.016981
15                       Indiana              0.016166
10                       Georgia              0.014636
37                          Ohio              0.013175
30        

In [22]:
filtered_data['month'] = filtered_data['date'].dt.to_period('M').astype(str)
filtered_data['month'] = filtered_data['month'].astype('category')
monthly_deaths = filtered_data.groupby(['state', 'month'])['deaths'].sum().reset_index()
monthly_cases = filtered_data.groupby(['state', 'month'])['cases'].sum().reset_index()
cfr_data = monthly_deaths.merge(monthly_cases, on=['state', 'month'])
cfr_data['CFR'] = (cfr_data['deaths'] / cfr_data['cases']) * 100
cfr_data['month'] = cfr_data['month'].cat.as_known()
cfr_matrix = cfr_data.pivot_table(index='state', columns='month', values='CFR').compute()
print(cfr_matrix)

  self._meta = self.obj._meta.groupby(


month                         2020-01   2020-02   2020-03    2020-04  \
state                                                                  
Alabama                           NaN       NaN  0.532357   2.982860   
Alaska                            NaN       NaN  0.000000   2.679510   
American Samoa                    NaN       NaN       NaN        NaN   
Arizona                           0.0  0.000000  0.000000   2.973322   
Arkansas                          NaN       NaN  0.923077   1.873642   
California                        0.0  0.000000  2.016785   3.479974   
Colorado                          NaN       NaN  1.829409   4.369445   
Connecticut                       NaN       NaN  1.818444   6.425654   
Delaware                          NaN       NaN  1.334107   2.726529   
Florida                           NaN       NaN  0.853401   2.905490   
Georgia                           NaN       NaN  2.131739   3.873315   
Guam                              NaN       NaN  2.702703   3.61

In [23]:
cfr_changes = cfr_matrix.diff(axis=1).fillna(0)
cfr_ranking = cfr_changes.sum(axis=1).sort_values(ascending=False)
print(cfr_ranking)

state
New Jersey                      8.370963
Connecticut                     7.460852
Massachusetts                   7.431181
Pennsylvania                    6.498693
New Hampshire                   6.156320
Michigan                        6.115149
New York                        5.822191
Rhode Island                    5.504321
Illinois                        4.801969
Maryland                        4.421005
Minnesota                       3.999253
Washington                      3.942265
Washington, D.C.                3.893230
Ohio                            3.746252
New Mexico                      3.314971
Missouri                        3.311839
Colorado                        3.233000
West Virginia                   3.010184
Delaware                        2.975366
Indiana                         2.917780
Maine                           2.634839
California                      2.543001
Wisconsin                       2.390510
Mississippi                     2.225327
Oregon    