# Comparing Global Mortality Rates to US States
Here we compare COVID-19 statistics for US states to those for other countries.

In [15]:
import pandas as pd
pd.options.display.max_rows = 100
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [16]:
# get cases and deaths by country
# Note that these are daily numbers, not cumulative sums
world = pd.read_csv('https://opendata.ecdc.europa.eu/covid19/casedistribution/csv')
world.head()

Unnamed: 0,dateRep,day,month,year,cases,deaths,countriesAndTerritories,geoId,countryterritoryCode,popData2018,continentExp
0,05/05/2020,5,5,2020,190,5,Afghanistan,AF,AFG,37172386.0,Asia
1,04/05/2020,4,5,2020,235,13,Afghanistan,AF,AFG,37172386.0,Asia
2,03/05/2020,3,5,2020,134,4,Afghanistan,AF,AFG,37172386.0,Asia
3,02/05/2020,2,5,2020,164,4,Afghanistan,AF,AFG,37172386.0,Asia
4,01/05/2020,1,5,2020,222,4,Afghanistan,AF,AFG,37172386.0,Asia


In [17]:
# reorganize data
world = world.groupby(['countriesAndTerritories'])\
    .agg({'cases': sum, 'deaths': sum, 'popData2018': max})
world.rename(
    columns={'countriesAndTerritories': 'country',
             'popData2018': 'population'},inplace=True)
world.head()

Unnamed: 0_level_0,cases,deaths,population
countriesAndTerritories,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Afghanistan,2894,90,37172386.0
Albania,803,31,2866376.0
Algeria,4648,465,42228429.0
Andorra,750,45,77006.0
Angola,35,2,30809762.0


In [18]:
# Read in the state level cases/deaths data from the NYT
# Note that cases and deaths here are cumulative, not daily
states = pd.read_csv('https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-states.csv')
states.head()

Unnamed: 0,date,state,fips,cases,deaths
0,2020-01-21,Washington,53,1,0
1,2020-01-22,Washington,53,1,0
2,2020-01-23,Washington,53,1,0
3,2020-01-24,Illinois,17,1,0
4,2020-01-24,Washington,53,1,0


In [19]:
states = states.groupby(['state']).agg({'cases': max, 'deaths': max})

In [20]:
# read in state populations
# (source: https://worldpopulationreview.com/states/states-by-area/)
state_populations = pd.read_csv('state population and area.csv')
state_populations.head()

Unnamed: 0,State,TotalArea,LandArea,WaterArea,Density,Pop
0,Alaska,665384,570641,94743,1.2863,734002
1,Texas,268596,261232,7365,112.8204,29472295
2,California,163696,155779,7916,256.3727,39937489
3,Montana,147040,145546,1494,7.4668,1086759
4,New Mexico,121590,121298,292,17.285,2096640


In [21]:
states = states.merge(state_populations,left_index=True,right_on='State')
states.head()

Unnamed: 0,cases,deaths,State,TotalArea,LandArea,WaterArea,Density,Pop
29,8112,298,Alabama,52420,50645,1775,96.9221,4908621
0,368,7,Alaska,665384,570641,94743,1.2863,734002
5,8919,362,Arizona,113990,113594,396,64.955,7378494
28,3469,80,Arkansas,53179,52035,1143,58.403,3038999
2,56333,2297,California,163696,155779,7916,256.3727,39937489


In [22]:
states.drop(columns=['TotalArea','LandArea','WaterArea','Density'],inplace=True)
states.rename(columns={'Pop': 'population'},inplace=True)
states.set_index('State',inplace=True)
states.head()

Unnamed: 0_level_0,cases,deaths,population
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Alabama,8112,298,4908621
Alaska,368,7,734002
Arizona,8919,362,7378494
Arkansas,3469,80,3038999
California,56333,2297,39937489


In [23]:
world.append(states)

Unnamed: 0,cases,deaths,population
Afghanistan,2894,90,37172386.0
Albania,803,31,2866376.0
Algeria,4648,465,42228429.0
Andorra,750,45,77006.0
Angola,35,2,30809762.0
...,...,...,...
Virginia,19492,684,8626207.0
Washington,15673,846,7797095.0
West Virginia,1224,50,1778070.0
Wisconsin,8236,340,5851754.0


In [24]:
# Merge the global data with the state data and compute ratios
world_v_states = world.append(states)
world_v_states['deathsPerM'] = 1E6 * world_v_states.deaths / world_v_states.population
world_v_states['cfr'] = world_v_states.deaths/world_v_states.cases
world_v_states['% infected'] = 100 * world_v_states.cases / world_v_states.population

In [25]:
# sort by descending mortality, show top 50, states over 1M people
world_v_states[world_v_states.population >= 1E6].sort_values(by='deathsPerM',ascending=False).head(50)

Unnamed: 0,cases,deaths,population,deathsPerM,cfr,% infected
New York,318984,19415,19440469.0,998.689898,0.060865,1.640825
New Jersey,128269,7910,8936574.0,885.126672,0.061667,1.435326
Connecticut,29973,2556,3563077.0,717.357497,0.085277,0.841211
Belgium,50267,7924,11422068.0,693.744775,0.157638,0.440087
Massachusetts,69087,4090,6976597.0,586.245701,0.059201,0.990268
Spain,218011,25428,46723749.0,544.220028,0.116636,0.466596
Italy,211938,29079,60431283.0,481.191174,0.137205,0.350709
United_Kingdom,190584,28734,66488991.0,432.161769,0.150768,0.28664
Louisiana,29673,1991,4645184.0,428.61596,0.067098,0.638791
Michigan,43928,4135,10045029.0,411.646397,0.094131,0.437311


In [26]:
# sort by descending casesPerM
world_v_states[world_v_states.population >= 1E6].sort_values(by='% infected',ascending=False).head(50)

Unnamed: 0,cases,deaths,population,deathsPerM,cfr,% infected
New York,318984,19415,19440469.0,998.689898,0.060865,1.640825
New Jersey,128269,7910,8936574.0,885.126672,0.061667,1.435326
Massachusetts,69087,4090,6976597.0,586.245701,0.059201,0.990268
Rhode Island,9652,341,1056161.0,322.867442,0.035329,0.913876
Connecticut,29973,2556,3563077.0,717.357497,0.085277,0.841211
Louisiana,29673,1991,4645184.0,428.61596,0.067098,0.638791
Qatar,16191,12,2781677.0,4.313944,0.000741,0.582059
Illinois,63840,2673,12659682.0,211.142744,0.04187,0.504278
Spain,218011,25428,46723749.0,544.220028,0.116636,0.466596
Ireland,21722,1319,4853506.0,271.76231,0.060722,0.447553
