# Excess mortality in different countries during the covid19 pandemics

In [1]:
%matplotlib notebook

import numpy as np
import pandas as pd
import pylab as plt
import seaborn as sns
import matplotlib

from matplotlib.patches import Polygon

import matplotlib as mpl
mpl.rcParams['figure.max_open_warning'] = 0

In [2]:
from sklearn.linear_model import LinearRegression

def predict(X, maxyear=2019):
    ind = X[:,0] <= maxyear
    m = np.max(X[ind,1]).astype(int)
    if m == 51: # the first and the last weeks are absent for some countries
        m = 50
    predictors = np.concatenate((X[ind,:1], np.concatenate([np.eye(m)]*(np.sum(ind)/m).astype(int))), axis=1)
    reg = LinearRegression().fit(predictors, X[ind,2])
    predictors = np.eye(m)
    predictors = np.concatenate((np.ones((m,1))*2020, predictors), axis=1)
    pred = reg.predict(predictors)
    return pred

In [3]:
# Will accumulate data from various sources here

mega = {}

In [4]:
# Human mortality database

df = pd.read_csv('https://www.mortality.org/Public/STMF/Outputs/stmf.csv', header=1)

countries = np.unique(df['CountryCode'])
countries = [c for c in countries if c not in ['RUS','GBRTENW','GBR_NIR','GBR_SCO']]
countries = countries + ['GBR']

countries_dict = {'AUS2':'Australia','AUT':'Austria','BEL':'Belgium','BGR':'Bulgaria',
                  'CAN':'Canada','CHE':'Switzerland','CHL':'Chile','CZE':'Czech Republic',
                  'DEUTNP':'Germany','DNK':'Denmark','ESP':'Spain','EST':'Estonia','FIN':'Finland',
                  'FRATNP':'France','GBR':'United Kingdom','GRC':'Greece','HRV':'Croatia','HUN':'Hungary',
                  'ISL':'Iceland','ISR':'Israel','ITA':'Italy','KOR':'South Korea','LTU':'Lithuania',
                  'LUX':'Luxembourg','LVA':'Latvia','NLD':'Netherlands','NOR':'Norway',
                  'NZL_NP':'New Zealand','POL':'Poland','PRT':'Portugal','SVK':'Slovakia',
                  'SVN':'Slovenia','SWE':'Sweden','TWN':'Taiwan','USA':'United States'}

for i,country in enumerate(countries):
    print(countries_dict[country])
    
    if country!='GBR':
        X = df[(df['CountryCode']==country)&(df['Sex']=='b')&(df['Year']>=2015)][['Year','Week','DTotal']].values
    else:
        # Add up numbers for UK
        X1 = df[(df['CountryCode']=='GBRTENW')&(df['Sex']=='b')&(df['Year']>=2015)][['Year','Week','DTotal']].values
        X2 = df[(df['CountryCode']=='GBR_NIR')&(df['Sex']=='b')&(df['Year']>=2015)][['Year','Week','DTotal']].values
        X3 = df[(df['CountryCode']=='GBR_SCO')&(df['Sex']=='b')&(df['Year']>=2015)][['Year','Week','DTotal']].values
        m = min(X1.shape[0],X2.shape[0],X3.shape[0])
        X1[:m,2] = X1[:m,2]+X2[:m,2]+X3[:m,2]
        X = X1
        
    # kick out last two weeks
    X = X[:-2,:]
    
    # one of the countries did not have data for early 2015
    if X[0,1]>1:
        X = X[X[:,0]>=2016,:]
        
    pred = predict(X)
    diff = X[X[:,0]==2020, 2] - pred[X[X[:,0]==2020, 1].astype(int)-1]
            
    mega[countries_dict[country]] = [X,pred,diff]

Australia
Austria
Belgium
Bulgaria
Canada
Switzerland
Chile
Czech Republic
Germany
Denmark
Spain
Estonia
Finland
France
Greece
Croatia
Hungary
Iceland
Israel
Italy
South Korea
Lithuania
Luxembourg
Latvia
Netherlands
Norway
New Zealand
Poland
Portugal
Slovakia
Slovenia
Sweden
Taiwan
United States
United Kingdom


In [5]:
# Financial Times

df = pd.read_csv('https://github.com/Financial-Times/coronavirus-excess-mortality-data/blob/master/data/ft_excess_deaths.csv?raw=true')

countries = np.unique(df['country'])
countries = [c for c in countries if c not in ['Indonesia','Turkey','US','S Korea','UK']+list(mega.keys())]

for i,country in enumerate(countries):
    print(country)
    
    X = df[(df['country']==country)&(df['region']==country)&(df['year']>=2015)][['year','month','week','deaths']].values
    
    # Brazil is broken, does not have week numbers
    if country=='Brazil':
        X[X[:,0]==2019,2] = np.arange(np.sum(X[:,0]==2019))+1
        X[X[:,0]==2020,2] = np.arange(np.sum(X[:,0]==2020))+1
    
    if ~np.isnan(X[0,2]):
        X = X[:,[0,2,3]]
        X = X[X[:,1]<=52, :]
    else:
        X = X[:,[0,1,3]]

    # Mexico does not have full 2019 data, so projecting based on 2015-18
    if country=='Mexico':
        pred = predict(X,2018)
    else:
        pred = predict(X)
        
    diff = X[X[:,0]==2020, 2] - pred[X[X[:,0]==2020, 1].astype(int)-1]
    
    mega[country] = [X,pred,diff]

Brazil
Ecuador
Mexico
Peru
Russia
South Africa


In [6]:
# New York Times

df = pd.read_csv('https://github.com/nytimes/covid-19-data/blob/master/excess-deaths/deaths.csv?raw=true')

countries = np.unique(df['country'])
countries = [c for c in countries if c not in ['India','Indonesia','Japan', 'Turkey',
                                               'Ireland','Thailand']+list(mega.keys())]

for i,country in enumerate(countries):
    print(country)
    
    X = df[(df['country']==country)][['year','month','week','deaths']].values.astype(float)
    if ~np.isnan(X[0,2]):
        X = X[:,[0,2,3]]
    else:
        X = X[:,[0,1,3]]
        
    # Manual update based on the graphic in NYT
    if country=='Bolivia' and X[-1][1]==8:
        X = np.concatenate((X, [[2020,9,7500]]))
        
    pred = predict(X)
    diff = X[X[:,0]==2020, 2] - pred[X[X[:,0]==2020, 1].astype(int)-1]
    
    mega[country] = [X,pred,diff]

Bolivia
Colombia


In [7]:
# Population

df = pd.read_csv('https://github.com/datasets/population/blob/master/data/population.csv?raw=true')

mm = {'South Korea':'Korea, Rep.',
      'Slovakia':'Slovak Republic',
      'Russia':'Russian Federation'}

pops = np.zeros(len(mega.keys()))
for i,m in enumerate(mega.keys()):
    if m in mm:
        pops[i] = df[df['Country Name']==mm[m]]['Value'].values[-1]
    elif m=='Taiwan':
        pops[i] = 23780000
    else:
        pops[i] = df[df['Country Name']==m]['Value'].values[-1]

In [8]:
# Daily reported numbers during the same time period

import datetime

df = pd.read_csv('https://covid.ourworldindata.org/data/owid-covid-data.csv?raw=true')

mm = {'Czech Republic':'Czechia'}

official = np.zeros(len(mega.keys()))
for i,m in enumerate(mega.keys()):
    last = mega[m][0][-1][1].astype(int)
    if last > 12:
        d = f'2020-W{last:02}'
        r = datetime.datetime.strptime(d + '-0', "%Y-W%W-%w")
        r = str(r)[:10]
    else:
        r = f'2020-{last+1:02}-01'
    
    if m in mm:
        official[i] = df[(df['location']==mm[m])&(df['date']==r)]['total_deaths'].values[0]
    else:
        official[i] = df[(df['location']==m)&(df['date']==r)]['total_deaths'].values[0]

In [9]:
# Make plot

ds = np.zeros(len(mega))
for i,country in enumerate(mega.keys()):
    X,pred,diff = mega[country] 
    beg = 8 if pred.size>12 else 3
    d = np.sum(diff[beg:])/np.sum(pred)
    ds[i] = d
ind = np.argsort(ds)[::-1]

fig = plt.figure(figsize=(8*1.5,4.5*1.5))

for i,country in enumerate(np.array(list(mega.keys()))[ind]):
    plt.subplot(6,8,i+1)
    
    X,pred,diff = mega[country] 

    for year in np.arange(X[0,0],2020):
        plt.plot(X[X[:,0]==year, 1], X[X[:,0]==year, 2], color='#aaaaaa', lw=1)
    plt.plot(X[X[:,0]==2020, 1], X[X[:,0]==2020, 2], color='r', lw=1.5, clip_on=False)
    plt.plot(np.arange(pred.size)+1, pred, color='k', lw=1)

    beg = 8 if pred.size>12 else 3
    d = np.sum(diff[beg:])
    plt.text(.02, .02, f'{np.round(d/100)*100:,.0f}', transform=plt.gca().transAxes)
    plt.text(.98, .02, f'{d/np.sum(pred)*100:.0f}%', transform=plt.gca().transAxes, ha='right')
    plt.text(.98, 1, f'{d/pops[ind][i]*100000:.0f}', transform=plt.gca().transAxes, ha='right', va='top')
    if d>0:
        plt.text(.02, 1, f'{d/official[ind][i]:.1f}', transform=plt.gca().transAxes, va='top')
        
    poly1 = np.concatenate((X[X[:,0]==2020, 1][beg:], X[X[:,0]==2020, 1][beg:][::-1]))
    poly2 = np.concatenate((X[X[:,0]==2020, 2][beg:], pred[X[X[:,0]==2020, 1].astype(int)-1][beg:][::-1]))
    poly = np.concatenate((poly1[:,np.newaxis], poly2[:,np.newaxis]), axis=1)
    poly = Polygon(poly, facecolor='r', edgecolor='r', alpha=.4, zorder=5)
    plt.gca().add_patch(poly)
    
    plt.ylim([0,np.max(X[:,-1])])
    plt.xlim(1,52 if pred.size>12 else 12)
    plt.xticks([])
    plt.yticks([])
    plt.title(country, fontsize=10)
    
sns.despine()
plt.tight_layout()

fig.text(.99,.01, 'Dmitry Kobak\n@hippopedoid', size=8, ha='right')

fig.text(.45,.02, 'Data from the Human Mortality Database, the Financial Times, and the New York Times.\n'
                'Excess mortality computed relative to the baseline extrapolated from 2015–19.\n\n'
                'Bottom left: excess mortality. Bottom right: as a % of usual yearly deaths.\n'
                'Top right: per 100,000 population. Top left: ratio to the daily reported deaths.\n\n'
               f'Last update: {datetime.date.today():%b %d, %Y}',
         fontsize=8
)

plt.savefig('img/all-countries.png', dpi=200)

<IPython.core.display.Javascript object>