# Analysis of the misalignment between non-COVID reported deaths and those estimated from the past

In [1]:
import altair as alt
import pandas as pd

from pathlib import Path

In [2]:
alt.data_transformers.disable_max_rows()

INPUT_PATH = Path('./data')

## Italian Regions

<img src="imgs/italy_regions-wanderingitaly.com.png" width="400px">

## Cumulative COVID-19 Daily Deaths per Million People

In [3]:
# CUMulative COVID Daily Deaths
cum_covid_dd = pd.read_csv(str(INPUT_PATH / 'cumulative_covid_daily_deaths_per_million_ppl.csv'), parse_dates=[0])

In [4]:
cum_covid_dd.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5960 entries, 0 to 5959
Data columns (total 5 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   date                5960 non-null   datetime64[ns]
 1   region              5960 non-null   object        
 2   deaths              5960 non-null   int64         
 3   ppl                 5960 non-null   int64         
 4   deaths_per_million  5960 non-null   float64       
dtypes: datetime64[ns](1), float64(1), int64(2), object(1)
memory usage: 232.9+ KB


In [5]:
cum_covid_dd.tail(10)

Unnamed: 0,date,region,deaths,ppl,deaths_per_million
5950,2020-12-08,Veneto,4374,4879133,896.47
5951,2020-12-09,Veneto,4403,4879133,902.41
5952,2020-12-10,Veneto,4551,4879133,932.75
5953,2020-12-11,Veneto,4659,4879133,954.88
5954,2020-12-12,Veneto,4769,4879133,977.43
5955,2020-12-13,Veneto,4801,4879133,983.99
5956,2020-12-14,Veneto,4827,4879133,989.32
5957,2020-12-15,Veneto,4992,4879133,1023.13
5958,2020-12-16,Veneto,5069,4879133,1038.91
5959,2020-12-17,Veneto,5161,4879133,1057.77


In [6]:
# Press `Shift`+`legend entry` to select multiple regions
selection = alt.selection_multi(fields=['region'], bind='legend')

# The basic line (palette: https://vega.github.io/vega/docs/schemes/)
line = alt.Chart(cum_covid_dd).mark_line().encode(
    x=alt.X('date:T', axis=alt.Axis(title=None)),
    y=alt.Y('deaths_per_million:Q', axis=alt.Axis(title='Cumulative deaths per million people')),
    color=alt.Color('region:N', scale=alt.Scale(scheme='category20'), legend=alt.Legend(title='Region')),
    opacity=alt.condition(selection, alt.value(1), alt.value(0.2)),
    size=alt.value(4)
).configure_axis(
    labelFontSize=12,
    titleFontSize=14,
).configure_legend(
    titleFontSize=14,
    labelFontSize=12,
    symbolStrokeWidth=8
).configure_title(
    fontSize=22
).add_selection(
    selection
).properties(
    title='Cumulative COVID-19 daily deaths during 2020',
    width=600,
    height=400
)

line

## COVID-19 Daily Deaths per Million People

In [7]:
# INCremental COVID Daily Deaths
inc_covid_dd = pd.read_csv(str(INPUT_PATH / 'covid_daily_deaths_per_million_ppl.csv'), parse_dates=[0])

In [8]:
inc_covid_dd.rename(columns={'covid_deaths_2020': 'deaths'}, inplace=True)

In [9]:
inc_covid_dd.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5960 entries, 0 to 5959
Data columns (total 5 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   date                5960 non-null   datetime64[ns]
 1   region              5960 non-null   object        
 2   deaths              5960 non-null   int64         
 3   ppl                 5960 non-null   int64         
 4   deaths_per_million  5960 non-null   float64       
dtypes: datetime64[ns](1), float64(1), int64(2), object(1)
memory usage: 232.9+ KB


In [10]:
inc_covid_dd.tail(10)

Unnamed: 0,date,region,deaths,ppl,deaths_per_million
5950,2020-12-08,Veneto,113,4879133,23.16
5951,2020-12-09,Veneto,29,4879133,5.94
5952,2020-12-10,Veneto,148,4879133,30.33
5953,2020-12-11,Veneto,108,4879133,22.14
5954,2020-12-12,Veneto,110,4879133,22.54
5955,2020-12-13,Veneto,32,4879133,6.56
5956,2020-12-14,Veneto,26,4879133,5.33
5957,2020-12-15,Veneto,165,4879133,33.82
5958,2020-12-16,Veneto,77,4879133,15.78
5959,2020-12-17,Veneto,92,4879133,18.86


In [11]:
selection = alt.selection_multi(fields=['region'], bind='legend')

# The basic line (palette: https://vega.github.io/vega/docs/schemes/)
line = alt.Chart(inc_covid_dd).mark_line().encode(
    x=alt.X('date:T', axis=alt.Axis(title=None)),
    y=alt.Y(
        'deaths_per_million:Q',
        scale=alt.Scale(domain=(0, 110), clamp=True),
        axis=alt.Axis(title='Daily deaths per million people')
    ),
    color=alt.Color(
        'region:N',
        scale=alt.Scale(scheme='category20'),
        legend=alt.Legend(title='Region')
    ),
    opacity=alt.condition(selection, alt.value(1), alt.value(0.01)),
    size=alt.value(3)
).configure_axis(
    labelFontSize=12,
    titleFontSize=14,
).configure_legend(
    titleFontSize=14,
    labelFontSize=12,
    symbolStrokeWidth=8
).configure_title(
    fontSize=22
).add_selection(
    selection
).properties(
    title='COVID-19 daily deaths during 2020',
    width=600,
    height=400
)

line

## Historical Monthly Deaths Comparison

In [13]:
# Historical monthly deaths comparison
hist_comp_md = pd.read_csv(str(INPUT_PATH / 'covid_monthly_deaths_per_million_ppl.csv'))

In [14]:
hist_comp_md

Unnamed: 0,month,region,deaths,year,death_type,ppl,deaths_per_million
0,2,Abruzzo,0.0,2020,covid,1293941,0.00
1,3,Abruzzo,115.0,2020,covid,1293941,88.88
2,4,Abruzzo,205.0,2020,covid,1293941,158.43
3,5,Abruzzo,85.0,2020,covid,1293941,65.69
4,6,Abruzzo,59.0,2020,covid,1293941,45.60
...,...,...,...,...,...,...,...
1211,5,Veneto,3930.0,2019,total,4884590,804.57
1212,6,Veneto,3920.0,2019,total,4884590,802.52
1213,7,Veneto,3827.0,2019,total,4884590,783.48
1214,8,Veneto,3770.0,2019,total,4884590,771.82


In [15]:
region='Lombardia'
mask0 = hist_comp_md.death_type != 'total'
mask1 = hist_comp_md.region == region

selection = alt.selection_multi(fields=['region'], bind='legend')

domain = ['non-covid', 'covid']
range_ = ['#1f77b4', '#ff7f0e']

bar = alt.Chart(hist_comp_md.loc[mask0 & mask1]).mark_bar(
    cornerRadiusTopLeft=3,
    cornerRadiusTopRight=3,
    opacity=0.5
).encode(
    x=alt.X('month:O', axis=alt.Axis(title=None)),
    y=alt.Y(
        'deaths_per_million:Q',
        axis=alt.Axis(title='Monthly deaths per million people'),
    ),
    color=alt.Color(
        'death_type:N',
        scale=alt.Scale(domain=domain, range=range_),
        legend=alt.Legend(title='Death type')
    ),
    opacity=alt.condition(selection, alt.value(1), alt.value(0.01)),
    tooltip=[
        alt.Tooltip('region', title='Region'),
        alt.Tooltip('death_type', title='Death type'),
        alt.Tooltip('deaths_per_million', title='Deaths per million')
    ]
).configure_axis(
    labelFontSize=12,
    titleFontSize=14,
).configure_legend(
    titleFontSize=14,
    labelFontSize=12,
    symbolStrokeWidth=8
).configure_title(
    fontSize=22
).add_selection(
    selection
).properties(
    title='COVID-19 monthly deaths during 2020',
    width=600,
    height=400
)

bar

In [71]:
from altair import datum

region='Lombardia'

domain = ['non-covid', 'covid']
range_ = ['#1f77b4', '#ff7f0e']


# Base chart
base = alt.Chart(hist_comp_md).encode(
    x=alt.X('month:O', axis=alt.Axis(title=None, labelAngle=0)),
    y=alt.Y(
        'deaths_per_million:Q',
        axis=alt.Axis(title='Monthly deaths per million people'),
    )
).transform_filter(
    # death_type == region
    alt.FieldEqualPredicate(field='region', equal=region)
)


# Bar-plot (with legend selection)
selection = alt.selection_multi(fields=['death_type'], bind='legend')
bar = base.mark_bar(
    cornerRadiusTopLeft=3,
    cornerRadiusTopRight=3,
    opacity=0.5
).encode(
    color=alt.Color(
        'death_type:N',
        scale=alt.Scale(domain=domain, range=range_),
        legend=alt.Legend(title='Death type')
    ),
    opacity=alt.condition(selection, alt.value(1), alt.value(0.01)),
    tooltip=[
        alt.Tooltip('region', title='Region'),
        alt.Tooltip('death_type', title='Death type'),
        alt.Tooltip('deaths_per_million', title='Deaths per million')
    ]
).transform_filter(
    # death_type != 'total'
    alt.FieldOneOfPredicate(field='death_type', oneOf=['covid', 'non-covid'])
).add_selection(
    selection
)


# Line-plot
selection2 = alt.selection_multi(fields=['year'], bind='legend')
line = base.mark_line(color='black').encode(
    detail='year:N'
).transform_filter(
    # death_type == 'total'
    alt.FieldEqualPredicate(field='death_type', equal='total')
).transform_filter(
    # year < 2020
    alt.FieldLTPredicate(field='year', lt=2020)
).add_selection(
    selection2
)


# Complete plot
fig = alt.layer(
    bar,
    line
).configure_axis(
    labelFontSize=12,
    titleFontSize=14
).configure_title(
    fontSize=22
).properties(
    title='COVID-19 monthly deaths during 2020',
    width=600,
    height=400
)

fig