In [15]:
import pandas as pd
from pathlib import Path
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib widget
Path.ls = lambda x: list(x.iterdir())

# ECDC Data

In [2]:
p2 = Path('ECDC'); p2.ls()

[PosixPath('ECDC/COVID-19-geographic-disbtribution-worldwide-2020-03-20.xlsx'),
 PosixPath('ECDC/COVID-19-geographic-disbtribution-worldwide-2020-03-13.xls'),
 PosixPath('ECDC/COVID-19-geographic-disbtribution-worldwide-2020-03-16.xls'),
 PosixPath('ECDC/COVID-19-geographic-disbtribution-worldwide-2020-03-17.xlsx'),
 PosixPath('ECDC/COVID-19-geographic-disbtribution-worldwide-2020-03-15.xls'),
 PosixPath('ECDC/COVID-19-geographic-disbtribution-worldwide-2020-03-14.xls'),
 PosixPath('ECDC/COVID-19-geographic-disbtribution-worldwide-2020-03-18.xls')]

In [3]:
ecdc_df = pd.read_excel(p2.ls()[0])
ecdc_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5841 entries, 0 to 5840
Data columns (total 8 columns):
DateRep                      5841 non-null datetime64[ns]
Day                          5841 non-null int64
Month                        5841 non-null int64
Year                         5841 non-null int64
Cases                        5841 non-null int64
Deaths                       5841 non-null int64
Countries and territories    5841 non-null object
GeoId                        5835 non-null object
dtypes: datetime64[ns](1), int64(5), object(2)
memory usage: 365.2+ KB


In [4]:
ecdc_df.head()

Unnamed: 0,DateRep,Day,Month,Year,Cases,Deaths,Countries and territories,GeoId
0,2020-03-20,20,3,2020,0,0,Afghanistan,AF
1,2020-03-19,19,3,2020,0,0,Afghanistan,AF
2,2020-03-18,18,3,2020,1,0,Afghanistan,AF
3,2020-03-17,17,3,2020,5,0,Afghanistan,AF
4,2020-03-16,16,3,2020,6,0,Afghanistan,AF


In [5]:
def add_cumsum(df, country, normalize=1):
    
    """ Isolate country, 
        Sort rows in ascending time order 
        add a cumulative sum of the counts"""
    tdf = df.rename({'Cases':'NewConfCases', 'Deaths':'NewDeaths'}, axis=1)
    tdf = tdf[['DateRep', 'NewConfCases', 'NewDeaths']][tdf['Countries and territories']==country].sort_values(by='DateRep')
    tdf[['ConfCases', 'Deaths']] = tdf[['NewConfCases', 'NewDeaths']].cumsum(axis=0)
    tdf = tdf.set_index('DateRep')
    return tdf

In [6]:
ecdc_fr = add_cumsum(ecdc_df, 'France')# 5.98
ecdc_it = add_cumsum(ecdc_df, 'Italy')# 3.18
ecdc_sp = add_cumsum(ecdc_df, 'Spain')# 2.97
ecdc_ge = add_cumsum(ecdc_df, 'Germany')# 8.00
ecdc_us = add_cumsum(ecdc_df, 'United_States_of_America')# 2.77
ecdc_ca = add_cumsum(ecdc_df, 'Canada')# 2.52
ecdc_it.tail()

Unnamed: 0_level_0,NewConfCases,NewDeaths,ConfCases,Deaths
DateRep,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-03-16,6230,370,23980,1811
2020-03-17,4000,347,27980,2158
2020-03-18,3526,347,31506,2505
2020-03-19,4207,473,35713,2978
2020-03-20,5322,429,41035,3407


In [7]:
def tri_plot(serie, title = None, ax = None, bar_plot=True, bar_offset = None, lab=None, color=None, alpha=1, growth=False, log=False):
    
    """ Make a line plot of the numulated sum, 
        a bar plot of the new cases and a line plot of the growth rate """
    
    if not ax:
        fig, ax = plt.subplots(figsize=(10,7))
        ax.set_title(title)
    b_width = 0.3 if bar_offset else 0.8
    ax.plot(serie.index , serie, label=lab, color=color, marker='.', alpha=alpha)
    if bar_plot:
        ax.bar(serie.index, serie.diff(), width = b_width, color=color, alpha=alpha)
    
    if log:
        ax.set_yscale('log')

In [8]:
def plot_country_comparison(data, bar_plot=True, start=None, title=None, 
                            dates = None, limits=None, log = False, alpha=None, figsize=None):
    
    """ Build a plot comparing the progression of multiple countres with various options """
    
    ## Build the plot frame
    fig, ax = plt.subplots(figsize=(15,15) if figsize is None else figsize)
    offset = 0.15
    if title is not None: ax.set_title(title, fontsize=14, loc='left', pad=10)
    
    #### Draw the different plots
    ymin = 1000
    for lbl in data.keys():
        serie, color, alpha = data[lbl]
        tri_plot(serie, ax = ax, bar_plot=bar_plot,bar_offset = -offset, lab=lbl, color=color, log=log, alpha=alpha)
        tymin = serie[serie.index > start].min()
        if tymin < ymin:
            ymin = tymin
            
    ## Configure axis labels
    ax.set_ylabel('Count' if not log else 'Count (log)')
    ax.set_xlabel('Dates')
    s = data[list(data.keys())[0]][0]
    ax.set_xticks(s.index)
    ax.set_xticklabels([str(d)[:10] for d in s.index], rotation=90)
    ax.set_xlim(start, str(s.index.max()))
    ax.set_ylim(ymin+10, ax.get_ylim()[-1])

    if log:
        ## Add comparison line for exponential growth
        s2 = s[s.index>=start]
        ymin2 = s2.min()
        n = len(s2)
        y = [ymin2 * 2**(i/3) for i in np.arange(1, 1+n, 1)]
        ax.plot(s2.index, y, linestyle='-.', color='Cyan', label='Growth: Double cases every 3 days')
    
    ## Add date markers
    if dates is not None:
        for d in dates.keys():
            lbl = dates[d][0]
            col = dates[d][1]
            ax.axvline( x=d, linestyle=':', color = col , linewidth=1)
            y = ax.get_ylim()[1]
            ax.annotate( lbl, xy=( d, y), color=col, fontsize=12, rotation=45)
    
    ## Add Amount limits
    if limits is not None:
        for l in limits.keys():
            lbl = limits[l][0]
            col = limits[l][1]
            y = l
            ax.axhline(y = y, color = col, linewidth=.4 )
            ax.annotate(lbl, xy=(start , y), color=col, fontsize=12)
    
    ax.legend(fontsize=14)
    fig.tight_layout()

## Compare confirmed Cases with gouv decisions

In [16]:
start = '2020-02-22'

data = {'Italy':[ecdc_it['ConfCases'].shift(0),'b',1],
        'France':[ecdc_fr['ConfCases'].shift(0),'r',1],
        #'Spain':[ecdc_sp['ConfCases'].shift(0),'g',1],
        #'Germany':[ecdc_ge['ConfCases'].shift(0),'c',1],
        'USA':[ecdc_us['ConfCases'].shift(0),'m',1],
        'Canada':[ecdc_ca['ConfCases'].shift(0),'k',1]}

dates = {'2020-03-04':['Italy closes Schools', 'b'],
         '2020-03-08':['Italy lockdown', 'b'],
         '2020-03-14':['France closes Schools', 'r'],
         '2020-03-15':['France lockdown', 'r'],
         '2020-03-16':['Canada closes Shops', 'k'],}

plot_country_comparison(data, bar_plot=False, start=start, title='COVID19 Confirmed cases and events', 
                        dates = dates, limits=None, log = False, alpha=None, figsize=(15,10))

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## Compare confirmed Cases with beds amount in countries (log scale)

In [17]:
start = '2020-02-22'

data = {'Italy':[ecdc_it['ConfCases'].shift(0),'b',1],
        'France':[ecdc_fr['ConfCases'].shift(0),'r',1],
        #'Spain':[ecdc_sp['ConfCases'].shift(0),'g',1],
        #'Germany':[ecdc_ge['ConfCases'].shift(0),'c',1],
        'USA':[ecdc_us['ConfCases'].shift(0),'m',1],
        'Canada':[ecdc_ca['ConfCases'].shift(0),'k',1]}

dates = {'2020-03-04':['Italy closes Schools', 'b'],
         '2020-03-08':['Italy lockdown', 'b'],
         '2020-03-14':['France closes Schools', 'g'],
         '2020-03-15':['France lockdown', 'g']}

limits = {3.18*1000 : ['Italy beds (3.18 per 1Kha - OCDE 2017)', 'b'],
          5.98*1000 : ['France beds (5.98 per 1Kha - OCDE 2017)', 'r'],
          #2.97*1000 : ['Spain beds (2.97 per 1Kha - OCDE 2017)', 'g'],
          #8.00*1000 : ['Germany beds (8.00 per 1Kha - OCDE 2017)', 'c'],
          2.77*1000 : ['USA beds (2.77 per 1Kha - OCDE 2016)', 'm'],
          2.52*1000 : ['Canada beds (2.52 per 1Kha - OCDE 2017)', 'k']}

plot_country_comparison(data, bar_plot=False, start=start, title='COVID19 Confirmed cases and bed counts\n(on logarithmic scale)', 
                        dates = dates, limits=limits, log = True, alpha=None, figsize=(15,15))

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## Compare deaths with the dates where cases number exceeded bed counts

In [18]:
start = '2020-02-24'

data = {'Italy':[ecdc_it['Deaths'].shift(0),'b',1],
        'France':[ecdc_fr['Deaths'].shift(0),'r',1],
        'Spain':[ecdc_sp['Deaths'].shift(0),'g',1],}
        #'Germany':[ecdc_ge['Deaths'].shift(0),'c',1],
        #'USA':[ecdc_us['Deaths'].shift(0),'m',1],}
        #'Canada':[ecdc_ca['Deaths'].shift(0),'k',1]}

dates = {'2020-03-05':['Italy cases\nexceeds beds ratio', 'b'],
         '2020-03-13':['Spain cases\nexceeds beds ratio', 'g'],
         '2020-03-16':['France cases\nexceeds beds ratio', 'r'],}
         #'2020-03-14':['USA cases\nexceeds beds ratio', 'm']}

plot_country_comparison(data, bar_plot=False, start=start, title='COVID19 deaths and chronology', 
                        dates = dates, log = False, alpha=None, figsize=(15,10))

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## Compare Italy and France with an offset of 8 days (confirmed cases)

In [19]:
start = '2020-02-24'

data = {'Italy':[ecdc_it['ConfCases'].shift(0),'b',1],
        'France':[ecdc_fr['ConfCases'].shift(-8),'r',1]}

dates = {'2020-03-04':['Italy closes Schools', 'b'],
         '2020-03-08':['Italy lockdown', 'b'],
         '2020-03-09':['France lockdown', 'g']}

limits = {3.18*1000 : ['Italy beds (3.18 per 1Kha - OCDE 2017)', 'b'],
          5.98*1000 : ['France beds (5.98 per 1Kha - OCDE 2017)', 'r']}

plot_country_comparison(data, bar_plot=False, start=start, title='France aligned to Italy (8 days offset)\nConfirmed cases', 
                        dates = dates, limits=limits, log = False, alpha=None, figsize=(15,10))

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [20]:
start = '2020-02-24'

data = {'Italy':[ecdc_it['Deaths'].shift(0),'b',1],
        'France':[ecdc_fr['Deaths'].shift(-8),'r',1]}

dates = {'2020-03-04':['Italy closes Schools', 'b'],
         '2020-03-08':['Italy lockdown', 'b'],
         '2020-03-09':['France lockdown', 'g']}

plot_country_comparison(data, bar_plot=False, start=start, title='France aligned to Italy (8 days offset)\nDeaths', 
                        dates = dates, limits=None, log = False, alpha=None, figsize=(15,10))

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …