# COVID-19, Hospital Capacity Analysis By Country

Given where COVID-19 is today, how much capacity is left in our hospitals?  How long before they are overwhelmed?

In [1]:
%matplotlib inline

import os
from glob import glob

import datetime as dt

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn import linear_model

from ipywidgets import interact

from IPython.core.pylabtools import figsize

In [2]:
figsize(15, 9)
figsize(15, 9)

sns.set()

pd.set_option('display.max_columns', 50)

SMALL_SIZE = 14
MEDIUM_SIZE = 16
BIGGER_SIZE = 18

plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=SMALL_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=SMALL_SIZE)    # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title

In [3]:
data_dir = 'data'

In [4]:
country_map = {
    'US': 'United_States',
    'United States of America': 'United_States',
    'Czechia': 'Czech_Republic',
    'UK': 'United_Kingdom',
    'United Kingdom of Great Britain and Northern Ireland': 'United_Kingdom',
    'Hong Kong SAR': 'Hong_Kong',
    'Russian Federation': 'Russia',
    'Mainland China': 'China',
    'Korea, South': 'South_Korea',
    'Republic of Korea': 'South_Korea',
}

## Severity Rates

https://www.worldometers.info/coronavirus/coronavirus-symptoms/#mild

In [5]:
P_SEVERE = 0.138
P_CRITICAL = 0.0188

## Hospital Bed Statistics by Country

https://en.wikipedia.org/wiki/List_of_countries_by_hospital_beds

In [6]:
def get_line(line):
    return [x.strip() for x in line.split(',')]

def to_num(x):
    try:
        return float(x)
    except:
        return None

beds_file = os.path.join(data_dir, 'hospital_beds.csv')
rows = []
with open(beds_file, 'r') as infile:
    headers = get_line(next(infile))
    for line in infile:
        split_lines = get_line(line)
        row = dict(zip(headers, split_lines))
        rows.append(row)
        
beds_df = pd.DataFrame(rows)
float_cols = ['2013', '2014', '2015', '2016', '2017', 'Occupancy', 'ICU',]
for f in float_cols:
    try:
        beds_df[f] = beds_df[f].map(to_num)
    except Exception as e:
        print(f)
        raise e
        
beds_df['beds_p_1k'] = \
    beds_df['2017'].combine_first(
    beds_df['2016'].combine_first(
    beds_df['2015'].combine_first(
    beds_df['2014'].combine_first(
    beds_df['2013']
))))

beds_df['ICU_p_100k'] = beds_df['ICU']

beds_df = beds_df[['Country', 'beds_p_1k', 'ICU_p_100k', 'Occupancy']].copy()

beds_df.head()

Unnamed: 0,Country,beds_p_1k,ICU_p_100k,Occupancy
0,Japan,13.05,7.3,75.5
1,South_Korea,12.27,10.6,
2,Russia,8.05,8.3,
3,Germany,8.0,29.2,79.8
4,Austria,7.37,21.8,73.8


## Population By Country

https://en.wikipedia.org/wiki/List_of_countries_by_population_(United_Nations)

In [7]:
pop_df = pd.read_csv(os.path.join(data_dir, 'population.csv'))
pop_df['Country'] = pop_df['Country or area']
pop_df['population'] = pop_df['Population(1 July 2019)'].map(lambda x: x.replace(',', '')).astype('float64')
pop_df = pop_df[['Country', 'population']].copy()
pop_df.head()

Unnamed: 0,Country,population
0,China,1433784000.0
1,India,1366418000.0
2,United_States,329064900.0
3,Indonesia,270625600.0
4,Pakistan,216565300.0


## Merge Static Data Sets

In [8]:
merged_static = beds_df.merge(pop_df, on='Country', how='inner')
merged_static['beds'] = (merged_static['beds_p_1k'] / 1000.0) * merged_static['population']
merged_static['available_beds'] = merged_static['beds'] * (1.0 - merged_static['Occupancy'] / 100.0)
merged_static['icu'] = (merged_static['ICU_p_100k'] / 100_000.0) * merged_static['population']
merged_static['available_icu'] = merged_static['icu'] * (1.0 - merged_static['Occupancy'] / 100.0)
merged_static = merged_static[merged_static['available_icu'].notnull()]
merged_static.head()

Unnamed: 0,Country,beds_p_1k,ICU_p_100k,Occupancy,population,beds,available_beds,icu,available_icu
0,Japan,13.05,7.3,75.5,126860301.0,1655527.0,405604.097372,9260.801973,2268.896483
3,Germany,8.0,29.2,79.8,83517045.0,668136.4,134963.54472,24386.97714,4926.169382
4,Austria,7.37,21.8,73.8,8955102.0,65999.1,17291.764656,1952.212236,511.479606
5,Hungary,7.02,13.8,65.5,9684679.0,67986.45,23455.32407,1336.485702,461.087567
6,Czech_Republic,6.63,11.6,70.1,10689209.0,70869.46,21189.967245,1239.948244,370.744525


In [9]:
merged_static[merged_static['Country'].isin(['Ireland', 'Italy', 'United_States'])]

Unnamed: 0,Country,beds_p_1k,ICU_p_100k,Occupancy,population,beds,available_beds,icu,available_icu
20,Italy,3.18,12.5,78.9,60550075.0,192549.2385,40627.889323,7568.759375,1597.008228
23,Ireland,2.96,6.5,94.9,4882495.0,14452.1852,737.061445,317.362175,16.185471
25,United_States,2.77,34.7,64.0,329064917.0,911509.82009,328143.535232,114185.526199,41106.789432


## COVID-19 Data

https://github.com/CSSEGISandData/COVID-19

In [10]:
covid_time_series_confirmed_path = \
    './COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv'

covid_time_series_recovered_path = \
    './COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Recovered.csv'

covid_time_series_death_path = \
    './COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Deaths.csv'


confirmed_df = pd.read_csv(covid_time_series_confirmed_path).drop(['Lat', 'Long'], axis=1)
confirmed_df = \
    confirmed_df\
        .rename(columns={'Province/State': 'State', 'Country/Region': 'Country'})\
        .melt(['State', 'Country'], var_name='Date', value_name='Confirmed')\
        .copy()

recovered_df = pd.read_csv(covid_time_series_recovered_path).drop(['Lat', 'Long'], axis=1)
recovered_df = \
    recovered_df\
        .rename(columns={'Province/State': 'State', 'Country/Region': 'Country'})\
        .melt(['State', 'Country'], var_name='Date', value_name='Recovered')\
        .copy()

deaths_df = pd.read_csv(covid_time_series_death_path).drop(['Lat', 'Long'], axis=1)
deaths_df = \
    deaths_df\
        .rename(columns={'Province/State': 'State', 'Country/Region': 'Country'})\
        .melt(['State', 'Country'], var_name='Date', value_name='Deaths')\
        .copy()

MERGE_COLS = ['State', 'Country', 'Date']
covid_df = \
    confirmed_df\
        .merge(recovered_df, on=MERGE_COLS)\
        .merge(deaths_df, on=MERGE_COLS)

covid_df = covid_df[covid_df[['Confirmed', 'Recovered', 'Deaths']].notnull().values.all(axis=1)]

covid_df['Active'] = covid_df['Confirmed'] - covid_df['Deaths'] - covid_df['Recovered']
covid_df['Severe'] = covid_df['Active'] * P_SEVERE
covid_df['Critical'] = (covid_df['Active'] * P_CRITICAL).astype('int64')

covid_df['Country'] = \
    covid_df['Country'].map(lambda x: country_map.get(x, x.strip().replace(' ', '_')))

covid_df['Date'] = pd.to_datetime(covid_df['Date']).dt.date

covid_df = covid_df.groupby(['Country', 'Date']).sum().reset_index().copy()
covid_df['Date'] = pd.to_datetime(covid_df['Date'])

covid_df[covid_df['Country'] == 'Ireland'].tail()

Unnamed: 0,Country,Date,Confirmed,Recovered,Deaths,Active,Severe,Critical
4675,Ireland,2020-03-17,223,5,2,216,29.808,4
4676,Ireland,2020-03-18,292,5,2,285,39.33,5
4677,Ireland,2020-03-19,557,5,3,549,75.762,10
4678,Ireland,2020-03-20,683,5,3,675,93.15,12
4679,Ireland,2020-03-21,785,5,3,777,107.226,14


## Putting it together

In [11]:
countries = sorted(merged_static['Country'].unique())

print(len(countries))

LINEWIDTH=6

def show(country, log, forecast, title):
    

    country_data_df = \
        covid_df[covid_df['Country'] == country]\
            .groupby(['Country', 'Date'])\
            .sum()\
            .reset_index()

    _, row = next(country_data_df.head(1).iterrows())
    first_date = row['Date']
    
    N_DAYS = 7
    last_week = country_data_df.tail(N_DAYS)
    _, row = next(last_week.head(1).iterrows())

    first_date_last_week = row['Date']
    last_week_indices = list(range(N_DAYS))
    
    _, row = next(merged_static[merged_static['Country']==country].iterrows())
    available_icu = row['available_icu']
    
    if forecast:
        model = linear_model.LinearRegression()
        model.fit([[i] for i in last_week_indices], np.log(last_week['Critical']))

        if available_icu and pd.notnull(available_icu):
            N_DAYS_PREDICT = int((np.log(available_icu) - model.intercept_)/model.coef_) + 2
        else:
            N_DAYS_PREDICT = 4 * N_DAYS

        next_week_indices = list(range(N_DAYS, N_DAYS_PREDICT))
        next_week = np.exp(model.predict([[i] for i in next_week_indices]))
    else:
        next_week_indices = [N_DAYS]
        next_week = None
    
    predict_df = pd.DataFrame()
    predict_df['Date'] = pd.to_datetime([
        first_date_last_week + dt.timedelta(days=i) for i in next_week_indices
    ])
    predict_df['Country'] = country
    predict_df['Forecast'] = next_week
    
    concat_df = \
        country_data_df\
            .merge(predict_df, on=['Date', 'Country'], how='outer')\
            .assign(available_icu=available_icu)\
            .reset_index(drop=True)
    
    concat_df['Date'] = concat_df['Date'].dt.date
    concat_df.set_index('Date', inplace=True)
    
    ax = concat_df['Critical'].plot(logy=log, lw=LINEWIDTH, style='r-', use_index=True)
    
    positions = [p for p in concat_df.index if p.weekday() == 0]
    labels = [l.strftime('%m-%d') for l in positions]
    
    concat_df['available_icu'].plot(logy=log, lw=LINEWIDTH, style='k--', xticks=[], use_index=True)
    
    if forecast:
        concat_df['Forecast'].plot(logy=log, lw=LINEWIDTH, use_index=True, style='ro')
    
    ax.set_xticks(positions)
    ax.set_xticklabels(labels)

    ax.annotate(
        'Free ICU Beds = {}'.format(int(available_icu)),
        (first_date, available_icu),
        fontsize=18,
        color='darkslategray',
        xytext=(10, -20),
        textcoords='offset points'
    )
    if title:
        plt.title(
            'Approximately {} days til hospitals exceed capacity'.format(N_DAYS_PREDICT - N_DAYS),
            fontsize=BIGGER_SIZE,
        )

    plt.ylabel('Critical Cases')
    plt.show()

interact(show, country=countries, log=False, forecast=False, title=False)

20


interactive(children=(Dropdown(description='country', options=('Austria', 'Belgium', 'Czech_Republic', 'Estoni…

<function __main__.show(country, log, forecast, title)>