# C33/C34 Beatmungsgeräte
> Manuel Vazquez  
> Christian Spiller  
> Michael Wild

## Imports

In [4]:
import pandas as pd
import requests
from zipfile import ZipFile
import os, os.path as op
import tqdm.notebook as tqdm

## Load Data

In [73]:
if not op.isdir('_data'):
    os.mkdir('_data')
    
def download_if_not_exists(fname, url):
    '''If the file `fname` does not exist in the '_data/' folder, download it from `url` and save it in `'_data/'+fname`.'''
    fpath = op.join('_data', fname)
    if not op.isfile(fpath):      
        r = requests.get(url, allow_redirects=True)
        r.raise_for_status()
        with open(fpath, 'wb') as f:
            f.write(r.content)
    return fpath

### Ventilator Data

In [63]:
# Get the ventilator occupancy time series
cantons = ['Kanton_' + k for k in 'AG AI AR BE BL BS FR GE GL GR JU LU NE NW OW SG SH SO SZ TG TI UR VD ZG ZH'.split()] + ['FL']
data = None
for c in tqdm.tqdm_notebook(cantons):
    fname = f'COVID19_Fallzahlen_{c}_total.csv'
    fpath = download_if_not_exists(fname, 'https://raw.githubusercontent.com/openZH/covid_19/master/fallzahlen_kanton_total_csv_v2/' + fname)
    df = pd.read_csv(fpath, parse_dates=[['date', 'time']])
    data = pd.concat((data, df)) if data is not None else df

data.set_index(['abbreviation_canton_and_fl', 'date_time'], inplace=True)

  0%|          | 0/26 [00:00<?, ?it/s]

### Incidence Data

In [74]:
fname = 'sources-csv.zip'
fpath = download_if_not_exists(fname, 'https://www.covid19.admin.ch/api/data/20211112-b5cqnmme/downloads/' + fname)
with ZipFile(fpath) as zf:
    with zf.open(f'data/COVID19Cases_vaccpersons_AKL10_w.csv') as f:
        df = pd.read_csv(f)

## Cleanup and Pre-Processing of Data

## Model

#### Total occupation at time $T$ is the sum of the occupations by cohorts (by age-group $i$ and vaccination status $j$)
\begin{equation}
    \Large V(T) = \sum_{i=1}^{N_i} \sum_{j=1}^{N_j} V_{ij}(T)
\end{equation}

#### The occupations by cohort $V_{ij}$ are the cumulative sum of incremental changes
\begin{equation}
    \Large V_{ij}(T) = \sum_{t=0}^T \Delta V_{ij}(t)
\end{equation}

#### The incremental changes $\Delta V_{ij}$ are the balance between entries $E_{ij}$ and exits $A_{ij}$
\begin{equation}
    \Large \Delta V_{ij}(t) = E_{ij}(t) - A_{ij}(t)
\end{equation}

#### The entries $E_{ij}$ at time $t$ are modelled as a fraction $\alpha_{ij}$ of the incidence number $I_{ij}$ at time $t - \tau_E$
\begin{equation}
    \Large E_{ij}(t) = \alpha_{ij} I_{ij}(t-\tau_E)
\end{equation}

#### The exits $A_{ij}$ at time $t$ are modelled as the entries at time $t - \tau_A$
\begin{equation}
    \Large A_{ij}(t) = E_{ij}(t - \tau_A)
\end{equation}

#### Putting things together, the model reads as
\begin{equation}
    \Large V(T) = \sum_{i=1}^{N_i} \sum_{j=1}^{N_j} \sum_{t=0}^T \alpha_{ij} \left(I_{ij}(t - \tau_E) - I_{ij}(t - \tau_A)\right)
\end{equation}

#### The parameters of the model are the _conversion factors_ $\alpha_{ij}$ and the time offsets $\tau_E$ and $\tau_A$.

In [72]:
def apply_model(inc, alpha, tau_e, tau_a):
    '''Model for the estimation of ventilator occupation given incidence time series
    
    Parameters
    ----------
    inc: pd.DataFrame
      DataFrame with incidence numbers with a multi-level index. Levels must be:
          * level=0: int, week numbers, named 't'
          * level=1: str, age-group, named 'age_group'
          * level=2: str, vaccination status, named 'vacc_status'
    alpha: pd.DataFrame
      Model parameters representing the _conversion factors_ from the incidence
      numbers to the number of patients requiring ventilation. The multi-level
      index must contain a value in the range [0, 1] for each combination of
      age-group and vaccination status.
    tau_e: int
      Model parameter describing the number of weeks that passes between
      infection and a patient being attached to a ventilator.
    tau_a: int
      Model parameter describing the number of weeks that passes between
      infection and a patient either no longer requires the ventilator
      or is deceased.
    
    Returns
    -------
    pd.DataFrame
      A time-series predicting the ventilator occupancy.
    '''
    
    # build grouping by age and vaccination status
    inc_grp = inc.groupby(level=['age_group', 'vacc_status'])
    # shift the groups and calculate the difference
    delta = inc_grp.shift(tau_e) - inc_grp.shift(tau_a)
    # for each group, multiply the difference with the conversion factor
    for i in inc.index.unique('age_group'):
        for j in inc.index.unique('vacc_status'):
            delta[:, i, j] *= alpha[i, j]
    # for each group calculate the cumulative sum
    vij = delta.groupby(level=['age_group', 'vacc_status']).cumsum()
    # calculate total occupation by summing over the groups
    v = vij.groupby(level='t').sum()
    return v
    