# Exploratory Data Analysis - Epidemic

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
from datetime import date, timedelta
import math
import pickle

from imblearn.over_sampling import SMOTE
from collections import Counter

In [2]:
# Loading epidemic datasets

## Cases and testing
cases_malaysia = pd.read_csv('dataset/epidemic/cases_malaysia.csv')
cases_state = pd.read_csv('dataset/epidemic/cases_state.csv')
tests_malaysia = pd.read_csv('dataset/epidemic/tests_malaysia.csv') 
tests_state = pd.read_csv('dataset/epidemic/tests_state.csv')

## Deaths
deaths_malaysia = pd.read_csv('dataset/epidemic/deaths_malaysia.csv')
deaths_state = pd.read_csv('dataset/epidemic/deaths_state.csv')

## Clustering
clusters = pd.read_csv('dataset/epidemic/clusters.csv')

## Healthcare
hospital = pd.read_csv('dataset/epidemic/hospital.csv')
icu = pd.read_csv('dataset/epidemic/icu.csv')
pkrc = pd.read_csv('dataset/epidemic/pkrc.csv') # PUSAT KUARANTINE DAN RAWATAN COVID, QUARANTINE AND TREATMENT CENTERS

# Planning

Overall data wrangling pipeline

1. Handle duplicate
    - checkDuplicate
2. Handle missing value
    - checkNaN
    - checkStateQuantity
3. Handle outlier
    - checkOutlier
4. Handle dates (and segment date into smaller pieces)
	- dateReformat (date format checks)
	- Segment date into year>month>week>day
99. Lastly, custom checking

Overall EDA
1. Merge?
2. Agg?
99. Plot useful graphs
    - multidimensional views

In [3]:
# Basic info
def basicInfo(df):
    print("Shape: " + str(df.shape))
    return df.tail(3)
    
# Handle duplicate
def checkDuplicate(df):
    df[df.duplicated()]
    
# Handle missing value
def checkNaN(df):
    null_rows = df.isna().any(axis=1)
    rows = null_rows.shape[0]  
    # print(rows, "rows with missing values")
    
    if rows > 0: 
        print(df.isna().sum(), "\n")
        return null_rows

In [4]:
# Handle outlier
def check_outlier(df):
    numeric_columns = df.describe().columns.copy()
    num_of_columns = len(numeric_columns)
    
    for i in range(math.ceil(num_of_columns/2)):
        c = 2*i
        
        plt.figure(figsize=(20,2))
        plt.subplot(1, 2, 1)
        plot_boxplot(df[numeric_columns[c]], xlabel=numeric_columns[c])
        plt.subplot(1, 2, 2)
        try:
            plot_boxplot(df[numeric_columns[c+1]], xlabel=numeric_columns[c+1])
        except IndexError:
            plt.xticks([]), plt.yticks([])
            plt.axis("off")
        plt.show()
    
def plot_boxplot(series, title='', xlabel=''):
    bp = sns.boxplot(x=series)
    bp.set(title=title,
           xlabel=xlabel)
    return bp

In [5]:
# Handle dates

# # wrangle relevant date columns
# df.date = pd.to_datetime(df.date, errors='coerce').dt.date
# df.date_positive = pd.to_datetime(df.date_positive, errors='coerce').dt.date
# assert len(df[df.date.isnull()]) == len(df[df.date_positive.isnull()]) == 0
# df.date_dose2 = pd.to_datetime(df.date_dose2, errors='coerce').dt.date

# Cases and Testing

## cases_malaysia.csv

In [6]:
basicInfo(cases_malaysia)

Shape: (620, 19)


Unnamed: 0,date,cases_new,cases_import,cases_recovered,cases_active,cases_cluster,cases_pvax,cases_fvax,cases_child,cases_adolescent,cases_adult,cases_elderly,cluster_import,cluster_religious,cluster_community,cluster_highRisk,cluster_education,cluster_detentionCentre,cluster_workplace
617,2021-10-03,9066,12,14454,150146,388,1119,4941,1700,764,5348,1087,0.0,1.0,224.0,17.0,1.0,5.0,140.0
618,2021-10-04,8075,6,15456,142746,731,1093,4282,1353,641,5050,948,0.0,5.0,73.0,25.0,8.0,7.0,613.0
619,2021-10-05,8817,14,15615,135945,619,1060,5105,1386,686,5624,1007,0.0,0.0,199.0,30.0,10.0,3.0,377.0


In [7]:
cases_malaysia[cases_malaysia.duplicated()]

Unnamed: 0,date,cases_new,cases_import,cases_recovered,cases_active,cases_cluster,cases_pvax,cases_fvax,cases_child,cases_adolescent,cases_adult,cases_elderly,cluster_import,cluster_religious,cluster_community,cluster_highRisk,cluster_education,cluster_detentionCentre,cluster_workplace


In [8]:
cases_malaysia[checkNaN(cases_malaysia)]

date                         0
cases_new                    0
cases_import                 0
cases_recovered              0
cases_active                 0
cases_cluster                0
cases_pvax                   0
cases_fvax                   0
cases_child                  0
cases_adolescent             0
cases_adult                  0
cases_elderly                0
cluster_import             342
cluster_religious          342
cluster_community          342
cluster_highRisk           342
cluster_education          342
cluster_detentionCentre    342
cluster_workplace          342
dtype: int64 



Unnamed: 0,date,cases_new,cases_import,cases_recovered,cases_active,cases_cluster,cases_pvax,cases_fvax,cases_child,cases_adolescent,cases_adult,cases_elderly,cluster_import,cluster_religious,cluster_community,cluster_highRisk,cluster_education,cluster_detentionCentre,cluster_workplace
0,2020-01-25,4,4,0,4,0,0,0,0,0,1,0,,,,,,,
1,2020-01-26,0,0,0,4,0,0,0,0,0,0,0,,,,,,,
2,2020-01-27,0,0,0,4,0,0,0,0,0,0,0,,,,,,,
3,2020-01-28,0,0,0,4,0,0,0,0,0,0,0,,,,,,,
4,2020-01-29,3,3,0,7,0,0,0,1,0,2,0,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
337,2020-12-27,1196,5,997,20198,508,0,0,112,45,876,91,,,,,,,
338,2020-12-28,1594,3,1181,20603,743,0,0,129,51,1300,88,,,,,,,
339,2020-12-29,1925,10,1123,21402,1098,0,0,127,42,1544,79,,,,,,,
340,2020-12-30,1870,2,745,22519,920,0,0,141,62,1304,115,,,,,,,


In [9]:
# Outlier not applicable since it's not human errors
# check_outlier(cases_malaysia)

### Observation

1. There are total of 342 rows with NaN value, all on the cluster data and starting from 2020-01-25 to 2020-12-31. 
2. Therefore, suggesting spliting dataset into 2
    - First dataset, drop all null cluster rows, retain other fields
    - Second datset, drop all clusters fields, retain other fields
3. Besides, the cluster here is not the same as clusters.csv

In [10]:
# First dataset
cases_malaysia_cluster = cases_malaysia.dropna()
basicInfo(cases_malaysia_cluster)

Shape: (278, 19)


Unnamed: 0,date,cases_new,cases_import,cases_recovered,cases_active,cases_cluster,cases_pvax,cases_fvax,cases_child,cases_adolescent,cases_adult,cases_elderly,cluster_import,cluster_religious,cluster_community,cluster_highRisk,cluster_education,cluster_detentionCentre,cluster_workplace
617,2021-10-03,9066,12,14454,150146,388,1119,4941,1700,764,5348,1087,0.0,1.0,224.0,17.0,1.0,5.0,140.0
618,2021-10-04,8075,6,15456,142746,731,1093,4282,1353,641,5050,948,0.0,5.0,73.0,25.0,8.0,7.0,613.0
619,2021-10-05,8817,14,15615,135945,619,1060,5105,1386,686,5624,1007,0.0,0.0,199.0,30.0,10.0,3.0,377.0


In [11]:
cases_malaysia = cases_malaysia[["date","cases_new","cases_import",\
                                 "cases_recovered","cases_active","cases_cluster",\
                                 "cases_pvax","cases_fvax","cases_child",\
                                 "cases_adolescent","cases_adult","cases_elderly"]]
basicInfo(cases_malaysia)

Shape: (620, 12)


Unnamed: 0,date,cases_new,cases_import,cases_recovered,cases_active,cases_cluster,cases_pvax,cases_fvax,cases_child,cases_adolescent,cases_adult,cases_elderly
617,2021-10-03,9066,12,14454,150146,388,1119,4941,1700,764,5348,1087
618,2021-10-04,8075,6,15456,142746,731,1093,4282,1353,641,5050,948
619,2021-10-05,8817,14,15615,135945,619,1060,5105,1386,686,5624,1007


## cases_state.csv

In [12]:
basicInfo(cases_state)

Shape: (9920, 13)


Unnamed: 0,date,state,cases_new,cases_import,cases_recovered,cases_active,cases_cluster,cases_pvax,cases_fvax,cases_child,cases_adolescent,cases_adult,cases_elderly
9917,2021-10-05,W.P. Kuala Lumpur,217,0,305,3471,0,15,146,39,13,144,21
9918,2021-10-05,W.P. Labuan,3,0,9,35,1,0,3,0,0,3,0
9919,2021-10-05,W.P. Putrajaya,23,0,33,413,0,3,16,4,2,17,0


In [13]:
cases_state[cases_state.duplicated()]

Unnamed: 0,date,state,cases_new,cases_import,cases_recovered,cases_active,cases_cluster,cases_pvax,cases_fvax,cases_child,cases_adolescent,cases_adult,cases_elderly


In [14]:
cases_state[checkNaN(cases_state)]

date                0
state               0
cases_new           0
cases_import        0
cases_recovered     0
cases_active        0
cases_cluster       0
cases_pvax          0
cases_fvax          0
cases_child         0
cases_adolescent    0
cases_adult         0
cases_elderly       0
dtype: int64 



Unnamed: 0,date,state,cases_new,cases_import,cases_recovered,cases_active,cases_cluster,cases_pvax,cases_fvax,cases_child,cases_adolescent,cases_adult,cases_elderly


In [15]:
# Check the quantity counts of each state (all of them have to tally)
cases_state.state.value_counts()

Pulau Pinang         620
Selangor             620
Pahang               620
Melaka               620
Perlis               620
W.P. Kuala Lumpur    620
Sabah                620
Perak                620
Johor                620
W.P. Putrajaya       620
Negeri Sembilan      620
W.P. Labuan          620
Kedah                620
Kelantan             620
Terengganu           620
Sarawak              620
Name: state, dtype: int64

In [16]:
# Outlier not applicable since it's not human errors
# check_outlier(cases_state)

### Possible customChecking for cases_malaysia and cases_state
- ageCat check categories (some of the cases have unverifiable age)
- ageCat is divided into cases_child, cases_adolescent, cases_adult, cases_elderly, and the rest is unverified

## tests_malaysia.csv

In [17]:
basicInfo(tests_malaysia)

Shape: (618, 3)


Unnamed: 0,date,rtk-ag,pcr
615,2021-09-30,96673,43082
616,2021-10-01,100825,37382
617,2021-10-02,67827,34369


In [18]:
tests_malaysia[tests_malaysia.duplicated()]

Unnamed: 0,date,rtk-ag,pcr


In [19]:
tests_malaysia[checkNaN(tests_malaysia)]

date      0
rtk-ag    0
pcr       0
dtype: int64 



Unnamed: 0,date,rtk-ag,pcr


## tests_state.csv

In [20]:
basicInfo(tests_state)

Shape: (1504, 4)


Unnamed: 0,date,state,rtk-ag,pcr
1501,2021-10-02,W.P. Kuala Lumpur,5805,3133
1502,2021-10-02,W.P. Labuan,344,174
1503,2021-10-02,W.P. Putrajaya,141,305


In [21]:
tests_state[tests_state.duplicated()]

Unnamed: 0,date,state,rtk-ag,pcr


In [22]:
tests_state[checkNaN(tests_state)]

date      0
state     0
rtk-ag    0
pcr       0
dtype: int64 



Unnamed: 0,date,state,rtk-ag,pcr


## deaths_malaysia.csv

In [23]:
basicInfo(deaths_malaysia)

Shape: (568, 8)


Unnamed: 0,date,deaths_new,deaths_bid,deaths_new_dod,deaths_bid_dod,deaths_pvax,deaths_fvax,deaths_tat
565,2021-10-03,118,27,44,3,6,22,3
566,2021-10-04,76,15,19,0,5,6,2
567,2021-10-05,117,16,3,0,0,2,3


In [24]:
deaths_malaysia[deaths_malaysia.duplicated()]

Unnamed: 0,date,deaths_new,deaths_bid,deaths_new_dod,deaths_bid_dod,deaths_pvax,deaths_fvax,deaths_tat


In [25]:
deaths_malaysia[checkNaN(deaths_malaysia)]

date              0
deaths_new        0
deaths_bid        0
deaths_new_dod    0
deaths_bid_dod    0
deaths_pvax       0
deaths_fvax       0
deaths_tat        0
dtype: int64 



Unnamed: 0,date,deaths_new,deaths_bid,deaths_new_dod,deaths_bid_dod,deaths_pvax,deaths_fvax,deaths_tat


## deaths_state.csv

In [26]:
basicInfo(deaths_state)

Shape: (9088, 9)


Unnamed: 0,date,state,deaths_new,deaths_bid,deaths_new_dod,deaths_bid_dod,deaths_pvax,deaths_fvax,deaths_tat
9085,2021-10-05,W.P. Kuala Lumpur,4,1,0,0,0,0,2
9086,2021-10-05,W.P. Labuan,0,0,0,0,0,0,0
9087,2021-10-05,W.P. Putrajaya,0,0,0,0,0,0,0


In [27]:
deaths_state[deaths_state.duplicated()]

Unnamed: 0,date,state,deaths_new,deaths_bid,deaths_new_dod,deaths_bid_dod,deaths_pvax,deaths_fvax,deaths_tat


In [28]:
deaths_state[checkNaN(deaths_state)]

date              0
state             0
deaths_new        0
deaths_bid        0
deaths_new_dod    0
deaths_bid_dod    0
deaths_pvax       0
deaths_fvax       0
deaths_tat        0
dtype: int64 



Unnamed: 0,date,state,deaths_new,deaths_bid,deaths_new_dod,deaths_bid_dod,deaths_pvax,deaths_fvax,deaths_tat


# <span style="color:blue">Multidimensional Data Views</span>

Datetime manipulation
https://www.w3schools.com/python/python_datetime.asp
or
https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior

In [29]:
# add a 'month' column by using 'date'
def add_month(df):
    df['month'] = df['date'].str[:-3]
    return df

# group data by months and count
def count_by_months(df):
    count_df = df.groupby(['month']).count()
    count_df.reset_index(inplace = True)
    return count_df

# group data by months and get mean
def avg_by_months(df):
    avg_df = df.groupby(['month']).mean()
    avg_df.reset_index(inplace = True)
    return avg_df

### Pickle the Datasets

Pickle all the cleaned data frames for later use.

The cleaned dataset has the information below.

| Dataset         | Start date | End date   | Number of rows |
| --------------- | ---------- | ---------- | -------------- |
| cases_malaysia  | 2020-01-25 | 2021-10-05 | 620            |
| cases_malaysia_cluster | 2021-01-01 | 2021-10-05 | 278     |
| cases_state     | 2020-01-25 | 2021-10-05 | 9920           |
| clusters        | 2020-03-01 | 2021-10-05 | 5507           |
| tests_malaysia  | 2020-01-24 | 2021-10-02 | 618            |
| tests_state     | 2021-07-01 | 2021-10-02 | 1504           |
| deaths_malaysia | 2020-03-17 | 2021-10-05 | 568            |
| deaths_state    | 2020-03-17 | 2021-10-05 | 9088           |
| hospital        | 2020-03-24 | 2021-09-11 |            |
| icu             | 2020-03-24 | 2021-09-11 |            |
| pkrc            | 2020-03-28 | 2021-09-11 |            |

In [30]:
# pickle.dump(datasetName, open('pickle_files/datasetName.pkl', 'wb'))
pickle.dump(cases_malaysia, open('pickle_files/cases_malaysia.pkl', 'wb'))
pickle.dump(cases_malaysia_cluster, open('pickle_files/cases_malaysia_cluster.pkl', 'wb'))
pickle.dump(cases_state, open('pickle_files/cases_state.pkl', 'wb'))
pickle.dump(tests_malaysia, open('pickle_files/tests_malaysia.pkl', 'wb'))
pickle.dump(tests_state, open('pickle_files/tests_state.pkl', 'wb'))
pickle.dump(deaths_malaysia, open('pickle_files/deaths_malaysia.pkl', 'wb'))
pickle.dump(deaths_state, open('pickle_files/deaths_state.pkl', 'wb'))

# Documentation for epidemic datasets

## File naming convention

| Filename | Naming convention | Update frequency |
| :--- | :---: | :---: |
| cases_malaysia.csv | Static name | Daily by 2359 (for T-0) |
| cases_state.csv | Static name | Daily by 2359 (for T-0) |
| deaths_malaysia.csv | Static name | Daily by 2359 (for T-0) |
| deaths_state.csv | Static name | Daily by 2359 (for T-0) |
| clusters.csv | Static name | Daily by 2359 (for T-1) |
| pkrc.csv| Static name |  Daily by 2359 (for T-0) |
| hospital.csv | Static name |  Daily by 2359 (for T-0) |
| icu.csv | Static name |  Daily by 2359 (for T-0) |
| tests_malaysia.csv | Static name | At least twice weekly |
| tests_state.csv | Static name | At least twice weekly |

## Variables and Methodology

### Cases and Testing

1) `date`: yyyy-mm-dd format; data correct as of 1200hrs on that date<br>
2) `state`: name of state (present in state file, but not country file)<br>
3) `cases_new`: cases reported in the 24h since the last report<br>
4) `cases_import`: imported cases reported in the 24h since the last report<br>
5) `cases_active`: Covid+ individuals who have not recovered or died<br>
6) `cases_recovered` recovered cases reported in the 24h since the last report<br>
7) `cases_cluster`: number of cases attributable to clusters; the difference between `cases_new` and the sum of cases attributable to clusters is the number of sporadic cases<br>
8) `cluster_x`: cases attributable to clusters under category `x`; possible values for `x` are import, religious, community, highRisk, education, detentionCentre, and workplace<br>
9) `cases_agecat`: cases falling into one of 4 age categories, i.e. child (0-11), adolescent (12-17), adult (18-59), elderly (60+); note that the sum of cases by age may not equal the total cases for that day, as some cases are registered without ages or with unverifiable age data<br> 
10) `cases_pvax`: number of partially-vaccinated individuals who tested positive for Covid (perfect subset of `cases_new`), where "partially vaccinated" is defined as receiving at least 1 dose of a 2-dose vaccine at least 1 day prior to testing positive, or receiving the Cansino vaccine between 1-27 days before testing positive<br>
11) `cases_fvax`: number of fully-vaccinated who tested positive for Covid (perfect subset of `cases_new`), where "fully vaccinated" is defined as receiving the 2nd dose of a 2-dose vaccine at least 14 days prior to testing positive, or receiving the Cansino vaccine at least 28 days before testing positive<br>
12) `rtk-ag`: number of tests done using Antigen Rapid Test Kits (RTK-Ag)<br>
13) `pcr`: number of tests done using Real-time Reverse Transcription Polymerase Chain Reaction (RT-PCR) technology<br>

### Deaths

1) `date`: yyyy-mm-dd format; data correct as of 1200hrs on that date<br>
2) `state`: name of state (present in state file, but not country file)<br>
3) `deaths_new`: deaths due to COVID-19 based on **date reported to public**<br>
4) `deaths_bid`: deaths due to COVID-19 which were brought-in dead based on **date reported to public** (perfect subset of `deaths_new`)<br>
5) `deaths_new_dod`: deaths due to COVID-19 based on **date of death**<br>
6) `deaths_bid_dod`: deaths due to COVID-19 which were brought-in dead based on **date of death** (perfect subset of `deaths_new_dod`)<br>
7) `deaths_pvax`: number of partially-vaccinated individuals who died due to COVID-19 based on **date of death** (perfect subset of `deaths_new_dod`), where "partially vaccinated" is defined as receiving at least 1 dose of a 2-dose vaccine at least 1 day prior to testing positive, or receiving the Cansino vaccine between 1-27 days before testing positive.<br>
8) `deaths_fvax`: number of fully-vaccinated who died due to COVID-19 based on **date of death** (perfect subset of `deaths_new_dod`), where "fully vaccinated" is defined as receiving the 2nd dose of a 2-dose vaccine at least 14 days prior to testing positive, or receiving the Cansino vaccine at least 28 days before testing positive.<br>
9) `deaths_tat`: median days between date of death and date of report for all deaths reported on the day<br>

### Cluster analysis

1) `cluster`: unique textual identifier of cluster; nomenclature does not necessarily signify address<br>
2) `state` and `district`: geographical epicentre of cluster, if localised; inter-district and inter-state clusters are possible and present in the dataset<br>
3) `date_announced`: date of declaration as cluster<br>
4) `date_last_onset`: most recent date of onset of symptoms for individuals within the cluster. note that this is distinct from the date on which said individual was tested, and the date on which their test result was received; consequently, today's date may not necessarily be present in this column.<br>
5) `category`: classification as per variable `cluster_x` above<br>
6) `status`: active or ended<br>
7) `cases_new`: number of new cases detected within cluster in the 24h since the last report<br>
8) `cases_total`: total number of cases traced to cluster<br>
9) `cases_active`: active cases within cluster<br>
10) `tests`: number of tests carried out on individuals within the cluster; denominator for computing a cluster's current positivity rate<br>
11) `icu`: number of individuals within the cluster currently under intensive care<br>
12) `deaths`: number of individuals within the cluster who passed away due to COVID-19<br>
13) `recovered`: number of individuals within the cluster who tested positive for and subsequently recovered from COVID-19<br>


### Healthcare 

_The datasets below have been constructed to provide 3 kinds of insight. First, the inflow and outflow of patients from quarantine centres, hospitals, and intensive care is, without any further scaling or context, critical to monitor - especially when clear divergences between infections and healthcare outcomes start to be observed (e.g. due to vaccination). Second, comparing against available capacity (number of beds, intensive care units, ventilators) allows for understanding of the strain exerted by the epidemic on the healthcare system. Third, the inclusion of datapoints on non-Covid patients demonstrates the interactions between the epidemic and broader health outcomes._

### PKRC (COVID-19 Quarantine and Treatment Centre)

1) `date`: yyyy-mm-dd format; data correct as of 2359hrs on that date<br>
2) `state`: name of state; note that (unlike with other datasets), it is not necessary that there be an observation for every state on every date. for instance, there are no PKRCs in W.P. Kuala Lumpur and W.P Putrajaya.<br>
3) `beds`: total PKRC beds (with related medical infrastructure)<br>
4) `admitted_x`: number of individuals in category `x` admitted to PKRCs, where `x` can be suspected/probable, COVID-19 positive, or non-COVID<br>
5) `discharged_x`: number of individuals in category `x` discharged from PKRCs<br>
6) `pkrc_x`: total number of individuals in category `x` in PKRCs; this is a stock variable altered by flows from admissions and discharges<br>


### Hospital

1) `date`: yyyy-mm-dd format; data correct as of 2359hrs on that date
2) `state`: name of state, with similar qualification on exhaustiveness of date-state combos as PKRC data
3) `beds`: total hospital beds (with related medical infrastructure)
3) `beds_covid`: total beds dedicated for COVID-19
4) `beds_noncrit`: total hospital beds for non-critical care
5) `admitted_x`: number of individuals in category `x` admitted to hospitals, where `x` can be suspected/probable, COVID-19 positive, or non-COVID
6) `discharged_x`: number of individuals in category `x` discharged from hospitals
7) `hosp_x`: total number of individuals in category `x` in hospitals; this is a stock variable altered by flows from admissions and discharges


### ICU

1) `date`: yyyy-mm-dd format; data correct as of 2359hrs on that date<br>
2) `state`: name of state, with similar qualification on exhaustiveness of date-state combos as PKRC data<br>
3) `beds_icu`: total gazetted ICU beds<br>
4) `beds_icu_rep`: total beds aside from (3) which are temporarily or permanently designated to be under the care of Anaesthesiology & Critical Care departments<br>
5) `beds_icu_total`: total critical care beds available (with related medical infrastructure)<br>
6) `beds_icu_covid`: total critical care beds dedicated for COVID-19<br>
7) `vent`: total available ventilators<br>
8) `vent_port`: total available portable ventilators<br>
9) `icu_x`: total number of individuals in category `x` under intensive care, where `x` can be  suspected/probable, COVID-19 positive, or non-COVID; this is a stock variable<br>
10) `vent_x`: total number of individuals in category `x` on mechanical ventilation, where `x` can be suspected/probable, COVID-19 positive, or non-COVID; this is a stock variable<br>