In [None]:
## Libraries 
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
## Description of the dataset

print('Number of rows and columns')
print(data.shape)
print('\n')

print('Number and list of countries')
print(data['slider_country'].nunique())
print(data['slider_country'].unique())
print('\n')


In [None]:
## Restricting data to adults 

print('Number of observations with missing age data')
print(data.loc[data['age'].isnull(), : ].shape)

print('Number of observations for children')
print(data.loc[(data['age'].notnull()) & (data['age'] < 18), : ].shape)


data = data.loc[(data['age'].notnull()) & (data['age'] >= 18), : ].copy().reset_index(drop=True)

print('Number of observations after removing those younger than 18 years')
print(data.shape[0])


In [None]:
## Restricting dataset to 01/03/2021 or later


print('Observations with admission before March 2021')
print(data.loc[(data['date_admit_TIME'] < pd.to_datetime('2021-03-01')) & (data['date_admit_TIME'].notnull()), : ].shape)
print('\n')

print('Creating dataset with patients admitted on or after March 2021')
data = data.loc[(data['date_admit_TIME'] >= pd.to_datetime('2021-03-01')) & \
                (data['date_admit_TIME'].notnull()), : ].copy().reset_index(drop=True)

print(data.shape)


In [None]:
## Restricting analysis to patients who had non-missing information on vaccination variable

print('Variable vaccination_ever')
print(data['vaccination_ever'].value_counts())
print('\n')

data = data.loc[(data['vaccination_ever'] != 'Unknown') & \
                (data['vaccination_ever'].notnull()), : ].copy().reset_index(drop=True)

print(data.shape)

In [None]:
## Creating table with frequency of vaccination by country

results_table = pd.crosstab(data['slider_country'], 
                            data['vaccination_ever'])


In [None]:
## Creating table with frequency of ICU admission 

ICU_distribution = pd.crosstab(data['slider_country'], data['slider_icu_ever'])


In [None]:
## Distribution of observations by SARS-CoV-2 variant-defined epidemiological period
## Each '_period' variable coded as 0 and 1

print(data['Delta_period'].value_counts())
print((data['Delta_period'].value_counts())/(data.shape[0]))
print('\n')

print(data['Omicron_period'].value_counts())
print((data['Omicron_period'].value_counts())/(data.shape[0]))
print('\n')

print(data['Alpha_period'].value_counts())
print((data['Alpha_period'].value_counts())/(data.shape[0]))
print('\n')


In [None]:
## Describing vaccination variables

print('Variable vaccination_ever')
print(data['vaccination_ever'].value_counts())
print('\n\n')

print(pd.crosstab(data['vaccination_ever'], data['calendar.year.admit']))
print('\n\n')

In [None]:
## Distribution of vaccine types for the first dose (excluding observations with 'Unknown vaccine type')

print(data.loc[data['type_dose_1'] != 'Unknown vaccine', 'type_dose_1'].value_counts())


In [None]:
## Age comparisons

print(np.percentile(data.loc[data['vaccination_ever'] == 'Yes', 'age'], [50, 25, 75]))
print(np.percentile(data.loc[data['vaccination_ever'] == 'No', 'age'], [50, 25, 75]))

In [None]:
## Table country specific age

Data_country_age = pd.DataFrame()

for country in sorted(list(data['slider_country'].unique())):
    temp_data = data.loc[data['slider_country'] == country, : ].copy()
    
    if temp_data.shape[0] >= 100:
        v50, v25, v75 = np.percentile(temp_data.loc[temp_data['vaccination_ever'] == 'Yes', 'age'], [50, 25, 75])
        uv50, uv25, uv75 = np.percentile(temp_data.loc[temp_data['vaccination_ever'] == 'No', 'age'], [50, 25, 75])
        
        text_v = str(int(v50)) + ' (' + str(int(v25)) + ' - ' + str(int(v75)) + ')'
        text_uv = str(int(uv50)) + ' (' + str(int(uv25)) + ' - ' + str(int(uv75)) + ')'
        
        Data_country_age = pd.concat([Data_country_age, pd.DataFrame({'country' : [country],
                                                                     'Vaccinated' : [text_v],
                                                                     'Unvaccinated' : [text_uv]})])

In [None]:
## Frequency of at least one symptom
## list_symptoms_overall is a list that contains all variables that represent symptoms in the database

print(len(list_symptoms_overall))

data['nb_symptom_info'] = data[list_symptoms_overall].notnull().sum(axis=1)
data['nb_symptom_pos'] = data[list_symptoms_overall].sum(axis=1)

check_list_s = list_symptoms_overall + ['nb_symptom_info', 'nb_symptom_pos']

## Checking that variables created corresponds to the number of symptom variables with non-missing information, and 
## number of variables with value 1 

data[check_list_s]

In [None]:
## Creating variables that contain the number of comorbidities variables with non-missing information and with value 1 

print(len(list_comorb_overall))

data['nb_common_comorb_info'] = data[list_comorb_overall].notnull().sum(axis = 1)
data['nb_common_comorb_pos'] = data[list_comorb_overall].sum(axis = 1)

check_list_c = list_comorb_overall + ['nb_common_comorb_info', 'nb_common_comorb_pos']
## Checking that that the variables created contain the right information
data[check_list_c]

In [None]:
## Frequency of non-missing comorbidity information (by vaccination status)

print(data.loc[data['nb_common_comorb_info'] > 0, : ].shape[0])
print('\n')

print(data.loc[(data['vaccination_ever'] == 'Yes'), : ].shape[0])
print(data.loc[(data['vaccination_ever'] == 'Yes') & (data['nb_common_comorb_info'] > 0), : ].shape[0])
print('\n')

print(data.loc[(data['vaccination_ever'] == 'No'), : ].shape[0])
print(data.loc[(data['vaccination_ever'] == 'No') & (data['nb_common_comorb_info'] > 0), : ].shape[0])
print('\n')

In [None]:
## Description of number of comorbidities present (by vaccination status)

print(np.percentile(data.loc[(data['vaccination_ever'] == 'Yes') & \
                             (data['nb_common_comorb_info'] > 0),'nb_common_comorb_pos'], [50, 25, 75]))
print(np.percentile(data.loc[(data['vaccination_ever'] == 'No') & \
                             (data['nb_common_comorb_info'] > 0),'nb_common_comorb_pos'], [50, 25, 75]))

In [None]:
## Creating variable that indicate presence of 3 or more comorbidities

data['three_comorb'] = np.nan
data.loc[(data['nb_common_comorb_info'] >= 10) & (data['nb_common_comorb_pos'] < 3), 'three_comorb'] = 0
data.loc[(data['nb_common_comorb_info'] >= 10) & (data['nb_common_comorb_pos'] >= 3), 'three_comorb'] = 1

In [None]:
## Description - frequency of three or more comorbidities by vaccination status 

print(data['three_comorb'].value_counts())
print('\n')

print(data.loc[(data['vaccination_ever'] == 'Yes'), 'three_comorb'].value_counts())
print('\n')

print(data.loc[(data['vaccination_ever'] == 'No'), 'three_comorb'].value_counts())
print('\n')


In [None]:
## Frequency of three or more comorbidities by vaccination status and age categories


print(data.loc[(data['vaccination_ever'] == 'Yes') & (data['age_cat'] == 0), 'three_comorb'].value_counts())
print('\n')

print(data.loc[(data['vaccination_ever'] == 'No') & (data['age_cat'] == 0), 'three_comorb'].value_counts())
print('\n')

print('\n\n')

print(data.loc[(data['vaccination_ever'] == 'Yes') & (data['age_cat'] == 1), 'three_comorb'].value_counts())
print('\n')

print(data.loc[(data['vaccination_ever'] == 'No') & (data['age_cat'] == 1), 'three_comorb'].value_counts())
print('\n')


In [None]:
## Frequency of clinical outcome by vaccination status

print(data.loc[data['vaccination_ever'] == 'Yes', 'outcome_28_num'].value_counts())
print('\n')
      
print(data.loc[data['vaccination_ever'] == 'No', 'outcome_28_num'].value_counts())
print('\n')

In [None]:
## Frequency of outcome by vaccination status and age category

mask_young_vacc = (data['age_cat'] == 0) & (data['vaccination_ever'] == 'Yes')
mask_young_unvacc = (data['age_cat'] == 0) & (data['vaccination_ever'] == 'No')

mask_old_vacc = (data['age_cat'] == 1) & (data['vaccination_ever'] == 'Yes')
mask_old_unvacc = (data['age_cat'] == 1) & (data['vaccination_ever'] == 'No')

print('mask_young_vacc')
print(data.loc[mask_young_vacc, 'outcome_28_num'].value_counts())
print('\n')

print('mask_young_unvacc')
print(data.loc[mask_young_unvacc, 'outcome_28_num'].value_counts())
print('\n')

print('mask_old_vacc')
print(data.loc[mask_old_vacc, 'outcome_28_num'].value_counts())
print('\n')

print('mask_old_unvacc')
print(data.loc[mask_old_unvacc, 'outcome_28_num'].value_counts())
print('\n')

In [None]:
## Country-specific frequency of clinical outcome 
Data_outcome_country = pd.DataFrame(pd.crosstab(data['slider_country'], data['outcome_28_num']))
Data_outcome_country 
