## Identifying a Proteomic Signature of Severe COVID-19
### Cohort Overview
#### C.V. Cosgriff, MD, MPH

### 0- Environment

In [1]:
import pandas as pd
import numpy as np

from tableone import TableOne

### 1- Features and Outcomes

Load MESSI proteomics dataset.

In [2]:
df = pd.read_csv('../data/MESSI-COVID-Proteomics_final-dat.csv')

Generate outcome variables on ordinal scale and binary versions.

In [3]:
# ordinal severity
nihord_d00 = pd.Categorical(df.ordinal_enrollment, ordered=True)
nihord_d07 = pd.Categorical(df.ordinal_d7, ordered=True)
nihord_d14 = pd.Categorical(df.ordinal_d14, ordered=True)
nihord_d28 = pd.Categorical(df.ordinal_d28, ordered=True)

df = df.assign(nihord_d00=nihord_d00, nihord_d07=nihord_d07,
               nihord_d14=nihord_d14, nihord_d28=nihord_d28)

# binary severity
mod_sev_d0 = df.nihord_d00 <= 3
mod_sev_d7 = df.nihord_d07 <= 3
mod_sev_d14 = df.nihord_d14 <= 3
mod_sev_d28 = df.nihord_d28 <= 3

df = df.assign(mod_sev_d0=mod_sev_d0, mod_sev_d7=mod_sev_d7,
               mod_sev_d14=mod_sev_d14, mod_sev_d28=mod_sev_d28)

# mortality
mortality_map = {'Alive' : 0, 'Dead' : 1, 'Unknown' : np.nan}
mort90_cons = df.mortality_90d.map(mortality_map)
df = df.assign(mort90_cons=mort90_cons)

Process demographic features.

In [4]:
# sex
male_sex = (df.sex == 'Male').astype(int)
df = df.assign(male_sex=male_sex)

# self-reported race
print(df.race.value_counts())

# code race as binary, use AA as reference given size
race_BAA = df.race == 'Black'
df = df.assign(race_BAA=race_BAA.astype(int))

# co-morbidity
cmdz = df.mhcad + df.mhhtn + df.mhdm + df.mhcri
mhcmdz = (cmdz > 0).astype(int)
df = df.assign(mhcmdz=mhcmdz)

Black             112
White              46
Asian               7
Other               1
Pacific Island      1
Name: race, dtype: int64


### 2- Table One

In [5]:
# generate a list of features for table 1 and their 'prettier' labels
columns = ['age', 'sex', 'race', 'ethnicity', 'mhcmdz', 'early_steroids', 'ordinal_enrollment', 'apacheIII', 'vasoactives_yn', 'mortality_90d', 'mod_sev_d0']
labels = {'age_calculated' : 'Age', 'sex' : 'Male Sex', 'race' : 'Race',
          'ethnicity' : 'Hispanic', 'ordinal_enrollment' : 'WHO Ordinal Enrollment',
         'mhcmdz' : 'Cardiometabolic Co-morbdidity', 'apacheIII' : 'APACHE III',
         'early_steroids' : 'Steroids Before First Blood Draw', 'mortality_90d' : '90-day Mortality',
         'vasoactives_yn' : 'Vasopressors'}

# demarcate the categorical variables versus numerical
categorical = ['sex', 'race', 'ethnicity', 'mhcmdz', 'early_steroids', 'vasoactives_yn', 'ordinal_enrollment', 'mortality_90d']

# delineate the grouping variable
groupby = ['mod_sev_d0']

remap = {True : 'Severe', False : 'Non-severe'}

df_temp = df.copy()
df_temp.mod_sev_d0 = df_temp.mod_sev_d0.map(remap)

# generate and print table
cohort_overview = TableOne(df_temp.loc[:, columns], columns=columns, categorical=categorical,
                   rename=labels, groupby=groupby, pval=False)
display(cohort_overview)

  df['percent'] = df['freq'].div(df.freq.sum(level=0),


Unnamed: 0_level_0,Unnamed: 1_level_0,Grouped by mod_sev_d0,Grouped by mod_sev_d0,Grouped by mod_sev_d0,Grouped by mod_sev_d0
Unnamed: 0_level_1,Unnamed: 1_level_1,Missing,Overall,Non-severe,Severe
n,,,167,76,91
"age, mean (SD)",,0.0,58.9 (14.6),57.1 (15.6),60.4 (13.6)
"Male Sex, n (%)",Female,0.0,76 (45.5),36 (47.4),40 (44.0)
"Male Sex, n (%)",Male,,91 (54.5),40 (52.6),51 (56.0)
"Race, n (%)",Asian,0.0,7 (4.2),2 (2.6),5 (5.5)
"Race, n (%)",Black,,112 (67.1),56 (73.7),56 (61.5)
"Race, n (%)",Other,,1 (0.6),,1 (1.1)
"Race, n (%)",Pacific Island,,1 (0.6),,1 (1.1)
"Race, n (%)",White,,46 (27.5),18 (23.7),28 (30.8)
"Hispanic, n (%)",Hispanic,0.0,12 (7.2),5 (6.6),7 (7.7)


### 3- Output Updated Dataset for Pathway Analysis

In [6]:
df.to_csv('../data/MESSI-COVID-Proteomics_final-dat_processed.csv')