In [299]:
import pandas as pd
import numpy as np
from tableone import TableOne

In [300]:
df = pd.read_csv('../../data/merged_processed_df_team6.csv')
df

Unnamed: 0,subject_id,hadm_id,stay_id,gender,dod,admittime,dischtime,los_hospital,admission_age,white_race,...,mortality_in,mortality_90,disposition,white_race.1,los_icu_survivors,los_icu_expired,anchor_age,charlson_comorbidity_index,discharge_location,SOFA
0,16124682,24817331,35315105,M,2183-11-15,2183-10-19 16:38:00,2183-11-15 18:38:00,27,72,White,...,1,1,Death or hospice,White,,27.17,72,6,DIED,5
1,16277979,21413780,30514767,M,,2127-02-09 20:58:00,2127-02-19 20:04:00,10,69,White,...,0,0,Skilled nursing facility,White,3.79,,68,4,SKILLED NURSING FACILITY,6
2,17172316,26428234,39442549,M,2144-02-08,2143-12-26 21:09:00,2144-02-08 11:30:00,44,54,White,...,1,1,Death or hospice,White,,15.04,54,1,DIED,1
3,12388732,26592796,35565911,M,,2185-12-08 13:13:00,2186-01-01 14:00:00,24,68,White,...,0,0,Rehab,White,11.88,,68,9,REHAB,4
4,11402910,23375177,36422142,F,2129-09-12,2129-08-28 17:57:00,2129-09-12 14:10:00,15,60,White,...,1,1,Death or hospice,White,,11.17,60,5,DIED,12
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5108,18642064,25378793,33175269,M,,2167-11-17 12:19:00,2167-11-30 12:57:00,13,68,Non-white,...,0,0,Home or equivalent,Non-white,7.38,,64,4,HOME,7
5109,14539351,21502552,34379501,F,,2132-01-12 10:36:00,2132-01-24 14:50:00,12,65,White,...,0,0,Rehab,White,7.92,,65,5,REHAB,6
5110,19893454,28167991,37119620,F,,2163-04-14 00:00:00,2163-04-30 11:57:00,16,42,White,...,0,0,Home or equivalent,White,4.63,,42,1,HOME,8
5111,16164098,23348948,37406379,F,2138-06-04,2138-05-21 07:50:00,2138-06-04 15:55:00,14,82,Non-white,...,1,1,Death or hospice,Non-white,,8.29,82,5,DIED,11


In [301]:
df.shape

(5113, 35)

In [302]:
df.columns

Index(['subject_id', 'hadm_id', 'stay_id', 'gender', 'dod', 'admittime',
       'dischtime', 'los_hospital', 'admission_age', 'white_race',
       'hospital_expire_flag', 'hospstay_seq', 'first_hosp_stay', 'icu_intime',
       'icu_outtime', 'los_icu', 'icustay_seq', 'first_icu_stay', 'race_group',
       'sex_female', 'los_icu_1', 'language', 'eng_prof', 'insurance',
       'private_insurance', 'mortality_in', 'mortality_90', 'disposition',
       'white_race.1', 'los_icu_survivors', 'los_icu_expired', 'anchor_age',
       'charlson_comorbidity_index', 'discharge_location', 'SOFA'],
      dtype='object')

In [303]:
df['SOFA'].value_counts()

SOFA
7     499
8     496
5     493
6     473
9     471
4     442
10    393
11    342
12    258
3     236
1     208
2     198
13    197
14    132
15     97
16     70
0      30
17     29
18     25
19     12
20      7
21      3
23      1
22      1
Name: count, dtype: int64

In [304]:
df['charlson_comorbidity_index'].value_counts()

charlson_comorbidity_index
5     705
4     689
6     649
3     593
7     533
8     408
2     394
1     309
9     286
0     238
10    164
11     75
12     40
13     10
14      9
15      7
19      2
16      1
17      1
Name: count, dtype: int64

In [305]:
df['mortality_in'].value_counts()

mortality_in
0    3761
1    1352
Name: count, dtype: int64

In [306]:
df['mortality_90'].value_counts()

mortality_90
0    3347
1    1766
Name: count, dtype: int64

In [307]:
df['los_icu'].median()

9.0

In [308]:
# list of columns to be included in Table 1
columns_to_include = ['gender', 'admission_age','white_race','race_group',
       'eng_prof', 'private_insurance',
       'disposition',
       'SOFA', 'charlson_comorbidity_index', 'los_icu',
       'los_icu_survivors', 'los_icu_expired', 
       'mortality_90', 'mortality_in'    
       ]

In [309]:
df['race_group'].value_counts()

race_group
White       3479
Other        937
Black        408
Hispanic     177
Asian        112
Name: count, dtype: int64

In [310]:
df['white_race'].value_counts()

white_race
White        3479
Non-white     408
Name: count, dtype: int64

In [314]:
# list of columns containing categorical variables
categorical = ['gender', 
       'race_group',
       'eng_prof', 'private_insurance', 
       'disposition',
       # 'mechanical_pressor',
       'mortality_90', 'mortality_in']

In [315]:
# categorical variable for stratification
groupby = ['white_race']

In [316]:
# list of non-normal variables
nonnormal = ['admission_age', 'SOFA', 'charlson_comorbidity_index', 'los_icu',
              'los_icu_survivors', 'los_icu_expired', ]

In [317]:
# dictionary of alternative labels:
labels={'disposition': 'discharge disposition',
       'eng_prof': 'english proficiency', 'first_code': 'first ICD code',
       'los_icu': 'length of stay in ICU', 
        'los_icu_survivors': 'length of stay in ICU for survivors',
        'los_icu_expired': 'length of stay in ICU for expired',
       'mortality_90': 'mortality in 90 days',
       'mortality_in': 'mortality in hospital'}

In [318]:
# Create an instance of TableOne with the input arguments
mytable = TableOne(df, columns=columns_to_include, categorical=categorical, groupby=groupby, nonnormal=nonnormal, rename=labels, pval=False)


  df_cont = pd.pivot_table(cont_data,
  df_cont = pd.pivot_table(cont_data,
  df_cont = pd.pivot_table(cont_data,
  df_cont = pd.pivot_table(cont_data,


In [319]:
# Display the table using the tabulate method. 
print(mytable.tabulate(tablefmt = "fancy_grid"))

╒═════════════════════════════════════════════════════╤══════════════════════════════╤═══════════╤══════════════════╤══════════════════╤══════════════════╕
│                                                     │                              │ Missing   │ Overall          │ Non-white        │ White            │
╞═════════════════════════════════════════════════════╪══════════════════════════════╪═══════════╪══════════════════╪══════════════════╪══════════════════╡
│ n                                                   │                              │           │ 5113             │ 408              │ 3479             │
├─────────────────────────────────────────────────────┼──────────────────────────────┼───────────┼──────────────────┼──────────────────┼──────────────────┤
│ gender, n (%)                                       │ F                            │ 0         │ 2051 (40.1)      │ 221 (54.2)       │ 1381 (39.7)      │
├─────────────────────────────────────────────────────┼─────────

In [320]:
mytable.to_excel('../../results/table1_by_race.xlsx')

In [321]:
mytable.to_html('../../results/table1_by_race.html')