# Demo for the 2018 BHI & BSN Data Challenge

In [1]:
# Import libraries
from tableone import TableOne
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
import psycopg2
import getpass
%matplotlib inline

In [2]:
# Create a database connection
user = 'postgres'
host = 'localhost'
dbname = 'mimic'
schema = 'public, mimiciii'

In [3]:
# Connect to the database
con = psycopg2.connect(dbname=dbname, user=user, host=host, 
                       password=getpass.getpass(prompt='Password:'.format(user)))
cur = con.cursor()
cur.execute('SET search_path to {}'.format(schema))

Password:········


In [4]:
# Run query and assign the results to a Pandas DataFrame
# Requires the icustay_detail view from:
# https://github.com/MIT-LCP/mimic-code/tree/master/concepts/demographics
# And the OASIS score from:
# https://github.com/MIT-LCP/mimic-code/tree/master/concepts/severityscores
query = \
"""
WITH first_icu AS (
    SELECT i.subject_id, i.hadm_id, i.icustay_id, i.gender, i.admittime admittime_hospital, 
      i.dischtime dischtime_hospital, i.los_hospital, i.age, i.admission_type, 
      i.hospital_expire_flag, i.intime intime_icu, i.outtime outtime_icu, i.los_icu, 
      s.first_careunit
    FROM icustay_detail i
    LEFT JOIN icustays s
    ON i.icustay_id = s.icustay_id
    WHERE i.hospstay_seq = 1
      AND i.icustay_seq = 1
      AND i.age >= 16
)
SELECT f.*, o.icustay_expire_flag, o.oasis, o.oasis_prob
FROM first_icu f
LEFT JOIN oasis o
ON f.icustay_id = o.icustay_id;
"""

data = pd.read_sql_query(query,con)

# Display the first few rows of the data

In [5]:
data.columns

Index(['subject_id', 'hadm_id', 'icustay_id', 'gender', 'admittime_hospital',
       'dischtime_hospital', 'los_hospital', 'age', 'admission_type',
       'hospital_expire_flag', 'intime_icu', 'outtime_icu', 'los_icu',
       'first_careunit', 'icustay_expire_flag', 'oasis', 'oasis_prob'],
      dtype='object')

In [None]:
data.head()

# Add day of week to DataFrame

In [None]:
data['admitday_hospital'] = data.admittime_hospital.dt.weekday_name
data['dischday_hospital'] = data.dischtime_hospital.dt.weekday_name
data['inday_icu'] = data.intime_icu.dt.weekday_name
data['outday_icu'] = data.outtime_icu.dt.weekday_name
data.head()

# Table 1 summary statistics

In [8]:
data.columns

Index(['subject_id', 'hadm_id', 'icustay_id', 'gender', 'admittime_hospital',
       'dischtime_hospital', 'los_hospital', 'age', 'admission_type',
       'hospital_expire_flag', 'intime_icu', 'outtime_icu', 'los_icu',
       'first_careunit', 'icustay_expire_flag', 'oasis', 'oasis_prob',
       'admitday_hospital', 'dischday_hospital', 'inday_icu', 'outday_icu'],
      dtype='object')

In [11]:
columns = ['gender', 'los_hospital', 'age', 'admission_type', 'hospital_expire_flag', 
           'los_icu','icustay_expire_flag', 'oasis', 'oasis_prob', 'first_careunit']

groupby = 'inday_icu'

pval = False

categorical = ['gender','admission_type','hospital_expire_flag','icustay_expire_flag',
               'first_careunit']

t = TableOne(data, columns=columns, categorical=categorical, groupby=groupby, pval=pval)
t.tableone

Unnamed: 0_level_0,Unnamed: 1_level_0,Grouped by inday_icu,Grouped by inday_icu,Grouped by inday_icu,Grouped by inday_icu,Grouped by inday_icu,Grouped by inday_icu,Grouped by inday_icu,Grouped by inday_icu
Unnamed: 0_level_1,Unnamed: 1_level_1,Friday,Monday,Saturday,Sunday,Thursday,Tuesday,Wednesday,isnull
variable,level,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
n,,6263,6097,4235,3960,5876,6141,5985,
admission_type,ELECTIVE,1016 (16.22),1265 (20.75),162 (3.83),101 (2.55),999 (17.0),1292 (21.04),1243 (20.77),0.0
admission_type,EMERGENCY,5118 (81.72),4687 (76.87),3852 (90.96),3681 (92.95),4746 (80.77),4704 (76.6),4600 (76.86),
admission_type,URGENT,129 (2.06),145 (2.38),221 (5.22),178 (4.49),131 (2.23),145 (2.36),142 (2.37),
age,,74.56 (53.22),73.13 (51.37),73.92 (58.58),75.26 (60.66),75.51 (55.46),75.48 (55.22),74.16 (53.88),0.0
first_careunit,CCU,838 (13.38),918 (15.06),695 (16.41),621 (15.68),850 (14.47),919 (14.96),851 (14.22),0.0
first_careunit,CSRU,1416 (22.61),1632 (26.77),237 (5.6),194 (4.9),1282 (21.82),1575 (25.65),1268 (21.19),
first_careunit,MICU,2139 (34.15),1940 (31.82),1765 (41.68),1706 (43.08),2020 (34.38),2019 (32.88),2020 (33.75),
first_careunit,SICU,1044 (16.67),865 (14.19),743 (17.54),743 (18.76),996 (16.95),933 (15.19),1038 (17.34),
first_careunit,TSICU,826 (13.19),742 (12.17),795 (18.77),696 (17.58),728 (12.39),695 (11.32),808 (13.5),


In [10]:
data.columns

Index(['subject_id', 'hadm_id', 'icustay_id', 'gender', 'admittime_hospital',
       'dischtime_hospital', 'los_hospital', 'age', 'admission_type',
       'hospital_expire_flag', 'intime_icu', 'outtime_icu', 'los_icu',
       'first_careunit', 'icustay_expire_flag', 'oasis', 'oasis_prob',
       'admitday_hospital', 'dischday_hospital', 'inday_icu', 'outday_icu'],
      dtype='object', name='variable')