<a href="https://colab.research.google.com/github/Jarvis-BITS/midodrine-mimic-iv/blob/main/notebook/01_cohort_data_extraction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Import libraries

In [1]:
from google.cloud import bigquery
import numpy as np
import pandas as pd
import seaborn as sns
sns.set(rc={'figure.figsize':(16,10)}, font_scale=1.3)
import sys
import warnings
warnings.filterwarnings('ignore')

Authenticate with GCP account having MIMIC-IV dataset

In [2]:
from google.colab import auth

auth.authenticate_user()
print('Authenticated')

Authenticated


In [3]:
%load_ext google.colab.data_table

In [4]:
%load_ext google.cloud.bigquery

In [5]:
project_id = "mit-midodrine" #@param {type:"string"} #Enter your project ID from GCP bigquey here
bqclient = bigquery.Client(project=project_id)

# BigQuery data extraction of patient details in ICU (1st day)

In [6]:
population_string = """
WITH patient_details AS (
  SELECT
  ie.subject_id, ie.hadm_id, ie.stay_id
  , pat.gender, ie.ethnicity
  , ie.admittime, ie.dischtime
  , DATETIME_DIFF(ie.admittime, DATETIME(pat.anchor_year, 1, 1, 0, 0, 0), YEAR) + pat.anchor_age AS age
  , adm.deathtime
  , ie.hospital_expire_flag

-- icu level factors
  , ie.icu_intime, ie.icu_outtime
  , ie.first_icu_stay
  , ie.los_icu, ie.los_hospital
  FROM `physionet-data.mimic_derived.icustay_detail` ie
  INNER JOIN `physionet-data.mimic_core.admissions` adm
  ON ie.hadm_id = adm.hadm_id
  INNER JOIN `physionet-data.mimic_core.patients` pat
  ON ie.subject_id = pat.subject_id

-- midodrine cohort
  )
  , midodrine AS (
    SELECT hadm_id, starttime AS drug_starttime
    FROM `physionet-data.mimic_hosp.pharmacy`
    WHERE lower(medication) like 'midodrine'
  )
  , midodrine_exclude AS (
    SELECT stay_id,
    CASE WHEN LOGICAL_AND(drug_starttime < DATE_TRUNC(intime - interval '1' day, DAY) OR drug_starttime > outtime) THEN 1 
    ELSE 0 END AS midodrine_exclude
    FROM `physionet-data.mimic_icu.icustays` 
    LEFT JOIN midodrine USING (hadm_id)
    GROUP BY stay_id
  )
  , midodrine_include AS (
    SELECT stay_id,
    CASE WHEN LOGICAL_AND(drug_starttime is NULL)
    OR LOGICAL_OR(drug_starttime BETWEEN DATE_TRUNC(intime - interval '1' day, DAY) AND outtime) THEN 1
    ELSE 0 END AS midodrine_include
    FROM `physionet-data.mimic_icu.icustays`  
    LEFT JOIN midodrine USING (hadm_id)
    GROUP BY stay_id
  )
  , midodrine_first AS (
  SELECT stay_id, min(drug_starttime) AS drug_starttime 
  FROM `physionet-data.mimic_icu.icustays` LEFT JOIN midodrine mi USING (hadm_id)
  WHERE drug_starttime BETWEEN DATE_TRUNC(intime - interval '1' day, DAY) AND outtime
  GROUP BY stay_id
  )
 , midodrine_dose AS (
  SELECT ic.stay_id, AVG(doses_per_24_hrs) AS avg_doses_per_24_hrs
  FROM `physionet-data.mimic_hosp.pharmacy` ph
  INNER JOIN `physionet-data.mimic_icu.icustays`ic
  ON ph.hadm_id = ic.hadm_id
  GROUP BY ic.stay_id
  )

 , sepsis3 AS (
   SELECT stay_id, sepsis3
   FROM `physionet-data.mimic_derived.sepsis3`
 )

  , adm_weekday AS (
    SELECT stay_id,
      EXTRACT(DAYOFWEEK FROM intime) AS adm_weekday
    FROM `physionet-data.mimic_icu.icustays`
  )

  , population AS (
    SELECT *  
    FROM (SELECT DISTINCT stay_id, first_careunit, FROM `physionet-data.mimic_icu.icustays`) a
    LEFT JOIN patient_details USING (stay_id)
    LEFT JOIN midodrine_first USING (stay_id)
    LEFT JOIN midodrine_exclude USING (stay_id)
    LEFT JOIN midodrine_include USING (stay_id)
    LEFT JOIN midodrine_dose USING (stay_id)
    LEFT JOIN sepsis3 USING (stay_id)
    LEFT JOIN adm_weekday USING (stay_id)
    )
    SELECT * FROM population
"""

In [7]:
vasopressor_id = """
SELECT DISTINCT stay_id FROM  `physionet-data.mimic_derived.epinephrine`
UNION DISTINCT
SELECT DISTINCT stay_id FROM  `physionet-data.mimic_derived.dobutamine`
UNION DISTINCT
SELECT DISTINCT stay_id FROM  `physionet-data.mimic_derived.dopamine`
UNION DISTINCT
SELECT DISTINCT stay_id FROM  `physionet-data.mimic_derived.norepinephrine`
UNION DISTINCT
SELECT DISTINCT stay_id FROM `physionet-data.mimic_derived.phenylephrine`
UNION DISTINCT
SELECT DISTINCT stay_id FROM  `physionet-data.mimic_derived.vasopressin`
"""

In [8]:
basic_string = """
WITH sapsii AS (
  SELECT sa.stay_id
    , AVG(sa.sapsii) AS sapsii
  FROM `physionet-data.mimic_derived.sapsii` sa
  LEFT JOIN `physionet-data.mimic_icu.icustays` ic USING (stay_id)
  WHERE
    (sa.starttime BETWEEN DATE_TRUNC(ic.intime - interval '1' day, DAY) AND ic.outtime)
    AND (sa.endtime < ic.outtime)
  GROUP BY stay_id
)
, charlson AS(
  SELECT ic.stay_id
    , AVG(chr.charlson_comorbidity_index) AS charlson_index
  FROM `physionet-data.mimic_derived.charlson` chr
  LEFT JOIN `physionet-data.mimic_icu.icustays` ic USING (subject_id)
  GROUP BY ic.stay_id
)
, basic AS (
  SELECT *
  FROM (SELECT DISTINCT stay_id FROM `physionet-data.mimic_icu.icustays`) a
  LEFT JOIN sapsii USING (stay_id)
  LEFT JOIN charlson USING (stay_id)
)
SELECT * FROM basic
"""

In [9]:
comorbidities_string = """
WITH charlson AS(
  SELECT subject_id, hadm_id
  , renal_diseASe AS Renal
  , severe_liver_diseASe AS severe_liver
  , mild_liver_diseASe AS mild_liver
  , chronic_pulmonary_diseASe AS CPD_or_COPD
  , malignant_cancer AS Maligancy
  FROM `physionet-data.mimic_derived.charlson` char
)
, diag AS
(
  SELECT 
      hadm_id
      , CASE WHEN icd_version = 9 THEN icd_code ELSE NULL END AS icd9_code
      , CASE WHEN icd_version = 10 THEN icd_code ELSE NULL END AS icd10_code
  FROM `physionet-data.mimic_hosp.diagnoses_icd` diag
)
, addition AS(
  SELECT ad.hadm_id
  -- CAD
  , MAX(CASE WHEN
      icd9_code LIKE '414%'
      OR
      SUBSTR(icd10_code, 1, 4) = 'I251'
      THEN 1 
      ELSE 0 END) AS CAD
  -- AFIB
  , MAX(CASE WHEN
      icd9_code LIKE '4273%'
      OR
      SUBSTR(icd10_code, 1, 3) = 'I48'
      THEN 1 
      ELSE 0 END) AS AFIB
  FROM `physionet-data.mimic_core.admissions` ad
  LEFT JOIN diag
  ON ad.hadm_id = diag.hadm_id
  GROUP BY ad.hadm_id
)
, comorbidities AS(
  SELECT ic.stay_id
    , AVG(charlson.CPD_or_COPD) AS CPD_or_COPD_rate
    , AVG(charlson.Maligancy) AS Maligancy_rate
    , AVG(charlson.mild_liver) AS mild_liver_rate
    , AVG(charlson.severe_liver) AS severe_liver_rate
    , AVG(charlson.Renal) AS Renal_rate
    , AVG(addition.CAD) AS CAD_rate
    , AVG(addition.AFIB) AS AFIB_rate
  FROM `physionet-data.mimic_icu.icustays` ic
  LEFT JOIN charlson USING (hadm_id)
  LEFT JOIN addition USING (hadm_id)
  GROUP BY ic.stay_id
)
SELECT * FROM comorbidities
"""

In [10]:
weight_string = """
SELECT
  ie.subject_id
  , ie.stay_id
  , AVG(CASE WHEN weight_type = 'admit' THEN ce.weight ELSE NULL END) AS weight
FROM `physionet-data.mimic_icu.icustays` ie
  -- admission weight
LEFT JOIN `physionet-data.mimic_derived.weight_durations` ce
    ON ie.stay_id = ce.stay_id
    -- we filter to weights documented during or before the 1st day
    AND ce.starttime <= DATETIME_ADD(ie.intime, INTERVAL '1' DAY)
GROUP BY ie.subject_id, ie.stay_id
"""

In [11]:
vital_sign_string = """
-- This query pivots vital signs and aggregates them
-- for the first 24 hours of a patient's stay.
SELECT
ie.subject_id
, ie.stay_id
, AVG(heart_rate) AS heart_rate_mean
, AVG(sbp) AS sbp_mean
, AVG(dbp) AS dbp_mean
, AVG(mbp) AS mbp_mean
, AVG(resp_rate) AS resp_rate_mean
, AVG(temperature) AS temperature_mean
, AVG(spo2) AS spo2_mean
, AVG(glucose) AS glucose_mean
FROM `physionet-data.mimic_icu.icustays` ie
LEFT JOIN `physionet-data.mimic_derived.vitalsign` ce
    ON ie.stay_id = ce.stay_id
    AND ce.charttime >= DATETIME_SUB(ie.intime, INTERVAL '6' HOUR)
    AND ce.charttime <= DATETIME_ADD(ie.intime, INTERVAL '1' DAY)
GROUP BY ie.subject_id, ie.stay_id;
"""

In [12]:
lab_tests_string = """
WITH blood AS (
  SELECT ic.stay_id
    , AVG(hemoglobin) AS hemoglobin
    , AVG(platelet) AS platelet
    , AVG(wbc) AS wbc
  FROM `physionet-data.mimic_icu.icustays` ic
  LEFT JOIN `physionet-data.mimic_derived.complete_blood_count` cbc 
    ON ic.hadm_id = cbc.hadm_id
    AND cbc.charttime >= DATETIME_SUB(ic.intime, INTERVAL '6' HOUR) 
    AND cbc.charttime <= DATETIME_ADD(ic.intime, INTERVAL '1' DAY) 
  GROUP BY ic.stay_id
)
, chem AS (
  SELECT ic.stay_id
    , AVG(sodium) AS sodium
    , AVG(potassium) AS potassium
    , AVG(bicarbonate) AS bicarbonate
    , AVG(chloride) AS chloride
    , AVG(bun) AS bun
    , AVG(creatinine) AS creatinine
    FROM `physionet-data.mimic_icu.icustays` ic
    LEFT JOIN `physionet-data.mimic_derived.chemistry` chem
    ON ic.hadm_id = chem.hadm_id
    AND chem.charttime >= DATETIME_SUB(ic.intime, INTERVAL '6' HOUR) 
    AND chem.charttime <= DATETIME_ADD(ic.intime, INTERVAL '1' DAY) 
    GROUP BY ic.stay_id
)
, bg AS (
  SELECT ic.stay_id
     , AVG(lactate) AS lactate
     , AVG(ph) AS ph
     , AVG(po2) AS po2
     , AVG(pco2) AS pco2
  FROM `physionet-data.mimic_icu.icustays` ic
  LEFT JOIN `physionet-data.mimic_derived.bg` bg
    ON ic.hadm_id = bg.hadm_id
    AND bg.charttime >= DATETIME_SUB(ic.intime, INTERVAL '6' HOUR) 
    AND bg.charttime <= DATETIME_ADD(ic.intime, INTERVAL '1' DAY) 
  GROUP BY ic.stay_id
  
)
, logical_tested_index AS (
  SELECT ic.stay_id
    , AVG(CASE WHEN ntprobnp is not null THEN 1 ELSE 0 END) AS bnp
    , AVG(CASE WHEN troponin_t is not null THEN 1 ELSE 0 END)AS troponin
    , AVG(CASE WHEN ck_mb is not null THEN 1 ELSE 0 END) AS creatinine_kinase
  FROM `physionet-data.mimic_icu.icustays` ic 
  LEFT JOIN `physionet-data.mimic_derived.cardiac_marker` cm 
  ON ic.hadm_id = cm.hadm_id
  AND cm.charttime >= DATETIME_SUB(ic.intime, INTERVAL '6' HOUR) 
  AND cm.charttime <= DATETIME_ADD(ic.intime, INTERVAL '1' DAY) 
  GROUP BY ic.stay_id
)
, lab_tests AS (
  SELECT ic.stay_id
    , blood.hemoglobin, blood.platelet, blood.wbc
    , chem.sodium, chem.potassium, chem.bicarbonate, chem.chloride, chem.bun, chem.creatinine
    , bg.lactate, bg.ph, bg.po2, bg.pco2
    , lti.bnp, lti.troponin, lti.creatinine_kinase
  
  FROM `physionet-data.mimic_icu.icustays` ic
  LEFT JOIN blood USING (stay_id)
  LEFT JOIN chem USING (stay_id)
  LEFT JOIN bg USING (stay_id)
  LEFT JOIN logical_tested_index lti USING (stay_id)
)
SELECT * FROM lab_tests
"""

In [13]:
interventions_string = """
-- sedative extract
WITH sedative AS (
    SELECT ic.stay_id
      , MAX(CASE WHEN ip.itemid in (221668,221744,225972,225942,222168) THEN 1 ELSE 0 END) AS sedative_use
    FROM `physionet-data.mimic_icu.icustays` ic
    LEFT JOIN `physionet-data.mimic_icu.inputevents` ip USING (stay_id)
    WHERE ip.starttime BETWEEN DATE_TRUNC(ic.intime-interval'1'day, DAY) AND ic.outtime
    GROUP BY ic.stay_id
)

-- vasopressor
, vasopressin AS (
    SELECT ic.stay_id
      , MAX(CASE WHEN ic.stay_id in (SELECT stay_id FROM `physionet-data.mimic_derived.vasopressin`) THEN 1 ELSE 0 END) AS vasopressin_use
    FROM `physionet-data.mimic_derived.vasopressin` iv
    LEFT JOIN `physionet-data.mimic_icu.icustays` ic USING (stay_id)
    WHERE iv.starttime BETWEEN DATE_TRUNC(ic.intime-interval'1'day, DAY) AND ic.outtime
    GROUP BY ic.stay_id
)
, dobutamine AS (
    SELECT ic.stay_id
      , MAX(CASE WHEN ic.stay_id in (SELECT stay_id FROM `physionet-data.mimic_derived.dobutamine`) THEN 1 ELSE 0 END) AS dobutamine_use
    FROM `physionet-data.mimic_derived.dobutamine` idb
    LEFT JOIN `physionet-data.mimic_icu.icustays` ic USING (stay_id)
    WHERE idb.starttime BETWEEN DATE_TRUNC(ic.intime-interval'1'day, DAY) AND ic.outtime
    GROUP BY ic.stay_id
)
, dopamine AS (
    SELECT ic.stay_id
      , MAX(CASE WHEN ic.stay_id in (SELECT stay_id FROM `physionet-data.mimic_derived.dopamine`) THEN 1 ELSE 0 END) AS dopamine_use
    FROM `physionet-data.mimic_derived.dopamine` idp
    LEFT JOIN `physionet-data.mimic_icu.icustays` ic USING (stay_id)
    WHERE idp.starttime BETWEEN DATE_TRUNC(ic.intime-interval'1'day, DAY) AND ic.outtime
    GROUP BY ic.stay_id
)
, epinephrine AS (
    SELECT ic.stay_id
      , MAX(CASE WHEN ic.stay_id in (SELECT stay_id FROM `physionet-data.mimic_derived.epinephrine`) THEN 1 ELSE 0 END) AS epinephrine_use
    FROM `physionet-data.mimic_derived.epinephrine` iep
    LEFT JOIN `physionet-data.mimic_icu.icustays` ic USING (stay_id)
    WHERE iep.starttime BETWEEN DATE_TRUNC(ic.intime-interval'1'day, DAY) AND ic.outtime
    GROUP BY ic.stay_id
)
, milrinone AS (
    SELECT ic.stay_id
      , MAX(CASE WHEN ic.stay_id in (SELECT stay_id FROM `physionet-data.mimic_derived.milrinone`) THEN 1 ELSE 0 END) AS milrinone_use
    FROM `physionet-data.mimic_derived.milrinone` iml
    LEFT JOIN `physionet-data.mimic_icu.icustays` ic USING (stay_id)
    WHERE iml.starttime BETWEEN DATE_TRUNC(ic.intime-interval'1'day, DAY) AND ic.outtime
    GROUP BY ic.stay_id
)
, norepinephrine AS (
    SELECT ic.stay_id
      , MAX(CASE WHEN ic.stay_id in (SELECT stay_id FROM `physionet-data.mimic_derived.norepinephrine`) THEN 1 ELSE 0 END) AS norepinephrine_use
    FROM `physionet-data.mimic_derived.norepinephrine` inrp
    LEFT JOIN `physionet-data.mimic_icu.icustays` ic USING (stay_id)
    WHERE inrp.starttime BETWEEN DATE_TRUNC(ic.intime-interval'1'day, DAY) AND ic.outtime
    GROUP BY ic.stay_id
)
, phenylephrine AS (
    SELECT ic.stay_id
      , MAX(CASE WHEN ic.stay_id in (SELECT stay_id FROM `physionet-data.mimic_derived.phenylephrine`) THEN 1 ELSE 0 END) AS phenylephrine_use
    FROM `physionet-data.mimic_derived.phenylephrine` iphe
    LEFT JOIN `physionet-data.mimic_icu.icustays` ic USING (stay_id)
    WHERE iphe.starttime BETWEEN DATE_TRUNC(ic.intime-interval'1'day, DAY) AND ic.outtime
    GROUP BY ic.stay_id
)

-- ventilation
, ventilation AS (
  SELECT ic.stay_id
    , MAX(CASE WHEN ic.stay_id in (SELECT stay_id FROM `physionet-data.mimic_derived.ventilation`) THEN 1 ELSE 0 END) AS ventilation_use
    FROM `physionet-data.mimic_icu.icustays` ic
    LEFT JOIN `physionet-data.mimic_derived.ventilation` ivn USING (stay_id)
    WHERE ivn.starttime BETWEEN DATE_TRUNC(ic.intime-interval'1'day, DAY) AND ic.outtime
    GROUP BY ic.stay_id
)

, interventions as (
    SELECT *
    FROM (SELECT DISTINCT stay_id FROM `physionet-data.mimic_icu.icustays`) a
    LEFT JOIN sedative USING (stay_id)
    LEFT JOIN vasopressin USING (stay_id)
    LEFT JOIN ventilation USING (stay_id)
    LEFT JOIN dobutamine USING (stay_id)
    LEFT JOIN dopamine USING (stay_id)
    LEFT JOIN epinephrine USING (stay_id)
    LEFT JOIN milrinone USING (stay_id)
    LEFT JOIN norepinephrine USING (stay_id)
    LEFT JOIN phenylephrine USING (stay_id)
)

SELECT * FROM interventions;
"""

In [14]:
mortality_string = """
SELECT ic.stay_id,
MAX(CASE WHEN pa.dod is not null THEN 1 ELSE 0 END) AS mortality,
MAX(CASE WHEN pa.dod <= DATETIME_ADD(ic.intime , INTERVAL '28' DAY) AND pa.dod >= ic.intime THEN 1 ELSE 0 END) AS mort_28_day
FROM `physionet-data.mimic_icu.icustays` ic
LEFT JOIN `physionet-data.mimic_core.patients` pa
ON ic.subject_id = pa.subject_id
GROUP BY ic.stay_id
"""

# Building cohort table

## Extract population info

In [15]:
population_data = (
    bqclient.query(population_string)
    .result()
    .to_dataframe()
)

In [16]:
population_data



Unnamed: 0,stay_id,first_careunit,subject_id,hadm_id,gender,ethnicity,admittime,dischtime,age,deathtime,...,icu_outtime,first_icu_stay,los_icu,los_hospital,drug_starttime,midodrine_exclude,midodrine_include,avg_doses_per_24_hrs,sepsis3,adm_weekday
0,34547665,Neuro Stepdown,12776735,20817525,M,OTHER,2200-07-11 22:46:00,2200-07-19 12:00:00,80,NaT,...,2200-07-13 16:44:40,True,1.67,8,NaT,0,1,2.121951,True,7
1,39289362,Neuro Stepdown,16256226,20013290,F,OTHER,2150-12-20 03:00:00,2150-12-21 14:50:00,49,NaT,...,2150-12-21 14:58:40,True,0.92,1,NaT,0,1,1.384615,,1
2,32563675,Neuro Stepdown,12974563,29618057,F,WHITE,2138-11-13 01:07:00,2138-11-15 15:53:00,72,NaT,...,2138-11-15 16:25:19,True,1.71,2,NaT,0,1,1.434783,,5
3,34947848,Neuro Stepdown,14609218,20606189,F,WHITE,2174-06-28 20:40:00,2174-07-05 16:45:00,69,NaT,...,2174-07-05 17:01:32,True,6.83,7,NaT,0,1,1.315789,,3
4,37445058,Neuro Stepdown,12687112,26132667,M,BLACK/AFRICAN AMERICAN,2162-05-31 15:36:00,2162-06-04 10:16:00,63,NaT,...,2162-06-04 10:16:13,True,3.67,4,NaT,0,1,1.666667,,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76535,39980385,Medical/Surgical Intensive Care Unit (MICU/SICU),11392990,21253505,M,WHITE,2158-08-10 04:50:00,2158-08-13 17:45:00,73,NaT,...,2158-08-11 21:04:48,True,1.63,3,NaT,0,1,1.592593,True,5
76536,39985296,Medical/Surgical Intensive Care Unit (MICU/SICU),19632565,29295929,F,WHITE,2179-04-26 14:22:00,2179-05-02 16:00:00,59,2179-05-02 16:00:00,...,2179-05-02 21:34:48,True,6.25,6,NaT,0,1,1.782609,,2
76537,39987031,Medical/Surgical Intensive Care Unit (MICU/SICU),12762280,23194856,F,WHITE,2177-12-07 21:34:00,2177-12-16 17:43:00,82,NaT,...,2177-12-15 23:25:26,True,2.04,9,NaT,0,1,2.035714,,7
76538,39989040,Medical/Surgical Intensive Care Unit (MICU/SICU),18311244,22780979,M,WHITE,2187-04-04 23:57:00,2187-04-08 15:45:00,58,NaT,...,2187-04-05 15:28:10,True,0.54,4,NaT,0,1,1.711111,True,5


In [17]:
population_data.info()    

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 76540 entries, 0 to 76539
Data columns (total 22 columns):
 #   Column                Non-Null Count  Dtype         
---  ------                --------------  -----         
 0   stay_id               76540 non-null  int64         
 1   first_careunit        76540 non-null  object        
 2   subject_id            76540 non-null  int64         
 3   hadm_id               76540 non-null  int64         
 4   gender                76540 non-null  object        
 5   ethnicity             76540 non-null  object        
 6   admittime             76540 non-null  datetime64[ns]
 7   dischtime             76540 non-null  datetime64[ns]
 8   age                   76540 non-null  int64         
 9   deathtime             8813 non-null   datetime64[ns]
 10  hospital_expire_flag  76540 non-null  int64         
 11  icu_intime            76540 non-null  datetime64[ns]
 12  icu_outtime           76540 non-null  datetime64[ns]
 13  first_icu_stay  

## Building the cohort table (consisting of only ICU patients)
1. Include adults (>= 18 years' old)
2. Include the first ICU admission of each patient
3. Include patients whose ICU care is 'MICU' or 'SICU'
4. Exclude those who accepted midodrine more than one day before icu care or after icu care (have been done before)

Note: midodrine_include is just opposite of midodrine_exclude so use either of the two for this

Filtering on age

In [138]:
cohort_1 = population_data.loc[population_data['age'] >= 18]
cohort_1.shape

(76540, 22)

Filtering on first ICU admission

In [139]:
cohort_2 = cohort_1.loc[cohort_1['first_icu_stay']  == True]
cohort_2.shape

(69211, 22)

Filtering out if death/discharge before 24hrs of intime

In [140]:
# 24 hrs = 86400s ;(24x60x60)
cohort_3 = cohort_2.loc[((cohort_2['deathtime'] - cohort_2['icu_intime']).dt.total_seconds() > 86400) | (cohort_2['deathtime'].isnull())] # Passed away after 24hrs or alive
cohort_3 = cohort_3.loc[((cohort_3['icu_outtime'] - cohort_3['icu_intime']).dt.total_seconds() > 86400)] # Discharged in 24hrs 
cohort_3 = cohort_3.loc[((cohort_3['dischtime'] - cohort_3['icu_intime']).dt.total_seconds() > 86400)] # Discharged in 24hrs 
cohort_3.shape

(54187, 22)

 Filtering in only MICU & SICU first care units

In [141]:
cohort_4 = cohort_3.loc[cohort_3['first_careunit'].str.contains("micu|sicu", case=False)]
cohort_4.shape

(36115, 22)

Filtering on sepsis 

In [142]:
cohort_5 = cohort_4.loc[cohort_4['sepsis3']  == True]
cohort_5.shape

(19915, 22)

Filtering on vasopressor treatments

In [85]:
vaso_id = (
    bqclient.query(vasopressor_id)
    .result()
    .to_dataframe()   
)

In [143]:
cohort_6 = pd.merge(cohort_5, vaso_id, on='stay_id', how='inner')
cohort_6.shape

(8834, 22)

Filtering on midodrine ***perscribed*** for first time

In [144]:
cohort_table = cohort_6.loc[cohort_6['midodrine_include'] == 1]
cohort_table.shape

(8674, 22)

## Extract the weight

In [None]:
weight_data = (
    bqclient.query(weight_string)
    .result()
    .to_dataframe()
)

In [None]:
weight_data



Unnamed: 0,subject_id,stay_id,weight
0,16256226,39289362,54.0
1,12974563,32563675,90.7
2,14609218,34947848,53.9
3,12687112,37445058,119.1
4,18190935,30056748,44.0
...,...,...,...
76535,15359196,38721510,79.0
76536,11780821,30062692,79.0
76537,19083332,31908710,79.0
76538,18724450,32159208,79.0


## Extract the basic score (charlson, sapsii)

In [None]:
basic_data = (
    bqclient.query(basic_string)
    .result()
    .to_dataframe()
)

In [None]:
basic_data



Unnamed: 0,stay_id,sapsii,charlson_index
0,34338479,8.0,0.000000
1,37186192,61.0,7.666667
2,37535351,10.0,2.000000
3,31860020,8.0,2.000000
4,38476338,16.0,5.600000
...,...,...,...
76535,30852106,57.0,6.461538
76536,33069153,57.0,11.222222
76537,35524095,57.0,6.000000
76538,39130941,57.0,2.840000


## Extract the mortality

In [None]:
mortality_data = (
    bqclient.query(mortality_string)
    .result()
    .to_dataframe()
)

In [None]:
mortality_data



Unnamed: 0,stay_id,mortality,mort_28_day
0,34547665,0,0
1,39289362,0,0
2,32563675,0,0
3,34947848,0,0
4,37445058,0,0
...,...,...,...
76535,39977793,1,1
76536,39980385,1,0
76537,39985296,1,1
76538,39989040,1,0


## Extract the Comorbidities

In [None]:
comorbidities_data = (
    bqclient.query(comorbidities_string)
    .result()
    .to_dataframe()
)

In [None]:
comorbidities_data.describe()

Unnamed: 0,stay_id,CPD_or_COPD_rate,Maligancy_rate,mild_liver_rate,severe_liver_rate,Renal_rate,CAD_rate,AFIB_rate
count,76540.0,76540.0,76540.0,76540.0,76540.0,76540.0,76540.0,76540.0
mean,34994800.0,0.259211,0.131761,0.119036,0.055344,0.218853,0.293977,0.294617
std,2888755.0,0.438204,0.338233,0.323833,0.228651,0.413471,0.455585,0.455873
min,30000150.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,32492320.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,34996600.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,37492220.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0
max,39999810.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


## Extract Vital Signs

In [None]:
vital_sign_data = (
    bqclient.query(vital_sign_string)
    .result()
    .to_dataframe()
)

In [None]:
vital_sign_data



Unnamed: 0,subject_id,stay_id,heart_rate_mean,sbp_mean,dbp_mean,mbp_mean,resp_rate_mean,temperature_mean,spo2_mean,glucose_mean
0,14035721,37115658,72.421053,118.210526,85.894737,96.315789,16.210526,36.780000,96.315789,87.750000
1,10935535,35516372,49.882353,143.470588,73.823529,99.823529,15.647059,36.866000,99.235294,83.000000
2,17402466,34034688,62.066667,136.980769,65.269231,88.021739,19.125000,36.490000,99.250000,118.000000
3,11495356,31532891,96.777778,121.562500,73.250000,92.266667,15.333333,36.944000,97.375000,125.666667
4,18623405,38071544,70.111111,153.640000,68.760000,92.200000,18.038462,36.905714,96.370370,107.600000
...,...,...,...,...,...,...,...,...,...,...
76535,18130948,38431990,76.666667,132.500000,65.916667,79.869565,24.314815,37.301429,95.458333,110.000000
76536,14262378,31586918,83.400000,75.750000,44.000000,51.625000,24.500000,34.440000,92.800000,110.000000
76537,16916209,39712235,69.450000,121.611111,60.444444,74.555556,19.150000,36.893333,95.150000,110.000000
76538,11797072,34548396,76.666667,116.263158,47.789474,66.947368,17.809524,36.963333,98.333333,110.000000


## Extract Lab Tests Results

In [None]:
lab_tests_data = (
    bqclient.query(lab_tests_string)
    .result()
    .to_dataframe()
)

In [None]:
lab_tests_data



Unnamed: 0,stay_id,hemoglobin,platelet,wbc,sodium,potassium,bicarbonate,chloride,bun,creatinine,lactate,ph,po2,pco2,bnp,troponin,creatinine_kinase
0,37072655,12.600000,237.000000,10.30,140.000000,3.900000,34.000000,100.000000,12.000000,0.500000,,,,,0.0,0.00,0.00
1,37431964,8.850000,123.500000,13.70,145.000000,3.850000,16.500000,115.000000,24.500000,0.950000,2.20,7.390,160.0,29.0,0.0,0.00,0.00
2,30092544,11.600000,162.000000,17.10,146.000000,3.900000,30.500000,105.000000,28.000000,0.400000,0.90,7.410,149.0,53.0,0.0,0.00,0.00
3,37262027,10.100000,194.000000,11.90,140.000000,3.700000,13.000000,108.000000,15.000000,0.600000,,,,,0.0,0.00,0.00
4,35718808,11.700000,195.000000,3.60,143.000000,4.300000,33.000000,100.000000,37.000000,3.700000,,,,,0.0,1.00,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76535,35079726,11.566667,232.333333,8.40,140.666667,3.800000,24.666667,105.666667,20.000000,1.300000,,,,,0.0,0.75,0.75
76536,37101836,9.833333,167.333333,2.30,137.333333,3.766667,24.666667,103.666667,12.333333,0.433333,,,,,0.0,0.00,0.00
76537,37249644,10.800000,211.000000,5.10,117.000000,4.033333,24.666667,82.500000,7.000000,0.550000,1.45,7.440,40.5,43.5,0.0,0.00,0.00
76538,37395726,9.750000,330.000000,8.70,137.666667,3.900000,24.666667,100.666667,28.000000,2.100000,1.75,7.370,97.5,49.5,0.0,1.00,1.00


## Extract interventions

In [None]:
interventions_data = (
    bqclient.query(interventions_string)
    .result()
    .to_dataframe()
)

In [None]:
interventions_data['vasopressor_use_sum'] = interventions_data['vasopressin_use'] + interventions_data['dobutamine_use'] + interventions_data['dopamine_use'] + interventions_data['epinephrine_use'] + interventions_data['milrinone_use'] + interventions_data['norepinephrine_use'] + interventions_data['phenylephrine_use']
interventions_data['vasopressor_use'] = (interventions_data['vasopressor_use_sum'] != 0).astype(np.int32)

In [None]:
interventions_data



Unnamed: 0,stay_id,sedative_use,vasopressin_use,ventilation_use,dobutamine_use,dopamine_use,epinephrine_use,milrinone_use,norepinephrine_use,phenylephrine_use,vasopressor_use_sum,vasopressor_use
0,34547665,0.0,,,,,,,,,,1
1,39289362,0.0,,,,,,,,,,1
2,32563675,0.0,,1.0,,,,,,,,1
3,34947848,0.0,,,,,,,,,,1
4,37445058,0.0,,,,,,,,,,1
...,...,...,...,...,...,...,...,...,...,...,...,...
76535,39980385,0.0,,1.0,,,,,,,,1
76536,39985296,0.0,,1.0,,,,,,,,1
76537,39987031,0.0,,1.0,,,,,,,,1
76538,39989040,0.0,,1.0,,,,,,,,1


## Divide the Midodrine Group

In [145]:
cohort_mid = cohort_table.loc[pd.notnull(cohort_table['drug_starttime'])]
cohort_mid



Unnamed: 0,stay_id,first_careunit,subject_id,hadm_id,gender,ethnicity,admittime,dischtime,age,deathtime,...,icu_outtime,first_icu_stay,los_icu,los_hospital,drug_starttime,midodrine_exclude,midodrine_include,avg_doses_per_24_hrs,sepsis3,adm_weekday
7,36014877,Trauma SICU (TSICU),11861017,27162817,M,WHITE,2189-03-15 15:47:00,2189-05-06 17:00:00,87,NaT,...,2189-05-01 01:33:42,True,46.42,52,2189-04-17 16:00:00,0,1,1.595745,True,1
8,32272859,Trauma SICU (TSICU),11259141,25963696,F,WHITE,2184-07-05 18:16:00,2184-08-08 15:59:00,60,NaT,...,2184-08-03 18:51:35,True,24.00,34,2184-07-23 14:00:00,0,1,1.633333,True,7
12,37039117,Medical Intensive Care Unit (MICU),10184327,21396430,M,WHITE,2138-10-20 18:55:00,2138-11-01 03:00:00,89,2138-11-01 03:00:00,...,2138-11-01 04:40:00,True,11.33,12,2138-10-22 07:00:00,0,1,1.183333,True,2
19,34023828,Neuro Surgical Intensive Care Unit (Neuro SICU),18010960,21782431,M,UNKNOWN,2167-04-26 14:05:00,2167-05-18 13:40:00,59,NaT,...,2167-05-15 14:29:56,True,18.96,22,2167-05-10 22:00:00,0,1,1.589041,True,1
32,32309766,Neuro Surgical Intensive Care Unit (Neuro SICU),15703353,29272306,F,WHITE,2181-05-17 22:14:00,2181-06-03 16:30:00,59,NaT,...,2181-05-26 23:37:40,True,8.83,17,2181-05-22 16:00:00,0,1,1.517241,True,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8783,37823328,Medical/Surgical Intensive Care Unit (MICU/SICU),10481056,21921090,M,UNKNOWN,2122-12-19 09:16:00,2122-12-31 14:32:00,63,NaT,...,2122-12-21 18:11:46,True,2.38,12,2122-12-20 23:00:00,0,1,1.239437,True,7
8786,37888531,Medical/Surgical Intensive Care Unit (MICU/SICU),15885972,29782542,F,WHITE,2172-07-30 17:16:00,2172-08-13 18:25:00,61,NaT,...,2172-08-10 01:46:14,True,5.29,14,2172-08-08 14:00:00,0,1,1.571429,True,3
8807,38743934,Medical/Surgical Intensive Care Unit (MICU/SICU),19894790,27933693,M,WHITE,2140-08-25 17:58:00,2140-09-13 18:00:00,84,2140-09-13 18:00:00,...,2140-09-11 16:02:05,True,16.88,19,2140-09-01 14:00:00,0,1,1.947368,True,5
8826,39446578,Medical/Surgical Intensive Care Unit (MICU/SICU),14781720,23795457,F,WHITE,2189-01-07 21:03:00,2189-01-26 15:11:00,92,NaT,...,2189-01-26 15:11:42,True,4.08,19,2189-01-26 14:00:00,0,1,1.409091,True,5


In [None]:
mid_weights = weight_data[weight_data['stay_id'].isin(cohort_mid['stay_id'].values)]
mid_weights.describe()

Unnamed: 0,subject_id,stay_id,weight
count,810.0,810.0,781.0
mean,15065040.0,34925710.0,84.899872
std,2836112.0,2896935.0,26.25702
min,10008920.0,30037710.0,32.9
25%,12663370.0,32362880.0,67.7
50%,14969780.0,34902890.0,80.6
75%,17468260.0,37361490.0,98.4
max,19999300.0,39999230.0,230.0


In [None]:
mid_vital_signs = vital_sign_data[vital_sign_data['stay_id'].isin(cohort_mid['stay_id'].values)]
mid_vital_signs.describe()

Unnamed: 0,subject_id,stay_id,heart_rate_mean,sbp_mean,dbp_mean,mbp_mean,resp_rate_mean,temperature_mean,spo2_mean,glucose_mean
count,810.0,810.0,809.0,804.0,804.0,809.0,809.0,803.0,808.0,804.0
mean,15065040.0,34925710.0,86.253055,108.043426,58.635796,72.455089,19.293953,36.765713,96.797348,854.736196
std,2836112.0,2896935.0,16.469304,14.936526,9.772158,10.295015,4.388801,0.517885,2.314189,10566.023438
min,10008920.0,30037710.0,44.178571,74.875,26.0,44.818182,9.538462,33.59,67.0,65.666667
25%,12663370.0,32362880.0,74.888889,98.201601,52.184103,65.78125,16.291667,36.523857,95.575594,103.375
50%,14969780.0,34902890.0,85.541667,104.747098,58.143434,71.04,18.6,36.7344,97.0,123.9
75%,17468260.0,37361490.0,97.96875,115.375769,64.126157,77.592593,21.74,36.985,98.424451,157.6375
max,19999300.0,39999230.0,144.923077,195.0,97.533333,117.666667,48.0,39.95,100.0,200072.0


In [None]:
mid_labs = lab_tests_data[lab_tests_data['stay_id'].isin(cohort_mid['stay_id'].values)]
mid_labs.describe()

Unnamed: 0,stay_id,hemoglobin,platelet,wbc,sodium,potassium,bicarbonate,chloride,bun,creatinine,lactate,ph,po2,pco2,bnp,troponin,creatinine_kinase
count,810.0,802.0,802.0,802.0,804.0,804.0,804.0,804.0,805.0,804.0,554.0,605.0,605.0,605.0,810.0,810.0,810.0
mean,34925710.0,9.71562,163.667983,13.084344,136.672788,4.267396,21.207323,101.459864,40.943542,2.498143,2.817277,7.35747,102.332191,39.680469,0.032757,0.223416,0.259979
std,2896935.0,1.936203,104.936119,7.598789,6.230019,0.692554,4.888621,7.466089,30.604515,2.227576,2.232144,0.07949,61.74662,9.508758,0.16873,0.411767,0.427631
min,30037710.0,5.0,13.25,0.575,107.333333,2.5,8.0,69.5,3.0,0.2,0.4,7.128,19.0,18.0,0.0,0.0,0.0
25%,32362880.0,8.133333,84.0,7.975,133.2375,3.766667,18.0,97.333333,18.0,0.9,1.50625,7.31,55.5,33.555556,0.0,0.0,0.0
50%,34902890.0,9.425,143.55,11.358333,137.0,4.166667,21.333333,102.0,32.0,1.8,2.1,7.36,87.714286,38.5,0.0,0.0,0.0
75%,37361490.0,10.9,221.9,16.61875,140.5,4.7,24.0,106.0,55.25,3.45,3.31875,7.4125,130.25,44.4,0.0,0.0,0.666667
max,39999230.0,16.25,807.0,67.9,158.2,6.78,42.666667,132.0,207.333333,16.05,20.428571,7.57,482.125,76.75,1.0,1.0,1.0


In [None]:
mid_comorbidities = comorbidities_data[comorbidities_data['stay_id'].isin(cohort_mid['stay_id'].values)]
mid_comorbidities.describe()

Unnamed: 0,stay_id,CPD_or_COPD_rate,Maligancy_rate,mild_liver_rate,severe_liver_rate,Renal_rate,CAD_rate,AFIB_rate
count,810.0,810.0,810.0,810.0,810.0,810.0,810.0,810.0
mean,34925710.0,0.230864,0.162963,0.495062,0.416049,0.306173,0.162963,0.312346
std,2896935.0,0.421646,0.36956,0.500285,0.493206,0.461187,0.36956,0.463736
min,30037710.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,32362880.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,34902890.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,37361490.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0
max,39999230.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [None]:
mid_basic = basic_data[basic_data['stay_id'].isin(cohort_mid['stay_id'].values)]
mid_basic.describe()

Unnamed: 0,stay_id,sapsii,charlson_index
count,810.0,762.0,810.0
mean,34925710.0,44.695538,6.29054
std,2896935.0,13.951454,2.607063
min,30037710.0,11.0,0.0
25%,32362880.0,35.0,4.5
50%,34902890.0,44.0,6.0
75%,37361490.0,54.0,8.0
max,39999230.0,95.0,17.0


In [None]:
mid_mortality = mortality_data[mortality_data['stay_id'].isin(cohort_mid['stay_id'].values)]
mid_mortality.describe()

Unnamed: 0,stay_id,mortality,mort_28_day
count,810.0,810.0,810.0
mean,34925710.0,0.403704,0.290123
std,2896935.0,0.490943,0.4541
min,30037710.0,0.0,0.0
25%,32362880.0,0.0,0.0
50%,34902890.0,0.0,0.0
75%,37361490.0,1.0,1.0
max,39999230.0,1.0,1.0


In [None]:
mid_interventions = interventions_data[interventions_data['stay_id'].isin(cohort_mid['stay_id'].values)]
mid_interventions.describe()

Unnamed: 0,stay_id,sedative_use,vasopressin_use,ventilation_use,dobutamine_use,dopamine_use,epinephrine_use,milrinone_use,norepinephrine_use,phenylephrine_use,vasopressor_use_sum,vasopressor_use
count,810.0,807.0,213.0,699.0,16.0,42.0,41.0,10.0,492.0,276.0,0.0,810.0
mean,34925710.0,0.589839,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,1.0
std,2896935.0,0.492168,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0
min,30037710.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,1.0
25%,32362880.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,1.0
50%,34902890.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,1.0
75%,37361490.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,1.0
max,39999230.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,1.0


## Divide the Non-Midodrine Group

In [146]:
cohort_nomid = cohort_table.loc[pd.isnull(cohort_table['drug_starttime'])]
cohort_nomid



Unnamed: 0,stay_id,first_careunit,subject_id,hadm_id,gender,ethnicity,admittime,dischtime,age,deathtime,...,icu_outtime,first_icu_stay,los_icu,los_hospital,drug_starttime,midodrine_exclude,midodrine_include,avg_doses_per_24_hrs,sepsis3,adm_weekday
0,38859960,Trauma SICU (TSICU),14470386,20124738,M,WHITE,2138-04-14 03:27:00,2138-05-08 15:31:00,44,NaT,...,2138-05-05 20:10:57,True,21.71,24,NaT,0,1,1.884892,True,2
1,30992197,Trauma SICU (TSICU),10670236,20938672,M,WHITE,2185-07-22 22:25:00,2185-08-28 16:40:00,32,NaT,...,2185-08-10 21:14:55,True,18.96,37,NaT,0,1,2.329787,True,6
2,31382786,Trauma SICU (TSICU),11975614,29023602,M,ASIAN,2181-06-06 17:18:00,2181-07-14 17:15:00,30,NaT,...,2181-06-29 22:27:28,True,23.21,38,NaT,0,1,1.966667,True,4
3,31617347,Trauma SICU (TSICU),17873103,27750553,M,WHITE,2169-06-29 04:57:00,2169-07-17 15:10:00,36,NaT,...,2169-07-11 17:39:27,True,12.46,18,NaT,0,1,2.571429,True,5
4,33521917,Trauma SICU (TSICU),11312502,25289892,F,BLACK/AFRICAN AMERICAN,2202-11-02 14:26:00,2202-12-03 16:55:00,62,NaT,...,2202-11-29 18:34:19,True,27.08,31,NaT,0,1,1.800000,True,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8828,39690469,Medical/Surgical Intensive Care Unit (MICU/SICU),18098524,21320524,F,WHITE,2120-10-30 16:54:00,2120-11-06 13:28:00,89,NaT,...,2120-11-02 18:50:45,True,2.88,7,NaT,0,1,1.636364,True,4
8829,39696826,Medical/Surgical Intensive Care Unit (MICU/SICU),10070932,24727163,F,WHITE,2146-05-11 18:07:00,2146-05-16 18:45:00,37,NaT,...,2146-05-13 09:54:29,True,1.54,5,NaT,0,1,1.613636,True,4
8830,39801252,Medical/Surgical Intensive Care Unit (MICU/SICU),16749537,24096764,M,WHITE,2127-09-09 22:13:00,2127-09-20 15:30:00,76,NaT,...,2127-09-11 18:51:49,True,1.75,11,NaT,0,1,1.423077,True,4
8831,39838873,Medical/Surgical Intensive Care Unit (MICU/SICU),15554479,20342520,M,ASIAN,2183-03-26 07:58:00,2183-03-30 16:19:00,49,NaT,...,2183-03-27 21:53:30,True,1.46,4,NaT,0,1,1.533333,True,4


In [None]:
nomid_weights = weight_data[weight_data['stay_id'].isin(cohort_nomid['stay_id'].values)]
nomid_weights.describe()

Unnamed: 0,subject_id,stay_id,weight
count,34490.0,34490.0,33839.0
mean,14998120.0,34985270.0,80.556065
std,2888636.0,2892301.0,26.13854
min,10000030.0,30000150.0,1.0
25%,12491250.0,32454070.0,64.7
50%,15011060.0,34995290.0,77.0
75%,17505770.0,37491200.0,92.0
max,19999990.0,39999380.0,1120.0


In [None]:
nomid_vital_signs = vital_sign_data[vital_sign_data['stay_id'].isin(cohort_nomid['stay_id'].values)]
nomid_vital_signs.describe()

Unnamed: 0,subject_id,stay_id,heart_rate_mean,sbp_mean,dbp_mean,mbp_mean,resp_rate_mean,temperature_mean,spo2_mean,glucose_mean
count,34490.0,34490.0,34432.0,34349.0,34348.0,34418.0,34426.0,34183.0,34406.0,33522.0
mean,14998120.0,34985270.0,86.12402,120.722963,65.137353,79.786458,19.282335,36.880976,96.683751,245.965824
std,2888636.0,2892301.0,16.359629,17.023988,11.397151,11.546087,3.923651,0.543993,2.641833,7038.938604
min,10000030.0,30000150.0,29.807692,34.166667,17.0,24.071429,6.0,30.4,26.6,34.0
25%,12491250.0,32454070.0,74.057276,108.2,57.237395,71.625,16.5,36.63,95.541667,107.411111
50%,15011060.0,34995290.0,84.961538,119.115385,64.133333,78.653846,18.653846,36.856,96.9375,129.0
75%,17505770.0,37491200.0,96.963602,131.84,72.1163,86.9,21.444444,37.138333,98.3125,159.8
max,19999990.0,39999380.0,167.0,206.388889,131.0,151.529412,44.333333,40.104118,100.0,999999.0


In [None]:
nomid_labs = lab_tests_data[lab_tests_data['stay_id'].isin(cohort_nomid['stay_id'].values)]
nomid_labs.describe()

Unnamed: 0,stay_id,hemoglobin,platelet,wbc,sodium,potassium,bicarbonate,chloride,bun,creatinine,lactate,ph,po2,pco2,bnp,troponin,creatinine_kinase
count,34490.0,33282.0,33289.0,33291.0,33488.0,33492.0,33468.0,33482.0,33479.0,33491.0,15287.0,18200.0,18201.0,18201.0,34490.0,34490.0,34490.0
mean,34985270.0,10.923399,212.665082,11.967207,138.556372,4.12112,23.095083,104.280951,24.002442,1.31948,2.309138,7.365645,119.222602,41.687248,0.029624,0.168826,0.25642
std,2892301.0,2.092942,108.247465,10.713998,4.907926,0.5801,4.388546,6.032193,20.534474,1.484395,2.051446,0.083457,74.604468,10.639249,0.157372,0.367397,0.428437
min,30000150.0,3.5,6.0,0.1,104.222222,1.6,3.0,67.0,1.0,0.1,0.2,6.72,14.0,10.4,0.0,0.0,0.0
25%,32454070.0,9.366667,145.0,7.5,136.0,3.733333,20.5,101.0,12.0,0.7,1.2,7.32,65.0,35.0,0.0,0.0,0.0
50%,34995290.0,10.9,199.0,10.4,139.0,4.05,23.0,104.5,17.5,0.9,1.7,7.373333,100.0,40.0,0.0,0.0,0.0
75%,37491200.0,12.4,261.0,14.15,141.0,4.4,25.5,108.0,28.0,1.3,2.6,7.42,156.0,46.0,0.0,0.0,0.666667
max,39999380.0,20.94,2436.0,474.7,179.0,9.6,49.0,152.25,228.0,31.95,26.66,7.69,587.0,228.5,1.0,1.0,1.0


In [None]:
nomid_comorbidities = comorbidities_data[comorbidities_data['stay_id'].isin(cohort_nomid['stay_id'].values)]
nomid_comorbidities.describe()

Unnamed: 0,stay_id,CPD_or_COPD_rate,Maligancy_rate,mild_liver_rate,severe_liver_rate,Renal_rate,CAD_rate,AFIB_rate
count,34490.0,34490.0,34490.0,34490.0,34490.0,34490.0,34490.0,34490.0
mean,34985270.0,0.23372,0.161264,0.115802,0.04755,0.161264,0.158162,0.216005
std,2892301.0,0.423202,0.36778,0.319992,0.212815,0.36778,0.364898,0.411523
min,30000150.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,32454070.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,34995290.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,37491200.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,39999380.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [None]:
nomid_basic = basic_data[basic_data['stay_id'].isin(cohort_nomid['stay_id'].values)]
nomid_basic.describe()

Unnamed: 0,stay_id,sapsii,charlson_index
count,34490.0,25921.0,34490.0
mean,34985270.0,34.683153,4.996305
std,2892301.0,14.199745,2.953193
min,30000150.0,0.0,0.0
25%,32454070.0,25.0,3.0
50%,34995290.0,33.0,5.0
75%,37491200.0,43.0,7.0
max,39999380.0,114.0,19.0


In [None]:
nomid_mortality = mortality_data[mortality_data['stay_id'].isin(cohort_nomid['stay_id'].values)]
nomid_mortality.describe()

Unnamed: 0,stay_id,mortality,mort_28_day
count,34490.0,34490.0,34490.0
mean,34985270.0,0.167121,0.107654
std,2892301.0,0.373089,0.309948
min,30000150.0,0.0,0.0
25%,32454070.0,0.0,0.0
50%,34995290.0,0.0,0.0
75%,37491200.0,0.0,0.0
max,39999380.0,1.0,1.0


In [None]:
nomid_interventions = interventions_data[interventions_data['stay_id'].isin(cohort_nomid['stay_id'].values)]
nomid_interventions.describe()

Unnamed: 0,stay_id,sedative_use,vasopressin_use,ventilation_use,dobutamine_use,dopamine_use,epinephrine_use,milrinone_use,norepinephrine_use,phenylephrine_use,vasopressor_use_sum,vasopressor_use
count,34490.0,34306.0,1831.0,24813.0,222.0,579.0,544.0,91.0,5565.0,4157.0,0.0,34490.0
mean,34985270.0,0.388008,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,1.0
std,2892301.0,0.487303,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0
min,30000150.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,1.0
25%,32454070.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,1.0
50%,34995290.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,1.0
75%,37491200.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,1.0
max,39999380.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,1.0


## Creating the combined feature cohort table

In [None]:
# define the treatment group flag
cohort_mid['midodrine'] = 1
cohort_nomid['midodrine'] = 0

# define if the patient is in MICU
cohort_mid['MICU'] = cohort_mid['first_careunit'].str.contains('MICU', case=False).astype(np.int32)
cohort_nomid['MICU'] = cohort_nomid['first_careunit'].str.contains('MICU').astype(np.int32)

# define if the patient is female
cohort_mid['Gender'] = (cohort_mid['gender']=='F').astype(np.int32)
cohort_nomid['Gender'] = (cohort_nomid['gender']=='F').astype(np.int32)

# select the needed feature
# midodrine injection group
mid_core = cohort_mid.loc[:, ['stay_id', 'hadm_id', 'subject_id', 'age', 'Gender','ethnicity', 'midodrine', 'avg_doses_per_24_hrs','MICU', 'icu_intime', 'adm_weekday']]
mid_merge_1 = pd.merge(mid_core, mid_weights, on='stay_id', how='left')
mid_merge_2 = pd.merge(mid_merge_1, mid_basic, on='stay_id', how='left')
mid_merge_3 = pd.merge(mid_merge_2, mid_comorbidities, on='stay_id', how='left')
mid_merge_4 = pd.merge(mid_merge_3, mid_labs, on='stay_id', how='left')
mid_merge_5 = pd.merge(mid_merge_4, mid_vital_signs, on='stay_id', how='left')
mid_merge_6 = pd.merge(mid_merge_5, mid_interventions.loc[:, ['stay_id', 'sedative_use', 'ventilation_use', 'vasopressor_use']], on='stay_id', how='left')
mid_group = pd.merge(mid_merge_6, mid_mortality, on='stay_id', how='left')

# no-midodrine injection group
nomid_core = cohort_nomid.loc[:, ['stay_id', 'hadm_id', 'subject_id', 'age', 'Gender', 'ethnicity', 'midodrine', 'avg_doses_per_24_hrs','MICU', 'icu_intime', 'adm_weekday']]
nomid_merge_1 = pd.merge(nomid_core, nomid_weights, on='stay_id', how='left')
nomid_merge_2 = pd.merge(nomid_merge_1, nomid_basic, on='stay_id', how='left')
nomid_merge_3 = pd.merge(nomid_merge_2, nomid_comorbidities, on='stay_id', how='left')
nomid_merge_4 = pd.merge(nomid_merge_3, nomid_labs, on='stay_id', how='left')
nomid_merge_5 = pd.merge(nomid_merge_4, nomid_vital_signs, on='stay_id', how='left')
nomid_merge_6 = pd.merge(nomid_merge_5, nomid_interventions.loc[:, ['stay_id', 'sedative_use', 'ventilation_use', 'vasopressor_use']], on='stay_id', how='left')
nomid_group = pd.merge(nomid_merge_6, nomid_mortality, on='stay_id', how='left')

# # concat the dataframe
table_cohort = pd.concat((mid_group, nomid_group), axis=0)

table_cohort['admit_hour'] = table_cohort['icu_intime'].dt.hour
table_cohort = table_cohort.drop(columns=['subject_id','subject_id_y','icu_intime'])
table_cohort.rename(columns = {'subject_id_x':'subject_id'}, inplace=True)

table_cohort



Unnamed: 0,stay_id,hadm_id,subject_id,age,Gender,ethnicity,midodrine,avg_doses_per_24_hrs,MICU,adm_weekday,...,resp_rate_mean,temperature_mean,spo2_mean,glucose_mean,sedative_use,ventilation_use,vasopressor_use,mortality,mort_28_day,admit_hour
0,36014877,27162817,11861017,87,0,WHITE,1,1.595745,0,1,...,17.600000,36.416667,100.000000,247.375000,1.0,1.0,1,0,0,15
1,32272859,25963696,11259141,60,1,WHITE,1,1.633333,0,7,...,17.333333,36.898333,97.259259,120.200000,1.0,1.0,1,0,0,18
2,34023828,21782431,18010960,59,0,UNKNOWN,1,1.589041,0,1,...,15.392857,36.786667,96.629630,98.000000,1.0,1.0,1,0,0,15
3,37798199,29764966,10926537,42,1,BLACK/AFRICAN AMERICAN,1,1.568627,0,7,...,17.240000,37.142857,99.791667,95.000000,1.0,1.0,1,0,0,17
4,32309766,29272306,15703353,59,1,WHITE,1,1.517241,0,6,...,21.740741,37.372000,98.178571,136.800000,1.0,1.0,1,0,0,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34485,39955170,24679700,12799579,55,1,WHITE,0,1.464286,1,7,...,24.148148,37.325000,94.115385,148.333333,0.0,1.0,1,0,0,19
34486,39977793,20968809,13886051,66,1,WHITE,0,1.407407,1,3,...,14.400000,36.646364,97.205882,96.333333,0.0,1.0,1,1,1,19
34487,39980385,21253505,11392990,73,0,WHITE,0,1.592593,1,5,...,19.605263,36.835000,93.605263,203.200000,0.0,1.0,1,1,0,6
34488,39985296,29295929,19632565,59,1,WHITE,0,1.782609,1,2,...,25.692308,36.603333,96.296296,173.400000,0.0,1.0,1,1,1,15


In [None]:
table_cohort.describe()



Unnamed: 0,stay_id,hadm_id,subject_id,age,Gender,midodrine,avg_doses_per_24_hrs,MICU,adm_weekday,weight,...,resp_rate_mean,temperature_mean,spo2_mean,glucose_mean,sedative_use,ventilation_use,vasopressor_use,mortality,mort_28_day,admit_hour
count,35300.0,35300.0,35300.0,35300.0,35300.0,35300.0,35184.0,35300.0,35300.0,34620.0,...,35235.0,34986.0,35214.0,34326.0,35113.0,25512.0,35300.0,35300.0,35300.0,35300.0
mean,34983900.0,24976130.0,14999660.0,62.705694,0.46983,0.022946,1.622051,0.534334,3.992663,80.654058,...,19.282602,36.87833,96.686357,260.224735,0.392647,1.0,1.0,0.17255,0.111841,12.855581
std,2892380.0,2889905.0,2887420.0,18.424232,0.499096,0.149734,0.357446,0.498827,1.995172,26.148795,...,3.934879,0.543675,2.634797,7141.871173,0.488346,0.0,0.0,0.377863,0.315176,7.540701
min,30000150.0,20000350.0,10000030.0,18.0,0.0,0.0,0.0,0.0,1.0,1.0,...,6.0,30.4,26.6,34.0,0.0,1.0,1.0,0.0,0.0,0.0
25%,32451690.0,22473890.0,12493610.0,51.0,0.0,0.0,1.4,0.0,2.0,64.7,...,16.5,36.626278,95.542857,107.333333,0.0,1.0,1.0,0.0,0.0,5.0
50%,34994330.0,24942000.0,15010410.0,64.0,0.0,0.0,1.575,1.0,4.0,77.1,...,18.653846,36.853333,96.941176,129.0,0.0,1.0,1.0,0.0,0.0,15.0
75%,37489200.0,27474730.0,17505400.0,77.0,1.0,0.0,1.794872,1.0,6.0,92.3,...,21.454545,37.135714,98.315789,159.75,1.0,1.0,1.0,0.0,0.0,19.0
max,39999380.0,29999620.0,19999990.0,102.0,1.0,1.0,9.25,1.0,7.0,1120.0,...,48.0,40.104118,100.0,999999.0,1.0,1.0,1.0,1.0,1.0,23.0


In [None]:
table_cohort.info() #Only lactate is below the 50% mark (17650)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 35300 entries, 0 to 34489
Data columns (total 50 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   stay_id               35300 non-null  int64  
 1   hadm_id               35300 non-null  int64  
 2   subject_id            35300 non-null  int64  
 3   age                   35300 non-null  int64  
 4   ethnicity             35300 non-null  object 
 5   midodrine             35300 non-null  int64  
 6   avg_doses_per_24_hrs  35184 non-null  float64
 7   MICU                  35300 non-null  int32  
 8   adm_weekday           35300 non-null  int64  
 9   weight                34620 non-null  float64
 10  sapsii                26683 non-null  float64
 11  charlson_index        35300 non-null  float64
 12  CPD_or_COPD_rate      35300 non-null  float64
 13  Maligancy_rate        35300 non-null  float64
 14  mild_liver_rate       35300 non-null  float64
 15  severe_liver_rate  

# Save csv to drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
table_cohort.to_csv('/content/drive/MyDrive/table_cohort.csv', index=False)