<a href="https://colab.research.google.com/github/Jarvis-BITS/midodrine-mimic-iv/blob/main/notebook/01_cohort_data_extraction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Import libraries

In [1]:
from google.cloud import bigquery
import numpy as np
import pandas as pd
import seaborn as sns
sns.set(rc={'figure.figsize':(16,10)}, font_scale=1.3)
import sys
import warnings
warnings.filterwarnings('ignore')

Authenticate with GCP account having MIMIC-IV dataset

In [2]:
from google.colab import auth

auth.authenticate_user()
print('Authenticated')

Authenticated


In [3]:
%load_ext google.colab.data_table

In [4]:
%load_ext google.cloud.bigquery

In [5]:
project_id = "mit-midodrine" #@param {type:"string"} #Enter your project ID from GCP bigquey here
bqclient = bigquery.Client(project=project_id)

# BigQuery data extraction of patient details in ICU (1st day)

In [129]:
population_string = """
WITH patient_details AS (
  SELECT
  ie.subject_id, ie.hadm_id, ie.stay_id
  , pat.gender, ie.ethnicity
  , ie.admittime, ie.dischtime
  , DATETIME_DIFF(ie.admittime, DATETIME(pat.anchor_year, 1, 1, 0, 0, 0), YEAR) + pat.anchor_age AS age
  , adm.deathtime
  , ie.hospital_expire_flag

-- icu level factors
  , ie.icu_intime, ie.icu_outtime
  , ie.first_icu_stay
  , ie.los_icu, ie.los_hospital
  FROM `physionet-data.mimic_derived.icustay_detail` ie
  INNER JOIN `physionet-data.mimic_core.admissions` adm
  ON ie.hadm_id = adm.hadm_id
  INNER JOIN `physionet-data.mimic_core.patients` pat
  ON ie.subject_id = pat.subject_id

-- midodrine cohort
  )
  , midodrine AS (
    SELECT hadm_id, pharmacy_id
    , starttime AS drug_starttime
    , stoptime AS drug_stoptime
    FROM `physionet-data.mimic_hosp.prescriptions`
    WHERE lower(drug) LIKE '%midodrine%'
  )
  , midodrine_exclude AS (
    SELECT stay_id,
    CASE WHEN LOGICAL_AND(drug_starttime < DATE_TRUNC(intime - interval '1' day, DAY) OR drug_starttime > outtime) THEN 1 
    ELSE 0 END AS midodrine_exclude
    FROM `physionet-data.mimic_icu.icustays` 
    LEFT JOIN midodrine USING (hadm_id)
    GROUP BY stay_id
  )
  , midodrine_include AS (
    SELECT stay_id,
    CASE WHEN LOGICAL_AND(drug_starttime is NULL)
    OR LOGICAL_OR(drug_starttime BETWEEN DATE_TRUNC(intime - interval '1' day, DAY) AND outtime) THEN 1
    ELSE 0 END AS midodrine_include
    FROM `physionet-data.mimic_icu.icustays`  
    LEFT JOIN midodrine USING (hadm_id)
    GROUP BY stay_id
  )
  , midodrine_first AS (
  SELECT stay_id, min(drug_starttime) AS drug_starttime 
  FROM `physionet-data.mimic_icu.icustays` LEFT JOIN midodrine mi USING (hadm_id)
  WHERE drug_starttime BETWEEN DATE_TRUNC(intime - interval '1' day, DAY) AND outtime
  GROUP BY stay_id
  )
 , midodrine_dose AS (
  SELECT ic.stay_id, AVG(per.doses_per_24_hrs) as avg_doses_per_24_hrs
  FROM `physionet-data.mimic_hosp.prescriptions` per
  INNER JOIN `physionet-data.mimic_icu.icustays`ic
  ON per.hadm_id = ic.hadm_id
  GROUP BY ic.stay_id 
  )

 , sepsis3 AS (
   SELECT stay_id, sepsis3
   FROM `physionet-data.mimic_derived.sepsis3`
 )

  , adm_weekday AS (
    SELECT stay_id,
      EXTRACT(DAYOFWEEK FROM intime) AS adm_weekday
    FROM `physionet-data.mimic_icu.icustays`
  )

  , population AS (
    SELECT *  
    FROM (SELECT DISTINCT stay_id, first_careunit, FROM `physionet-data.mimic_icu.icustays`) a
    LEFT JOIN patient_details USING (stay_id)
    LEFT JOIN midodrine_first USING (stay_id)
    LEFT JOIN midodrine_exclude USING (stay_id)
    LEFT JOIN midodrine_include USING (stay_id)
    LEFT JOIN midodrine_dose USING (stay_id)
    LEFT JOIN sepsis3 USING (stay_id)
    LEFT JOIN adm_weekday USING (stay_id)
    )
    SELECT * FROM population
"""

In [65]:
vasopressor_id = """
SELECT DISTINCT stay_id FROM  `physionet-data.mimic_derived.epinephrine`
UNION DISTINCT
SELECT DISTINCT stay_id FROM  `physionet-data.mimic_derived.dobutamine`
UNION DISTINCT
SELECT DISTINCT stay_id FROM  `physionet-data.mimic_derived.dopamine`
UNION DISTINCT
SELECT DISTINCT stay_id FROM  `physionet-data.mimic_derived.norepinephrine`
UNION DISTINCT
SELECT DISTINCT stay_id FROM `physionet-data.mimic_derived.phenylephrine`
UNION DISTINCT
SELECT DISTINCT stay_id FROM  `physionet-data.mimic_derived.vasopressin`
"""

In [90]:
basic_string = """
WITH sapsii AS (
  SELECT stay_id, sapsii
  FROM `physionet-data.mimic_derived.sapsii`
  )
  
  , sirs AS(
  SELECT stay_id, sirs
  FROM `physionet-data.mimic_derived.sirs`
  )

  , sofa AS(
  SELECT stay_id, 
  CEILING(AVG(sofa_24hours)) as sofa_avg_24hrs
  FROM `physionet-data.mimic_derived.sofa`
  GROUP BY stay_id
  )

  , basic AS (
  SELECT *
  FROM (SELECT DISTINCT stay_id FROM `physionet-data.mimic_icu.icustays`)
  LEFT JOIN sapsii USING (stay_id)
  LEFT JOIN sirs USING (stay_id)
  LEFT JOIN sofa USING (stay_id)
  )
SELECT * FROM basic
"""

In [103]:
comorbidities_string = """
WITH charlson AS(
  SELECT subject_id, hadm_id
  , charlson_comorbidity_index
  , renal_diseASe AS Renal
  , severe_liver_diseASe AS severe_liver
  , mild_liver_diseASe AS mild_liver
  , chronic_pulmonary_diseASe AS CPD_or_COPD
  , malignant_cancer AS Maligancy
  , aids AS aids
  FROM `physionet-data.mimic_derived.charlson` char
)
, diag AS
(
  SELECT 
      hadm_id
      , CASE WHEN icd_version = 9 THEN icd_code ELSE NULL END AS icd9_code
      , CASE WHEN icd_version = 10 THEN icd_code ELSE NULL END AS icd10_code
  FROM `physionet-data.mimic_hosp.diagnoses_icd` diag
)
, addition AS(
  SELECT ad.hadm_id
  -- CAD
  , MAX(CASE WHEN
      icd9_code LIKE '414%'
      OR
      SUBSTR(icd10_code, 1, 4) = 'I251'
      THEN 1 
      ELSE 0 END) AS CAD
  -- AFIB
  , MAX(CASE WHEN
      icd9_code LIKE '4273%'
      OR
      SUBSTR(icd10_code, 1, 3) = 'I48'
      THEN 1 
      ELSE 0 END) AS AFIB
  FROM `physionet-data.mimic_core.admissions` ad
  LEFT JOIN diag
  ON ad.hadm_id = diag.hadm_id
  GROUP BY ad.hadm_id
)
, comorbidities AS(
  SELECT ic.stay_id
    , CEILING(AVG(charlson.charlson_comorbidity_index)) AS charlson_index
    , CEILING(AVG(charlson.CPD_or_COPD)) AS CPD_or_COPD
    , CEILING(AVG(charlson.Maligancy)) AS Maligancy
    , CEILING(AVG(charlson.mild_liver)) AS mild_liver
    , CEILING(AVG(charlson.severe_liver)) AS severe_liver
    , CEILING(AVG(charlson.Renal)) AS Renal
    , CEILING(AVG(charlson.aids)) AS aids
    , CEILING(AVG(addition.CAD)) AS CAD_rate
    , CEILING(AVG(addition.AFIB)) AS AFIB_rate
  FROM `physionet-data.mimic_icu.icustays` ic
  LEFT JOIN charlson USING (hadm_id)
  LEFT JOIN addition USING (hadm_id)
  GROUP BY ic.stay_id
)
SELECT * FROM comorbidities
"""

In [68]:
weight_string = """
SELECT  ie.stay_id
, AVG(CASE WHEN weight_type = 'admit' THEN ce.weight ELSE NULL END) AS weight
FROM `physionet-data.mimic_icu.icustays` ie
  -- admission weight
LEFT JOIN `physionet-data.mimic_derived.weight_durations` ce
    ON ie.stay_id = ce.stay_id
    -- we filter to weights documented during or before the 1st day
    AND ce.starttime <= DATETIME_ADD(ie.intime, INTERVAL '1' DAY)
GROUP BY ie.subject_id, ie.stay_id
"""

In [69]:
vital_sign_string = """
-- This query pivots vital signs and aggregates them
-- for the first 24 hours of a patient's stay.
SELECT ie.stay_id
, AVG(heart_rate) AS heart_rate_mean
, AVG(sbp) AS sbp_mean
, AVG(dbp) AS dbp_mean
, AVG(mbp) AS mbp_mean
, AVG(resp_rate) AS resp_rate_mean
, AVG(temperature) AS temperature_mean
, AVG(spo2) AS spo2_mean
, AVG(glucose) AS glucose_mean
FROM `physionet-data.mimic_icu.icustays` ie
LEFT JOIN `physionet-data.mimic_derived.vitalsign` ce
    ON ie.stay_id = ce.stay_id
    AND ce.charttime >= DATETIME_SUB(ie.intime, INTERVAL '6' HOUR)
    AND ce.charttime <= DATETIME_ADD(ie.outtime, INTERVAL '6' HOUR)
GROUP BY ie.stay_id
"""

In [70]:
lab_tests_string = """
WITH blood AS (
  SELECT ic.stay_id
    , AVG(hemoglobin) AS hemoglobin
    , AVG(platelet) AS platelet
    , AVG(wbc) AS wbc
  FROM `physionet-data.mimic_icu.icustays` ic
  LEFT JOIN `physionet-data.mimic_derived.complete_blood_count` cbc 
    ON ic.hadm_id = cbc.hadm_id
    AND cbc.charttime >= DATETIME_SUB(ic.intime, INTERVAL '6' HOUR) 
    AND cbc.charttime <= DATETIME_ADD(ic.outtime, INTERVAL '6' HOUR) 
  GROUP BY ic.stay_id
)
, chem AS (
  SELECT ic.stay_id
    , AVG(sodium) AS sodium
    , AVG(potassium) AS potassium
    , AVG(bicarbonate) AS bicarbonate
    , AVG(chloride) AS chloride
    , AVG(bun) AS bun
    , AVG(creatinine) AS creatinine
    FROM `physionet-data.mimic_icu.icustays` ic
    LEFT JOIN `physionet-data.mimic_derived.chemistry` chem
    ON ic.hadm_id = chem.hadm_id
    AND chem.charttime >= DATETIME_SUB(ic.intime, INTERVAL '6' HOUR) 
    AND chem.charttime <= DATETIME_ADD(ic.outtime, INTERVAL '6' HOUR) 
    GROUP BY ic.stay_id
)
, bg AS (
  SELECT ic.stay_id
     , AVG(lactate) AS lactate
     , AVG(ph) AS ph
     , AVG(po2) AS po2
     , AVG(pco2) AS pco2
  FROM `physionet-data.mimic_icu.icustays` ic
  LEFT JOIN `physionet-data.mimic_derived.bg` bg
    ON ic.hadm_id = bg.hadm_id
    AND bg.charttime >= DATETIME_SUB(ic.intime, INTERVAL '6' HOUR) 
    AND bg.charttime <= DATETIME_ADD(ic.outtime, INTERVAL '6' HOUR)
  GROUP BY ic.stay_id
  
)
, logical_tested_index AS (
  SELECT ic.stay_id
    , AVG(CASE WHEN ntprobnp is not null THEN 1 ELSE 0 END) AS bnp
    , AVG(CASE WHEN troponin_t is not null THEN 1 ELSE 0 END)AS troponin
    , AVG(CASE WHEN ck_mb is not null THEN 1 ELSE 0 END) AS creatinine_kinase
  FROM `physionet-data.mimic_icu.icustays` ic 
  LEFT JOIN `physionet-data.mimic_derived.cardiac_marker` cm 
  ON ic.hadm_id = cm.hadm_id
  AND cm.charttime >= DATETIME_SUB(ic.intime, INTERVAL '6' HOUR) 
  AND cm.charttime <= DATETIME_ADD(ic.outtime, INTERVAL '6' HOUR) 
  GROUP BY ic.stay_id
)
, lab_tests AS (
  SELECT ic.stay_id
    , blood.hemoglobin, blood.platelet, blood.wbc
    , chem.sodium, chem.potassium, chem.bicarbonate, chem.chloride, chem.bun, chem.creatinine
    , bg.lactate, bg.ph, bg.po2, bg.pco2
    , lti.bnp, lti.troponin, lti.creatinine_kinase
  
  FROM `physionet-data.mimic_icu.icustays` ic
  LEFT JOIN blood USING (stay_id)
  LEFT JOIN chem USING (stay_id)
  LEFT JOIN bg USING (stay_id)
  LEFT JOIN logical_tested_index lti USING (stay_id)
)
SELECT * FROM lab_tests
"""

In [71]:
interventions_string = """
-- sedative extract
WITH sedative AS (
    SELECT ic.stay_id
      , MAX(CASE WHEN ip.itemid in (221668,221744,225972,225942,222168) THEN 1 ELSE 0 END) AS sedative_use
    FROM `physionet-data.mimic_icu.icustays` ic
    INNER JOIN `physionet-data.mimic_icu.inputevents` ip USING (stay_id)
    WHERE ip.starttime BETWEEN DATE_TRUNC(ic.intime-interval'1'day, DAY) AND ic.outtime
    GROUP BY ic.stay_id
)

-- vasopressor
, vasopressin AS (
    SELECT ic.stay_id
      , MAX(CASE WHEN ic.stay_id in (SELECT stay_id FROM `physionet-data.mimic_derived.vasopressin`) THEN 1 ELSE 0 END) AS vasopressin_use
    FROM `physionet-data.mimic_derived.vasopressin` iv
    INNER JOIN `physionet-data.mimic_icu.icustays` ic USING (stay_id)
    WHERE iv.starttime BETWEEN DATE_TRUNC(ic.intime-interval'1'day, DAY) AND ic.outtime
    GROUP BY ic.stay_id
)
, dobutamine AS (
    SELECT ic.stay_id
      , MAX(CASE WHEN ic.stay_id in (SELECT stay_id FROM `physionet-data.mimic_derived.dobutamine`) THEN 1 ELSE 0 END) AS dobutamine_use
    FROM `physionet-data.mimic_derived.dobutamine` idb
    INNER JOIN `physionet-data.mimic_icu.icustays` ic USING (stay_id)
    WHERE idb.starttime BETWEEN DATE_TRUNC(ic.intime-interval'1'day, DAY) AND ic.outtime
    GROUP BY ic.stay_id
)
, dopamine AS (
    SELECT ic.stay_id
      , MAX(CASE WHEN ic.stay_id in (SELECT stay_id FROM `physionet-data.mimic_derived.dopamine`) THEN 1 ELSE 0 END) AS dopamine_use
    FROM `physionet-data.mimic_derived.dopamine` idp
    INNER JOIN `physionet-data.mimic_icu.icustays` ic USING (stay_id)
    WHERE idp.starttime BETWEEN DATE_TRUNC(ic.intime-interval'1'day, DAY) AND ic.outtime
    GROUP BY ic.stay_id
)
, epinephrine AS (
    SELECT ic.stay_id
      , MAX(CASE WHEN ic.stay_id in (SELECT stay_id FROM `physionet-data.mimic_derived.epinephrine`) THEN 1 ELSE 0 END) AS epinephrine_use
    FROM `physionet-data.mimic_derived.epinephrine` iep
    INNER JOIN `physionet-data.mimic_icu.icustays` ic USING (stay_id)
    WHERE iep.starttime BETWEEN DATE_TRUNC(ic.intime-interval'1'day, DAY) AND ic.outtime
    GROUP BY ic.stay_id
)
, milrinone AS (
    SELECT ic.stay_id
      , MAX(CASE WHEN ic.stay_id in (SELECT stay_id FROM `physionet-data.mimic_derived.milrinone`) THEN 1 ELSE 0 END) AS milrinone_use
    FROM `physionet-data.mimic_derived.milrinone` iml
    INNER JOIN `physionet-data.mimic_icu.icustays` ic USING (stay_id)
    WHERE iml.starttime BETWEEN DATE_TRUNC(ic.intime-interval'1'day, DAY) AND ic.outtime
    GROUP BY ic.stay_id
)
, norepinephrine AS (
    SELECT ic.stay_id
      , MAX(CASE WHEN ic.stay_id in (SELECT stay_id FROM `physionet-data.mimic_derived.norepinephrine`) THEN 1 ELSE 0 END) AS norepinephrine_use
    FROM `physionet-data.mimic_derived.norepinephrine` inrp
    INNER JOIN `physionet-data.mimic_icu.icustays` ic USING (stay_id)
    WHERE inrp.starttime BETWEEN DATE_TRUNC(ic.intime-interval'1'day, DAY) AND ic.outtime
    GROUP BY ic.stay_id
)
, phenylephrine AS (
    SELECT ic.stay_id
      , MAX(CASE WHEN ic.stay_id in (SELECT stay_id FROM `physionet-data.mimic_derived.phenylephrine`) THEN 1 ELSE 0 END) AS phenylephrine_use
    FROM `physionet-data.mimic_derived.phenylephrine` iphe
    INNER JOIN `physionet-data.mimic_icu.icustays` ic USING (stay_id)
    WHERE iphe.starttime BETWEEN DATE_TRUNC(ic.intime-interval'1'day, DAY) AND ic.outtime
    GROUP BY ic.stay_id
)

-- ventilation
, ventilation AS (
  SELECT ic.stay_id
    , MAX(CASE WHEN ic.stay_id in (SELECT stay_id FROM `physionet-data.mimic_derived.ventilation`) THEN 1 ELSE 0 END) AS ventilation_use
    FROM `physionet-data.mimic_icu.icustays` ic
    INNER JOIN `physionet-data.mimic_derived.ventilation` ivn USING (stay_id)
    WHERE ivn.starttime BETWEEN DATE_TRUNC(ic.intime-interval'1'day, DAY) AND ic.outtime
    GROUP BY ic.stay_id
)

, interventions as (
    SELECT *
    FROM (SELECT DISTINCT stay_id FROM `physionet-data.mimic_icu.icustays`) a
    LEFT JOIN sedative USING (stay_id)
    LEFT JOIN vasopressin USING (stay_id)
    LEFT JOIN ventilation USING (stay_id)
    LEFT JOIN dobutamine USING (stay_id)
    LEFT JOIN dopamine USING (stay_id)
    LEFT JOIN epinephrine USING (stay_id)
    LEFT JOIN milrinone USING (stay_id)
    LEFT JOIN norepinephrine USING (stay_id)
    LEFT JOIN phenylephrine USING (stay_id)
)

SELECT * FROM interventions;
"""

In [72]:
mortality_string = """
SELECT ic.stay_id,
MAX(CASE WHEN pa.dod <= DATETIME_ADD(ic.intime , INTERVAL '28' DAY) AND pa.dod >= ic.intime THEN 1 ELSE 0 END) AS mort_28_day
FROM `physionet-data.mimic_icu.icustays` ic
LEFT JOIN `physionet-data.mimic_core.patients` pa
ON ic.subject_id = pa.subject_id
GROUP BY ic.stay_id
"""

# Building cohort table

## Extract population info

In [130]:
population_data = (
    bqclient.query(population_string)
    .result()
    .to_dataframe()
)

In [131]:
population_data



Unnamed: 0,stay_id,first_careunit,subject_id,hadm_id,gender,ethnicity,admittime,dischtime,age,deathtime,...,icu_outtime,first_icu_stay,los_icu,los_hospital,drug_starttime,midodrine_exclude,midodrine_include,avg_doses_per_24_hrs,sepsis3,adm_weekday
0,34547665,Neuro Stepdown,12776735,20817525,M,OTHER,2200-07-11 22:46:00,2200-07-19 12:00:00,80,NaT,...,2200-07-13 16:44:40,True,1.67,8,NaT,0,1,2.120000,True,7
1,39289362,Neuro Stepdown,16256226,20013290,F,OTHER,2150-12-20 03:00:00,2150-12-21 14:50:00,49,NaT,...,2150-12-21 14:58:40,True,0.92,1,NaT,0,1,1.428571,,1
2,32563675,Neuro Stepdown,12974563,29618057,F,WHITE,2138-11-13 01:07:00,2138-11-15 15:53:00,72,NaT,...,2138-11-15 16:25:19,True,1.71,2,NaT,0,1,1.434783,,5
3,34947848,Neuro Stepdown,14609218,20606189,F,WHITE,2174-06-28 20:40:00,2174-07-05 16:45:00,69,NaT,...,2174-07-05 17:01:32,True,6.83,7,NaT,0,1,1.428571,,3
4,37445058,Neuro Stepdown,12687112,26132667,M,BLACK/AFRICAN AMERICAN,2162-05-31 15:36:00,2162-06-04 10:16:00,63,NaT,...,2162-06-04 10:16:13,True,3.67,4,NaT,0,1,1.666667,,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76535,39980385,Medical/Surgical Intensive Care Unit (MICU/SICU),11392990,21253505,M,WHITE,2158-08-10 04:50:00,2158-08-13 17:45:00,73,NaT,...,2158-08-11 21:04:48,True,1.63,3,NaT,0,1,1.592593,True,5
76536,39985296,Medical/Surgical Intensive Care Unit (MICU/SICU),19632565,29295929,F,WHITE,2179-04-26 14:22:00,2179-05-02 16:00:00,59,2179-05-02 16:00:00,...,2179-05-02 21:34:48,True,6.25,6,NaT,0,1,1.666667,,2
76537,39987031,Medical/Surgical Intensive Care Unit (MICU/SICU),12762280,23194856,F,WHITE,2177-12-07 21:34:00,2177-12-16 17:43:00,82,NaT,...,2177-12-15 23:25:26,True,2.04,9,NaT,0,1,2.035714,,7
76538,39989040,Medical/Surgical Intensive Care Unit (MICU/SICU),18311244,22780979,M,WHITE,2187-04-04 23:57:00,2187-04-08 15:45:00,58,NaT,...,2187-04-05 15:28:10,True,0.54,4,NaT,0,1,1.727273,True,5


In [132]:
population_data.info()    

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 76540 entries, 0 to 76539
Data columns (total 22 columns):
 #   Column                Non-Null Count  Dtype         
---  ------                --------------  -----         
 0   stay_id               76540 non-null  int64         
 1   first_careunit        76540 non-null  object        
 2   subject_id            76540 non-null  int64         
 3   hadm_id               76540 non-null  int64         
 4   gender                76540 non-null  object        
 5   ethnicity             76540 non-null  object        
 6   admittime             76540 non-null  datetime64[ns]
 7   dischtime             76540 non-null  datetime64[ns]
 8   age                   76540 non-null  int64         
 9   deathtime             8813 non-null   datetime64[ns]
 10  hospital_expire_flag  76540 non-null  int64         
 11  icu_intime            76540 non-null  datetime64[ns]
 12  icu_outtime           76540 non-null  datetime64[ns]
 13  first_icu_stay  

## Building the cohort table (consisting of only ICU patients)
1. Include adults (>= 18 years' old)
2. Include the first ICU admission of each patient
3. Include patients whose ICU care is 'MICU' or 'SICU'
4. Exclude those who accepted midodrine more than one day before icu care or after icu care (have been done before)

Note: midodrine_include is just opposite of midodrine_exclude so use either of the two for this

Filtering on age

In [133]:
cohort_1 = population_data.loc[population_data['age'] >= 18]
cohort_1.shape

(76540, 22)

Filtering on first ICU admission

In [134]:
cohort_2 = cohort_1.loc[cohort_1['first_icu_stay']  == True]
cohort_2.shape

(69211, 22)

Filtering out if death/discharge before 24hrs of intime

In [135]:
# 24 hrs = 86400s ;(24x60x60)
cohort_3 = cohort_2.loc[((cohort_2['deathtime'] - cohort_2['icu_intime']).dt.total_seconds() > 86400) | (cohort_2['deathtime'].isnull())] # Passed away after 24hrs or alive
cohort_3 = cohort_3.loc[((cohort_3['icu_outtime'] - cohort_3['icu_intime']).dt.total_seconds() > 86400)] # Discharged in 24hrs 
cohort_3 = cohort_3.loc[((cohort_3['dischtime'] - cohort_3['icu_intime']).dt.total_seconds() > 86400)] # Discharged in 24hrs 
cohort_3.shape

(54187, 22)

 Filtering in only MICU & SICU first care units

In [136]:
cohort_4 = cohort_3.loc[cohort_3['first_careunit'].str.contains("micu|sicu", case=False)]
cohort_4.shape

(36115, 22)

Filtering on sepsis 

In [137]:
cohort_5 = cohort_4.loc[cohort_4['sepsis3']  == True]
cohort_5.shape

(19915, 22)

Filtering on vasopressor treatments

In [138]:
vaso_id = (
    bqclient.query(vasopressor_id)
    .result()
    .to_dataframe()   
)

In [139]:
cohort_6 = pd.merge(cohort_5, vaso_id, on='stay_id', how='inner')
cohort_6.shape

(8834, 22)

Filtering on midodrine ***perscribed*** for first time

In [140]:
cohort_table = cohort_6.loc[cohort_6['midodrine_include'] == 1]
cohort_table.shape

(8674, 22)

## Extract the weight

In [141]:
weight_data = (
    bqclient.query(weight_string)
    .result()
    .to_dataframe()
)

In [142]:
weight_data



Unnamed: 0,stay_id,weight
0,39289362,54.0
1,32563675,90.7
2,34947848,53.9
3,37445058,119.1
4,30056748,44.0
...,...,...
76535,38721510,79.0
76536,30062692,79.0
76537,31908710,79.0
76538,32159208,79.0


## Extract the basic score (charlson, sapsii)

In [91]:
basic_data = (
    bqclient.query(basic_string)
    .result()
    .to_dataframe()
)

In [99]:
basic_data.describe()

Unnamed: 0,stay_id,sapsii,sirs,sofa_avg_24hrs
count,76540.0,76540.0,76540.0,76519.0
mean,34994800.0,34.803906,2.485367,3.77897
std,2888755.0,13.911267,0.964767,2.681072
min,30000150.0,0.0,0.0,0.0
25%,32492320.0,25.0,2.0,2.0
50%,34996600.0,33.0,3.0,3.0
75%,37492220.0,42.0,3.0,5.0
max,39999810.0,114.0,4.0,19.0


## Extract the mortality

In [88]:
mortality_data = (
    bqclient.query(mortality_string)
    .result()
    .to_dataframe()
)

In [89]:
mortality_data



Unnamed: 0,stay_id,mort_28_day
0,34547665,0
1,39289362,0
2,32563675,0
3,34947848,0
4,37445058,0
...,...,...
76535,39867006,1
76536,39876847,1
76537,39977793,1
76538,39985296,1


## Extract the Comorbidities

In [104]:
comorbidities_data = (
    bqclient.query(comorbidities_string)
    .result()
    .to_dataframe()
)

In [105]:
comorbidities_data.describe()

Unnamed: 0,stay_id,charlson_index,CPD_or_COPD,Maligancy,mild_liver,severe_liver,Renal,aids,CAD_rate,AFIB_rate
count,76540.0,76540.0,76540.0,76540.0,76540.0,76540.0,76540.0,76540.0,76540.0,76540.0
mean,34994800.0,5.621936,0.259211,0.131761,0.119036,0.055344,0.218853,0.00729,0.293977,0.294617
std,2888755.0,3.027045,0.438204,0.338233,0.323833,0.228651,0.413471,0.085072,0.455585,0.455873
min,30000150.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,32492320.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,34996600.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,37492220.0,8.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
max,39999810.0,20.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


## Extract Vital Signs

In [97]:
vital_sign_data = (
    bqclient.query(vital_sign_string)
    .result()
    .to_dataframe()
)

In [100]:
vital_sign_data.describe()

Unnamed: 0,stay_id,heart_rate_mean,sbp_mean,dbp_mean,mbp_mean,resp_rate_mean,temperature_mean,spo2_mean,glucose_mean
count,76540.0,76516.0,76385.0,76384.0,76433.0,76469.0,75796.0,76425.0,74653.0
mean,34994800.0,84.508135,119.74036,64.132143,79.081547,19.285836,36.831628,96.529878,206.352611
std,2888755.0,14.450425,16.161016,10.977594,10.955955,3.560719,0.446769,2.3841,4389.862308
min,30000150.0,26.45,34.166667,17.0,24.071429,5.0,26.67,26.6,11.0
25%,32492320.0,74.320385,108.313433,56.563039,71.5,16.767442,36.625663,95.580645,111.0
50%,34996600.0,83.575758,118.052632,63.080323,77.871795,18.841463,36.82069,96.78125,128.337662
75%,37492220.0,93.882353,129.822222,70.769546,85.662587,21.348,37.048333,97.89881,154.375
max,39999810.0,182.0,215.857143,190.0,198.0,48.0,40.055,100.0,999999.0


## Extract Lab Tests Results

In [101]:
lab_tests_data = (
    bqclient.query(lab_tests_string)
    .result()
    .to_dataframe()
)

In [102]:
lab_tests_data.describe()

Unnamed: 0,stay_id,hemoglobin,platelet,wbc,sodium,potassium,bicarbonate,chloride,bun,creatinine,lactate,ph,po2,pco2,bnp,troponin,creatinine_kinase
count,76540.0,74266.0,74274.0,74266.0,74530.0,74544.0,74492.0,74530.0,74515.0,74529.0,41640.0,46625.0,46631.0,46627.0,76540.0,76540.0,76540.0
mean,34994800.0,10.397542,207.755678,11.457127,138.522511,4.137333,24.071149,103.622209,25.881182,1.401199,2.143649,7.377911,133.070779,42.12413,0.034679,0.211247,0.277394
std,2888755.0,1.945643,107.110567,8.352902,4.392457,0.493912,4.272149,5.67109,20.479055,1.458355,1.758683,0.072448,81.360539,10.160781,0.166785,0.397493,0.435354
min,30000150.0,2.4,5.0,0.1,97.0,1.4,3.0,66.0,1.0,0.1,0.05,6.49,13.0,0.0,0.0,0.0,0.0
25%,32492320.0,8.9,138.475,7.65,136.053363,3.8,21.7,100.5,13.0,0.7,1.233333,7.343333,71.318452,36.285714,0.0,0.0,0.0
50%,34996600.0,10.183333,191.083333,10.3,138.666667,4.08,24.0,104.0,19.0,0.933333,1.7,7.383333,111.333333,40.571429,0.0,0.0,0.0
75%,37492220.0,11.7,256.0,13.633333,141.0,4.4,26.272727,107.0,31.666667,1.425,2.4,7.422105,181.0,45.571429,0.0,0.0,0.833333
max,39999810.0,20.94,2133.0,474.7,179.0,10.0,49.0,145.0,274.285714,31.95,26.66,7.69,2155.5,228.5,1.0,1.0,1.0


## Extract interventions

In [110]:
interventions_data = (
    bqclient.query(interventions_string)
    .result()
    .to_dataframe()
)

In [111]:
interventions_data



Unnamed: 0,stay_id,sedative_use,vasopressin_use,ventilation_use,dobutamine_use,dopamine_use,epinephrine_use,milrinone_use,norepinephrine_use,phenylephrine_use
0,34547665,0.0,,,,,,,,
1,39289362,0.0,,,,,,,,
2,32563675,0.0,,1.0,,,,,,
3,34947848,0.0,,,,,,,,
4,37445058,0.0,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...
76535,39980385,0.0,,1.0,,,,,,
76536,39985296,0.0,,1.0,,,,,,
76537,39987031,0.0,,1.0,,,,,,
76538,39989040,0.0,,1.0,,,,,,


## Divide the Midodrine Group

In [143]:
cohort_mid = cohort_table.loc[pd.notnull(cohort_table['drug_starttime'])]
cohort_mid



Unnamed: 0,stay_id,first_careunit,subject_id,hadm_id,gender,ethnicity,admittime,dischtime,age,deathtime,...,icu_outtime,first_icu_stay,los_icu,los_hospital,drug_starttime,midodrine_exclude,midodrine_include,avg_doses_per_24_hrs,sepsis3,adm_weekday
7,36014877,Trauma SICU (TSICU),11861017,27162817,M,WHITE,2189-03-15 15:47:00,2189-05-06 17:00:00,87,NaT,...,2189-05-01 01:33:42,True,46.42,52,2189-04-17 16:00:00,0,1,1.605166,True,1
8,32272859,Trauma SICU (TSICU),11259141,25963696,F,WHITE,2184-07-05 18:16:00,2184-08-08 15:59:00,60,NaT,...,2184-08-03 18:51:35,True,24.00,34,2184-07-23 14:00:00,0,1,1.594406,True,7
12,37039117,Medical Intensive Care Unit (MICU),10184327,21396430,M,WHITE,2138-10-20 18:55:00,2138-11-01 03:00:00,89,2138-11-01 03:00:00,...,2138-11-01 04:40:00,True,11.33,12,2138-10-22 07:00:00,0,1,1.087500,True,2
19,34023828,Neuro Surgical Intensive Care Unit (Neuro SICU),18010960,21782431,M,UNKNOWN,2167-04-26 14:05:00,2167-05-18 13:40:00,59,NaT,...,2167-05-15 14:29:56,True,18.96,22,2167-05-10 22:00:00,0,1,1.719512,True,1
32,32309766,Neuro Surgical Intensive Care Unit (Neuro SICU),15703353,29272306,F,WHITE,2181-05-17 22:14:00,2181-06-03 16:30:00,59,NaT,...,2181-05-26 23:37:40,True,8.83,17,2181-05-22 16:00:00,0,1,1.523810,True,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8783,37823328,Medical/Surgical Intensive Care Unit (MICU/SICU),10481056,21921090,M,UNKNOWN,2122-12-19 09:16:00,2122-12-31 14:32:00,63,NaT,...,2122-12-21 18:11:46,True,2.38,12,2122-12-20 23:00:00,0,1,1.288889,True,7
8786,37888531,Medical/Surgical Intensive Care Unit (MICU/SICU),15885972,29782542,F,WHITE,2172-07-30 17:16:00,2172-08-13 18:25:00,61,NaT,...,2172-08-10 01:46:14,True,5.29,14,2172-08-08 14:00:00,0,1,1.553571,True,3
8807,38743934,Medical/Surgical Intensive Care Unit (MICU/SICU),19894790,27933693,M,WHITE,2140-08-25 17:58:00,2140-09-13 18:00:00,84,2140-09-13 18:00:00,...,2140-09-11 16:02:05,True,16.88,19,2140-09-01 14:00:00,0,1,1.868421,True,5
8826,39446578,Medical/Surgical Intensive Care Unit (MICU/SICU),14781720,23795457,F,WHITE,2189-01-07 21:03:00,2189-01-26 15:11:00,92,NaT,...,2189-01-26 15:11:42,True,4.08,19,2189-01-26 14:00:00,0,1,1.365385,True,5


In [113]:
mid_weights = weight_data[weight_data['stay_id'].isin(cohort_mid['stay_id'].values)]
mid_weights.describe()

Unnamed: 0,stay_id,weight
count,854.0,826.0
mean,35015810.0,85.078571
std,2870589.0,26.483402
min,30045620.0,26.0
25%,32476020.0,67.425
50%,35103080.0,80.0
75%,37314370.0,100.0
max,39999230.0,230.0


In [114]:
mid_vital_signs = vital_sign_data[vital_sign_data['stay_id'].isin(cohort_mid['stay_id'].values)]
mid_vital_signs.describe()

Unnamed: 0,stay_id,heart_rate_mean,sbp_mean,dbp_mean,mbp_mean,resp_rate_mean,temperature_mean,spo2_mean,glucose_mean
count,854.0,854.0,854.0,854.0,854.0,854.0,854.0,854.0,854.0
mean,35015810.0,85.813348,106.483085,56.543872,70.861992,19.761355,36.820497,96.882892,382.403325
std,2870589.0,13.044908,10.98211,7.317192,7.137724,3.350113,0.360417,1.889708,3349.840697
min,30045620.0,50.216292,75.368421,37.336478,53.266204,11.809859,35.54,68.1,60.75
25%,32476020.0,76.5,99.153737,51.591331,66.192315,17.512934,36.612242,96.004359,114.806838
50%,35103080.0,85.59462,105.304535,55.822646,70.589829,19.501607,36.805864,97.059169,135.038462
75%,37314370.0,95.261773,112.370821,61.119795,74.833494,21.946596,37.024713,98.047679,162.165613
max,39999230.0,124.914634,163.078886,91.552204,111.5625,30.106383,38.13,99.816901,62595.625


In [115]:
mid_labs = lab_tests_data[lab_tests_data['stay_id'].isin(cohort_mid['stay_id'].values)]
mid_labs.describe()

Unnamed: 0,stay_id,hemoglobin,platelet,wbc,sodium,potassium,bicarbonate,chloride,bun,creatinine,lactate,ph,po2,pco2,bnp,troponin,creatinine_kinase
count,854.0,853.0,853.0,853.0,853.0,853.0,853.0,853.0,853.0,853.0,756.0,785.0,785.0,785.0,854.0,854.0,854.0
mean,35015810.0,8.983416,165.425185,12.851495,138.15786,4.13443,22.853861,101.890779,39.092911,2.361351,2.422832,7.366701,92.143187,41.734631,0.05651,0.406807,0.396568
std,2870589.0,1.300046,107.783595,6.118957,4.56724,0.455456,4.400645,5.880274,24.375165,1.795841,1.623659,0.062554,39.927184,8.40245,0.199479,0.472469,0.459613
min,30045620.0,6.685714,12.8,0.733333,121.625,3.06,10.25,83.4,3.8,0.106667,0.4,7.122857,21.0,18.818182,0.0,0.0,0.0
25%,32476020.0,8.023077,77.0,8.788889,135.333333,3.833333,20.0,97.807947,21.454545,1.019643,1.448214,7.328846,61.5,36.0,0.0,0.0,0.0
50%,35103080.0,8.72,141.0,11.671429,138.0,4.1,22.75,101.75,34.3,1.91,2.0,7.37,91.3125,40.85,0.0,0.0,0.0
75%,37314370.0,9.662857,224.384615,16.085714,141.144928,4.382464,25.619048,105.473684,50.368421,3.157143,2.843023,7.41,113.214286,46.213483,0.0,1.0,1.0
max,39999230.0,15.175,605.074074,53.944444,154.454545,6.22,41.684211,121.857143,198.142857,12.375,15.717391,7.57,327.0,81.254902,1.0,1.0,1.0


In [116]:
mid_comorbidities = comorbidities_data[comorbidities_data['stay_id'].isin(cohort_mid['stay_id'].values)]
mid_comorbidities.describe()

Unnamed: 0,stay_id,charlson_index,CPD_or_COPD,Maligancy,mild_liver,severe_liver,Renal,aids,CAD_rate,AFIB_rate
count,854.0,854.0,854.0,854.0,854.0,854.0,854.0,854.0,854.0,854.0
mean,35015810.0,7.224824,0.264637,0.156909,0.428571,0.348946,0.411007,0.017564,0.240047,0.384075
std,2870589.0,2.859925,0.441398,0.363928,0.495162,0.476916,0.492305,0.131439,0.427362,0.486661
min,30045620.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,32476020.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,35103080.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,37314370.0,9.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0
max,39999230.0,18.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [117]:
mid_basic = basic_data[basic_data['stay_id'].isin(cohort_mid['stay_id'].values)]
mid_basic.describe()

Unnamed: 0,stay_id,sapsii,sirs,sofa_avg_24hrs
count,854.0,854.0,854.0,854.0
mean,35015810.0,46.139344,2.779859,8.093677
std,2870589.0,13.81168,0.89849,3.399625
min,30045620.0,6.0,0.0,2.0
25%,32476020.0,36.0,2.0,5.0
50%,35103080.0,45.0,3.0,8.0
75%,37314370.0,55.0,3.0,11.0
max,39999230.0,95.0,4.0,19.0


In [118]:
mid_mortality = mortality_data[mortality_data['stay_id'].isin(cohort_mid['stay_id'].values)]
mid_mortality.describe()

Unnamed: 0,stay_id,mort_28_day
count,854.0,854.0
mean,35015810.0,0.322014
std,2870589.0,0.467522
min,30045620.0,0.0
25%,32476020.0,0.0
50%,35103080.0,0.0
75%,37314370.0,1.0
max,39999230.0,1.0


In [119]:
mid_interventions = interventions_data[interventions_data['stay_id'].isin(cohort_mid['stay_id'].values)]
mid_interventions.describe()

Unnamed: 0,stay_id,sedative_use,vasopressin_use,ventilation_use,dobutamine_use,dopamine_use,epinephrine_use,milrinone_use,norepinephrine_use,phenylephrine_use
count,854.0,854.0,316.0,797.0,30.0,57.0,62.0,11.0,762.0,355.0
mean,35015810.0,0.693208,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
std,2870589.0,0.461432,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
min,30045620.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
25%,32476020.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
50%,35103080.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
75%,37314370.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
max,39999230.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


## Divide the Non-Midodrine Group

In [144]:
cohort_nomid = cohort_table.loc[pd.isnull(cohort_table['drug_starttime'])]
cohort_nomid



Unnamed: 0,stay_id,first_careunit,subject_id,hadm_id,gender,ethnicity,admittime,dischtime,age,deathtime,...,icu_outtime,first_icu_stay,los_icu,los_hospital,drug_starttime,midodrine_exclude,midodrine_include,avg_doses_per_24_hrs,sepsis3,adm_weekday
0,38859960,Trauma SICU (TSICU),14470386,20124738,M,WHITE,2138-04-14 03:27:00,2138-05-08 15:31:00,44,NaT,...,2138-05-05 20:10:57,True,21.71,24,NaT,0,1,1.965909,True,2
1,30992197,Trauma SICU (TSICU),10670236,20938672,M,WHITE,2185-07-22 22:25:00,2185-08-28 16:40:00,32,NaT,...,2185-08-10 21:14:55,True,18.96,37,NaT,0,1,2.272727,True,6
2,31382786,Trauma SICU (TSICU),11975614,29023602,M,ASIAN,2181-06-06 17:18:00,2181-07-14 17:15:00,30,NaT,...,2181-06-29 22:27:28,True,23.21,38,NaT,0,1,2.088757,True,4
3,31617347,Trauma SICU (TSICU),17873103,27750553,M,WHITE,2169-06-29 04:57:00,2169-07-17 15:10:00,36,NaT,...,2169-07-11 17:39:27,True,12.46,18,NaT,0,1,2.415385,True,5
4,33521917,Trauma SICU (TSICU),11312502,25289892,F,BLACK/AFRICAN AMERICAN,2202-11-02 14:26:00,2202-12-03 16:55:00,62,NaT,...,2202-11-29 18:34:19,True,27.08,31,NaT,0,1,1.819149,True,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8828,39690469,Medical/Surgical Intensive Care Unit (MICU/SICU),18098524,21320524,F,WHITE,2120-10-30 16:54:00,2120-11-06 13:28:00,89,NaT,...,2120-11-02 18:50:45,True,2.88,7,NaT,0,1,1.644444,True,4
8829,39696826,Medical/Surgical Intensive Care Unit (MICU/SICU),10070932,24727163,F,WHITE,2146-05-11 18:07:00,2146-05-16 18:45:00,37,NaT,...,2146-05-13 09:54:29,True,1.54,5,NaT,0,1,1.571429,True,4
8830,39801252,Medical/Surgical Intensive Care Unit (MICU/SICU),16749537,24096764,M,WHITE,2127-09-09 22:13:00,2127-09-20 15:30:00,76,NaT,...,2127-09-11 18:51:49,True,1.75,11,NaT,0,1,1.389831,True,4
8831,39838873,Medical/Surgical Intensive Care Unit (MICU/SICU),15554479,20342520,M,ASIAN,2183-03-26 07:58:00,2183-03-30 16:19:00,49,NaT,...,2183-03-27 21:53:30,True,1.46,4,NaT,0,1,1.550000,True,4


In [121]:
nomid_weights = weight_data[weight_data['stay_id'].isin(cohort_nomid['stay_id'].values)]
nomid_weights.describe()

Unnamed: 0,stay_id,weight
count,7820.0,7672.0
mean,34973580.0,82.243365
std,2899115.0,26.848527
min,30000480.0,1.0
25%,32415820.0,65.1
50%,34976440.0,78.0
75%,37512760.0,94.2
max,39998010.0,833.0


In [122]:
nomid_vital_signs = vital_sign_data[vital_sign_data['stay_id'].isin(cohort_nomid['stay_id'].values)]
nomid_vital_signs.describe()

Unnamed: 0,stay_id,heart_rate_mean,sbp_mean,dbp_mean,mbp_mean,resp_rate_mean,temperature_mean,spo2_mean,glucose_mean
count,7820.0,7820.0,7813.0,7813.0,7820.0,7820.0,7799.0,7820.0,7814.0
mean,34973580.0,88.764671,115.360835,61.291565,76.598925,20.499938,36.966497,96.620675,201.991644
std,2899115.0,13.88974,13.667315,8.661196,8.958495,3.634133,0.514373,2.173646,1858.582365
min,30000480.0,43.378378,69.227273,22.5,44.255319,9.428571,32.1,54.44,67.5
25%,32415820.0,78.994027,105.681416,55.49345,70.365909,17.897784,36.694514,95.733439,115.370635
50%,34976440.0,88.284507,113.74692,60.820225,75.818927,20.262829,36.956441,96.923848,134.486842
75%,37512760.0,98.096238,123.697674,66.864017,81.992383,22.857318,37.276329,97.991685,164.0
max,39998010.0,149.310345,185.16,108.661905,122.440758,38.057692,39.674783,100.0,111218.444444


In [123]:
nomid_labs = lab_tests_data[lab_tests_data['stay_id'].isin(cohort_nomid['stay_id'].values)]
nomid_labs.describe()

Unnamed: 0,stay_id,hemoglobin,platelet,wbc,sodium,potassium,bicarbonate,chloride,bun,creatinine,lactate,ph,po2,pco2,bnp,troponin,creatinine_kinase
count,7820.0,7810.0,7810.0,7811.0,7813.0,7813.0,7813.0,7813.0,7812.0,7813.0,6803.0,7089.0,7090.0,7089.0,7820.0,7820.0,7820.0
mean,34973580.0,9.650691,205.192168,13.348212,139.235791,4.101537,23.183047,104.709961,31.644373,1.579823,2.345184,7.362567,103.922294,41.979913,0.062875,0.371712,0.441128
std,2899115.0,1.596356,116.810296,8.430888,4.693769,0.467765,4.748376,5.957325,22.295429,1.402702,1.967436,0.068742,48.254903,9.232226,0.213645,0.463495,0.470214
min,30000480.0,4.0,8.421053,0.1,114.433333,2.6,4.0,80.444444,1.2,0.1,0.3,6.9825,17.0,16.375,0.0,0.0,0.0
25%,32415820.0,8.442857,123.0,8.720192,136.444444,3.792308,20.25,101.0,15.771422,0.733333,1.270714,7.324545,71.75,36.0,0.0,0.0,0.0
50%,34976440.0,9.408333,187.66092,11.86,139.166667,4.02,23.088235,104.794118,25.0,1.1,1.772917,7.369091,98.588123,40.636364,0.0,0.0,0.0
75%,37512760.0,10.569167,265.5,15.94,142.0,4.32,26.0,108.3125,41.36875,1.895652,2.633333,7.41,127.983333,46.2,0.0,1.0,1.0
max,39998010.0,18.12,1156.818182,208.966667,167.5,7.6,44.75,139.769231,187.2,16.8,23.553333,7.59,442.0,95.571429,1.0,1.0,1.0


In [124]:
nomid_comorbidities = comorbidities_data[comorbidities_data['stay_id'].isin(cohort_nomid['stay_id'].values)]
nomid_comorbidities.describe()

Unnamed: 0,stay_id,charlson_index,CPD_or_COPD,Maligancy,mild_liver,severe_liver,Renal,aids,CAD_rate,AFIB_rate
count,7820.0,7820.0,7820.0,7820.0,7820.0,7820.0,7820.0,7820.0,7820.0,7820.0
mean,34973580.0,6.032737,0.288619,0.175448,0.169949,0.074425,0.227494,0.01087,0.212404,0.314322
std,2899115.0,3.031143,0.453149,0.380374,0.375612,0.262477,0.419241,0.103696,0.409035,0.464275
min,30000480.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,32415820.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,34976440.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,37512760.0,8.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
max,39998010.0,20.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [125]:
nomid_basic = basic_data[basic_data['stay_id'].isin(cohort_nomid['stay_id'].values)]
nomid_basic.describe()

Unnamed: 0,stay_id,sapsii,sirs,sofa_avg_24hrs
count,7820.0,7820.0,7820.0,7820.0
mean,34973580.0,44.343734,3.013299,6.145269
std,2899115.0,14.741209,0.844065,2.923656
min,30000480.0,6.0,0.0,1.0
25%,32415820.0,34.0,3.0,4.0
50%,34976440.0,43.0,3.0,6.0
75%,37512760.0,53.0,4.0,8.0
max,39998010.0,114.0,4.0,19.0


In [126]:
nomid_mortality = mortality_data[mortality_data['stay_id'].isin(cohort_nomid['stay_id'].values)]
nomid_mortality.describe()

Unnamed: 0,stay_id,mort_28_day
count,7820.0,7820.0
mean,34973580.0,0.249872
std,2899115.0,0.432967
min,30000480.0,0.0
25%,32415820.0,0.0
50%,34976440.0,0.0
75%,37512760.0,0.0
max,39998010.0,1.0


In [127]:
nomid_interventions = interventions_data[interventions_data['stay_id'].isin(cohort_nomid['stay_id'].values)]
nomid_interventions.describe()

Unnamed: 0,stay_id,sedative_use,vasopressin_use,ventilation_use,dobutamine_use,dopamine_use,epinephrine_use,milrinone_use,norepinephrine_use,phenylephrine_use
count,7820.0,7820.0,1833.0,7247.0,244.0,493.0,403.0,89.0,6116.0,3509.0
mean,34973580.0,0.73734,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
std,2899115.0,0.440107,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
min,30000480.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
25%,32415820.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
50%,34976440.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
75%,37512760.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
max,39998010.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


## Creating the combined feature cohort table

In [149]:
# define the treatment group flag
cohort_mid['midodrine'] = 1
cohort_nomid['midodrine'] = 0

# define if the patient is in MICU
cohort_mid['MICU'] = cohort_mid['first_careunit'].str.contains('MICU', case=False).astype(np.int32)
cohort_nomid['MICU'] = cohort_nomid['first_careunit'].str.contains('MICU').astype(np.int32)

# define if the patient is female
cohort_mid['Gender'] = (cohort_mid['gender']=='F').astype(np.int32)
cohort_nomid['Gender'] = (cohort_nomid['gender']=='F').astype(np.int32)

# select the needed feature
# midodrine injection group
mid_core = cohort_mid.loc[:, ['stay_id', 'hadm_id', 'subject_id', 'age', 'Gender','ethnicity', 'midodrine', 'avg_doses_per_24_hrs','MICU', 'icu_intime', 'adm_weekday']]
mid_merge_1 = pd.merge(mid_core, mid_weights, on='stay_id', how='left')
mid_merge_2 = pd.merge(mid_merge_1, mid_basic, on='stay_id', how='left')
mid_merge_3 = pd.merge(mid_merge_2, mid_comorbidities, on='stay_id', how='left')
mid_merge_4 = pd.merge(mid_merge_3, mid_labs, on='stay_id', how='left')
mid_merge_5 = pd.merge(mid_merge_4, mid_vital_signs, on='stay_id', how='left')
mid_merge_6 = pd.merge(mid_merge_5, mid_interventions.loc[:, ['stay_id', 'sedative_use', 'ventilation_use']], on='stay_id', how='left')
mid_group = pd.merge(mid_merge_6, mid_mortality, on='stay_id', how='left')

# no-midodrine injection group
nomid_core = cohort_nomid.loc[:, ['stay_id', 'hadm_id', 'subject_id', 'age', 'Gender', 'ethnicity', 'midodrine', 'avg_doses_per_24_hrs','MICU', 'icu_intime', 'adm_weekday']]
nomid_merge_1 = pd.merge(nomid_core, nomid_weights, on='stay_id', how='left')
nomid_merge_2 = pd.merge(nomid_merge_1, nomid_basic, on='stay_id', how='left')
nomid_merge_3 = pd.merge(nomid_merge_2, nomid_comorbidities, on='stay_id', how='left')
nomid_merge_4 = pd.merge(nomid_merge_3, nomid_labs, on='stay_id', how='left')
nomid_merge_5 = pd.merge(nomid_merge_4, nomid_vital_signs, on='stay_id', how='left')
nomid_merge_6 = pd.merge(nomid_merge_5, nomid_interventions.loc[:, ['stay_id', 'sedative_use', 'ventilation_use']], on='stay_id', how='left')
nomid_group = pd.merge(nomid_merge_6, nomid_mortality, on='stay_id', how='left')

# # concat the dataframe
table_cohort = pd.concat((mid_group, nomid_group), axis=0)

table_cohort['admit_hour'] = table_cohort['icu_intime'].dt.hour

table_cohort



Unnamed: 0,stay_id,hadm_id,subject_id,age,Gender,ethnicity,midodrine,avg_doses_per_24_hrs,MICU,icu_intime,...,dbp_mean,mbp_mean,resp_rate_mean,temperature_mean,spo2_mean,glucose_mean,sedative_use,ventilation_use,mort_28_day,admit_hour
0,36014877,27162817,11861017,87,0,WHITE,1,1.605166,0,2189-03-15 15:48:40,...,64.691426,79.174745,22.680266,37.116512,99.571549,179.721983,1.0,1.0,0,15
1,32272859,25963696,11259141,60,1,WHITE,1,1.594406,0,2184-07-10 18:39:19,...,71.675633,81.397152,21.641196,37.124694,97.346535,124.652542,1.0,1.0,0,18
2,37039117,21396430,10184327,89,0,WHITE,1,1.087500,1,2138-10-20 20:25:00,...,61.787234,74.797872,19.133197,37.195846,97.771028,193.470588,1.0,1.0,1,20
3,34023828,21782431,18010960,59,0,UNKNOWN,1,1.719512,0,2167-04-26 15:52:00,...,64.720982,84.488839,17.865471,37.258167,96.552809,117.967033,1.0,1.0,0,15
4,32309766,29272306,15703353,59,1,WHITE,1,1.523810,0,2181-05-18 03:10:39,...,57.630303,75.487654,20.508824,37.435862,97.309942,188.555556,1.0,1.0,0,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7815,39690469,21320524,18098524,89,1,WHITE,0,1.644444,1,2120-10-30 21:30:12,...,52.400000,63.692308,27.923077,36.654118,96.353846,100.250000,0.0,1.0,0,21
7816,39696826,24727163,10070932,37,1,WHITE,0,1.571429,1,2146-05-11 20:27:00,...,59.596154,68.250000,20.684211,38.076923,97.342105,111.500000,0.0,,0,20
7817,39801252,24096764,16749537,76,0,WHITE,0,1.389831,1,2127-09-10 00:31:00,...,56.677966,73.906780,16.704545,36.961538,97.558140,179.181818,1.0,1.0,0,0
7818,39838873,20342520,15554479,49,0,ASIAN,0,1.550000,1,2183-03-26 10:18:00,...,72.911765,80.352941,24.054054,36.185556,97.702703,124.000000,0.0,,0,10


In [150]:
table_cohort.describe()



Unnamed: 0,stay_id,hadm_id,subject_id,age,Gender,midodrine,avg_doses_per_24_hrs,MICU,adm_weekday,weight,...,dbp_mean,mbp_mean,resp_rate_mean,temperature_mean,spo2_mean,glucose_mean,sedative_use,ventilation_use,mort_28_day,admit_hour
count,8674.0,8674.0,8674.0,8674.0,8674.0,8674.0,8674.0,8674.0,8674.0,8498.0,...,8667.0,8674.0,8674.0,8653.0,8674.0,8668.0,8674.0,8044.0,8674.0,8674.0
mean,34977740.0,25026630.0,15006730.0,65.522712,0.44789,0.098455,1.554209,0.62843,3.988241,82.518946,...,60.823753,76.034094,20.42722,36.952088,96.646492,219.766399,0.732995,1.0,0.256975,12.822343
std,2896182.0,2876491.0,2865990.0,15.85119,0.497306,0.297946,0.264804,0.483252,2.000196,26.824864,...,8.654263,8.96017,3.613687,0.503157,2.14868,2054.569034,0.44242,0.0,0.436991,7.462557
min,30000480.0,20001300.0,10001880.0,18.0,0.0,0.0,0.806452,0.0,1.0,1.0,...,22.5,44.255319,9.428571,32.1,54.44,60.75,0.0,1.0,0.0,0.0
25%,32425150.0,22571220.0,12558710.0,56.0,0.0,0.0,1.373737,0.0,2.0,65.4,...,54.918197,69.82307,17.830975,36.682549,95.757837,115.303814,0.0,1.0,0.0,6.0
50%,34991440.0,25048380.0,15013990.0,67.0,0.0,0.0,1.525,1.0,4.0,78.0,...,60.337778,75.179484,20.171008,36.938,96.9375,134.521047,1.0,1.0,0.0,15.0
75%,37490990.0,27527990.0,17460520.0,77.0,1.0,0.0,1.7,1.0,6.0,95.0,...,66.224186,81.389472,22.74613,37.2525,98.0,163.864704,1.0,1.0,1.0,19.0
max,39999230.0,29999100.0,19999840.0,102.0,1.0,1.0,4.58,1.0,7.0,833.0,...,108.661905,122.440758,38.057692,39.674783,100.0,111218.444444,1.0,1.0,1.0,23.0


In [151]:
table_cohort.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 8674 entries, 0 to 7819
Data columns (total 52 columns):
 #   Column                Non-Null Count  Dtype         
---  ------                --------------  -----         
 0   stay_id               8674 non-null   int64         
 1   hadm_id               8674 non-null   int64         
 2   subject_id            8674 non-null   int64         
 3   age                   8674 non-null   int64         
 4   Gender                8674 non-null   int32         
 5   ethnicity             8674 non-null   object        
 6   midodrine             8674 non-null   int64         
 7   avg_doses_per_24_hrs  8674 non-null   float64       
 8   MICU                  8674 non-null   int32         
 9   icu_intime            8674 non-null   datetime64[ns]
 10  adm_weekday           8674 non-null   int64         
 11  weight                8498 non-null   float64       
 12  sapsii                8674 non-null   int64         
 13  sirs              

# Save csv to drive

In [152]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [153]:
table_cohort.to_csv('/content/drive/MyDrive/table_cohort.csv', index=False)