<a href="https://colab.research.google.com/github/Jarvis-BITS/midodrine-mimic-iv/blob/main/notebook/01_2_cohort_extraction_add_prescriptions_midodrine.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Import libraries

In [1]:
from google.cloud import bigquery
import numpy as np
import pandas as pd
import seaborn as sns
sns.set(rc={'figure.figsize':(16,10)}, font_scale=1.3)
import sys
import warnings
warnings.filterwarnings('ignore')

Authenticate with GCP account having MIMIC-IV dataset

In [2]:
from google.colab import auth

auth.authenticate_user()
print('Authenticated')

Authenticated


In [3]:
%load_ext google.colab.data_table

In [4]:
%load_ext google.cloud.bigquery

In [5]:
project_id = "my-project-eicu" #@param {type:"string"} #Enter your project ID from GCP bigquey here
bqclient = bigquery.Client(project=project_id)

# BigQuery data extraction of patient details in ICU (1st day)

In [19]:
population_string = """
WITH patient_details AS (
  SELECT
  ie.subject_id, ie.hadm_id, ie.stay_id
  , pat.gender, ie.ethnicity
  , ie.admittime, ie.dischtime
  , DATETIME_DIFF(ie.admittime, DATETIME(pat.anchor_year, 1, 1, 0, 0, 0), YEAR) + pat.anchor_age AS age
  , adm.deathtime
  , ie.hospital_expire_flag

-- icu level factors
  , ie.icu_intime, ie.icu_outtime
  , ie.first_icu_stay
  , ie.los_icu, ie.los_hospital
  FROM `physionet-data.mimic_derived.icustay_detail` ie
  INNER JOIN `physionet-data.mimic_core.admissions` adm
  ON ie.hadm_id = adm.hadm_id
  INNER JOIN `physionet-data.mimic_core.patients` pat
  ON ie.subject_id = pat.subject_id

-- midodrine cohort
  )
  , midodrine AS (
    SELECT subject_id, starttime AS drug_starttime
    FROM `physionet-data.mimic_hosp.prescriptions`
    WHERE lower(drug) like 'midodrine'
  )
  , midodrine_exclude AS (
    SELECT stay_id,
    CASE WHEN LOGICAL_AND(drug_starttime < DATE_TRUNC(intime - interval '1' day, DAY) OR drug_starttime > outtime) THEN 1 
    ELSE 0 END AS midodrine_exclude
    FROM `physionet-data.mimic_icu.icustays` 
    LEFT JOIN midodrine USING (subject_id)
    GROUP BY stay_id
  )
  , midodrine_include AS (
    SELECT stay_id,
    CASE WHEN LOGICAL_AND(drug_starttime is NULL)
    OR LOGICAL_OR(drug_starttime BETWEEN DATE_TRUNC(intime - interval '1' day, DAY) AND outtime) THEN 1
    ELSE 0 END AS midodrine_include
    FROM `physionet-data.mimic_icu.icustays`  
    LEFT JOIN midodrine USING (subject_id)
    GROUP BY stay_id
  )
  , midodrine_first AS (
  SELECT stay_id, min(drug_starttime) AS drug_starttime 
  FROM `physionet-data.mimic_icu.icustays` LEFT JOIN midodrine mi USING (subject_id)
  WHERE drug_starttime BETWEEN DATE_TRUNC(intime - interval '1' day, DAY) AND outtime
  GROUP BY stay_id
  )
 , midodrine_dose AS (
  SELECT ic.stay_id, AVG(doses_per_24_hrs) AS avg_doses_per_24_hrs
  FROM `physionet-data.mimic_hosp.pharmacy` ph
  INNER JOIN `physionet-data.mimic_icu.icustays`ic
  ON ph.hadm_id = ic.hadm_id
  GROUP BY ic.stay_id
  )

 , sepsis3 AS (
   SELECT stay_id, sepsis3
   FROM `physionet-data.mimic_derived.sepsis3`
 )

  , adm_weekday AS (
    SELECT stay_id,
      EXTRACT(DAYOFWEEK FROM intime) AS adm_weekday
    FROM `physionet-data.mimic_icu.icustays`
  )

  , population AS (
    SELECT *  
    FROM (SELECT DISTINCT stay_id, first_careunit, FROM `physionet-data.mimic_icu.icustays`) a
    LEFT JOIN patient_details USING (stay_id)
    LEFT JOIN midodrine_first USING (stay_id)
    LEFT JOIN midodrine_exclude USING (stay_id)
    LEFT JOIN midodrine_include USING (stay_id)
    LEFT JOIN midodrine_dose USING (stay_id)
    LEFT JOIN sepsis3 USING (stay_id)
    LEFT JOIN adm_weekday USING (stay_id)
    )
    SELECT * FROM population
"""

In [7]:
vasopressor_id = """
SELECT DISTINCT stay_id FROM  `physionet-data.mimic_derived.epinephrine`
UNION DISTINCT
SELECT DISTINCT stay_id FROM  `physionet-data.mimic_derived.dobutamine`
UNION DISTINCT
SELECT DISTINCT stay_id FROM  `physionet-data.mimic_derived.dopamine`
UNION DISTINCT
SELECT DISTINCT stay_id FROM  `physionet-data.mimic_derived.norepinephrine`
UNION DISTINCT
SELECT DISTINCT stay_id FROM `physionet-data.mimic_derived.phenylephrine`
UNION DISTINCT
SELECT DISTINCT stay_id FROM  `physionet-data.mimic_derived.vasopressin`
"""

In [8]:
basic_string = """
WITH sapsii AS (
  SELECT sa.stay_id
    , AVG(sa.sapsii) AS sapsii
  FROM `physionet-data.mimic_derived.sapsii` sa
  LEFT JOIN `physionet-data.mimic_icu.icustays` ic USING (stay_id)
  WHERE
    (sa.starttime BETWEEN DATE_TRUNC(ic.intime - interval '1' day, DAY) AND ic.outtime)
    AND (sa.endtime < ic.outtime)
  GROUP BY stay_id
)
, charlson AS(
  SELECT ic.stay_id
    , AVG(chr.charlson_comorbidity_index) AS charlson_index
  FROM `physionet-data.mimic_derived.charlson` chr
  LEFT JOIN `physionet-data.mimic_icu.icustays` ic USING (subject_id)
  GROUP BY ic.stay_id
)
, basic AS (
  SELECT *
  FROM (SELECT DISTINCT stay_id FROM `physionet-data.mimic_icu.icustays`) a
  LEFT JOIN sapsii USING (stay_id)
  LEFT JOIN charlson USING (stay_id)
)
SELECT * FROM basic
"""

In [9]:
comorbidities_string = """
WITH charlson AS(
  SELECT subject_id, hadm_id
  , renal_diseASe AS Renal
  , severe_liver_diseASe AS severe_liver
  , mild_liver_diseASe AS mild_liver
  , chronic_pulmonary_diseASe AS CPD_or_COPD
  , malignant_cancer AS Maligancy
  FROM `physionet-data.mimic_derived.charlson` char
)
, diag AS
(
  SELECT 
      hadm_id
      , CASE WHEN icd_version = 9 THEN icd_code ELSE NULL END AS icd9_code
      , CASE WHEN icd_version = 10 THEN icd_code ELSE NULL END AS icd10_code
  FROM `physionet-data.mimic_hosp.diagnoses_icd` diag
)
, addition AS(
  SELECT ad.hadm_id
  -- CAD
  , MAX(CASE WHEN
      icd9_code LIKE '414%'
      OR
      SUBSTR(icd10_code, 1, 4) = 'I251'
      THEN 1 
      ELSE 0 END) AS CAD
  -- AFIB
  , MAX(CASE WHEN
      icd9_code LIKE '4273%'
      OR
      SUBSTR(icd10_code, 1, 3) = 'I48'
      THEN 1 
      ELSE 0 END) AS AFIB
  FROM `physionet-data.mimic_core.admissions` ad
  LEFT JOIN diag
  ON ad.hadm_id = diag.hadm_id
  GROUP BY ad.hadm_id
)
, comorbidities AS(
  SELECT ic.stay_id
    , AVG(charlson.CPD_or_COPD) AS CPD_or_COPD_rate
    , AVG(charlson.Maligancy) AS Maligancy_rate
    , AVG(charlson.mild_liver) AS mild_liver_rate
    , AVG(charlson.severe_liver) AS severe_liver_rate
    , AVG(charlson.Renal) AS Renal_rate
    , AVG(addition.CAD) AS CAD_rate
    , AVG(addition.AFIB) AS AFIB_rate
  FROM `physionet-data.mimic_icu.icustays` ic
  LEFT JOIN charlson USING (hadm_id)
  LEFT JOIN addition USING (hadm_id)
  GROUP BY ic.stay_id
)
SELECT * FROM comorbidities
"""

In [10]:
weight_string = """
SELECT
  ie.subject_id
  , ie.stay_id
  , AVG(CASE WHEN weight_type = 'admit' THEN ce.weight ELSE NULL END) AS weight
FROM `physionet-data.mimic_icu.icustays` ie
  -- admission weight
LEFT JOIN `physionet-data.mimic_derived.weight_durations` ce
    ON ie.stay_id = ce.stay_id
    -- we filter to weights documented during or before the 1st day
    AND ce.starttime <= DATETIME_ADD(ie.intime, INTERVAL '1' DAY)
GROUP BY ie.subject_id, ie.stay_id
"""

In [11]:
vital_sign_string = """
-- This query pivots vital signs and aggregates them
-- for the first 24 hours of a patient's stay.
SELECT
ie.subject_id
, ie.stay_id
, AVG(heart_rate) AS heart_rate_mean
, AVG(sbp) AS sbp_mean
, AVG(dbp) AS dbp_mean
, AVG(mbp) AS mbp_mean
, AVG(resp_rate) AS resp_rate_mean
, AVG(temperature) AS temperature_mean
, AVG(spo2) AS spo2_mean
, AVG(glucose) AS glucose_mean
FROM `physionet-data.mimic_icu.icustays` ie
LEFT JOIN `physionet-data.mimic_derived.vitalsign` ce
    ON ie.stay_id = ce.stay_id
    AND ce.charttime >= DATETIME_SUB(ie.intime, INTERVAL '6' HOUR)
    AND ce.charttime <= DATETIME_ADD(ie.intime, INTERVAL '1' DAY)
GROUP BY ie.subject_id, ie.stay_id;
"""

In [12]:
lab_tests_string = """
WITH blood AS (
  SELECT ic.stay_id
    , AVG(hemoglobin) AS hemoglobin
    , AVG(platelet) AS platelet
    , AVG(wbc) AS wbc
  FROM `physionet-data.mimic_icu.icustays` ic
  LEFT JOIN `physionet-data.mimic_derived.complete_blood_count` cbc 
    ON ic.hadm_id = cbc.hadm_id
    AND cbc.charttime >= DATETIME_SUB(ic.intime, INTERVAL '6' HOUR) 
    AND cbc.charttime <= DATETIME_ADD(ic.intime, INTERVAL '1' DAY) 
  GROUP BY ic.stay_id
)
, chem AS (
  SELECT ic.stay_id
    , AVG(sodium) AS sodium
    , AVG(potassium) AS potassium
    , AVG(bicarbonate) AS bicarbonate
    , AVG(chloride) AS chloride
    , AVG(bun) AS bun
    , AVG(creatinine) AS creatinine
    FROM `physionet-data.mimic_icu.icustays` ic
    LEFT JOIN `physionet-data.mimic_derived.chemistry` chem
    ON ic.hadm_id = chem.hadm_id
    AND chem.charttime >= DATETIME_SUB(ic.intime, INTERVAL '6' HOUR) 
    AND chem.charttime <= DATETIME_ADD(ic.intime, INTERVAL '1' DAY) 
    GROUP BY ic.stay_id
)
, bg AS (
  SELECT ic.stay_id
     , AVG(lactate) AS lactate
     , AVG(ph) AS ph
     , AVG(po2) AS po2
     , AVG(pco2) AS pco2
  FROM `physionet-data.mimic_icu.icustays` ic
  LEFT JOIN `physionet-data.mimic_derived.bg` bg
    ON ic.hadm_id = bg.hadm_id
    AND bg.charttime >= DATETIME_SUB(ic.intime, INTERVAL '6' HOUR) 
    AND bg.charttime <= DATETIME_ADD(ic.intime, INTERVAL '1' DAY) 
  GROUP BY ic.stay_id
  
)
, logical_tested_index AS (
  SELECT ic.stay_id
    , AVG(CASE WHEN ntprobnp is not null THEN 1 ELSE 0 END) AS bnp
    , AVG(CASE WHEN troponin_t is not null THEN 1 ELSE 0 END)AS troponin
    , AVG(CASE WHEN ck_mb is not null THEN 1 ELSE 0 END) AS creatinine_kinase
  FROM `physionet-data.mimic_icu.icustays` ic 
  LEFT JOIN `physionet-data.mimic_derived.cardiac_marker` cm 
  ON ic.hadm_id = cm.hadm_id
  AND cm.charttime >= DATETIME_SUB(ic.intime, INTERVAL '6' HOUR) 
  AND cm.charttime <= DATETIME_ADD(ic.intime, INTERVAL '1' DAY) 
  GROUP BY ic.stay_id
)
, lab_tests AS (
  SELECT ic.stay_id
    , blood.hemoglobin, blood.platelet, blood.wbc
    , chem.sodium, chem.potassium, chem.bicarbonate, chem.chloride, chem.bun, chem.creatinine
    , bg.lactate, bg.ph, bg.po2, bg.pco2
    , lti.bnp, lti.troponin, lti.creatinine_kinase
  
  FROM `physionet-data.mimic_icu.icustays` ic
  LEFT JOIN blood USING (stay_id)
  LEFT JOIN chem USING (stay_id)
  LEFT JOIN bg USING (stay_id)
  LEFT JOIN logical_tested_index lti USING (stay_id)
)
SELECT * FROM lab_tests
"""

In [13]:
interventions_string = """
-- sedative extract
WITH sedative AS (
    SELECT ic.stay_id
      , MAX(CASE WHEN ip.itemid in (221668,221744,225972,225942,222168) THEN 1 ELSE 0 END) AS sedative_use
    FROM `physionet-data.mimic_icu.icustays` ic
    LEFT JOIN `physionet-data.mimic_icu.inputevents` ip USING (stay_id)
    WHERE ip.starttime BETWEEN DATE_TRUNC(ic.intime-interval'1'day, DAY) AND ic.outtime
    GROUP BY ic.stay_id
)

-- vasopressor
, vasopressin AS (
    SELECT ic.stay_id
      , MAX(CASE WHEN ic.stay_id in (SELECT stay_id FROM `physionet-data.mimic_derived.vasopressin`) THEN 1 ELSE 0 END) AS vasopressin_use
    FROM `physionet-data.mimic_derived.vasopressin` iv
    LEFT JOIN `physionet-data.mimic_icu.icustays` ic USING (stay_id)
    WHERE iv.starttime BETWEEN DATE_TRUNC(ic.intime-interval'1'day, DAY) AND ic.outtime
    GROUP BY ic.stay_id
)
, dobutamine AS (
    SELECT ic.stay_id
      , MAX(CASE WHEN ic.stay_id in (SELECT stay_id FROM `physionet-data.mimic_derived.dobutamine`) THEN 1 ELSE 0 END) AS dobutamine_use
    FROM `physionet-data.mimic_derived.dobutamine` idb
    LEFT JOIN `physionet-data.mimic_icu.icustays` ic USING (stay_id)
    WHERE idb.starttime BETWEEN DATE_TRUNC(ic.intime-interval'1'day, DAY) AND ic.outtime
    GROUP BY ic.stay_id
)
, dopamine AS (
    SELECT ic.stay_id
      , MAX(CASE WHEN ic.stay_id in (SELECT stay_id FROM `physionet-data.mimic_derived.dopamine`) THEN 1 ELSE 0 END) AS dopamine_use
    FROM `physionet-data.mimic_derived.dopamine` idp
    LEFT JOIN `physionet-data.mimic_icu.icustays` ic USING (stay_id)
    WHERE idp.starttime BETWEEN DATE_TRUNC(ic.intime-interval'1'day, DAY) AND ic.outtime
    GROUP BY ic.stay_id
)
, epinephrine AS (
    SELECT ic.stay_id
      , MAX(CASE WHEN ic.stay_id in (SELECT stay_id FROM `physionet-data.mimic_derived.epinephrine`) THEN 1 ELSE 0 END) AS epinephrine_use
    FROM `physionet-data.mimic_derived.epinephrine` iep
    LEFT JOIN `physionet-data.mimic_icu.icustays` ic USING (stay_id)
    WHERE iep.starttime BETWEEN DATE_TRUNC(ic.intime-interval'1'day, DAY) AND ic.outtime
    GROUP BY ic.stay_id
)
, milrinone AS (
    SELECT ic.stay_id
      , MAX(CASE WHEN ic.stay_id in (SELECT stay_id FROM `physionet-data.mimic_derived.milrinone`) THEN 1 ELSE 0 END) AS milrinone_use
    FROM `physionet-data.mimic_derived.milrinone` iml
    LEFT JOIN `physionet-data.mimic_icu.icustays` ic USING (stay_id)
    WHERE iml.starttime BETWEEN DATE_TRUNC(ic.intime-interval'1'day, DAY) AND ic.outtime
    GROUP BY ic.stay_id
)
, norepinephrine AS (
    SELECT ic.stay_id
      , MAX(CASE WHEN ic.stay_id in (SELECT stay_id FROM `physionet-data.mimic_derived.norepinephrine`) THEN 1 ELSE 0 END) AS norepinephrine_use
    FROM `physionet-data.mimic_derived.norepinephrine` inrp
    LEFT JOIN `physionet-data.mimic_icu.icustays` ic USING (stay_id)
    WHERE inrp.starttime BETWEEN DATE_TRUNC(ic.intime-interval'1'day, DAY) AND ic.outtime
    GROUP BY ic.stay_id
)
, phenylephrine AS (
    SELECT ic.stay_id
      , MAX(CASE WHEN ic.stay_id in (SELECT stay_id FROM `physionet-data.mimic_derived.phenylephrine`) THEN 1 ELSE 0 END) AS phenylephrine_use
    FROM `physionet-data.mimic_derived.phenylephrine` iphe
    LEFT JOIN `physionet-data.mimic_icu.icustays` ic USING (stay_id)
    WHERE iphe.starttime BETWEEN DATE_TRUNC(ic.intime-interval'1'day, DAY) AND ic.outtime
    GROUP BY ic.stay_id
)

-- ventilation
, ventilation AS (
  SELECT ic.stay_id
    , MAX(CASE WHEN ic.stay_id in (SELECT stay_id FROM `physionet-data.mimic_derived.ventilation`) THEN 1 ELSE 0 END) AS ventilation_use
    FROM `physionet-data.mimic_icu.icustays` ic
    LEFT JOIN `physionet-data.mimic_derived.ventilation` ivn USING (stay_id)
    WHERE ivn.starttime BETWEEN DATE_TRUNC(ic.intime-interval'1'day, DAY) AND ic.outtime
    GROUP BY ic.stay_id
)

, interventions as (
    SELECT *
    FROM (SELECT DISTINCT stay_id FROM `physionet-data.mimic_icu.icustays`) a
    LEFT JOIN sedative USING (stay_id)
    LEFT JOIN vasopressin USING (stay_id)
    LEFT JOIN ventilation USING (stay_id)
    LEFT JOIN dobutamine USING (stay_id)
    LEFT JOIN dopamine USING (stay_id)
    LEFT JOIN epinephrine USING (stay_id)
    LEFT JOIN milrinone USING (stay_id)
    LEFT JOIN norepinephrine USING (stay_id)
    LEFT JOIN phenylephrine USING (stay_id)
)

SELECT * FROM interventions;
"""

In [14]:
mortality_string = """
SELECT ic.stay_id,
MAX(CASE WHEN pa.dod is not null THEN 1 ELSE 0 END) AS mortality,
MAX(CASE WHEN pa.dod <= DATETIME_ADD(ic.intime , INTERVAL '28' DAY) AND pa.dod >= ic.intime THEN 1 ELSE 0 END) AS mort_28_day
FROM `physionet-data.mimic_icu.icustays` ic
LEFT JOIN `physionet-data.mimic_core.patients` pa
ON ic.subject_id = pa.subject_id
GROUP BY ic.stay_id
"""

# Building cohort table

## Extract population info

In [20]:
population_data = (
    bqclient.query(population_string)
    .result()
    .to_dataframe()
)

In [22]:
population_data



Unnamed: 0,stay_id,first_careunit,subject_id,hadm_id,gender,ethnicity,admittime,dischtime,age,deathtime,...,icu_outtime,first_icu_stay,los_icu,los_hospital,drug_starttime,midodrine_exclude,midodrine_include,avg_doses_per_24_hrs,sepsis3,adm_weekday
0,34547665,Neuro Stepdown,12776735,20817525,M,OTHER,2200-07-11 22:46:00,2200-07-19 12:00:00,80,NaT,...,2200-07-13 16:44:40,True,1.67,8,NaT,0,1,2.121951,True,7
1,39289362,Neuro Stepdown,16256226,20013290,F,OTHER,2150-12-20 03:00:00,2150-12-21 14:50:00,49,NaT,...,2150-12-21 14:58:40,True,0.92,1,NaT,0,1,1.384615,,1
2,32563675,Neuro Stepdown,12974563,29618057,F,WHITE,2138-11-13 01:07:00,2138-11-15 15:53:00,72,NaT,...,2138-11-15 16:25:19,True,1.71,2,NaT,0,1,1.434783,,5
3,34947848,Neuro Stepdown,14609218,20606189,F,WHITE,2174-06-28 20:40:00,2174-07-05 16:45:00,69,NaT,...,2174-07-05 17:01:32,True,6.83,7,NaT,0,1,1.315789,,3
4,37445058,Neuro Stepdown,12687112,26132667,M,BLACK/AFRICAN AMERICAN,2162-05-31 15:36:00,2162-06-04 10:16:00,63,NaT,...,2162-06-04 10:16:13,True,3.67,4,NaT,0,1,1.666667,,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76535,39980385,Medical/Surgical Intensive Care Unit (MICU/SICU),11392990,21253505,M,WHITE,2158-08-10 04:50:00,2158-08-13 17:45:00,73,NaT,...,2158-08-11 21:04:48,True,1.63,3,NaT,0,1,1.592593,True,5
76536,39985296,Medical/Surgical Intensive Care Unit (MICU/SICU),19632565,29295929,F,WHITE,2179-04-26 14:22:00,2179-05-02 16:00:00,59,2179-05-02 16:00:00,...,2179-05-02 21:34:48,True,6.25,6,NaT,0,1,1.782609,,2
76537,39987031,Medical/Surgical Intensive Care Unit (MICU/SICU),12762280,23194856,F,WHITE,2177-12-07 21:34:00,2177-12-16 17:43:00,82,NaT,...,2177-12-15 23:25:26,True,2.04,9,NaT,0,1,2.035714,,7
76538,39989040,Medical/Surgical Intensive Care Unit (MICU/SICU),18311244,22780979,M,WHITE,2187-04-04 23:57:00,2187-04-08 15:45:00,58,NaT,...,2187-04-05 15:28:10,True,0.54,4,NaT,0,1,1.711111,True,5


In [21]:
population_data.info()    

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 76540 entries, 0 to 76539
Data columns (total 22 columns):
 #   Column                Non-Null Count  Dtype         
---  ------                --------------  -----         
 0   stay_id               76540 non-null  int64         
 1   first_careunit        76540 non-null  object        
 2   subject_id            76540 non-null  int64         
 3   hadm_id               76540 non-null  int64         
 4   gender                76540 non-null  object        
 5   ethnicity             76540 non-null  object        
 6   admittime             76540 non-null  datetime64[ns]
 7   dischtime             76540 non-null  datetime64[ns]
 8   age                   76540 non-null  int64         
 9   deathtime             8813 non-null   datetime64[ns]
 10  hospital_expire_flag  76540 non-null  int64         
 11  icu_intime            76540 non-null  datetime64[ns]
 12  icu_outtime           76540 non-null  datetime64[ns]
 13  first_icu_stay  

## Building the cohort table (consisting of only ICU patients)
1. Include adults (>= 18 years' old)
2. Include the first ICU admission of each patient
3. Include patients whose ICU care is 'MICU' or 'SICU'
4. Exclude those who accepted midodrine more than one day before icu care or after icu care (have been done before)

Note: midodrine_include is just opposite of midodrine_exclude so use either of the two for this

Filtering on age

In [23]:
cohort_1 = population_data.loc[population_data['age'] >= 18]
cohort_1.shape

(76540, 22)

Filtering on first ICU admission

In [24]:
cohort_2 = cohort_1.loc[cohort_1['first_icu_stay']  == True]
cohort_2.shape

(69211, 22)

Filtering out if death/discharge before 24hrs of intime

In [25]:
# 24 hrs = 86400s ;(24x60x60)
cohort_3 = cohort_2.loc[((cohort_2['deathtime'] - cohort_2['icu_intime']).dt.total_seconds() > 86400) | (cohort_2['deathtime'].isnull())] # Passed away after 24hrs or alive
cohort_3 = cohort_3.loc[((cohort_3['icu_outtime'] - cohort_3['icu_intime']).dt.total_seconds() > 86400)] # Discharged in 24hrs 
cohort_3 = cohort_3.loc[((cohort_3['dischtime'] - cohort_3['icu_intime']).dt.total_seconds() > 86400)] # Discharged in 24hrs 
cohort_3.shape

(54187, 22)

 Filtering in only MICU & SICU first care units

In [26]:
cohort_4 = cohort_3.loc[cohort_3['first_careunit'].str.contains("micu|sicu", case=False)]
cohort_4.shape

(36115, 22)

Filtering on sepsis 

In [27]:
cohort_5 = cohort_4.loc[cohort_4['sepsis3']  == True]
cohort_5.shape

(19915, 22)

Filtering on vasopressor treatments

In [28]:
vaso_id = (
    bqclient.query(vasopressor_id)
    .result()
    .to_dataframe()   
)

In [29]:
cohort_6 = pd.merge(cohort_5, vaso_id, on='stay_id', how='inner')
cohort_6.shape

(8834, 22)

Filtering on midodrine ***perscribed*** for first time

In [30]:
cohort_table = cohort_6.loc[cohort_6['midodrine_include'] == 1]
cohort_table.shape

(8287, 22)

## Extract the weight

In [31]:
weight_data = (
    bqclient.query(weight_string)
    .result()
    .to_dataframe()
)

In [32]:
weight_data



Unnamed: 0,subject_id,stay_id,weight
0,16256226,39289362,54.0
1,12974563,32563675,90.7
2,14609218,34947848,53.9
3,12687112,37445058,119.1
4,18190935,30056748,44.0
...,...,...,...
76535,15359196,38721510,79.0
76536,11780821,30062692,79.0
76537,19083332,31908710,79.0
76538,18724450,32159208,79.0


## Extract the basic score (charlson, sapsii)

In [33]:
basic_data = (
    bqclient.query(basic_string)
    .result()
    .to_dataframe()
)

In [34]:
basic_data



Unnamed: 0,stay_id,sapsii,charlson_index
0,34338479,8.0,0.000000
1,37186192,61.0,7.666667
2,37535351,10.0,2.000000
3,31860020,8.0,2.000000
4,38476338,16.0,5.600000
...,...,...,...
76535,30852106,57.0,6.461538
76536,33069153,57.0,11.222222
76537,35524095,57.0,6.000000
76538,39130941,57.0,2.840000


## Extract the mortality

In [35]:
mortality_data = (
    bqclient.query(mortality_string)
    .result()
    .to_dataframe()
)

In [36]:
mortality_data



Unnamed: 0,stay_id,mortality,mort_28_day
0,34547665,0,0
1,39289362,0,0
2,32563675,0,0
3,34947848,0,0
4,37445058,0,0
...,...,...,...
76535,39977793,1,1
76536,39980385,1,0
76537,39985296,1,1
76538,39989040,1,0


## Extract the Comorbidities

In [37]:
comorbidities_data = (
    bqclient.query(comorbidities_string)
    .result()
    .to_dataframe()
)

In [38]:
comorbidities_data.describe()

Unnamed: 0,stay_id,CPD_or_COPD_rate,Maligancy_rate,mild_liver_rate,severe_liver_rate,Renal_rate,CAD_rate,AFIB_rate
count,76540.0,76540.0,76540.0,76540.0,76540.0,76540.0,76540.0,76540.0
mean,34994800.0,0.259211,0.131761,0.119036,0.055344,0.218853,0.293977,0.294617
std,2888755.0,0.438204,0.338233,0.323833,0.228651,0.413471,0.455585,0.455873
min,30000150.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,32492320.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,34996600.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,37492220.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0
max,39999810.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


## Extract Vital Signs

In [39]:
vital_sign_data = (
    bqclient.query(vital_sign_string)
    .result()
    .to_dataframe()
)

In [40]:
vital_sign_data



Unnamed: 0,subject_id,stay_id,heart_rate_mean,sbp_mean,dbp_mean,mbp_mean,resp_rate_mean,temperature_mean,spo2_mean,glucose_mean
0,12776735,34547665,64.142857,125.000000,64.615385,81.083333,17.916667,37.666250,94.538462,113.000000
1,14477428,37365042,73.000000,90.590909,55.590909,66.590909,16.777778,36.796667,95.304348,109.500000
2,18336985,39529590,73.538462,129.166667,79.583333,94.041667,20.884615,37.008571,97.769231,93.800000
3,11775129,31117124,82.458333,146.043478,55.913043,81.739130,19.666667,37.247143,97.666667,162.833333
4,17375900,34942552,89.625000,179.000000,102.285714,123.480000,14.125000,36.851667,99.083333,180.857143
...,...,...,...,...,...,...,...,...,...,...
76535,11048381,39228917,97.391304,128.086957,57.391304,73.086957,30.304348,37.657143,99.826087,110.000000
76536,18951962,37816217,98.565217,122.600000,76.600000,90.200000,18.695652,37.020000,93.217391,110.000000
76537,19366315,34120553,75.533333,117.307692,61.846154,75.346154,18.433333,37.481667,96.266667,110.000000
76538,14468223,39170646,87.666667,110.450000,68.150000,77.900000,14.285714,36.888000,98.238095,110.000000


## Extract Lab Tests Results

In [41]:
lab_tests_data = (
    bqclient.query(lab_tests_string)
    .result()
    .to_dataframe()
)

In [42]:
lab_tests_data



Unnamed: 0,stay_id,hemoglobin,platelet,wbc,sodium,potassium,bicarbonate,chloride,bun,creatinine,lactate,ph,po2,pco2,bnp,troponin,creatinine_kinase
0,37072655,12.600000,237.000000,10.30,140.000000,3.900000,34.000000,100.000000,12.000000,0.500000,,,,,0.0,0.00,0.00
1,37431964,8.850000,123.500000,13.70,145.000000,3.850000,16.500000,115.000000,24.500000,0.950000,2.20,7.390,160.0,29.0,0.0,0.00,0.00
2,30092544,11.600000,162.000000,17.10,146.000000,3.900000,30.500000,105.000000,28.000000,0.400000,0.90,7.410,149.0,53.0,0.0,0.00,0.00
3,37262027,10.100000,194.000000,11.90,140.000000,3.700000,13.000000,108.000000,15.000000,0.600000,,,,,0.0,0.00,0.00
4,35718808,11.700000,195.000000,3.60,143.000000,4.300000,33.000000,100.000000,37.000000,3.700000,,,,,0.0,1.00,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76535,35079726,11.566667,232.333333,8.40,140.666667,3.800000,24.666667,105.666667,20.000000,1.300000,,,,,0.0,0.75,0.75
76536,37101836,9.833333,167.333333,2.30,137.333333,3.766667,24.666667,103.666667,12.333333,0.433333,,,,,0.0,0.00,0.00
76537,37249644,10.800000,211.000000,5.10,117.000000,4.033333,24.666667,82.500000,7.000000,0.550000,1.45,7.440,40.5,43.5,0.0,0.00,0.00
76538,37395726,9.750000,330.000000,8.70,137.666667,3.900000,24.666667,100.666667,28.000000,2.100000,1.75,7.370,97.5,49.5,0.0,1.00,1.00


## Extract interventions

In [43]:
interventions_data = (
    bqclient.query(interventions_string)
    .result()
    .to_dataframe()
)

In [44]:
interventions_data['vasopressor_use_sum'] = interventions_data['vasopressin_use'] + interventions_data['dobutamine_use'] + interventions_data['dopamine_use'] + interventions_data['epinephrine_use'] + interventions_data['milrinone_use'] + interventions_data['norepinephrine_use'] + interventions_data['phenylephrine_use']
interventions_data['vasopressor_use'] = (interventions_data['vasopressor_use_sum'] != 0).astype(np.int32)

In [45]:
interventions_data



Unnamed: 0,stay_id,sedative_use,vasopressin_use,ventilation_use,dobutamine_use,dopamine_use,epinephrine_use,milrinone_use,norepinephrine_use,phenylephrine_use,vasopressor_use_sum,vasopressor_use
0,34547665,0.0,,,,,,,,,,1
1,39289362,0.0,,,,,,,,,,1
2,32563675,0.0,,1.0,,,,,,,,1
3,34947848,0.0,,,,,,,,,,1
4,37445058,0.0,,,,,,,,,,1
...,...,...,...,...,...,...,...,...,...,...,...,...
76535,39980385,0.0,,1.0,,,,,,,,1
76536,39985296,0.0,,1.0,,,,,,,,1
76537,39987031,0.0,,1.0,,,,,,,,1
76538,39989040,0.0,,1.0,,,,,,,,1


## Divide the Midodrine Group

In [46]:
cohort_mid = cohort_table.loc[pd.notnull(cohort_table['drug_starttime'])]
cohort_mid



Unnamed: 0,stay_id,first_careunit,subject_id,hadm_id,gender,ethnicity,admittime,dischtime,age,deathtime,...,icu_outtime,first_icu_stay,los_icu,los_hospital,drug_starttime,midodrine_exclude,midodrine_include,avg_doses_per_24_hrs,sepsis3,adm_weekday
7,36014877,Trauma SICU (TSICU),11861017,27162817,M,WHITE,2189-03-15 15:47:00,2189-05-06 17:00:00,87,NaT,...,2189-05-01 01:33:42,True,46.42,52,2189-04-17 16:00:00,0,1,1.595745,True,1
8,32272859,Trauma SICU (TSICU),11259141,25963696,F,WHITE,2184-07-05 18:16:00,2184-08-08 15:59:00,60,NaT,...,2184-08-03 18:51:35,True,24.00,34,2184-07-23 14:00:00,0,1,1.633333,True,7
12,37039117,Medical Intensive Care Unit (MICU),10184327,21396430,M,WHITE,2138-10-20 18:55:00,2138-11-01 03:00:00,89,2138-11-01 03:00:00,...,2138-11-01 04:40:00,True,11.33,12,2138-10-22 07:00:00,0,1,1.183333,True,2
19,34023828,Neuro Surgical Intensive Care Unit (Neuro SICU),18010960,21782431,M,UNKNOWN,2167-04-26 14:05:00,2167-05-18 13:40:00,59,NaT,...,2167-05-15 14:29:56,True,18.96,22,2167-05-10 22:00:00,0,1,1.589041,True,1
32,32309766,Neuro Surgical Intensive Care Unit (Neuro SICU),15703353,29272306,F,WHITE,2181-05-17 22:14:00,2181-06-03 16:30:00,59,NaT,...,2181-05-26 23:37:40,True,8.83,17,2181-05-22 16:00:00,0,1,1.517241,True,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8783,37823328,Medical/Surgical Intensive Care Unit (MICU/SICU),10481056,21921090,M,UNKNOWN,2122-12-19 09:16:00,2122-12-31 14:32:00,63,NaT,...,2122-12-21 18:11:46,True,2.38,12,2122-12-20 23:00:00,0,1,1.239437,True,7
8786,37888531,Medical/Surgical Intensive Care Unit (MICU/SICU),15885972,29782542,F,WHITE,2172-07-30 17:16:00,2172-08-13 18:25:00,61,NaT,...,2172-08-10 01:46:14,True,5.29,14,2172-08-08 14:00:00,0,1,1.571429,True,3
8807,38743934,Medical/Surgical Intensive Care Unit (MICU/SICU),19894790,27933693,M,WHITE,2140-08-25 17:58:00,2140-09-13 18:00:00,84,2140-09-13 18:00:00,...,2140-09-11 16:02:05,True,16.88,19,2140-09-01 14:00:00,0,1,1.947368,True,5
8826,39446578,Medical/Surgical Intensive Care Unit (MICU/SICU),14781720,23795457,F,WHITE,2189-01-07 21:03:00,2189-01-26 15:11:00,92,NaT,...,2189-01-26 15:11:42,True,4.08,19,2189-01-26 14:00:00,0,1,1.409091,True,5


In [47]:
mid_weights = weight_data[weight_data['stay_id'].isin(cohort_mid['stay_id'].values)]
mid_weights.describe()

Unnamed: 0,subject_id,stay_id,weight
count,852.0,852.0,824.0
mean,14989290.0,35007810.0,85.084587
std,2839541.0,2869179.0,26.513893
min,10011430.0,30045620.0,26.0
25%,12487980.0,32470890.0,67.375
50%,14958110.0,35086020.0,80.0
75%,17341900.0,37299660.0,100.0
max,19997370.0,39999230.0,230.0


In [48]:
mid_vital_signs = vital_sign_data[vital_sign_data['stay_id'].isin(cohort_mid['stay_id'].values)]
mid_vital_signs.describe()

Unnamed: 0,subject_id,stay_id,heart_rate_mean,sbp_mean,dbp_mean,mbp_mean,resp_rate_mean,temperature_mean,spo2_mean,glucose_mean
count,852.0,852.0,852.0,844.0,844.0,852.0,852.0,847.0,852.0,850.0
mean,14989290.0,35007810.0,87.53003,105.41851,57.321968,70.811911,19.791464,36.810667,96.944438,453.06047
std,2839541.0,2869179.0,16.307855,12.490594,8.46405,8.585295,4.258057,0.535516,2.243283,6661.081269
min,10011430.0,30045620.0,47.913043,65.4,26.0,42.513889,9.916667,33.4,68.1,55.4
25%,12487980.0,32470890.0,75.631341,97.52625,51.630198,65.138393,16.725379,36.549375,95.758312,103.875
50%,14958110.0,35086020.0,86.550833,103.198994,56.729167,69.850446,19.081319,36.776667,97.173163,126.071429
75%,17341900.0,37299660.0,99.58625,110.71125,62.680478,75.594767,22.284074,37.065,98.501866,161.964286
max,19997370.0,39999230.0,144.923077,167.5,91.833333,117.52381,37.115385,39.95,100.0,166766.0


In [49]:
mid_labs = lab_tests_data[lab_tests_data['stay_id'].isin(cohort_mid['stay_id'].values)]
mid_labs.describe()

Unnamed: 0,stay_id,hemoglobin,platelet,wbc,sodium,potassium,bicarbonate,chloride,bun,creatinine,lactate,ph,po2,pco2,bnp,troponin,creatinine_kinase
count,852.0,847.0,847.0,846.0,848.0,848.0,848.0,848.0,848.0,847.0,648.0,704.0,704.0,704.0,852.0,852.0,852.0
mean,35007810.0,9.566479,173.894831,13.551092,136.927049,4.313107,21.22656,101.215854,42.056153,2.700486,2.773682,7.349256,98.391974,41.126151,0.043642,0.288537,0.308255
std,2869179.0,1.856812,114.66621,7.583195,5.734996,0.710898,4.907904,7.167546,29.806943,2.214974,2.143897,0.077652,58.860844,9.766116,0.188031,0.445053,0.446722
min,30045620.0,5.0,13.25,0.575,107.333333,2.675,8.666667,74.0,2.333333,0.2,0.4,7.0775,21.0,20.0,0.0,0.0,0.0
25%,32470890.0,8.1,88.25,8.3,133.5,3.8,18.0,96.625,20.666667,1.05,1.5,7.3,52.1875,34.8125,0.0,0.0,0.0
50%,35086020.0,9.3,147.0,11.916667,137.0,4.2,21.333333,101.0,34.0,2.0,2.0875,7.356667,84.833333,40.0,0.0,0.0,0.0
75%,37299660.0,10.68,233.25,17.491667,140.5,4.75,24.25,106.0,55.0,3.775,3.3,7.4,123.85,45.5,0.0,1.0,1.0
max,39999230.0,16.25,1144.5,65.628571,158.2,6.82,42.666667,132.0,207.333333,16.05,20.428571,7.565,482.125,86.333333,1.0,1.0,1.0


In [50]:
mid_comorbidities = comorbidities_data[comorbidities_data['stay_id'].isin(cohort_mid['stay_id'].values)]
mid_comorbidities.describe()

Unnamed: 0,stay_id,CPD_or_COPD_rate,Maligancy_rate,mild_liver_rate,severe_liver_rate,Renal_rate,CAD_rate,AFIB_rate
count,852.0,852.0,852.0,852.0,852.0,852.0,852.0,852.0
mean,35007810.0,0.264085,0.156103,0.429577,0.349765,0.411972,0.24061,0.382629
std,2869179.0,0.441103,0.363166,0.495307,0.477176,0.492479,0.427705,0.486314
min,30045620.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,32470890.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,35086020.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,37299660.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0
max,39999230.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [51]:
mid_basic = basic_data[basic_data['stay_id'].isin(cohort_mid['stay_id'].values)]
mid_basic.describe()

Unnamed: 0,stay_id,sapsii,charlson_index
count,852.0,852.0,852.0
mean,35007810.0,46.146714,6.594235
std,2869179.0,13.826296,2.404203
min,30045620.0,6.0,0.0
25%,32470890.0,36.0,5.0
50%,35086020.0,45.0,6.5
75%,37299660.0,55.0,8.25
max,39999230.0,95.0,15.0


In [52]:
mid_mortality = mortality_data[mortality_data['stay_id'].isin(cohort_mid['stay_id'].values)]
mid_mortality.describe()

Unnamed: 0,stay_id,mortality,mort_28_day
count,852.0,852.0,852.0
mean,35007810.0,0.460094,0.321596
std,2869179.0,0.498698,0.467363
min,30045620.0,0.0,0.0
25%,32470890.0,0.0,0.0
50%,35086020.0,0.0,0.0
75%,37299660.0,1.0,1.0
max,39999230.0,1.0,1.0


In [53]:
mid_interventions = interventions_data[interventions_data['stay_id'].isin(cohort_mid['stay_id'].values)]
mid_interventions.describe()

Unnamed: 0,stay_id,sedative_use,vasopressin_use,ventilation_use,dobutamine_use,dopamine_use,epinephrine_use,milrinone_use,norepinephrine_use,phenylephrine_use,vasopressor_use_sum,vasopressor_use
count,852.0,852.0,315.0,795.0,30.0,57.0,62.0,11.0,761.0,354.0,0.0,852.0
mean,35007810.0,0.693662,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,1.0
std,2869179.0,0.461243,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0
min,30045620.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,1.0
25%,32470890.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,1.0
50%,35086020.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,1.0
75%,37299660.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,1.0
max,39999230.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,1.0


## Divide the Non-Midodrine Group

In [54]:
cohort_nomid = cohort_table.loc[pd.isnull(cohort_table['drug_starttime'])]
cohort_nomid



Unnamed: 0,stay_id,first_careunit,subject_id,hadm_id,gender,ethnicity,admittime,dischtime,age,deathtime,...,icu_outtime,first_icu_stay,los_icu,los_hospital,drug_starttime,midodrine_exclude,midodrine_include,avg_doses_per_24_hrs,sepsis3,adm_weekday
0,38859960,Trauma SICU (TSICU),14470386,20124738,M,WHITE,2138-04-14 03:27:00,2138-05-08 15:31:00,44,NaT,...,2138-05-05 20:10:57,True,21.71,24,NaT,0,1,1.884892,True,2
1,30992197,Trauma SICU (TSICU),10670236,20938672,M,WHITE,2185-07-22 22:25:00,2185-08-28 16:40:00,32,NaT,...,2185-08-10 21:14:55,True,18.96,37,NaT,0,1,2.329787,True,6
2,31382786,Trauma SICU (TSICU),11975614,29023602,M,ASIAN,2181-06-06 17:18:00,2181-07-14 17:15:00,30,NaT,...,2181-06-29 22:27:28,True,23.21,38,NaT,0,1,1.966667,True,4
3,31617347,Trauma SICU (TSICU),17873103,27750553,M,WHITE,2169-06-29 04:57:00,2169-07-17 15:10:00,36,NaT,...,2169-07-11 17:39:27,True,12.46,18,NaT,0,1,2.571429,True,5
4,33521917,Trauma SICU (TSICU),11312502,25289892,F,BLACK/AFRICAN AMERICAN,2202-11-02 14:26:00,2202-12-03 16:55:00,62,NaT,...,2202-11-29 18:34:19,True,27.08,31,NaT,0,1,1.800000,True,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8828,39690469,Medical/Surgical Intensive Care Unit (MICU/SICU),18098524,21320524,F,WHITE,2120-10-30 16:54:00,2120-11-06 13:28:00,89,NaT,...,2120-11-02 18:50:45,True,2.88,7,NaT,0,1,1.636364,True,4
8829,39696826,Medical/Surgical Intensive Care Unit (MICU/SICU),10070932,24727163,F,WHITE,2146-05-11 18:07:00,2146-05-16 18:45:00,37,NaT,...,2146-05-13 09:54:29,True,1.54,5,NaT,0,1,1.613636,True,4
8830,39801252,Medical/Surgical Intensive Care Unit (MICU/SICU),16749537,24096764,M,WHITE,2127-09-09 22:13:00,2127-09-20 15:30:00,76,NaT,...,2127-09-11 18:51:49,True,1.75,11,NaT,0,1,1.423077,True,4
8831,39838873,Medical/Surgical Intensive Care Unit (MICU/SICU),15554479,20342520,M,ASIAN,2183-03-26 07:58:00,2183-03-30 16:19:00,49,NaT,...,2183-03-27 21:53:30,True,1.46,4,NaT,0,1,1.533333,True,4


In [55]:
nomid_weights = weight_data[weight_data['stay_id'].isin(cohort_nomid['stay_id'].values)]
nomid_weights.describe()

Unnamed: 0,subject_id,stay_id,weight
count,7435.0,7435.0,7294.0
mean,15035790.0,34974790.0,82.313833
std,2872896.0,2894338.0,26.976087
min,10001880.0,30000480.0,1.0
25%,12619710.0,32430270.0,65.1
50%,15071760.0,34982370.0,78.0
75%,17524140.0,37505870.0,94.5
max,19999840.0,39998010.0,833.0


In [56]:
nomid_vital_signs = vital_sign_data[vital_sign_data['stay_id'].isin(cohort_nomid['stay_id'].values)]
nomid_vital_signs.describe()

Unnamed: 0,subject_id,stay_id,heart_rate_mean,sbp_mean,dbp_mean,mbp_mean,resp_rate_mean,temperature_mean,spo2_mean,glucose_mean
count,7435.0,7435.0,7432.0,7412.0,7411.0,7432.0,7431.0,7338.0,7432.0,7424.0
mean,15035790.0,34974790.0,90.225318,110.913061,60.146084,74.462255,20.581498,36.951915,96.938159,261.443685
std,2872896.0,2894338.0,17.364399,13.097881,9.203122,9.041522,4.237484,0.687472,2.467285,4843.017543
min,10001880.0,30000480.0,41.382353,61.5,22.5,46.157895,8.695652,31.2,54.095238,52.0
25%,12619710.0,32430270.0,77.520625,102.070019,54.018544,68.438643,17.460769,36.635714,95.76,113.0
50%,15071760.0,34982370.0,89.326667,108.698214,59.448276,73.316498,20.008475,36.933333,97.28,138.0
75%,17524140.0,37505870.0,102.316842,117.2,65.469669,79.230769,23.207615,37.321215,98.68,172.810526
max,19999840.0,39998010.0,161.44186,195.34375,111.625,124.314815,40.129032,40.104118,100.0,333400.0


In [57]:
nomid_labs = lab_tests_data[lab_tests_data['stay_id'].isin(cohort_nomid['stay_id'].values)]
nomid_labs.describe()

Unnamed: 0,stay_id,hemoglobin,platelet,wbc,sodium,potassium,bicarbonate,chloride,bun,creatinine,lactate,ph,po2,pco2,bnp,troponin,creatinine_kinase
count,7435.0,7397.0,7397.0,7396.0,7415.0,7415.0,7413.0,7415.0,7410.0,7412.0,5940.0,6402.0,6404.0,6402.0,7435.0,7435.0,7435.0
mean,34974790.0,10.311289,206.338407,14.478205,138.530854,4.218724,21.565082,104.852639,31.241845,1.626252,2.627308,7.343701,112.006099,41.74779,0.049083,0.323158,0.376985
std,2894338.0,1.991281,119.898627,10.516124,5.371362,0.628749,4.8173,6.750818,23.841702,1.505012,2.189696,0.079372,61.990108,10.030961,0.193676,0.456443,0.468687
min,30000480.0,3.7,6.0,0.1,104.222222,2.533333,4.0,67.0,1.5,0.1,0.0,6.9825,17.0,11.166667,0.0,0.0,0.0
25%,32430270.0,8.8,126.333333,8.7,135.5,3.8,18.5,101.0,15.0,0.8,1.333333,7.294286,67.5,35.333333,0.0,0.0,0.0
50%,34982370.0,10.1,185.75,12.6,138.5,4.133333,21.333333,105.0,24.0,1.15,1.966667,7.349762,99.174242,40.2,0.0,0.0,0.0
75%,37505870.0,11.6,259.75,17.9,141.333333,4.575,24.4,109.0,39.5,1.85,3.1,7.398333,143.8,46.0,0.0,1.0,1.0
max,39998010.0,19.45,1297.0,293.38,171.6,7.6,45.0,152.25,219.333333,23.72,25.011111,7.611111,472.0,112.0,1.0,1.0,1.0


In [58]:
nomid_comorbidities = comorbidities_data[comorbidities_data['stay_id'].isin(cohort_nomid['stay_id'].values)]
nomid_comorbidities.describe()

Unnamed: 0,stay_id,CPD_or_COPD_rate,Maligancy_rate,mild_liver_rate,severe_liver_rate,Renal_rate,CAD_rate,AFIB_rate
count,7435.0,7435.0,7435.0,7435.0,7435.0,7435.0,7435.0,7435.0
mean,34974790.0,0.285541,0.179287,0.162744,0.069132,0.215736,0.210491,0.314728
std,2894338.0,0.451702,0.383618,0.369157,0.253696,0.41136,0.407685,0.464439
min,30000480.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,32430270.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,34982370.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,37505870.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
max,39998010.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [59]:
nomid_basic = basic_data[basic_data['stay_id'].isin(cohort_nomid['stay_id'].values)]
nomid_basic.describe()

Unnamed: 0,stay_id,sapsii,charlson_index
count,7435.0,7435.0,7435.0
mean,34974790.0,44.397714,5.666469
std,2894338.0,14.835829,2.739417
min,30000480.0,6.0,0.0
25%,32430270.0,34.0,4.0
50%,34982370.0,43.0,5.75
75%,37505870.0,53.0,7.5
max,39998010.0,114.0,19.0


In [60]:
nomid_mortality = mortality_data[mortality_data['stay_id'].isin(cohort_nomid['stay_id'].values)]
nomid_mortality.describe()

Unnamed: 0,stay_id,mortality,mort_28_day
count,7435.0,7435.0,7435.0
mean,34974790.0,0.321453,0.256759
std,2894338.0,0.467065,0.436874
min,30000480.0,0.0,0.0
25%,32430270.0,0.0,0.0
50%,34982370.0,0.0,0.0
75%,37505870.0,1.0,1.0
max,39998010.0,1.0,1.0


In [61]:
nomid_interventions = interventions_data[interventions_data['stay_id'].isin(cohort_nomid['stay_id'].values)]
nomid_interventions.describe()

Unnamed: 0,stay_id,sedative_use,vasopressin_use,ventilation_use,dobutamine_use,dopamine_use,epinephrine_use,milrinone_use,norepinephrine_use,phenylephrine_use,vasopressor_use_sum,vasopressor_use
count,7435.0,7435.0,1768.0,6908.0,233.0,475.0,391.0,87.0,5790.0,3384.0,0.0,7435.0
mean,34974790.0,0.746066,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,1.0
std,2894338.0,0.43529,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0
min,30000480.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,1.0
25%,32430270.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,1.0
50%,34982370.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,1.0
75%,37505870.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,1.0
max,39998010.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,1.0


## Creating the combined feature cohort table

In [62]:
# define the treatment group flag
cohort_mid['midodrine'] = 1
cohort_nomid['midodrine'] = 0

# define if the patient is in MICU
cohort_mid['MICU'] = cohort_mid['first_careunit'].str.contains('MICU', case=False).astype(np.int32)
cohort_nomid['MICU'] = cohort_nomid['first_careunit'].str.contains('MICU').astype(np.int32)

# define if the patient is female
cohort_mid['Gender'] = (cohort_mid['gender']=='F').astype(np.int32)
cohort_nomid['Gender'] = (cohort_nomid['gender']=='F').astype(np.int32)

# select the needed feature
# midodrine injection group
mid_core = cohort_mid.loc[:, ['stay_id', 'hadm_id', 'subject_id', 'age', 'Gender','ethnicity', 'midodrine', 'avg_doses_per_24_hrs','MICU', 'icu_intime', 'adm_weekday']]
mid_merge_1 = pd.merge(mid_core, mid_weights, on='stay_id', how='left')
mid_merge_2 = pd.merge(mid_merge_1, mid_basic, on='stay_id', how='left')
mid_merge_3 = pd.merge(mid_merge_2, mid_comorbidities, on='stay_id', how='left')
mid_merge_4 = pd.merge(mid_merge_3, mid_labs, on='stay_id', how='left')
mid_merge_5 = pd.merge(mid_merge_4, mid_vital_signs, on='stay_id', how='left')
mid_merge_6 = pd.merge(mid_merge_5, mid_interventions.loc[:, ['stay_id', 'sedative_use', 'ventilation_use', 'vasopressor_use']], on='stay_id', how='left')
mid_group = pd.merge(mid_merge_6, mid_mortality, on='stay_id', how='left')

# no-midodrine injection group
nomid_core = cohort_nomid.loc[:, ['stay_id', 'hadm_id', 'subject_id', 'age', 'Gender', 'ethnicity', 'midodrine', 'avg_doses_per_24_hrs','MICU', 'icu_intime', 'adm_weekday']]
nomid_merge_1 = pd.merge(nomid_core, nomid_weights, on='stay_id', how='left')
nomid_merge_2 = pd.merge(nomid_merge_1, nomid_basic, on='stay_id', how='left')
nomid_merge_3 = pd.merge(nomid_merge_2, nomid_comorbidities, on='stay_id', how='left')
nomid_merge_4 = pd.merge(nomid_merge_3, nomid_labs, on='stay_id', how='left')
nomid_merge_5 = pd.merge(nomid_merge_4, nomid_vital_signs, on='stay_id', how='left')
nomid_merge_6 = pd.merge(nomid_merge_5, nomid_interventions.loc[:, ['stay_id', 'sedative_use', 'ventilation_use', 'vasopressor_use']], on='stay_id', how='left')
nomid_group = pd.merge(nomid_merge_6, nomid_mortality, on='stay_id', how='left')

# # concat the dataframe
table_cohort = pd.concat((mid_group, nomid_group), axis=0)

table_cohort['admit_hour'] = table_cohort['icu_intime'].dt.hour
table_cohort = table_cohort.drop(columns=['subject_id','subject_id_y','icu_intime'])
table_cohort.rename(columns = {'subject_id_x':'subject_id'}, inplace=True)

table_cohort



Unnamed: 0,stay_id,hadm_id,subject_id,age,Gender,ethnicity,midodrine,avg_doses_per_24_hrs,MICU,adm_weekday,...,resp_rate_mean,temperature_mean,spo2_mean,glucose_mean,sedative_use,ventilation_use,vasopressor_use,mortality,mort_28_day,admit_hour
0,36014877,27162817,11861017,87,0,WHITE,1,1.595745,0,1,...,17.600000,36.416667,100.000000,247.375000,1.0,1.0,1,0,0,15
1,32272859,25963696,11259141,60,1,WHITE,1,1.633333,0,7,...,17.333333,36.898333,97.259259,120.200000,1.0,1.0,1,0,0,18
2,37039117,21396430,10184327,89,0,WHITE,1,1.183333,1,2,...,16.875000,38.020000,98.777778,137.000000,1.0,1.0,1,1,1,20
3,34023828,21782431,18010960,59,0,UNKNOWN,1,1.589041,0,1,...,15.392857,36.786667,96.629630,98.000000,1.0,1.0,1,0,0,15
4,32309766,29272306,15703353,59,1,WHITE,1,1.517241,0,6,...,21.740741,37.372000,98.178571,136.800000,1.0,1.0,1,0,0,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7430,39690469,21320524,18098524,89,1,WHITE,0,1.636364,1,4,...,28.380952,36.555000,97.300000,101.000000,0.0,1.0,1,0,0,21
7431,39696826,24727163,10070932,37,1,WHITE,0,1.613636,1,4,...,19.560000,38.145000,97.360000,123.000000,0.0,,1,0,0,20
7432,39801252,24096764,16749537,76,0,WHITE,0,1.423077,1,4,...,17.115385,36.930000,97.800000,186.500000,1.0,1.0,1,0,0,0
7433,39838873,20342520,15554479,49,0,ASIAN,0,1.533333,1,4,...,23.555556,36.028333,97.851852,124.000000,0.0,,1,0,0,10


In [64]:
table_cohort.describe()



Unnamed: 0,stay_id,hadm_id,subject_id,age,Gender,midodrine,avg_doses_per_24_hrs,MICU,adm_weekday,weight,...,resp_rate_mean,temperature_mean,spo2_mean,glucose_mean,sedative_use,ventilation_use,vasopressor_use,mortality,mort_28_day,admit_hour
count,8287.0,8287.0,8287.0,8287.0,8287.0,8287.0,8270.0,8287.0,8287.0,8118.0,...,8283.0,8185.0,8284.0,8274.0,8287.0,7703.0,8287.0,8287.0,8287.0,8287.0
mean,34978190.0,25023490.0,15031010.0,65.572101,0.450947,0.102812,1.569321,0.624231,3.984071,82.595073,...,20.500234,36.937299,96.938804,281.128755,0.740678,1.0,1.0,0.335707,0.263425,12.828647
std,2891607.0,2875628.0,2869349.0,15.978234,0.497618,0.303731,0.270145,0.48435,2.001022,26.94092,...,4.246135,0.674685,2.445068,5059.816946,0.438289,0.0,0.0,0.472265,0.440517,7.460821
min,30000480.0,20001300.0,10001880.0,18.0,0.0,0.0,0.888889,0.0,1.0,1.0,...,8.695652,31.2,54.095238,52.0,0.0,1.0,1.0,0.0,0.0,0.0
25%,32437150.0,22572860.0,12605820.0,56.0,0.0,0.0,1.388889,0.0,2.0,65.5,...,17.362284,36.625,95.76,112.0,0.0,1.0,1.0,0.0,0.0,6.0
50%,34994610.0,25042360.0,15063390.0,67.0,0.0,0.0,1.536157,1.0,4.0,78.1,...,19.946429,36.915,97.269231,137.0,1.0,1.0,1.0,0.0,0.0,15.0
75%,37473500.0,27528530.0,17498370.0,77.0,1.0,0.0,1.712121,1.0,6.0,95.0,...,23.094542,37.298333,98.666667,172.0,1.0,1.0,1.0,1.0,1.0,19.0
max,39999230.0,29999100.0,19999840.0,102.0,1.0,1.0,4.653061,1.0,7.0,833.0,...,40.129032,40.104118,100.0,333400.0,1.0,1.0,1.0,1.0,1.0,23.0


In [65]:
table_cohort.info() #Only lactate is below the 50% mark (17650)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 8287 entries, 0 to 7434
Data columns (total 50 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   stay_id               8287 non-null   int64  
 1   hadm_id               8287 non-null   int64  
 2   subject_id            8287 non-null   int64  
 3   age                   8287 non-null   int64  
 4   Gender                8287 non-null   int32  
 5   ethnicity             8287 non-null   object 
 6   midodrine             8287 non-null   int64  
 7   avg_doses_per_24_hrs  8270 non-null   float64
 8   MICU                  8287 non-null   int32  
 9   adm_weekday           8287 non-null   int64  
 10  weight                8118 non-null   float64
 11  sapsii                8287 non-null   float64
 12  charlson_index        8287 non-null   float64
 13  CPD_or_COPD_rate      8287 non-null   float64
 14  Maligancy_rate        8287 non-null   float64
 15  mild_liver_rate      

# Save csv to drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
table_cohort.to_csv('/content/drive/MyDrive/table_cohort.csv', index=False)