In [115]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np

import sqlite3
import random
import os

random.seed(137)
np.random.seed(137)

pd.options.mode.chained_assignment = None  # default="warn"
%matplotlib inline

## HR & SBP

In [116]:
def is_numeric(x):
    try:
        float(x)
        return True
    except:
        return False

In [117]:
raw_data_dir = "data/raw/eicu/eicu-2.0"
processed_data_dir = "data/processed/eicu"

In [118]:
nurse_charting = pd.read_csv(os.path.join(raw_data_dir, "nurseCharting.csv"))
nurse_charting

Unnamed: 0,nursingchartid,patientunitstayid,nursingchartoffset,nursingchartentryoffset,nursingchartcelltypecat,nursingchartcelltypevallabel,nursingchartcelltypevalname,nursingchartvalue
0,221469923,141924,5714,5714,Vital Signs,Temperature,Temperature (C),36.6
1,277425167,141924,12964,12964,Other Vital Signs and Infusions,Genitourinary Assessment,Value,X
2,236464484,141924,3844,3844,Vital Signs,Heart Rate,Heart Rate,97
3,253552049,141924,124,124,Vital Signs,Non-Invasive BP,Non-Invasive BP Systolic,106
4,147240923,141924,1489,1489,Other Vital Signs and Infusions,MAP (mmHg),Value,71
...,...,...,...,...,...,...,...,...
151604227,80599950,141924,4774,4774,Other Vital Signs and Infusions,Patient s Comfort/Function (Pain) GOAL At Rest,Value,3
151604228,180586115,141924,5714,5714,Vital Signs,Non-Invasive BP,Non-Invasive BP Systolic,100
151604229,95065377,141924,5269,5269,Vital Signs,Temperature,Temperature Location,ORAL
151604230,168147793,141924,9064,9064,Vital Signs,Temperature,Temperature Location,ORAL


In [119]:
nurse_charting["nursingchartcelltypevalname"].unique()

array(['Temperature (C)', 'Value', 'Heart Rate',
       'Non-Invasive BP Systolic', 'Non-Invasive BP Diastolic',
       'Pain Score', 'Respiratory Rate', 'Temperature (F)',
       'O2 Saturation', 'Temperature Location', 'GCS Total',
       'Invasive BP Systolic', 'Invasive BP Diastolic',
       'Non-Invasive BP Mean', 'CI', 'Bedside Glucose',
       'Invasive BP Mean', 'O2 L/%', 'O2 Admin Device', 'Verbal', 'Eyes',
       'Motor', 'PVR', 'End Tidal CO2', 'CVP', 'Delirium Score',
       'Sedation Score', 'Delirium Scale', 'Sedation Scale', 'SV', 'SVR',
       'CO', 'PA Diastolic', 'PA Mean', 'PA Systolic', 'CPP', 'ICP',
       'PAOP', 'Pain Goal', 'SVO2', 'SVRI', 'PVRI', 'Flow Rate',
       'Sedation Goal', 'Mode', 'Electrolyte Replacement', 'QT', 'QRS',
       'PR', 'IAP', 'QTc', 'Fall Risk'], dtype=object)

In [120]:
nurse_charting = nurse_charting[nurse_charting["nursingchartcelltypevalname"].isin(["Heart Rate", "Non-Invasive BP Systolic", "Invasive BP Systolic"])]
nurse_charting

Unnamed: 0,nursingchartid,patientunitstayid,nursingchartoffset,nursingchartentryoffset,nursingchartcelltypecat,nursingchartcelltypevallabel,nursingchartcelltypevalname,nursingchartvalue
2,236464484,141924,3844,3844,Vital Signs,Heart Rate,Heart Rate,97
3,253552049,141924,124,124,Vital Signs,Non-Invasive BP,Non-Invasive BP Systolic,106
10,74023594,141924,1744,1744,Vital Signs,Heart Rate,Heart Rate,104
12,110024075,141924,1024,1024,Vital Signs,Heart Rate,Heart Rate,120
13,252633992,141924,10924,10924,Vital Signs,Non-Invasive BP,Non-Invasive BP Systolic,110
...,...,...,...,...,...,...,...,...
151604214,91946104,141924,304,304,Vital Signs,Heart Rate,Heart Rate,117
151604217,303717534,141924,764,764,Vital Signs,Non-Invasive BP,Non-Invasive BP Systolic,86
151604221,110017799,141924,8216,8216,Vital Signs,Heart Rate,Heart Rate,100
151604228,180586115,141924,5714,5714,Vital Signs,Non-Invasive BP,Non-Invasive BP Systolic,100


In [121]:
numeric_mask = nurse_charting["nursingchartvalue"].apply(is_numeric)
nurse_charting = nurse_charting[numeric_mask]
nurse_charting["nursingchartvalue"] = nurse_charting["nursingchartvalue"].astype(float)

In [122]:
# 300 >= sbp >= 30
sbp = nurse_charting[nurse_charting["nursingchartcelltypevalname"].isin(["Non-Invasive BP Systolic", "Invasive BP Systolic"])]
sbp = sbp[(sbp["nursingchartvalue"]>=30) & (sbp["nursingchartvalue"]<=300)]
sbp

Unnamed: 0,nursingchartid,patientunitstayid,nursingchartoffset,nursingchartentryoffset,nursingchartcelltypecat,nursingchartcelltypevallabel,nursingchartcelltypevalname,nursingchartvalue
3,253552049,141924,124,124,Vital Signs,Non-Invasive BP,Non-Invasive BP Systolic,106.0
13,252633992,141924,10924,10924,Vital Signs,Non-Invasive BP,Non-Invasive BP Systolic,110.0
15,109412537,141924,12394,12394,Vital Signs,Non-Invasive BP,Non-Invasive BP Systolic,108.0
19,198530434,141924,1984,1984,Vital Signs,Non-Invasive BP,Non-Invasive BP Systolic,132.0
21,109114106,141924,859,859,Vital Signs,Non-Invasive BP,Non-Invasive BP Systolic,115.0
...,...,...,...,...,...,...,...,...
151604173,217474746,141924,774,774,Vital Signs,Non-Invasive BP,Non-Invasive BP Systolic,107.0
151604196,303465851,141924,244,244,Vital Signs,Non-Invasive BP,Non-Invasive BP Systolic,123.0
151604206,109030987,141924,849,849,Vital Signs,Non-Invasive BP,Non-Invasive BP Systolic,115.0
151604217,303717534,141924,764,764,Vital Signs,Non-Invasive BP,Non-Invasive BP Systolic,86.0


In [123]:
# 300 >= hr 
hr = nurse_charting[nurse_charting["nursingchartcelltypevalname"]=="Heart Rate"]
hr = hr[hr["nursingchartvalue"]<=300]
hr

Unnamed: 0,nursingchartid,patientunitstayid,nursingchartoffset,nursingchartentryoffset,nursingchartcelltypecat,nursingchartcelltypevallabel,nursingchartcelltypevalname,nursingchartvalue
2,236464484,141924,3844,3844,Vital Signs,Heart Rate,Heart Rate,97.0
10,74023594,141924,1744,1744,Vital Signs,Heart Rate,Heart Rate,104.0
12,110024075,141924,1024,1024,Vital Signs,Heart Rate,Heart Rate,120.0
17,128507618,141924,864,864,Vital Signs,Heart Rate,Heart Rate,110.0
18,290804921,141924,1414,1414,Vital Signs,Heart Rate,Heart Rate,126.0
...,...,...,...,...,...,...,...,...
151604197,236826124,141924,1309,1309,Vital Signs,Heart Rate,Heart Rate,122.0
151604198,254833734,141924,2404,2404,Vital Signs,Heart Rate,Heart Rate,109.0
151604214,91946104,141924,304,304,Vital Signs,Heart Rate,Heart Rate,117.0
151604221,110017799,141924,8216,8216,Vital Signs,Heart Rate,Heart Rate,100.0


In [124]:
nurse_charting = pd.concat([hr, sbp], ignore_index=True)

In [125]:
icustay_ids = nurse_charting["patientunitstayid"].unique().tolist()
len(icustay_ids)

184871

## icu stays

In [127]:
icu_stays = pd.read_csv(os.path.join(raw_data_dir, "patient.csv"))
icu_stays

Unnamed: 0,patientunitstayid,patienthealthsystemstayid,gender,age,ethnicity,hospitalid,wardid,apacheadmissiondx,admissionheight,hospitaladmittime24,...,unitadmitsource,unitvisitnumber,unitstaytype,admissionweight,dischargeweight,unitdischargetime24,unitdischargeoffset,unitdischargelocation,unitdischargestatus,uniquepid
0,141168,128919,Female,70,Caucasian,59,91,"Rhythm disturbance (atrial, supraventricular)",152.4,15:54:00,...,Direct Admit,1,admit,84.3,85.8,03:50:00,3596,Death,Expired,002-34851
1,141178,128927,Female,52,Caucasian,60,83,,162.6,08:56:00,...,Emergency Department,1,admit,54.4,54.4,09:18:00,8,Step-Down Unit (SDU),Alive,002-33870
2,141179,128927,Female,52,Caucasian,60,83,,162.6,08:56:00,...,ICU to SDU,2,stepdown/other,,60.4,19:20:00,2042,Home,Alive,002-33870
3,141194,128941,Male,68,Caucasian,73,92,"Sepsis, renal/UTI (including bladder)",180.3,18:18:40,...,Floor,1,admit,73.9,76.7,15:31:00,4813,Floor,Alive,002-5276
4,141196,128943,Male,71,Caucasian,67,109,,162.6,20:21:00,...,ICU to SDU,2,stepdown/other,,63.2,22:23:00,1463,Floor,Alive,002-37665
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200854,3353235,2743084,Male,50,Caucasian,458,1109,"CHF, congestive heart failure",175.3,04:55:00,...,Emergency Department,1,admit,90.0,99.2,23:18:00,1069,Telemetry,Alive,035-16382
200855,3353237,2743086,Female,79,Caucasian,458,1106,"Embolus, pulmonary",162.6,01:45:00,...,Direct Admit,1,admit,78.4,81.4,23:08:00,1269,Step-Down Unit (SDU),Alive,035-751
200856,3353251,2743099,Male,73,African American,458,1104,Cardiac arrest (with or without respiratory ar...,177.8,12:51:00,...,Emergency Department,1,admit,102.0,96.2,23:16:00,16259,Telemetry,Alive,035-5166
200857,3353254,2743102,Male,81,Caucasian,459,1108,"Bleeding, lower GI",185.4,07:43:00,...,Emergency Department,1,admit,83.9,92.9,19:25:00,431,Step-Down Unit (SDU),Alive,035-19511


In [128]:
len(icu_stays["patienthealthsystemstayid"].unique())

166355

In [129]:
patients = icu_stays.loc[icu_stays["patientunitstayid"].isin(icustay_ids), "patienthealthsystemstayid"].unique()
len(patients)

155275

In [130]:
icu_stays = icu_stays[icu_stays["patienthealthsystemstayid"].isin(patients)]
icu_stays

Unnamed: 0,patientunitstayid,patienthealthsystemstayid,gender,age,ethnicity,hospitalid,wardid,apacheadmissiondx,admissionheight,hospitaladmittime24,...,unitadmitsource,unitvisitnumber,unitstaytype,admissionweight,dischargeweight,unitdischargetime24,unitdischargeoffset,unitdischargelocation,unitdischargestatus,uniquepid
0,141168,128919,Female,70,Caucasian,59,91,"Rhythm disturbance (atrial, supraventricular)",152.4,15:54:00,...,Direct Admit,1,admit,84.3,85.8,03:50:00,3596,Death,Expired,002-34851
1,141178,128927,Female,52,Caucasian,60,83,,162.6,08:56:00,...,Emergency Department,1,admit,54.4,54.4,09:18:00,8,Step-Down Unit (SDU),Alive,002-33870
2,141179,128927,Female,52,Caucasian,60,83,,162.6,08:56:00,...,ICU to SDU,2,stepdown/other,,60.4,19:20:00,2042,Home,Alive,002-33870
3,141194,128941,Male,68,Caucasian,73,92,"Sepsis, renal/UTI (including bladder)",180.3,18:18:40,...,Floor,1,admit,73.9,76.7,15:31:00,4813,Floor,Alive,002-5276
4,141196,128943,Male,71,Caucasian,67,109,,162.6,20:21:00,...,ICU to SDU,2,stepdown/other,,63.2,22:23:00,1463,Floor,Alive,002-37665
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200854,3353235,2743084,Male,50,Caucasian,458,1109,"CHF, congestive heart failure",175.3,04:55:00,...,Emergency Department,1,admit,90.0,99.2,23:18:00,1069,Telemetry,Alive,035-16382
200855,3353237,2743086,Female,79,Caucasian,458,1106,"Embolus, pulmonary",162.6,01:45:00,...,Direct Admit,1,admit,78.4,81.4,23:08:00,1269,Step-Down Unit (SDU),Alive,035-751
200856,3353251,2743099,Male,73,African American,458,1104,Cardiac arrest (with or without respiratory ar...,177.8,12:51:00,...,Emergency Department,1,admit,102.0,96.2,23:16:00,16259,Telemetry,Alive,035-5166
200857,3353254,2743102,Male,81,Caucasian,459,1108,"Bleeding, lower GI",185.4,07:43:00,...,Emergency Department,1,admit,83.9,92.9,19:25:00,431,Step-Down Unit (SDU),Alive,035-19511


In [131]:
len(icu_stays["patienthealthsystemstayid"].unique())

155275

In [132]:
icu_stays["unitstaytype"].unique()

array(['admit', 'stepdown/other', 'readmit', 'transfer'], dtype=object)

### age

In [133]:
mask = icu_stays["age"].apply(is_numeric)
icu_stays = icu_stays[mask]
icu_stays["age"] = icu_stays["age"].astype(float)

In [134]:
icu_stays = icu_stays[icu_stays["age"]>=18]
icu_stays.shape

(181879, 29)

In [135]:
len(icu_stays["patienthealthsystemstayid"].unique())

149192

In [136]:
icu_stays["age"].median()

64.0

In [137]:
icu_stays.loc[icu_stays["age"].isna(), "age"] = 64

In [138]:
icu_stays[icu_stays["age"]<65].shape[0], icu_stays[icu_stays["age"]>=65].shape[0]

(91250, 90629)

### gender

In [139]:
icu_stays["gender"].unique()

array(['Female', 'Male', nan, 'Other', 'Unknown'], dtype=object)

In [140]:
icu_stays = icu_stays[icu_stays["gender"].isin(["Female", "Male"])]
icu_stays

Unnamed: 0,patientunitstayid,patienthealthsystemstayid,gender,age,ethnicity,hospitalid,wardid,apacheadmissiondx,admissionheight,hospitaladmittime24,...,unitadmitsource,unitvisitnumber,unitstaytype,admissionweight,dischargeweight,unitdischargetime24,unitdischargeoffset,unitdischargelocation,unitdischargestatus,uniquepid
0,141168,128919,Female,70.0,Caucasian,59,91,"Rhythm disturbance (atrial, supraventricular)",152.4,15:54:00,...,Direct Admit,1,admit,84.3,85.8,03:50:00,3596,Death,Expired,002-34851
1,141178,128927,Female,52.0,Caucasian,60,83,,162.6,08:56:00,...,Emergency Department,1,admit,54.4,54.4,09:18:00,8,Step-Down Unit (SDU),Alive,002-33870
2,141179,128927,Female,52.0,Caucasian,60,83,,162.6,08:56:00,...,ICU to SDU,2,stepdown/other,,60.4,19:20:00,2042,Home,Alive,002-33870
3,141194,128941,Male,68.0,Caucasian,73,92,"Sepsis, renal/UTI (including bladder)",180.3,18:18:40,...,Floor,1,admit,73.9,76.7,15:31:00,4813,Floor,Alive,002-5276
4,141196,128943,Male,71.0,Caucasian,67,109,,162.6,20:21:00,...,ICU to SDU,2,stepdown/other,,63.2,22:23:00,1463,Floor,Alive,002-37665
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200854,3353235,2743084,Male,50.0,Caucasian,458,1109,"CHF, congestive heart failure",175.3,04:55:00,...,Emergency Department,1,admit,90.0,99.2,23:18:00,1069,Telemetry,Alive,035-16382
200855,3353237,2743086,Female,79.0,Caucasian,458,1106,"Embolus, pulmonary",162.6,01:45:00,...,Direct Admit,1,admit,78.4,81.4,23:08:00,1269,Step-Down Unit (SDU),Alive,035-751
200856,3353251,2743099,Male,73.0,African American,458,1104,Cardiac arrest (with or without respiratory ar...,177.8,12:51:00,...,Emergency Department,1,admit,102.0,96.2,23:16:00,16259,Telemetry,Alive,035-5166
200857,3353254,2743102,Male,81.0,Caucasian,459,1108,"Bleeding, lower GI",185.4,07:43:00,...,Emergency Department,1,admit,83.9,92.9,19:25:00,431,Step-Down Unit (SDU),Alive,035-19511


In [141]:
len(icu_stays["patienthealthsystemstayid"].unique())

149164

In [142]:
icu_stays.shape

(181849, 29)

In [143]:
icu_stays["gender"].value_counts()

Male      99225
Female    82624
Name: gender, dtype: int64

In [144]:
icu_stays.loc[icu_stays["gender"]=="Male", "gender"] = 1
icu_stays.loc[icu_stays["gender"]=="Female", "gender"] = 0

### ethnicity

In [145]:
icu_stays.loc[icu_stays["ethnicity"]=="Caucasian", "ethnicity"] = "WHITE"
icu_stays.loc[icu_stays["ethnicity"]=="African American", "ethnicity"] = "BLACK"
icu_stays.loc[icu_stays["ethnicity"]=="Hispanic", "ethnicity"] = "LATINO"

icu_stays.loc[icu_stays["ethnicity"]=="Asian", "ethnicity"] = "OTHER"
icu_stays.loc[icu_stays["ethnicity"]=="Other/Unknown", "ethnicity"] = "OTHER"
icu_stays.loc[icu_stays["ethnicity"]=="Native American", "ethnicity"] = "OTHER"

In [146]:
icu_stays["ethnicity"].value_counts()

WHITE     140865
BLACK      19363
OTHER      12930
LATINO      6748
Name: ethnicity, dtype: int64

In [147]:
icu_stays["race_w"] = 0
icu_stays["race_b"] = 0
icu_stays["race_l"] = 0
icu_stays["race_o"] = 0
icu_stays.loc[icu_stays["ethnicity"]=="WHITE", "race_w"] = 1
icu_stays.loc[icu_stays["ethnicity"]=="BLACK", "race_b"] = 1
icu_stays.loc[icu_stays["ethnicity"]=="LATINO", "race_l"] = 1
icu_stays.loc[icu_stays["ethnicity"]=="OTHER", "race_o"] = 1

### bmi

In [148]:
icu_stays = icu_stays.rename(columns={"admissionheight": "height", "admissionweight": "weight"})

In [149]:
icu_stays[["height", "weight"]].describe()

Unnamed: 0,height,weight
count,178449.0,166526.0
mean,169.464514,84.676701
std,13.608964,27.236303
min,0.0,0.0
25%,162.56,67.0
50%,170.1,80.7
75%,177.8,97.7
max,612.6,953.0


In [150]:
mask = icu_stays["height"].isna() | icu_stays["weight"].isna()
mask.sum()

16524

In [151]:
icu_stays["height"].median(), icu_stays["weight"].median()

(170.1, 80.7)

In [152]:
mask = icu_stays["height"].notna() & icu_stays["weight"].notna()
icu_stays["bmi"] = np.nan
icu_stays.loc[mask, "bmi"] =  icu_stays.loc[mask, "weight"] / (icu_stays.loc[mask, "height"]/100)**2

In [153]:
# availability columns
for c in ["height", "weight", "bmi"]:
    c_avail = f"{c}_avail"
    print(c)
    mask = icu_stays[c].isna()
    icu_stays.loc[mask, c_avail] = 0.0
    icu_stays.loc[~mask, c_avail] = 1.0

icu_stays

height
weight
bmi


Unnamed: 0,patientunitstayid,patienthealthsystemstayid,gender,age,ethnicity,hospitalid,wardid,apacheadmissiondx,height,hospitaladmittime24,...,unitdischargestatus,uniquepid,race_w,race_b,race_l,race_o,bmi,height_avail,weight_avail,bmi_avail
0,141168,128919,0,70.0,WHITE,59,91,"Rhythm disturbance (atrial, supraventricular)",152.4,15:54:00,...,Expired,002-34851,1,0,0,0,36.295906,1.0,1.0,1.0
1,141178,128927,0,52.0,WHITE,60,83,,162.6,08:56:00,...,Alive,002-33870,1,0,0,0,20.575852,1.0,1.0,1.0
2,141179,128927,0,52.0,WHITE,60,83,,162.6,08:56:00,...,Alive,002-33870,1,0,0,0,,1.0,0.0,0.0
3,141194,128941,1,68.0,WHITE,73,92,"Sepsis, renal/UTI (including bladder)",180.3,18:18:40,...,Alive,002-5276,1,0,0,0,22.732803,1.0,1.0,1.0
4,141196,128943,1,71.0,WHITE,67,109,,162.6,20:21:00,...,Alive,002-37665,1,0,0,0,,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200854,3353235,2743084,1,50.0,WHITE,458,1109,"CHF, congestive heart failure",175.3,04:55:00,...,Alive,035-16382,1,0,0,0,29.287256,1.0,1.0,1.0
200855,3353237,2743086,0,79.0,WHITE,458,1106,"Embolus, pulmonary",162.6,01:45:00,...,Alive,035-751,1,0,0,0,29.653433,1.0,1.0,1.0
200856,3353251,2743099,1,73.0,BLACK,458,1104,Cardiac arrest (with or without respiratory ar...,177.8,12:51:00,...,Alive,035-5166,0,1,0,0,32.265371,1.0,1.0,1.0
200857,3353254,2743102,1,81.0,WHITE,459,1108,"Bleeding, lower GI",185.4,07:43:00,...,Alive,035-19511,1,0,0,0,24.408579,1.0,1.0,1.0


In [154]:
for c in ["height", "weight", "bmi"]:
    print(c)
    mask = icu_stays[c].isna()
    
    median_to_fill = icu_stays.loc[~mask, c].median()
    icu_stays.loc[mask, c] = median_to_fill

height
weight
bmi


In [155]:
# remove abnormal height and weight
icu_stays = icu_stays[(icu_stays["height"]>100) & (icu_stays["weight"]>30)]

In [156]:
icu_stays[["height", "weight", "bmi"]].describe()

Unnamed: 0,height,weight,bmi
count,181201.0,181201.0,181201.0
mean,169.838167,84.385915,29.185781
std,11.521149,25.991076,8.565679
min,100.7,30.03,1.599843
25%,162.6,68.04,24.103007
50%,170.1,80.7,27.75781
75%,177.8,95.5,32.407407
max,612.6,953.0,386.628261


In [157]:
icu_stays[icu_stays["bmi"]<30].shape[0], icu_stays[icu_stays["bmi"]>=30].shape[0]

(118911, 62290)

In [158]:
icu_stays.columns

Index(['patientunitstayid', 'patienthealthsystemstayid', 'gender', 'age',
       'ethnicity', 'hospitalid', 'wardid', 'apacheadmissiondx', 'height',
       'hospitaladmittime24', 'hospitaladmitoffset', 'hospitaladmitsource',
       'hospitaldischargeyear', 'hospitaldischargetime24',
       'hospitaldischargeoffset', 'hospitaldischargelocation',
       'hospitaldischargestatus', 'unittype', 'unitadmittime24',
       'unitadmitsource', 'unitvisitnumber', 'unitstaytype', 'weight',
       'dischargeweight', 'unitdischargetime24', 'unitdischargeoffset',
       'unitdischargelocation', 'unitdischargestatus', 'uniquepid', 'race_w',
       'race_b', 'race_l', 'race_o', 'bmi', 'height_avail', 'weight_avail',
       'bmi_avail'],
      dtype='object')

In [159]:
icu_stays = icu_stays[[
    "patientunitstayid", "patienthealthsystemstayid", "uniquepid", "gender", "age", "height", "weight", "bmi", "height_avail", "weight_avail", "bmi_avail",
    "hospitaladmitoffset", "hospitaldischargestatus", "hospitaldischargeoffset", "unitdischargeoffset", "unitdischargestatus",
    "race_w", "race_b", "race_l", "race_o"
]]
icu_stays

Unnamed: 0,patientunitstayid,patienthealthsystemstayid,uniquepid,gender,age,height,weight,bmi,height_avail,weight_avail,bmi_avail,hospitaladmitoffset,hospitaldischargestatus,hospitaldischargeoffset,unitdischargeoffset,unitdischargestatus,race_w,race_b,race_l,race_o
0,141168,128919,002-34851,0,70.0,152.4,84.3,36.295906,1.0,1.0,1.0,0,Expired,3596,3596,Expired,1,0,0,0
1,141178,128927,002-33870,0,52.0,162.6,54.4,20.575852,1.0,1.0,1.0,-14,Alive,2050,8,Alive,1,0,0,0
2,141179,128927,002-33870,0,52.0,162.6,80.7,27.757810,1.0,0.0,0.0,-22,Alive,2042,2042,Alive,1,0,0,0
3,141194,128941,002-5276,1,68.0,180.3,73.9,22.732803,1.0,1.0,1.0,-780,Alive,12492,4813,Alive,1,0,0,0
4,141196,128943,002-37665,1,71.0,162.6,80.7,27.757810,1.0,0.0,0.0,-99,Alive,5460,1463,Alive,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200854,3353235,2743084,035-16382,1,50.0,175.3,90.0,29.287256,1.0,1.0,1.0,-34,Alive,3811,1069,Alive,1,0,0,0
200855,3353237,2743086,035-751,0,79.0,162.6,78.4,29.653433,1.0,1.0,1.0,-14,Alive,9665,1269,Alive,1,0,0,0
200856,3353251,2743099,035-5166,1,73.0,177.8,102.0,32.265371,1.0,1.0,1.0,-206,Alive,19098,16259,Alive,0,1,0,0
200857,3353254,2743102,035-19511,1,81.0,185.4,83.9,24.408579,1.0,1.0,1.0,-271,Alive,6144,431,Alive,1,0,0,0


In [160]:
icu_stays = icu_stays[icu_stays["hospitaldischargestatus"].notna()]
icu_stays.shape

(180017, 20)

In [161]:
patients_to_remove = icu_stays[icu_stays["unitdischargestatus"].isna()]["patienthealthsystemstayid"].to_list()
patients_to_remove

[264941,
 343396,
 361566,
 458422,
 503381,
 509455,
 514651,
 585386,
 608629,
 637949,
 674779,
 751339,
 775026,
 775494,
 811526,
 811526,
 842814,
 896829,
 896829,
 1241713,
 1241713,
 1348014,
 1387052,
 1568070,
 1896227,
 2150433,
 2543236,
 2543236,
 2727320,
 2727320]

In [162]:
icu_stays = icu_stays[~icu_stays["patienthealthsystemstayid"].isin(patients_to_remove)]
icu_stays

Unnamed: 0,patientunitstayid,patienthealthsystemstayid,uniquepid,gender,age,height,weight,bmi,height_avail,weight_avail,bmi_avail,hospitaladmitoffset,hospitaldischargestatus,hospitaldischargeoffset,unitdischargeoffset,unitdischargestatus,race_w,race_b,race_l,race_o
0,141168,128919,002-34851,0,70.0,152.4,84.3,36.295906,1.0,1.0,1.0,0,Expired,3596,3596,Expired,1,0,0,0
1,141178,128927,002-33870,0,52.0,162.6,54.4,20.575852,1.0,1.0,1.0,-14,Alive,2050,8,Alive,1,0,0,0
2,141179,128927,002-33870,0,52.0,162.6,80.7,27.757810,1.0,0.0,0.0,-22,Alive,2042,2042,Alive,1,0,0,0
3,141194,128941,002-5276,1,68.0,180.3,73.9,22.732803,1.0,1.0,1.0,-780,Alive,12492,4813,Alive,1,0,0,0
4,141196,128943,002-37665,1,71.0,162.6,80.7,27.757810,1.0,0.0,0.0,-99,Alive,5460,1463,Alive,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200854,3353235,2743084,035-16382,1,50.0,175.3,90.0,29.287256,1.0,1.0,1.0,-34,Alive,3811,1069,Alive,1,0,0,0
200855,3353237,2743086,035-751,0,79.0,162.6,78.4,29.653433,1.0,1.0,1.0,-14,Alive,9665,1269,Alive,1,0,0,0
200856,3353251,2743099,035-5166,1,73.0,177.8,102.0,32.265371,1.0,1.0,1.0,-206,Alive,19098,16259,Alive,0,1,0,0
200857,3353254,2743102,035-19511,1,81.0,185.4,83.9,24.408579,1.0,1.0,1.0,-271,Alive,6144,431,Alive,1,0,0,0


In [163]:
len(icu_stays["patienthealthsystemstayid"].unique())

147619

In [39]:
# icu_stays.loc[icu_stays["hospitaldischargestatus"]=="Expired", "hospitaldischargestatus"] = "Dead"
# icu_stays.loc[icu_stays["unitdischargestatus"]=="Expired", "unitdischargestatus"] = "Dead"

In [41]:
# dst_dir = "/N/project/waveform_mortality/JL/Xiang's Model"
# icustays.to_csv(os.path.join(dst_dir, "eICU_icustays.csv"), index=False)

### comorbidity

In [164]:
# comorbidity = pd.read_csv(os.path.join(comorbidity_dir, "eICU_comorbidity_admissions.csv"))
comorbidity = pd.read_csv(os.path.join(processed_data_dir, "comorbidity.csv"))
# comorbidity = comorbidity.drop(columns=["index"], axis=1)
comorbidity

Unnamed: 0,patientunitstayid,metacanc,aids,msld,hp,rend,diab,mi,chf,pvd,cevd,cpd,dementia,rheumd,pud,mld,canc,diabwc,CCI
0,141168,0,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0.0,5.0
1,141178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0
2,141179,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0
3,141194,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0.0,3.0
4,141196,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200854,3353235,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0
200855,3353237,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0
200856,3353251,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0.0,1.0
200857,3353254,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0.0,1.0


In [165]:
comorbidity = comorbidity[
    ["patientunitstayid", "mi", "chf", "pvd", "cevd", "dementia", "cpd", "rheumd", "pud", "mld", "diab", "diabwc", "hp", "rend", "canc", "msld", "metacanc", "aids", "CCI"]
]

In [166]:
comorbidity[comorbidity["CCI"]<1].shape[0], comorbidity[comorbidity["CCI"]>=1].shape[0]

(85285, 115574)

In [167]:
icu_stays["patientunitstayid"].shape

(179957,)

In [168]:
icu_stays = icu_stays.merge(comorbidity, on=["patientunitstayid"], how="left")
icu_stays

Unnamed: 0,patientunitstayid,patienthealthsystemstayid,uniquepid,gender,age,height,weight,bmi,height_avail,weight_avail,...,mld,diab,diabwc,hp,rend,canc,msld,metacanc,aids,CCI
0,141168,128919,002-34851,0,70.0,152.4,84.3,36.295906,1.0,1.0,...,0,0,0.0,0,1,0,0,0,0,5.0
1,141178,128927,002-33870,0,52.0,162.6,54.4,20.575852,1.0,1.0,...,0,0,0.0,0,0,0,0,0,0,0.0
2,141179,128927,002-33870,0,52.0,162.6,80.7,27.757810,1.0,0.0,...,0,0,0.0,0,0,0,0,0,0,0.0
3,141194,128941,002-5276,1,68.0,180.3,73.9,22.732803,1.0,1.0,...,0,1,0.0,0,1,0,0,0,0,3.0
4,141196,128943,002-37665,1,71.0,162.6,80.7,27.757810,1.0,0.0,...,0,0,0.0,0,0,0,0,0,0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
179952,3353235,2743084,035-16382,1,50.0,175.3,90.0,29.287256,1.0,1.0,...,0,0,0.0,0,0,0,0,0,0,0.0
179953,3353237,2743086,035-751,0,79.0,162.6,78.4,29.653433,1.0,1.0,...,0,0,0.0,0,0,0,0,0,0,0.0
179954,3353251,2743099,035-5166,1,73.0,177.8,102.0,32.265371,1.0,1.0,...,0,1,0.0,0,0,0,0,0,0,1.0
179955,3353254,2743102,035-19511,1,81.0,185.4,83.9,24.408579,1.0,1.0,...,0,0,0.0,0,0,0,0,0,0,1.0


In [169]:
icu_stays["CCI"].isna().sum()

0

In [170]:
icu_stays.iloc[:, 20:39]

Unnamed: 0,mi,chf,pvd,cevd,dementia,cpd,rheumd,pud,mld,diab,diabwc,hp,rend,canc,msld,metacanc,aids,CCI
0,0,1,0,0,0,1,1,0,0,0,0.0,0,1,0,0,0,0,5.0
1,0,0,0,0,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0.0
2,0,0,0,0,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0.0
3,0,0,0,0,0,0,0,0,0,1,0.0,0,1,0,0,0,0,3.0
4,0,0,0,0,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
179952,0,0,0,0,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0.0
179953,0,0,0,0,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0.0
179954,0,0,0,0,0,0,0,0,0,1,0.0,0,0,0,0,0,0,1.0
179955,0,0,0,0,0,1,0,0,0,0,0.0,0,0,0,0,0,0,1.0


In [172]:
icu_stays.iloc[:, 20:39] = icu_stays.iloc[:, 20:39].fillna(0)
icu_stays.iloc[:, 20:39]

Unnamed: 0,mi,chf,pvd,cevd,dementia,cpd,rheumd,pud,mld,diab,diabwc,hp,rend,canc,msld,metacanc,aids,CCI
0,0,1,0,0,0,1,1,0,0,0,0.0,0,1,0,0,0,0,5.0
1,0,0,0,0,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0.0
2,0,0,0,0,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0.0
3,0,0,0,0,0,0,0,0,0,1,0.0,0,1,0,0,0,0,3.0
4,0,0,0,0,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
179952,0,0,0,0,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0.0
179953,0,0,0,0,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0.0
179954,0,0,0,0,0,0,0,0,0,1,0.0,0,0,0,0,0,0,1.0
179955,0,0,0,0,0,1,0,0,0,0,0.0,0,0,0,0,0,0,1.0


### sofa scores

In [173]:
sofa_scores = pd.read_csv(os.path.join(processed_data_dir, "sofa_scores.csv"))
sofa_scores

Unnamed: 0,patientunitstayid,icu_hour,mean_arterial_pressure,rate_dopamine,norepinephrine,rate_dobutamine,pf_ratio,urine_output,creatinine_max,platelet_min,...,cns,startoffset,endoffset,imputed_renal,imputed_respiration,imputed_coagulation,imputed_liver,imputed_cns,imputed_cardiovascular,sofa_total
0,1037475,22,98.0,,,,4.761905,0.0,,,...,,1320,1380,4,0,0,0,0,0,4
1,1037475,23,86.0,,,,4.761905,,,,...,1.0,1380,1440,4,0,0,0,1,0,5
2,1037475,24,104.0,,,,4.761905,,,,...,,1440,1500,4,0,0,0,1,0,5
3,1037475,25,97.0,,,,4.761905,,,,...,,1500,1560,4,0,0,0,1,0,5
4,1037475,26,94.0,,,,4.761905,,,,...,,1560,1620,4,0,0,0,1,0,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13215587,1037475,17,87.0,,,,4.761905,,,,...,,1020,1080,2,0,0,0,1,0,3
13215588,1037475,18,104.0,,,,4.761905,,0.5,,...,,1080,1140,0,0,0,0,1,0,1
13215589,1037475,19,103.0,,,,4.761905,,,,...,,1140,1200,0,0,0,0,1,0,1
13215590,1037475,20,91.0,,,,4.761905,,,,...,1.0,1200,1260,0,0,0,0,1,0,1


In [175]:
sofa_scores = sofa_scores[
    ["patientunitstayid", "startoffset", "endoffset", "imputed_respiration", "imputed_coagulation", "imputed_liver", "imputed_cardiovascular", "imputed_cns", "imputed_renal", "sofa_total"]]
sofa_scores = sofa_scores.rename(columns={
    "imputed_respiration": "respiration", "imputed_coagulation": "coagulation", "imputed_liver": "liver", "imputed_cardiovascular": "cardiovascular", "imputed_cns": 'cns', "imputed_renal": "renal"})

In [176]:
sofa_scores.isnull().sum()

patientunitstayid    0
startoffset          0
endoffset            0
respiration          0
coagulation          0
liver                0
cardiovascular       0
cns                  0
renal                0
sofa_total           0
dtype: int64

In [48]:
# worst_sofa_socre = []
# for n, g in sofa_scores.groupby("patientunitstayid"):
#     data_in_g = g[g["icu_hour"]<=24]
#     worst_sofa_socre.append({"patientunitstayid": n, "worst_sofa_score_24h": data_in_g["sofa_total"].max()})

In [51]:
# worst_sofa_socre = pd.DataFrame(worst_sofa_socre)
# worst_sofa_socre

Unnamed: 0,patientunitstayid,worst_sofa_score_24h
0,141168,6
1,141178,0
2,141179,4
3,141194,5
4,141196,4
...,...,...
200852,3353235,4
200853,3353237,5
200854,3353251,11
200855,3353254,5


In [53]:
# icu_stays = icu_stays.merge(worst_sofa_socre, on="patientunitstayid")

In [65]:
# dst_dir = "/N/project/waveform_mortality/JL/Xiang's Model"
# icustays.to_csv(os.path.join(dst_dir, "eICU_icustays.csv"), index=False)

### mv

In [177]:
# mv = pd.read_csv("/N/project/waveform_mortality/xiang/Projects/hr_sbp_contour/data/mv_raw.csv")
mv = pd.read_csv(os.path.join(processed_data_dir, "mechanical_ventilation.csv"))
mv

Unnamed: 0,patientunitstayid,ventnum,activeupondischarge,vent_start,charttime,unitdischargeoffset,oxygen_therapy_type,supp_oxygen,vent_end
0,141168,1,False,396,2329,3596,4.0,1,2389
1,141179,1,False,12,642,2042,-1.0,1,702
2,141194,1,False,-3,6127,4813,-1.0,1,6187
3,141196,1,False,840,5303,1463,-1.0,1,5363
4,141227,1,False,-13,1573,1652,4.0,1,1633
...,...,...,...,...,...,...,...,...,...
185024,3353235,1,True,16,331,1069,2.0,1,1069
185025,3353237,1,True,-14,1201,1269,2.0,1,1269
185026,3353251,1,True,0,16183,16259,4.0,1,16259
185027,3353254,1,False,-58,5926,431,2.0,1,5986


In [178]:
mv["mv"] = "none"
mv.loc[mv["oxygen_therapy_type"]<=1, "mv"] = "oxygen_therapy"
mv.loc[mv["oxygen_therapy_type"]==2, "mv"] = "mv_unknown"
mv.loc[mv["oxygen_therapy_type"]==3, "mv"] = "mv_non_vasive"
mv.loc[mv["oxygen_therapy_type"]==4, "mv"] = "mv_invasive"

## chart events

#### HR events

In [179]:
hr

Unnamed: 0,nursingchartid,patientunitstayid,nursingchartoffset,nursingchartentryoffset,nursingchartcelltypecat,nursingchartcelltypevallabel,nursingchartcelltypevalname,nursingchartvalue
2,236464484,141924,3844,3844,Vital Signs,Heart Rate,Heart Rate,97.0
10,74023594,141924,1744,1744,Vital Signs,Heart Rate,Heart Rate,104.0
12,110024075,141924,1024,1024,Vital Signs,Heart Rate,Heart Rate,120.0
17,128507618,141924,864,864,Vital Signs,Heart Rate,Heart Rate,110.0
18,290804921,141924,1414,1414,Vital Signs,Heart Rate,Heart Rate,126.0
...,...,...,...,...,...,...,...,...
151604197,236826124,141924,1309,1309,Vital Signs,Heart Rate,Heart Rate,122.0
151604198,254833734,141924,2404,2404,Vital Signs,Heart Rate,Heart Rate,109.0
151604214,91946104,141924,304,304,Vital Signs,Heart Rate,Heart Rate,117.0
151604221,110017799,141924,8216,8216,Vital Signs,Heart Rate,Heart Rate,100.0


In [180]:
conn = sqlite3.connect(":memory:")

In [181]:
mv.to_sql("mv", conn, index=False)
hr.to_sql("hr", conn, index=False)
sofa_scores.to_sql("sofa", conn, index=False)

13215592

In [182]:
sofa_scores

Unnamed: 0,patientunitstayid,startoffset,endoffset,respiration,coagulation,liver,cardiovascular,cns,renal,sofa_total
0,1037475,1320,1380,0,0,0,0,0,4,4
1,1037475,1380,1440,0,0,0,0,1,4,5
2,1037475,1440,1500,0,0,0,0,1,4,5
3,1037475,1500,1560,0,0,0,0,1,4,5
4,1037475,1560,1620,0,0,0,0,1,4,5
...,...,...,...,...,...,...,...,...,...,...
13215587,1037475,1020,1080,0,0,0,0,1,2,3
13215588,1037475,1080,1140,0,0,0,0,1,0,1
13215589,1037475,1140,1200,0,0,0,0,1,0,1
13215590,1037475,1200,1260,0,0,0,0,1,0,1


In [183]:
query = """
    SELECT hr.*, mv.mv, 
    sofa.respiration, sofa.coagulation, sofa.liver, sofa.cardiovascular, sofa.cns, sofa.renal, sofa.sofa_total
    FROM hr
    LEFT JOIN mv
    ON hr.patientunitstayid=mv.patientunitstayid and hr.nursingchartoffset>=mv.vent_start and hr.nursingchartoffset<mv.vent_end
    LEFT JOIN sofa
    ON hr.patientunitstayid=sofa.patientunitstayid and hr.nursingchartoffset>=sofa.startoffset and hr.nursingchartoffset<sofa.endoffset
"""

hr_events_mv_sofa = pd.read_sql_query(query, conn)
hr_events_mv_sofa

Unnamed: 0,nursingchartid,patientunitstayid,nursingchartoffset,nursingchartentryoffset,nursingchartcelltypecat,nursingchartcelltypevallabel,nursingchartcelltypevalname,nursingchartvalue,mv,respiration,coagulation,liver,cardiovascular,cns,renal,sofa_total
0,236464484,141924,3844,3844,Vital Signs,Heart Rate,Heart Rate,97.0,oxygen_therapy,,,,,,,
1,74023594,141924,1744,1744,Vital Signs,Heart Rate,Heart Rate,104.0,oxygen_therapy,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,110024075,141924,1024,1024,Vital Signs,Heart Rate,Heart Rate,120.0,oxygen_therapy,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,128507618,141924,864,864,Vital Signs,Heart Rate,Heart Rate,110.0,oxygen_therapy,0.0,0.0,0.0,0.0,0.0,2.0,2.0
4,290804921,141924,1414,1414,Vital Signs,Heart Rate,Heart Rate,126.0,oxygen_therapy,0.0,0.0,0.0,0.0,0.0,4.0,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17674017,236826124,141924,1309,1309,Vital Signs,Heart Rate,Heart Rate,122.0,oxygen_therapy,0.0,0.0,0.0,0.0,0.0,2.0,2.0
17674018,254833734,141924,2404,2404,Vital Signs,Heart Rate,Heart Rate,109.0,oxygen_therapy,0.0,0.0,0.0,0.0,0.0,0.0,0.0
17674019,91946104,141924,304,304,Vital Signs,Heart Rate,Heart Rate,117.0,oxygen_therapy,0.0,0.0,0.0,1.0,0.0,2.0,3.0
17674020,110017799,141924,8216,8216,Vital Signs,Heart Rate,Heart Rate,100.0,oxygen_therapy,,,,,,,


In [184]:
conn.close()

In [185]:
hr_events_mv_sofa[hr_events_mv_sofa["sofa_total"].isna()]

Unnamed: 0,nursingchartid,patientunitstayid,nursingchartoffset,nursingchartentryoffset,nursingchartcelltypecat,nursingchartcelltypevallabel,nursingchartcelltypevalname,nursingchartvalue,mv,respiration,coagulation,liver,cardiovascular,cns,renal,sofa_total
0,236464484,141924,3844,3844,Vital Signs,Heart Rate,Heart Rate,97.0,oxygen_therapy,,,,,,,
6,237167808,141924,11904,11904,Vital Signs,Heart Rate,Heart Rate,113.0,oxygen_therapy,,,,,,,
14,146093319,141924,13669,13669,Vital Signs,Heart Rate,Heart Rate,137.0,oxygen_therapy,,,,,,,
16,182463044,141924,13694,13694,Vital Signs,Heart Rate,Heart Rate,130.0,oxygen_therapy,,,,,,,
23,273310323,141924,4744,4744,Vital Signs,Heart Rate,Heart Rate,101.0,oxygen_therapy,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17674012,146861417,141924,8387,8387,Vital Signs,Heart Rate,Heart Rate,88.0,oxygen_therapy,,,,,,,
17674013,91936970,141924,-308,-308,Vital Signs,Heart Rate,Heart Rate,142.0,,,,,,,,
17674016,236783213,141924,11329,11329,Vital Signs,Heart Rate,Heart Rate,130.0,oxygen_therapy,,,,,,,
17674020,110017799,141924,8216,8216,Vital Signs,Heart Rate,Heart Rate,100.0,oxygen_therapy,,,,,,,


In [186]:
hr_events_mv_sofa.iloc[:, 9:16]

Unnamed: 0,respiration,coagulation,liver,cardiovascular,cns,renal,sofa_total
0,,,,,,,
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,2.0,2.0
4,0.0,0.0,0.0,0.0,0.0,4.0,4.0
...,...,...,...,...,...,...,...
17674017,0.0,0.0,0.0,0.0,0.0,2.0,2.0
17674018,0.0,0.0,0.0,0.0,0.0,0.0,0.0
17674019,0.0,0.0,0.0,1.0,0.0,2.0,3.0
17674020,,,,,,,


In [187]:
hr_events_mv_sofa.iloc[:, 9:16] = hr_events_mv_sofa.iloc[:, 9:16].fillna(0)

In [188]:
hr_events_mv_sofa[hr_events_mv_sofa["mv"].isna()]

Unnamed: 0,nursingchartid,patientunitstayid,nursingchartoffset,nursingchartentryoffset,nursingchartcelltypecat,nursingchartcelltypevallabel,nursingchartcelltypevalname,nursingchartvalue,mv,respiration,coagulation,liver,cardiovascular,cns,renal,sofa_total
30,74004798,141924,-218,-218,Vital Signs,Heart Rate,Heart Rate,124.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0
34,110866063,141924,-38,-38,Vital Signs,Heart Rate,Heart Rate,118.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0
38,236352269,141924,-268,-268,Vital Signs,Heart Rate,Heart Rate,132.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0
47,254534099,141924,-326,-326,Vital Signs,Heart Rate,Heart Rate,160.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0
51,254467313,141924,-288,-288,Vital Signs,Heart Rate,Heart Rate,140.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17673941,254548107,141923,205,205,Vital Signs,Heart Rate,Heart Rate,131.0,,0.0,0.0,0.0,1.0,0.0,0.0,1.0
17673972,255200895,141924,-98,-98,Vital Signs,Heart Rate,Heart Rate,118.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0
17673980,128928097,141924,-128,-128,Vital Signs,Heart Rate,Heart Rate,118.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0
17673993,92606633,141924,-188,-188,Vital Signs,Heart Rate,Heart Rate,122.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [189]:
hr_events_mv_sofa["mv"].unique()

array(['oxygen_therapy', None, 'mv_unknown', 'mv_invasive',
       'mv_non_vasive'], dtype=object)

In [190]:
hr_events_mv_sofa["mv_invasive"] = 0
hr_events_mv_sofa["mv_non_vasive"] = 0
hr_events_mv_sofa["mv_oxygen_therapy"] = 0
hr_events_mv_sofa["mv_none"] = 0
hr_events_mv_sofa["mv_unknown"] = 0
hr_events_mv_sofa.loc[hr_events_mv_sofa["mv"] == "invasive", "mv_invasive"] = 1
hr_events_mv_sofa.loc[hr_events_mv_sofa["mv"] == "non_invasive", "mv_non_vasive"] = 1
hr_events_mv_sofa.loc[hr_events_mv_sofa["mv"] == "oxygen_therapy", "mv_oxygen_therapy"] = 1
# hr_events_mv_sofa.loc[hr_events_mv_sofa["mv"] == "None", "mv_none"] = 1
hr_events_mv_sofa.loc[hr_events_mv_sofa["mv"].isna(), "mv_none"] = 1
hr_events_mv_sofa.loc[hr_events_mv_sofa["mv"] == "mv_unknown", "mv_unknown"] = 1
hr_events_mv_sofa

Unnamed: 0,nursingchartid,patientunitstayid,nursingchartoffset,nursingchartentryoffset,nursingchartcelltypecat,nursingchartcelltypevallabel,nursingchartcelltypevalname,nursingchartvalue,mv,respiration,...,liver,cardiovascular,cns,renal,sofa_total,mv_invasive,mv_non_vasive,mv_oxygen_therapy,mv_none,mv_unknown
0,236464484,141924,3844,3844,Vital Signs,Heart Rate,Heart Rate,97.0,oxygen_therapy,0.0,...,0.0,0.0,0.0,0.0,0.0,0,0,1,0,0
1,74023594,141924,1744,1744,Vital Signs,Heart Rate,Heart Rate,104.0,oxygen_therapy,0.0,...,0.0,0.0,0.0,0.0,0.0,0,0,1,0,0
2,110024075,141924,1024,1024,Vital Signs,Heart Rate,Heart Rate,120.0,oxygen_therapy,0.0,...,0.0,0.0,0.0,0.0,0.0,0,0,1,0,0
3,128507618,141924,864,864,Vital Signs,Heart Rate,Heart Rate,110.0,oxygen_therapy,0.0,...,0.0,0.0,0.0,2.0,2.0,0,0,1,0,0
4,290804921,141924,1414,1414,Vital Signs,Heart Rate,Heart Rate,126.0,oxygen_therapy,0.0,...,0.0,0.0,0.0,4.0,4.0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17674017,236826124,141924,1309,1309,Vital Signs,Heart Rate,Heart Rate,122.0,oxygen_therapy,0.0,...,0.0,0.0,0.0,2.0,2.0,0,0,1,0,0
17674018,254833734,141924,2404,2404,Vital Signs,Heart Rate,Heart Rate,109.0,oxygen_therapy,0.0,...,0.0,0.0,0.0,0.0,0.0,0,0,1,0,0
17674019,91946104,141924,304,304,Vital Signs,Heart Rate,Heart Rate,117.0,oxygen_therapy,0.0,...,0.0,1.0,0.0,2.0,3.0,0,0,1,0,0
17674020,110017799,141924,8216,8216,Vital Signs,Heart Rate,Heart Rate,100.0,oxygen_therapy,0.0,...,0.0,0.0,0.0,0.0,0.0,0,0,1,0,0


In [191]:
hr_events_mv_sofa.columns

Index(['nursingchartid', 'patientunitstayid', 'nursingchartoffset',
       'nursingchartentryoffset', 'nursingchartcelltypecat',
       'nursingchartcelltypevallabel', 'nursingchartcelltypevalname',
       'nursingchartvalue', 'mv', 'respiration', 'coagulation', 'liver',
       'cardiovascular', 'cns', 'renal', 'sofa_total', 'mv_invasive',
       'mv_non_vasive', 'mv_oxygen_therapy', 'mv_none', 'mv_unknown'],
      dtype='object')

In [192]:
hr_events_mv_sofa = hr_events_mv_sofa.drop(columns=["mv"])
hr_events_mv_sofa

Unnamed: 0,nursingchartid,patientunitstayid,nursingchartoffset,nursingchartentryoffset,nursingchartcelltypecat,nursingchartcelltypevallabel,nursingchartcelltypevalname,nursingchartvalue,respiration,coagulation,liver,cardiovascular,cns,renal,sofa_total,mv_invasive,mv_non_vasive,mv_oxygen_therapy,mv_none,mv_unknown
0,236464484,141924,3844,3844,Vital Signs,Heart Rate,Heart Rate,97.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,1,0,0
1,74023594,141924,1744,1744,Vital Signs,Heart Rate,Heart Rate,104.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,1,0,0
2,110024075,141924,1024,1024,Vital Signs,Heart Rate,Heart Rate,120.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,1,0,0
3,128507618,141924,864,864,Vital Signs,Heart Rate,Heart Rate,110.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,0,0,1,0,0
4,290804921,141924,1414,1414,Vital Signs,Heart Rate,Heart Rate,126.0,0.0,0.0,0.0,0.0,0.0,4.0,4.0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17674017,236826124,141924,1309,1309,Vital Signs,Heart Rate,Heart Rate,122.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,0,0,1,0,0
17674018,254833734,141924,2404,2404,Vital Signs,Heart Rate,Heart Rate,109.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,1,0,0
17674019,91946104,141924,304,304,Vital Signs,Heart Rate,Heart Rate,117.0,0.0,0.0,0.0,1.0,0.0,2.0,3.0,0,0,1,0,0
17674020,110017799,141924,8216,8216,Vital Signs,Heart Rate,Heart Rate,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,1,0,0


In [193]:
merged_hr_events = hr_events_mv_sofa.merge(icu_stays, on=["patientunitstayid"])
merged_hr_events

Unnamed: 0,nursingchartid,patientunitstayid,nursingchartoffset,nursingchartentryoffset,nursingchartcelltypecat,nursingchartcelltypevallabel,nursingchartcelltypevalname,nursingchartvalue,respiration,coagulation,...,mld,diab,diabwc,hp,rend,canc,msld,metacanc,aids,CCI
0,236464484,141924,3844,3844,Vital Signs,Heart Rate,Heart Rate,97.0,0.0,0.0,...,0,1,0.0,0,0,0,0,0,0,1.0
1,74023594,141924,1744,1744,Vital Signs,Heart Rate,Heart Rate,104.0,0.0,0.0,...,0,1,0.0,0,0,0,0,0,0,1.0
2,110024075,141924,1024,1024,Vital Signs,Heart Rate,Heart Rate,120.0,0.0,0.0,...,0,1,0.0,0,0,0,0,0,0,1.0
3,128507618,141924,864,864,Vital Signs,Heart Rate,Heart Rate,110.0,0.0,0.0,...,0,1,0.0,0,0,0,0,0,0,1.0
4,290804921,141924,1414,1414,Vital Signs,Heart Rate,Heart Rate,126.0,0.0,0.0,...,0,1,0.0,0,0,0,0,0,0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17020309,236984818,141923,310,310,Vital Signs,Heart Rate,Heart Rate,62.0,0.0,0.0,...,0,1,0.0,0,0,0,0,0,0,1.0
17020310,218468143,141923,350,350,Vital Signs,Heart Rate,Heart Rate,0.0,0.0,0.0,...,0,1,0.0,0,0,0,0,0,0,1.0
17020311,219044914,141923,295,295,Vital Signs,Heart Rate,Heart Rate,108.0,0.0,0.0,...,0,1,0.0,0,0,0,0,0,0,1.0
17020312,218584013,141923,335,335,Vital Signs,Heart Rate,Heart Rate,66.0,0.0,0.0,...,0,1,0.0,0,0,0,0,0,0,1.0


In [194]:
merged_hr_events.columns

Index(['nursingchartid', 'patientunitstayid', 'nursingchartoffset',
       'nursingchartentryoffset', 'nursingchartcelltypecat',
       'nursingchartcelltypevallabel', 'nursingchartcelltypevalname',
       'nursingchartvalue', 'respiration', 'coagulation', 'liver',
       'cardiovascular', 'cns', 'renal', 'sofa_total', 'mv_invasive',
       'mv_non_vasive', 'mv_oxygen_therapy', 'mv_none', 'mv_unknown',
       'patienthealthsystemstayid', 'uniquepid', 'gender', 'age', 'height',
       'weight', 'bmi', 'height_avail', 'weight_avail', 'bmi_avail',
       'hospitaladmitoffset', 'hospitaldischargestatus',
       'hospitaldischargeoffset', 'unitdischargeoffset', 'unitdischargestatus',
       'race_w', 'race_b', 'race_l', 'race_o', 'mi', 'chf', 'pvd', 'cevd',
       'dementia', 'cpd', 'rheumd', 'pud', 'mld', 'diab', 'diabwc', 'hp',
       'rend', 'canc', 'msld', 'metacanc', 'aids', 'CCI'],
      dtype='object')

In [195]:
merged_hr_events.loc[merged_hr_events["unitdischargestatus"]=="Expired", "unitdischargeoffset"]

3052        1081
3053        1081
3054        1081
3055        1081
3056        1081
            ... 
17012851    2459
17012852    2459
17012853    2459
17012854    2459
17012855    2459
Name: unitdischargeoffset, Length: 1521996, dtype: int64

In [196]:
merged_hr_events.columns

Index(['nursingchartid', 'patientunitstayid', 'nursingchartoffset',
       'nursingchartentryoffset', 'nursingchartcelltypecat',
       'nursingchartcelltypevallabel', 'nursingchartcelltypevalname',
       'nursingchartvalue', 'respiration', 'coagulation', 'liver',
       'cardiovascular', 'cns', 'renal', 'sofa_total', 'mv_invasive',
       'mv_non_vasive', 'mv_oxygen_therapy', 'mv_none', 'mv_unknown',
       'patienthealthsystemstayid', 'uniquepid', 'gender', 'age', 'height',
       'weight', 'bmi', 'height_avail', 'weight_avail', 'bmi_avail',
       'hospitaladmitoffset', 'hospitaldischargestatus',
       'hospitaldischargeoffset', 'unitdischargeoffset', 'unitdischargestatus',
       'race_w', 'race_b', 'race_l', 'race_o', 'mi', 'chf', 'pvd', 'cevd',
       'dementia', 'cpd', 'rheumd', 'pud', 'mld', 'diab', 'diabwc', 'hp',
       'rend', 'canc', 'msld', 'metacanc', 'aids', 'CCI'],
      dtype='object')

In [197]:
merged_hr_events[f"death_in_24_hours"].unique()

KeyError: 'death_in_24_hours'

In [198]:
for h in [6, 12, 24, 24*2, 24*3, 24*4, 24*5, 24*6, 24*7, 24*8, 24*9, 24*10, 7200]:
    merged_hr_events[f"death_in_{h}_hours"] = (merged_hr_events["nursingchartoffset"]+60*h) >= merged_hr_events["unitdischargeoffset"]
    merged_hr_events.loc[merged_hr_events["unitdischargestatus"]=="Alive", f"death_in_{h}_hours"] = False

In [199]:
for h in [6, 12, 24, 24*2, 24*3, 24*4, 24*5, 24*6, 24*7, 24*8, 24*9, 24*10, 7200]:
    merged_hr_events[f"death_in_{h}_hours"] = merged_hr_events[f"death_in_{h}_hours"].astype(int)

In [200]:
dst_dir = "data/data_for_training/eicu"
merged_hr_events.to_csv(os.path.join(dst_dir, "merged_hr_events.csv"), index=False)

### SBP

In [201]:
sbp

Unnamed: 0,nursingchartid,patientunitstayid,nursingchartoffset,nursingchartentryoffset,nursingchartcelltypecat,nursingchartcelltypevallabel,nursingchartcelltypevalname,nursingchartvalue
3,253552049,141924,124,124,Vital Signs,Non-Invasive BP,Non-Invasive BP Systolic,106.0
13,252633992,141924,10924,10924,Vital Signs,Non-Invasive BP,Non-Invasive BP Systolic,110.0
15,109412537,141924,12394,12394,Vital Signs,Non-Invasive BP,Non-Invasive BP Systolic,108.0
19,198530434,141924,1984,1984,Vital Signs,Non-Invasive BP,Non-Invasive BP Systolic,132.0
21,109114106,141924,859,859,Vital Signs,Non-Invasive BP,Non-Invasive BP Systolic,115.0
...,...,...,...,...,...,...,...,...
151604173,217474746,141924,774,774,Vital Signs,Non-Invasive BP,Non-Invasive BP Systolic,107.0
151604196,303465851,141924,244,244,Vital Signs,Non-Invasive BP,Non-Invasive BP Systolic,123.0
151604206,109030987,141924,849,849,Vital Signs,Non-Invasive BP,Non-Invasive BP Systolic,115.0
151604217,303717534,141924,764,764,Vital Signs,Non-Invasive BP,Non-Invasive BP Systolic,86.0


In [202]:
merged_sbp_events = sbp.merge(icu_stays, on=["patientunitstayid"], how="left")
merged_sbp_events

Unnamed: 0,nursingchartid,patientunitstayid,nursingchartoffset,nursingchartentryoffset,nursingchartcelltypecat,nursingchartcelltypevallabel,nursingchartcelltypevalname,nursingchartvalue,patienthealthsystemstayid,uniquepid,...,mld,diab,diabwc,hp,rend,canc,msld,metacanc,aids,CCI
0,253552049,141924,124,124,Vital Signs,Non-Invasive BP,Non-Invasive BP Systolic,106.0,129508.0,002-21222,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,252633992,141924,10924,10924,Vital Signs,Non-Invasive BP,Non-Invasive BP Systolic,110.0,129508.0,002-21222,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2,109412537,141924,12394,12394,Vital Signs,Non-Invasive BP,Non-Invasive BP Systolic,108.0,129508.0,002-21222,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,198530434,141924,1984,1984,Vital Signs,Non-Invasive BP,Non-Invasive BP Systolic,132.0,129508.0,002-21222,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,109114106,141924,859,859,Vital Signs,Non-Invasive BP,Non-Invasive BP Systolic,115.0,129508.0,002-21222,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17086693,217474746,141924,774,774,Vital Signs,Non-Invasive BP,Non-Invasive BP Systolic,107.0,129508.0,002-21222,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
17086694,303465851,141924,244,244,Vital Signs,Non-Invasive BP,Non-Invasive BP Systolic,123.0,129508.0,002-21222,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
17086695,109030987,141924,849,849,Vital Signs,Non-Invasive BP,Non-Invasive BP Systolic,115.0,129508.0,002-21222,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
17086696,303717534,141924,764,764,Vital Signs,Non-Invasive BP,Non-Invasive BP Systolic,86.0,129508.0,002-21222,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [203]:
merged_sbp_events.columns

Index(['nursingchartid', 'patientunitstayid', 'nursingchartoffset',
       'nursingchartentryoffset', 'nursingchartcelltypecat',
       'nursingchartcelltypevallabel', 'nursingchartcelltypevalname',
       'nursingchartvalue', 'patienthealthsystemstayid', 'uniquepid', 'gender',
       'age', 'height', 'weight', 'bmi', 'height_avail', 'weight_avail',
       'bmi_avail', 'hospitaladmitoffset', 'hospitaldischargestatus',
       'hospitaldischargeoffset', 'unitdischargeoffset', 'unitdischargestatus',
       'race_w', 'race_b', 'race_l', 'race_o', 'mi', 'chf', 'pvd', 'cevd',
       'dementia', 'cpd', 'rheumd', 'pud', 'mld', 'diab', 'diabwc', 'hp',
       'rend', 'canc', 'msld', 'metacanc', 'aids', 'CCI'],
      dtype='object')

In [204]:
dst_dir = "data/data_for_training/eicu"
merged_sbp_events.to_csv(os.path.join(dst_dir, "merged_sbp_events.csv"), index=False)

## Dataset

### Check # subjects

In [151]:
hr_suject_ids = merged_hr_events["subject_id"].unique()
len(hr_suject_ids)

50872

In [152]:
sbp_subject_ids = merged_sbp_events["subject_id"].unique()
len(sbp_subject_ids)

50911

In [153]:
def merge_measurements(measurement_1, measurement_2, start_time, end_time):
    m1_clean = measurement_1[(measurement_1["time_diff"] >= start_time) & (measurement_1["time_diff"] < end_time)]
    m2_clean = measurement_2[(measurement_2["time_diff"] >= start_time) & (measurement_2["time_diff"] < end_time)]
    m1_clean.rename(columns={"valuenum": "m1_valuenum"}, inplace=True)
    m2_clean = m2_clean[["stay_id", "valuenum"]]
    m2_clean.rename(columns={"valuenum": "m2_valuenum"}, inplace=True)
    return pd.merge(m1_clean, m2_clean, on="stay_id")

In [183]:
prediction_window, mop, outcome_window = 6, 6, 6
start_time = pd.to_timedelta(mop, unit="h")
end_time = start_time + pd.to_timedelta(prediction_window, unit="h")

In [156]:
merged_measurements = merge_measurements(
    merged_hr_events, merged_sbp_events, start_time, end_time
)
merged_measurements

Unnamed: 0,subject_id,hadm_id,stay_id,charttime,m1_valuenum,intime,outtime,los,icu_history,gender,...,race_l,race_o,death_in_6_hours,death_in_12_hours,death_in_24_hours,death_in_36_hours,death_in_48_hours,death_in_60_hours,death_in_72_hours,m2_valuenum
0,10000032,29079034,39553978,2180-07-23 22:00:00,94.0,2180-07-23 14:00:00,2180-07-23 23:50:47,0.410266,0,0,...,0,0,0,False,0,0,0,0,0,82.0
1,10000032,29079034,39553978,2180-07-23 22:00:00,94.0,2180-07-23 14:00:00,2180-07-23 23:50:47,0.410266,0,0,...,0,0,0,False,0,0,0,0,0,85.0
2,10000032,29079034,39553978,2180-07-23 22:00:00,94.0,2180-07-23 14:00:00,2180-07-23 23:50:47,0.410266,0,0,...,0,0,0,False,0,0,0,0,0,90.0
3,10000032,29079034,39553978,2180-07-23 20:00:00,100.0,2180-07-23 14:00:00,2180-07-23 23:50:47,0.410266,0,0,...,0,0,0,False,0,0,0,0,0,82.0
4,10000032,29079034,39553978,2180-07-23 20:00:00,100.0,2180-07-23 14:00:00,2180-07-23 23:50:47,0.410266,0,0,...,0,0,0,False,0,0,0,0,0,85.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4092247,19999987,23865745,36195440,2145-11-03 06:00:00,90.0,2145-11-02 22:59:00,2145-11-04 21:29:30,1.937847,0,0,...,0,1,0,False,0,0,0,0,0,106.0
4092248,19999987,23865745,36195440,2145-11-03 06:00:00,90.0,2145-11-02 22:59:00,2145-11-04 21:29:30,1.937847,0,0,...,0,1,0,False,0,0,0,0,0,90.0
4092249,19999987,23865745,36195440,2145-11-03 06:00:00,90.0,2145-11-02 22:59:00,2145-11-04 21:29:30,1.937847,0,0,...,0,1,0,False,0,0,0,0,0,110.0
4092250,19999987,23865745,36195440,2145-11-03 06:00:00,90.0,2145-11-02 22:59:00,2145-11-04 21:29:30,1.937847,0,0,...,0,1,0,False,0,0,0,0,0,113.0


In [158]:
# measurements median
measurements = merged_measurements.groupby("stay_id").agg({
    "m1_valuenum": "median", "m2_valuenum": "median"
})
measurements

Unnamed: 0_level_0,m1_valuenum,m2_valuenum
stay_id,Unnamed: 1_level_1,Unnamed: 2_level_1
30000153,123.0,136.0
30000213,84.0,127.0
30000484,84.5,91.0
30000646,78.0,90.0
30001148,80.0,108.0
...,...,...
39999301,58.0,116.0
39999384,69.0,158.0
39999552,90.0,128.0
39999562,97.0,135.0


In [166]:
mask = (measurements["m1_valuenum"]>=50) & (measurements["m1_valuenum"]<=60) & (measurements["m2_valuenum"]>=45) & (measurements["m2_valuenum"]<=50)
measurements[mask]

Unnamed: 0_level_0,m1_valuenum,m2_valuenum
stay_id,Unnamed: 1_level_1,Unnamed: 2_level_1
35679245,52.5,46.5
38939945,50.0,50.0


In [167]:
stay_ids = list(measurements[mask].index)
stay_ids

[35679245, 38939945]

In [179]:
merged_hr_events[merged_hr_events["stay_id"].isin(stay_ids)][["subject_id", "stay_id", "valuenum", "gender", "age", "race_w", "race_b", "race_l", "race_o"]]

Unnamed: 0,subject_id,stay_id,valuenum,gender,age,race_w,race_b,race_l,race_o
221644,10352831,38939945,50.0,1,85,0,1,0,0
221645,10352831,38939945,50.0,1,85,0,1,0,0
221646,10352831,38939945,50.0,1,85,0,1,0,0
221647,10352831,38939945,50.0,1,85,0,1,0,0
221648,10352831,38939945,49.0,1,85,0,1,0,0
221649,10352831,38939945,50.0,1,85,0,1,0,0
221650,10352831,38939945,51.0,1,85,0,1,0,0
221651,10352831,38939945,50.0,1,85,0,1,0,0
221652,10352831,38939945,50.0,1,85,0,1,0,0
221653,10352831,38939945,59.0,1,85,0,1,0,0


In [178]:
merged_hr_events[merged_hr_events["stay_id"].isin(stay_ids)].iloc[:, 11:29]

Unnamed: 0,mi,chf,pvd,cevd,dementia,cpd,rheumd,pud,mld,diab,diabwc,hp,rend,canc,msld,metacanc,aids,CCI
221644,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,4.0
221645,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,4.0
221646,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,4.0
221647,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,4.0
221648,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,4.0
221649,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,4.0
221650,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,4.0
221651,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,4.0
221652,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,4.0
221653,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,4.0


In [176]:
merged_hr_events[merged_hr_events["stay_id"].isin(stay_ids)].iloc[:, 35:42]

Unnamed: 0,respiration,coagulation,liver,cardiovascular,cns,renal,sofa_24hours
221644,0.0,0.0,0.0,4.0,0.0,0.0,11.0
221645,0.0,0.0,0.0,4.0,0.0,0.0,11.0
221646,0.0,0.0,0.0,4.0,0.0,0.0,11.0
221647,0.0,0.0,0.0,4.0,0.0,0.0,11.0
221648,0.0,0.0,0.0,4.0,0.0,0.0,11.0
221649,0.0,0.0,0.0,4.0,0.0,0.0,11.0
221650,0.0,0.0,0.0,4.0,0.0,0.0,11.0
221651,0.0,0.0,0.0,4.0,0.0,0.0,11.0
221652,0.0,0.0,0.0,1.0,0.0,0.0,11.0
221653,0.0,0.0,0.0,1.0,0.0,0.0,11.0


In [186]:
prediction_window, mop, outcome_window = 6, 6, 12
data_dir = f"/N/project/waveform_mortality/xiang/Projects/icu-contour/results_icu_history/death_in_{outcome_window}_hours_smote_smooth"
npz_data = np.load(os.path.join(data_dir, f"auc_fpr_tpr__PW_{prediction_window}__MOP_{mop}__OW_{outcome_window}.npz"))
auc, fpr, tpr, thresholds = npz_data["auc"], npz_data["fpr"], npz_data["tpr"], npz_data["thresholds"]
thresholds

array([1.98, 0.98, 0.97, 0.96, 0.95, 0.94, 0.93, 0.92, 0.91, 0.9 , 0.89,
       0.88, 0.87, 0.86, 0.85, 0.84, 0.83, 0.82, 0.81, 0.8 , 0.79, 0.78,
       0.77, 0.76, 0.75, 0.74, 0.73, 0.72, 0.71, 0.7 , 0.69, 0.68, 0.67,
       0.66, 0.65, 0.64, 0.63, 0.62, 0.61, 0.6 , 0.59, 0.58, 0.57, 0.56,
       0.55, 0.54, 0.53, 0.52, 0.51, 0.5 , 0.49, 0.48, 0.47, 0.46, 0.45,
       0.44, 0.43, 0.42, 0.41, 0.39, 0.38, 0.37, 0.36, 0.35, 0.34, 0.33,
       0.32, 0.31, 0.3 , 0.29, 0.28, 0.27, 0.26, 0.25, 0.24, 0.23, 0.22,
       0.21, 0.2 , 0.19, 0.18, 0.17, 0.16, 0.15, 0.14, 0.13, 0.12, 0.11,
       0.1 , 0.09, 0.08, 0.07, 0.06, 0.05, 0.04, 0.03, 0.02, 0.01, 0.  ])