In [3]:
import os
import pandas as pd
from sqlalchemy import create_engine

import openmimic as om

# MySQL 데이터베이스 연결 정보
username = 'root'
password = os.getenv('AIMED_PW')
host = '172.28.8.103'
port = '3306'  # 예: '3306'
database = "MIMIC_III"
db_engine = create_engine(f'mysql+pymysql://{username}:{password}@{host}:{port}/{database}')

# om configuration
om.Config.mimic_path = "../mimic3_csv/"
processed_data_path = "./processed_data/"

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [16]:
import importlib

def reload_all():
    importlib.reload(om)
    importlib.reload(om.chartevents)
    importlib.reload(om.chartevents_engineering)
    # importlib.reload(om.cohort)
    importlib.reload(om.config)
    importlib.reload(om.inputevents_mv)
    importlib.reload(om.inputevents_mv_engineering)
    importlib.reload(om.mimic_preprocessor)
    importlib.reload(om.patient_static)
    importlib.reload(om.patient_static_engineering)
    importlib.reload(om.utils)
reload_all()

In [31]:
# read json file
import json

with open('X.feature_names.json') as f:
    x_feature_names = json.load(f)

d_items = pd.read_csv("../mimic3_csv/D_ITEMS.csv")  # D_ITEMS.csv
d_labitems = pd.read_csv("../mimic3_csv/D_LABITEMS.csv")  # D_LABITEMS.csv

chartitem_map = {}
labitem_map = {}

for item_id in x_feature_names:
    id = item_id.split("_")[0]
    # if id is number
    if id.isdigit():
        id = int(id)
        try:
            label = d_items[d_items["ITEMID"] == id]
            chartitem_map[id] = label["LABEL"].values[0]
        except:
            try:
                label = d_labitems[d_labitems["ITEMID"] == id]
                labitem_map[id] = label["LABEL"].values[0]
            except:
                print("NOT FOUND: ", id)

In [32]:
chartitem_map

{220048: 'Heart Rhythm',
 220046: 'Heart rate Alarm - High',
 220047: 'Heart Rate Alarm - Low',
 220052: 'Arterial Blood Pressure mean',
 220056: 'Arterial Blood Pressure Alarm - Low',
 220058: 'Arterial Blood Pressure Alarm - High',
 220059: 'Pulmonary Artery Pressure systolic',
 220060: 'Pulmonary Artery Pressure diastolic',
 220061: 'Pulmonary Artery Pressure mean',
 220063: 'Pulmonary Artery Pressure Alarm - High',
 220066: 'Pulmonary Artery Pressure Alarm - Low',
 220072: 'Central Venous Pressure Alarm - High',
 220073: 'Central Venous Pressure  Alarm - Low',
 220074: 'Central Venous Pressure',
 220088: 'Cardiac Output (thermodilution)',
 220120: 'Intra Aortic Ballon Pump Setting',
 220181: 'Non Invasive Blood Pressure mean',
 220194: 'Temporary AV interval',
 220292: 'Minute Volume Alarm - Low',
 220293: 'Minute Volume Alarm - High',
 220339: 'PEEP set',
 220739: 'GCS - Eye Opening',
 220765: 'Intra Cranial Pressure',
 220862: 'Albumin 25%',
 220864: 'Albumin 5%',
 220949: 'Dextr

# Patient_static preprocessing

In [12]:
# query = "SELECT * FROM patient_static"
# patients_all = pd.read_sql(query, db_engine)
patients_static_csv = pd.read_csv(processed_data_path+"patients_static.csv")
patients_static_T_info_csv = pd.read_csv(processed_data_path+"patients_static_T_info.csv")

patients_static = om.PatientStatic()
patients_static.load_processed(patients_static_csv, patients_static_T_info_csv)
# patients_static.load(patients_all)
# patients_static.patients_T_info

# Chartevents preprocessing

In [12]:
chartevents_items = (769, 220644, 772, 1521, 227456, 773, 225612, 227073, 770, 220587, 227443, 848, 225690, 1538, 225651, 803, 781, 1162, 225624, 225625, 786, 1522, 816, 225667, 116, 89, 90, 220074, 113, 220602, 226536, 1523, 788, 789, 1524, 220603, 787, 857, 225698, 777, 223679, 791, 1525, 220615, 224643, 225310, 220180, 8555, 220051, 8368, 8441, 8440, 227468, 1528, 806, 189, 727, 223835, 190, 198, 220621, 225664, 811, 807, 226537, 1529, 211, 220045, 226707, 226730, 1394, 813, 220545, 220228, 814, 818, 225668, 1531, 220635, 1532, 821, 456, 220181, 224, 225312, 220052, 52, 6702, 224322, 646, 834, 220277, 220227, 226062, 778, 220235, 779, 227466, 825, 1533, 535, 224695, 860, 223830, 1126, 780, 220274, 1534, 225677, 827, 224696, 543, 828, 227457, 224700, 506, 220339, 512, 829, 1535, 227464, 227442, 227467, 1530, 815, 1286, 824, 227465, 491, 492, 220059, 504, 833, 224422, 618, 220210, 224689, 614, 651, 224690, 615, 224688, 619, 837, 1536, 220645, 226534, 626, 442, 227243, 224167, 220179, 225309, 6701, 220050, 51, 455, 223761, 677, 676, 679, 678, 223762, 224685, 682, 224684, 683, 684, 224686, 1539, 849, 851, 227429, 859, 226531, 763, 224639, 226512, 861, 1542, 220546, 1127 )
query = f"SELECT * FROM CHARTEVENTS WHERE ITEMID IN {chartevents_items} ORDER BY CHARTTIME;"
icu_patient_original = pd.read_sql(query, db_engine)

In [3]:
icu_patient_original = pd.read_csv(processed_data_path+"raw_chartevents.csv")
icu_patient_original["CHARTTIME"] = pd.to_datetime(icu_patient_original["CHARTTIME"])
icu_patient_original["ICUSTAY_ID"].unique()

  icu_patient_original = pd.read_csv(processed_data_path+"icu_patient_original.csv")


array([202134., 293407., 222148., ..., 252411., 202836., 234115.])

In [4]:
chartevents2 = om.Chartevents()
chartevents2.load(icu_patient_original, patients_static.patients_T_info)
chartevents2.filter()
chartevents2.process(["mean"])

Chartevents data updated!
-----------------------------------
Filtering...
-> filter_remove_unassociated_columns...	 Complete!	1.18s
-> filter_remove_no_ICUSTAY_ID...	 Complete!	2.26s
-> filter_remove_error...	 Complete!	1.75s
-> filter_remove_labitems...	 Complete!	1.21s
Chartevents data updated!
Filtering Complete!
=> Before: 66,635,422, After: 66,534,355 : 99.85% remained.
-----------------------------------
Processing...
-> process_group_variables_from_fiddle...	 Complete!	19.97s
Chartevents data updated!
########
ICUSTAY_ID
-> process_interval_shift_alignment...	 Complete!	4.61s
Processing Complete!


# Inputevents_MV preprocessing

In [3]:
columns = "ROW_ID, ICUSTAY_ID, STARTTIME, ENDTIME, ITEMID, AMOUNT, AMOUNTUOM, RATE, RATEUOM, PATIENTWEIGHT"
columns = "*"
query = f"SELECT {columns} FROM INPUTEVENTS_MV"
inputevents_mv_all = pd.read_sql(query, db_engine)

In [4]:
inputevents_mv = om.InputeventsMV()
inputevents_mv.load(inputevents_mv_all, patients_static.patients_T_info)
inputevents_mv.process()

-----------------------------------
Filtering...
-> filter_remove_unassociated_columns...	 Complete!	0.27s
-> filter_remove_no_ICUSTAY_ID...	 Complete!	0.35s
-> filter_remove_error...	 Complete!	0.67s
-> filter_remove_zero_input...	 Complete!	0.60s
-> filter_remove_continuous_uom_missing...	 Complete!	0.73s
Filtering Complete!
=> Before: 3,618,991, After: 2,535,497 : 70.06% remained.
-----------------------------------
Processing...
-> process_rateuom_into_hour_unit...	 Complete!	3.15s
-> process_unite_convertable_uom_by_D_ITEMS...	 Complete!	4.80s
-> process_split_ITEMID_by_unit...	 Complete!	3.58s
-> process_transform_T_cohort...	########
ICUSTAY_ID
 Complete!	4m 6.22s
Processing Complete!


# Cohort Integration


In [17]:
chartevents = pd.read_csv(processed_data_path+"chartevents2.csv")
inputevents_mv = pd.read_csv(processed_data_path+"inputevents_mv.csv")

d_chartitems = dict(zip(d_items["ITEMID"], d_items["LABEL"]))

chartevents = om.Chartevents().load_processed(chartevents)
chartevents.d_chartitems = d_chartitems
inputevents_mv = om.InputeventsMV().load_processed(inputevents_mv)

In [19]:
class Cohort:
    def __init__(self, patients_static=None, chartevents=None, inputevents_mv=None, cohort=None):
        self.patients_static = patients_static
        self.chartevents = chartevents
        self.inputevents_mv = inputevents_mv
        self.cohort_present = False
        self.data = None
        if isinstance(cohort, pd.DataFrame):
            self.data = cohort
            self.cohort_present = True

    def make_cohort(self):
        if self.cohort_present:
            return self.data
        self.data = self.patients_static.data
        if isinstance(self.chartevents, om.Chartevents):
            self.data = self.data.merge(self.chartevents.data, on="ICUSTAY_ID", how="left")
        else:
            print("chartevents is not openmimic.Chartevents object")
        if isinstance(self.inputevents_mv, om.InputeventsMV):
            self.data = self.data.merge(self.inputevents_mv.data, on=["ICUSTAY_ID", "T"], how="left")
        else:
            print("inputevents_mv is not openmimic.InputeventsMV object")
        self.cohort_present = True
        return self.data



cohort = Cohort(patients_static, chartevents, inputevents_mv)
cohort.make_cohort()
cohort.data.sort_values(by=["ICUSTAY_ID", "T"])

Unnamed: 0,SUBJECT_ID,AGE,GENDER,ADMISSION_TYPE,ADMISSION_LOCATION,ICUSTAY_ID,LOS,FIRST_CAREUNIT,FIRST_WARDID,HADM_to_ICU_time,...,226046.0,226045.0,225876.1,227978.0,225991.0,225996.0,227518.0,227691.1,225896.0,225909.1
1680923,55973,61.1096,F,EMERGENCY,CLINIC REFERRAL/PREMATURE,200001,3.0786,MICU,23,7,...,,,,,,,,,,
1680924,55973,61.1096,F,EMERGENCY,CLINIC REFERRAL/PREMATURE,200001,3.0786,MICU,23,7,...,,,,,,,,,,
1680925,55973,61.1096,F,EMERGENCY,CLINIC REFERRAL/PREMATURE,200001,3.0786,MICU,23,7,...,,,,,,,,,,
1680926,55973,61.1096,F,EMERGENCY,CLINIC REFERRAL/PREMATURE,200001,3.0786,MICU,23,7,...,,,,,,,,,,
1680927,55973,61.1096,F,EMERGENCY,CLINIC REFERRAL/PREMATURE,200001,3.0786,MICU,23,7,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
347284,7630,48.7644,M,ELECTIVE,PHYS REFERRAL/NORMAL DELI,299999,1.2978,CSRU,14,0,...,,,,,,,,,,
347285,7630,48.7644,M,ELECTIVE,PHYS REFERRAL/NORMAL DELI,299999,1.2978,CSRU,14,0,...,,,,,,,,,,
347286,7630,48.7644,M,ELECTIVE,PHYS REFERRAL/NORMAL DELI,299999,1.2978,CSRU,14,0,...,,,,,,,,,,
347287,7630,48.7644,M,ELECTIVE,PHYS REFERRAL/NORMAL DELI,299999,1.2978,CSRU,14,0,...,,,,,,,,,,


In [57]:
inputevents_mv.data

Unnamed: 0,ICUSTAY_ID,T,220949.0,221347.0,225152.0,225158.0,225837.0,225863.0,225883.0,225907.0,...,226046.0,226045.0,225876.1,227978.0,225991.0,225996.0,227518.0,227691.1,225896.0,225909.1
0,200001.0,0,,,,,,,,,...,,,,,,,,,,
1,200001.0,1,,,,,,,,,...,,,,,,,,,,
2,200001.0,2,,,,,,,,,...,,,,,,,,,,
3,200001.0,3,,,,,,,,,...,,,,,,,,,,
4,200001.0,4,,,,,,2.0,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
874828,299998.0,42,,,,,,,,,...,,,,,,,,,,
874829,299998.0,43,,,,,,,,,...,,,,,,,,,,
874830,299998.0,44,,,,,,,,,...,,,,,,,,,,
874831,299998.0,45,,,,,,,,,...,,,,,,,,,,


In [54]:
'SysBP_mean_(137.0, 2998.0]'.isdigit()

False

In [10]:
id = 200001
patients_static.patients_T_info[patients_static.patients_T_info["ICUSTAY_ID"] == id]["T"].max(), chartevents2[chartevents2["ICUSTAY_ID"] == id]["T"].max(), inputevents_mv[inputevents_mv["ICUSTAY_ID"] == id]["T"].max()

(47, 45.0, 44)

In [11]:
target = inputevents_mv
target[target["ICUSTAY_ID"] == 200001]

Unnamed: 0,ICUSTAY_ID,T,220949.0,221347.0,225152.0,225158.0,225837.0,225863.0,225883.0,225907.0,...,226046.0,226045.0,225876.1,227978.0,225991.0,225996.0,227518.0,227691.1,225896.0,225909.1
0,200001.0,0,,,,,,,,,...,,,,,,,,,,
1,200001.0,1,,,,,,,,,...,,,,,,,,,,
2,200001.0,2,,,,,,,,,...,,,,,,,,,,
3,200001.0,3,,,,,,,,,...,,,,,,,,,,
4,200001.0,4,,,,,,2.0,,,...,,,,,,,,,,
5,200001.0,5,,,,10.0,,,,,...,,,,,,,,,,
6,200001.0,6,,,,10.0,,,,,...,,,,,,,,,,
7,200001.0,7,,,,10.0,,,,,...,,,,,,,,,,
8,200001.0,8,,,,10.0,,,,,...,,,,,,,,,,
9,200001.0,9,,,,10.0,,,,,...,,,,,,,,,,


In [20]:
cohort.patients_static

<openmimic.patient_static.PatientStatic at 0x7fba36701250>