In [1]:
import os
import pandas as pd
from openpyxl.styles.builtins import output
from sipbuild.generator.outputs import output_api
from sqlalchemy import create_engine
from sympy.physics.units import katal

import openmimic as om

# MySQL 데이터베이스 연결 정보
username = 'root'
password = os.getenv('AIMED_PW')
host = '172.28.8.103'
port = '3306'  # 예: '3306'
database = "MIMIC_III"
db_engine = create_engine(f'mysql+pymysql://{username}:{password}@{host}:{port}/{database}')

# om configuration
om.Config.mimic_path = "../mimic3_csv/"
processed_data_path = "./processed_data/"

In [9]:
import importlib

def reload_all():
    importlib.reload(om)
    importlib.reload(om.chartevents)
    importlib.reload(om.chartevents_engineering)
    # importlib.reload(om.cohort)
    importlib.reload(om.config)
    importlib.reload(om.utils)
    importlib.reload(om.inputevents_mv)
    importlib.reload(om.inputevents_mv_engineering)
    importlib.reload(om.mimic_preprocessor)
    importlib.reload(om.patient_static)
    importlib.reload(om.patient_static_engineering)
    importlib.reload(om.outputevents)
    importlib.reload(om.outputevents_engineering)

reload_all()

# Patient_static

In [2]:
# query = "SELECT * FROM patient_static"
# patients_all = pd.read_sql(query, db_engine)
patients_static_csv = pd.read_csv(processed_data_path+"patients_static.csv")
patients_static_T_info_csv = pd.read_csv(processed_data_path+"patients_static_T_info.csv")

patients_static = om.PatientStatic()
patients_static.load_processed(patients_static_csv, patients_static_T_info_csv)
# patients_static.load(patients_all)
# patients_static.patients_T_info

<openmimic.patient_static.PatientStatic at 0x7f1d5a39cbf0>

# Chartevents

In [12]:
chartevents_items = (769, 220644, 772, 1521, 227456, 773, 225612, 227073, 770, 220587, 227443, 848, 225690, 1538, 225651, 803, 781, 1162, 225624, 225625, 786, 1522, 816, 225667, 116, 89, 90, 220074, 113, 220602, 226536, 1523, 788, 789, 1524, 220603, 787, 857, 225698, 777, 223679, 791, 1525, 220615, 224643, 225310, 220180, 8555, 220051, 8368, 8441, 8440, 227468, 1528, 806, 189, 727, 223835, 190, 198, 220621, 225664, 811, 807, 226537, 1529, 211, 220045, 226707, 226730, 1394, 813, 220545, 220228, 814, 818, 225668, 1531, 220635, 1532, 821, 456, 220181, 224, 225312, 220052, 52, 6702, 224322, 646, 834, 220277, 220227, 226062, 778, 220235, 779, 227466, 825, 1533, 535, 224695, 860, 223830, 1126, 780, 220274, 1534, 225677, 827, 224696, 543, 828, 227457, 224700, 506, 220339, 512, 829, 1535, 227464, 227442, 227467, 1530, 815, 1286, 824, 227465, 491, 492, 220059, 504, 833, 224422, 618, 220210, 224689, 614, 651, 224690, 615, 224688, 619, 837, 1536, 220645, 226534, 626, 442, 227243, 224167, 220179, 225309, 6701, 220050, 51, 455, 223761, 677, 676, 679, 678, 223762, 224685, 682, 224684, 683, 684, 224686, 1539, 849, 851, 227429, 859, 226531, 763, 224639, 226512, 861, 1542, 220546, 1127 )
query = f"SELECT * FROM CHARTEVENTS WHERE ITEMID IN {chartevents_items} ORDER BY CHARTTIME;"
chartevents_raw = pd.read_sql(query, db_engine)

In [3]:
chartevents_raw = pd.read_csv(processed_data_path+"raw_chartevents.csv")
chartevents_raw["CHARTTIME"] = pd.to_datetime(chartevents_raw["CHARTTIME"])
chartevents_raw["ICUSTAY_ID"].unique()

  chartevents_raw = pd.read_csv(processed_data_path+"raw_chartevents.csv")


array([202134., 293407., 222148., ..., 252411., 202836., 234115.])

In [4]:
chartevents2 = om.Chartevents()
chartevents2.load(chartevents_raw, patients_static.patients_T_info)
chartevents2.process()

../mimic3_csv/
Chartevents data updated!
-----------------------------------
Filtering...
-> filter_remove_unassociated_columns...	 Complete!	1.20s
-> filter_remove_no_ICUSTAY_ID...	 Complete!	2.33s
-> filter_remove_error...	 Complete!	1.82s
-> filter_remove_labitems...	../mimic3_csv/
 Complete!	1.25s
Chartevents data updated!
=> Before: 66,635,422, After: 66,534,355 : 99.85% remained.
Filtering Complete!
-----------------------------------
Processing...
-> process_group_variables_from_fiddle...	 Complete!	20.60s
Chartevents data updated!
-> process_aggregator...	 Complete!	6m 25.68s
-> process_interval_shift_alignment...	 Complete!	4.50s
Processing Complete!


# Inputevents_MV

In [4]:
columns = "ROW_ID, ICUSTAY_ID, STARTTIME, ENDTIME, ITEMID, AMOUNT, AMOUNTUOM, RATE, RATEUOM, PATIENTWEIGHT"
columns = "*"
query = f"SELECT {columns} FROM INPUTEVENTS_MV"
inputevents_mv_raw = pd.read_sql(query, db_engine)

In [5]:
inputevents_mv = om.InputeventsMV()
inputevents_mv.load(inputevents_mv_raw, patients_static.patients_T_info)
inputevents_mv.process()

-----------------------------------
Filtering...
-> filter_remove_unassociated_columns...	 Complete!	0.26s
-> filter_remove_no_ICUSTAY_ID...	 Complete!	0.34s
-> filter_remove_error...	 Complete!	0.63s
-> filter_remove_zero_input...	 Complete!	0.58s
-> filter_remove_continuous_uom_missing...	 Complete!	0.69s
=> Before: 3,618,991, After: 2,535,497 : 70.06% remained.
Filtering Complete!
-----------------------------------
Processing...
-> process_rateuom_into_hour_unit...	 Complete!	3.02s
-> process_unite_convertable_uom_by_D_ITEMS...	 Complete!	4.57s
-> process_split_ITEMID_by_unit...	 Complete!	3.49s
-> process_transform_T_cohort...	 Complete!	4m 8.95s
Processing Complete!


# Outputevents

In [10]:
columns = "*"
query = f"SELECT {columns} FROM OUTPUTEVENTS"
outputevents_raw = pd.read_sql(query, db_engine)

In [8]:
outputevents = om.Outputevents()
outputevents.load(outputevents_raw, patients_static.patients_T_info)
outputevents.process()

-----------------------------------
Filtering...
-> filter_remove_unassociated_columns...	 Complete!	0.00s
-> filter_remove_no_ICUSTAY_ID...	 Complete!	0.00s
-> filter_remove_error...	 Complete!	0.00s
-> filter_remove_zero_output...	 Complete!	0.00s
Outputevents data updated!
Filtering Complete!
=> Before: 100,000, After: 93,308 : 93.31% remained.
-----------------------------------
Processing...
-> process_aggregator...	 Complete!	5.37s
-> process_interval_shift_alignment...	 Complete!	0.09s
Processing Complete!


# Cohort Integration


In [8]:
chartevents = pd.read_csv(processed_data_path+"chartevents2.csv")
inputevents_mv = pd.read_csv(processed_data_path+"inputevents_mv.csv")


chartevents = om.Chartevents().load_processed(chartevents)
inputevents_mv = om.InputeventsMV().load_processed(inputevents_mv)

../mimic3_csv/
../mimic3_csv/


In [9]:
class Cohort:
    def __init__(self, patients_static=None, chartevents=None, inputevents_mv=None, outputevents=None, cohort=None):
        self.patients_static = patients_static
        self.chartevents = chartevents
        self.inputevents_mv = inputevents_mv
        self.outputevents = outputevents
        self.cohort_present = False
        self.data = None
        if isinstance(cohort, pd.DataFrame):
            self.data = cohort
            self.cohort_present = True


    def make_cohort(self):
        if self.cohort_present:
            return self.data
        merged_table = []
        self.cnvrt_column()
        self.data = self.patients_static.data
        merged_table.append("patients_static")
        if isinstance(self.chartevents, om.Chartevents):
            merged_table.append("chartevents")
            self.data = self.data.merge(self.chartevents.data, on="ICUSTAY_ID", how="left")
        if isinstance(self.inputevents_mv, om.InputeventsMV):
            merged_table.append("inputevents_mv")
            self.data = self.data.merge(self.inputevents_mv.data, on=["ICUSTAY_ID", "T"], how="left")
        if isinstance(self.outputevents, om.Outputevents):
            merged_table.append("outputevents")
            self.data = self.data.merge(self.outputevents.data, on=["ICUSTAY_ID", "T"], how="left")
        self.cohort_present = True
        print(f"Tables merged: {merged_table}")
        return self.data


    def cnvrt_column(self):
        if isinstance(self.chartevents, om.Chartevents):
            self.chartevents.cnvrt_column()
        if isinstance(self.inputevents_mv, om.InputeventsMV):
            self.inputevents_mv.cnvrt_column()


cohort = Cohort(patients_static, chartevents, inputevents_mv)
cohort.make_cohort()
cohort.data.sort_values(by=["ICUSTAY_ID", "T"])

Unnamed: 0,SUBJECT_ID,AGE,GENDER,ADMISSION_TYPE,ADMISSION_LOCATION,ICUSTAY_ID,LOS,FIRST_CAREUNIT,FIRST_WARDID,HADM_to_ICU_time,...,Replete (3/4) #0,Replete (2/3) #0,Imipenem/Cilastatin #1,Boost Glucose Control (3/4) #0,Resource Fruit Beverage #0,Mighty Shake (no sugar added) #0,Nutren 2.0 (3/4) #0,Keflex #1,Quinine #0,Lansoprazole (Prevacid) #1
1680923,55973,61.1096,F,EMERGENCY,CLINIC REFERRAL/PREMATURE,200001,3.0786,MICU,23,7,...,,,,,,,,,,
1680924,55973,61.1096,F,EMERGENCY,CLINIC REFERRAL/PREMATURE,200001,3.0786,MICU,23,7,...,,,,,,,,,,
1680925,55973,61.1096,F,EMERGENCY,CLINIC REFERRAL/PREMATURE,200001,3.0786,MICU,23,7,...,,,,,,,,,,
1680926,55973,61.1096,F,EMERGENCY,CLINIC REFERRAL/PREMATURE,200001,3.0786,MICU,23,7,...,,,,,,,,,,
1680927,55973,61.1096,F,EMERGENCY,CLINIC REFERRAL/PREMATURE,200001,3.0786,MICU,23,7,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
347284,7630,48.7644,M,ELECTIVE,PHYS REFERRAL/NORMAL DELI,299999,1.2978,CSRU,14,0,...,,,,,,,,,,
347285,7630,48.7644,M,ELECTIVE,PHYS REFERRAL/NORMAL DELI,299999,1.2978,CSRU,14,0,...,,,,,,,,,,
347286,7630,48.7644,M,ELECTIVE,PHYS REFERRAL/NORMAL DELI,299999,1.2978,CSRU,14,0,...,,,,,,,,,,
347287,7630,48.7644,M,ELECTIVE,PHYS REFERRAL/NORMAL DELI,299999,1.2978,CSRU,14,0,...,,,,,,,,,,


In [10]:
id = 200001
patients_static.patients_T_info[patients_static.patients_T_info["ICUSTAY_ID"] == id]["T"].max(), chartevents2[chartevents2["ICUSTAY_ID"] == id]["T"].max(), inputevents_mv[inputevents_mv["ICUSTAY_ID"] == id]["T"].max()

NameError: name 'chartevents2' is not defined

In [11]:
target = inputevents_mv
target[target["ICUSTAY_ID"] == 200001]

TypeError: 'InputeventsMV' object is not subscriptable