In [None]:
import os

import numpy as np
import pandas as pd
from sqlalchemy import create_engine

import mipipe as mip

# MySQL 데이터베이스 연결 정보
username = 'root'
password = os.getenv('AIMED_PW')
host = '172.28.8.103'
port = '3306'  # 예: '3306'
database = "MIMIC_III"
db_engine = create_engine(f'mysql+pymysql://{username}:{password}@{host}:{port}/{database}')

# mip configuration
mip.Config.mimic_path = "../mimic3_csv/"

In [None]:
from importlib import reload

reload(mip)  # mymodule을 다시 로드하여 변경 사항 반영.

In [None]:
# read json file
import json

with open('X.feature_names.json') as f:
    x_feature_names = json.load(f)

d_items = pd.read_csv("../mimic3_csv/D_ITEMS.csv")  # D_ITEMS.csv
d_labitems = pd.read_csv("../mimic3_csv/D_LABITEMS.csv")  # D_LABITEMS.csv
x_feature_names

chartitem_map = {}
labitem_map = {}

for item_id in x_feature_names:
    id = item_id.split("_")[0]
    # if id is number
    if id.isdigit():
        id = int(id)
        try:
            label = d_items[d_items["ITEMID"] == id]
            chartitem_map[id] = label["LABEL"].values[0]
        except:
            try:
                label = d_labitems[d_labitems["ITEMID"] == id]
                labitem_map[id] = label["LABEL"].values[0]
            except:
                print("NOT FOUND: ", id)

In [None]:
import pandas as pd
import multiprocessing as mp
from concurrent.futures import ProcessPoolExecutor


def process_group(group, x):
    return group["value"].mean()  # 예시로 그룹의 평균값을 계산


def parallel_process_dataframe(df):
    futures = []
    with ProcessPoolExecutor(max_workers=mp.cpu_count()) as executor:
        for _, group in df.groupby('ID'):
            future = executor.submit(process_group, group, 1)
            futures.append(future)

    results = [future.result() for future in futures]
    return results


# 예시 데이터프레임
df = pd.DataFrame({
    'ID': [1, 1, 2, 2, 3, 3],
    'value': [10, 20, 30, 40, 50, 60]
})

result_df = parallel_process_dataframe(df)
print(result_df)

# Chartevents preprocessing

In [None]:
query = "SELECT * FROM CHARTEVENTS LIMIT 10000000"
chartevents_all = pd.read_sql(query, db_engine)

chartevents = mip.Chartevents()
chartevents.load(chartevents_all)

In [None]:
data = chartevents.item_desc_info
data = data[data["50%"] <= 30]

# leave naturla numbers
data["50%"] = data["50%"].astype(int)

# draw scatter plot x = 50%, y = std

import matplotlib.pyplot as plt

plt.scatter(data["50%"], data["std"])
plt.xlabel("50% quantile")
plt.ylabel("std")
plt.title("50% quantile vs std")
plt.show()


In [None]:
query = "SELECT * FROM CHARTEVENTS WHERE ITEMID IN (220045, 224167, 227243, 220050, 220179, 225309, 224639, 226512, 226531, 224690, 220210) ORDER BY CHARTTIME LIMIT 100000;"
icu_patient_original_2 = pd.read_sql(query, db_engine)

# icu_patient_original_2 = icu_patient_original_2[icu_patient_original_2["ITEMID"].isin([220045, 220179, 220180])]

In [None]:
chartevents2 = mip.Chartevents()
chartevents2.load(icu_patient_original_2)
chartevents2.process(["mean", "min"])

# Inputevents MV preprocessing

In [17]:
columns = "ROW_ID, ICUSTAY_ID, STARTTIME, ENDTIME, ITEMID, AMOUNT, AMOUNTUOM,RATE, RATEUOM, PATIENTWEIGHT"
query = f"SELECT {columns} FROM INPUTEVENTS_MV LIMIT 1000000"
inputevents_mv_all = pd.read_sql(query, db_engine)

inputevents_mv = mip.InputeventsMV()
inputevents_mv.load(inputevents_mv_all)
inputevents_mv.process()

-----------------------------------
Processing...
process_convert_rateuom_into_hour... Complete!
Processing Complete!


In [18]:
inputevents_mv.data

Unnamed: 0,ROW_ID,ICUSTAY_ID,STARTTIME,ENDTIME,ITEMID,AMOUNT,AMOUNTUOM,RATE,RATEUOM,PATIENTWEIGHT
121561,121562,200038.0,2143-10-24 21:30:00,2143-10-24 21:43:00,222042,125.0,mg,576923.0400,mcg/hour,72.0
121562,121563,200038.0,2143-10-24 21:30:00,2143-10-24 21:43:00,225158,250.0,ml,1153.8500,mL/hour,72.0
121573,121574,200038.0,2143-10-25 07:30:00,2143-10-25 10:51:00,225158,33.5,ml,10.0000,mL/hour,72.0
121571,121572,200038.0,2143-10-25 08:00:00,2143-10-25 08:01:00,226453,30.0,ml,,,72.0
121575,121576,200038.0,2143-10-25 08:00:00,2143-10-25 08:01:00,225799,50.0,ml,,,72.0
...,...,...,...,...,...,...,...,...,...,...
313631,313632,299929.0,2145-01-16 14:00:00,2145-01-16 14:01:00,226452,240.0,ml,,,93.0
899185,899186,299943.0,2151-08-25 05:30:00,2151-08-25 16:12:00,225158,107.0,ml,10.0000,mL/hour,72.4
899184,899185,299943.0,2151-08-25 11:00:00,2151-08-25 11:01:00,226452,150.0,ml,,,72.4
343643,343644,,2119-12-17 00:00:00,2119-12-17 02:46:00,222168,1000.0,mg,361445.9184,mcg/hour,80.4


In [21]:
inputevents_mv.data[inputevents_mv.data["ROW_ID"] == 236]

Unnamed: 0,ROW_ID,ICUSTAY_ID,STARTTIME,ENDTIME,ITEMID,AMOUNT,AMOUNTUOM,RATE,RATEUOM,PATIENTWEIGHT
235,236,223259.0,2133-02-04 14:53:00,2133-02-04 14:54:00,221906,0.008325,mg,499.479552,mcg/hour,83.2
