In [1]:
import os

import pandas as pd
from sqlalchemy import create_engine

import mipipe as mip

# MySQL 데이터베이스 연결 정보
username = 'root'
password = os.getenv('AIMED_PW')
host = '172.28.8.103'
port = '3306'  # 예: '3306'
database = "MIMIC_III"
db_engine = create_engine(f'mysql+pymysql://{username}:{password}@{host}:{port}/{database}')

# mip configuration
mip.Config.mimic_path = "../mimic3_csv/"

In [30]:
from importlib import reload

reload(mip)  # mymodule을 다시 로드하여 변경 사항 반영.

<module 'mipipe' from '/home/jgpark/PycharmProjects/MIMIC_preprocessing/mipipe/__init__.py'>

In [4]:
# read json file
import json

with open('X.feature_names.json') as f:
    x_feature_names = json.load(f)

d_items = pd.read_csv("../mimic3_csv/D_ITEMS.csv")  # D_ITEMS.csv
d_labitems = pd.read_csv("../mimic3_csv/D_LABITEMS.csv")  # D_LABITEMS.csv
x_feature_names

chartitem_map = {}
labitem_map = {}

for item_id in x_feature_names:
    id = item_id.split("_")[0]
    # if id is number
    if id.isdigit():
        id = int(id)
        try:
            label = d_items[d_items["ITEMID"] == id]
            chartitem_map[id] = label["LABEL"].values[0]
        except:
            try:
                label = d_labitems[d_labitems["ITEMID"] == id]
                labitem_map[id] = label["LABEL"].values[0]
            except:
                print("NOT FOUND: ", id)

In [13]:
import pandas as pd
import multiprocessing as mp
from concurrent.futures import ProcessPoolExecutor


def process_group(group, x):
    return group["value"].mean()  # 예시로 그룹의 평균값을 계산


def parallel_process_dataframe(df):
    futures = []
    with ProcessPoolExecutor(max_workers=mp.cpu_count()) as executor:
        for _, group in df.groupby('ID'):
            future = executor.submit(process_group, group, 1)
            futures.append(future)

    results = [future.result() for future in futures]
    return results


# 예시 데이터프레임
df = pd.DataFrame({
    'ID': [1, 1, 2, 2, 3, 3],
    'value': [10, 20, 30, 40, 50, 60]
})

result_df = parallel_process_dataframe(df)
print(result_df)

[15.0, 35.0, 55.0]


# Chartevents preprocessing

In [3]:
query = "SELECT * FROM CHARTEVENTS WHERE ITEMID IN (226707, 226730, 224639, 226512, 226531) ORDER BY CHARTTIME LIMIT 100000;"
icu_patient_original = pd.read_sql(query, db_engine)

chartevents = mip.Chartevents()
chartevents.load(icu_patient_original)

In [4]:
chartevents.item_interval_info

{0: [226512], 24: [226531, 224639, 226730, 226707]}

In [2]:
query = "SELECT * FROM CHARTEVENTS WHERE ITEMID IN (220045, 224167, 227243, 220050, 220179, 225309, 224639, 226512, 226531, 224690, 220210) ORDER BY CHARTTIME LIMIT 100000;"
icu_patient_original_2 = pd.read_sql(query, db_engine)

# icu_patient_original_2 = icu_patient_original_2[icu_patient_original_2["ITEMID"].isin([220045, 220179, 220180])]

In [3]:
chartevents2 = mip.Chartevents()
chartevents2.load(icu_patient_original_2)
chartevents2.process(["mean", "min"])

Chartevents data updated!
-----------------------------------
Filtering...
chartevents_filter_remove_no_ICUSTAY_ID... Complete!
chartevents_filter_remove_error... Complete!
chartevents_filter_remove_labitems... Complete!
Chartevents data updated!
Filtered Complete!
-----------------------------------
Processing...
chartevents_group_variables... Complete!
Chartevents data updated!
chartevents_aggregator... Complete!
Processed Complete!


In [4]:
chartevents2.data

Unnamed: 0_level_0,ICUSTAY_ID,T,220045,220045,220179,220179,220210,220210,224639,224639
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,min,mean,min,mean,min,mean,min
0,200975,0,84.0,84.0,132.0,132.0,19.0,19.0,79.8,79.8
1,200975,1,90.0,90.0,129.0,129.0,14.0,14.0,,
2,200975,2,83.0,83.0,116.0,116.0,16.0,16.0,,
3,200975,3,84.0,84.0,116.0,116.0,15.0,15.0,,
4,200975,4,84.0,84.0,124.0,124.0,19.0,19.0,,
...,...,...,...,...,...,...,...,...,...,...
28478,299853,51,82.0,82.0,101.0,101.0,11.0,11.0,,
28479,299853,52,91.0,91.0,113.0,113.0,2.0,2.0,,
28480,299853,53,89.0,89.0,104.0,104.0,14.0,14.0,,
28481,299853,54,86.0,86.0,111.0,110.0,19.0,19.0,,
