In [1]:
import os
import numpy as numpy
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import datetime

from tqdm import tqdm
from utils.utils import get_readable_file_size

In [2]:
ehr_data_dir = "/home/mengliang/DatasetFolder/mimiciv/2.2"

In [3]:
icu_inputevents_path = os.path.join(ehr_data_dir, "icu/inputevents.csv.gz")
df_icu_inputevents = pd.read_csv(icu_inputevents_path, index_col=False, compression="gzip")
df_icu_inputevents.head()

Unnamed: 0,subject_id,hadm_id,stay_id,caregiver_id,starttime,endtime,storetime,itemid,amount,amountuom,...,ordercomponenttypedescription,ordercategorydescription,patientweight,totalamount,totalamountuom,isopenbag,continueinnextdept,statusdescription,originalamount,originalrate
0,10000032,29079034,39553978,66056,2180-07-23 21:10:00,2180-07-23 21:11:00,2180-07-23 21:10:00,226452,100.0,ml,...,Main order parameter,Bolus,39.4,100.0,ml,0,0,FinishedRunning,100.0,100.0
1,10000032,29079034,39553978,88981,2180-07-23 17:00:00,2180-07-23 17:01:00,2180-07-23 18:56:00,226452,200.0,ml,...,Main order parameter,Bolus,39.4,200.0,ml,0,0,FinishedRunning,200.0,200.0
2,10000032,29079034,39553978,88981,2180-07-23 17:00:00,2180-07-23 17:30:00,2180-07-23 17:02:00,220862,49.999999,ml,...,Main order parameter,Continuous IV,39.4,50.0,ml,0,0,FinishedRunning,50.0,100.0
3,10000032,29079034,39553978,88981,2180-07-23 17:33:00,2180-07-23 18:03:00,2180-07-23 18:16:00,220862,49.999999,ml,...,Main order parameter,Continuous IV,39.4,50.0,ml,0,0,FinishedRunning,50.0,100.0
4,10000032,29079034,39553978,88981,2180-07-23 18:56:00,2180-07-23 18:57:00,2180-07-23 18:56:00,226452,100.0,ml,...,Main order parameter,Bolus,39.4,100.0,ml,0,0,FinishedRunning,100.0,100.0


In [4]:
print(len(df_icu_inputevents["subject_id"].unique()))
print(len(df_icu_inputevents["hadm_id"].unique()))
print(len(df_icu_inputevents["stay_id"].unique()))
print(df_icu_inputevents.dtypes)

50755
65986
72690
subject_id                         int64
hadm_id                            int64
stay_id                            int64
caregiver_id                       int64
starttime                         object
endtime                           object
storetime                         object
itemid                             int64
amount                           float64
amountuom                         object
rate                             float64
rateuom                           object
orderid                            int64
linkorderid                        int64
ordercategoryname                 object
secondaryordercategoryname        object
ordercomponenttypedescription     object
ordercategorydescription          object
patientweight                    float64
totalamount                      float64
totalamountuom                    object
isopenbag                          int64
continueinnextdept                 int64
statusdescription                 objec

In [5]:
df_items = pd.read_csv(os.path.join(ehr_data_dir, "icu/d_items.csv.gz"))
df_items.head()

Unnamed: 0,itemid,label,abbreviation,linksto,category,unitname,param_type,lownormalvalue,highnormalvalue
0,220001,Problem List,Problem List,chartevents,General,,Text,,
1,220003,ICU Admission date,ICU Admission date,datetimeevents,ADT,,Date and time,,
2,220045,Heart Rate,HR,chartevents,Routine Vital Signs,bpm,Numeric,,
3,220046,Heart rate Alarm - High,HR Alarm - High,chartevents,Alarms,bpm,Numeric,,
4,220047,Heart Rate Alarm - Low,HR Alarm - Low,chartevents,Alarms,bpm,Numeric,,


In [6]:
df_icu_inputevents_merge = pd.merge(df_icu_inputevents, df_items, how='left', on='itemid')
df_icu_inputevents_merge.head()

Unnamed: 0,subject_id,hadm_id,stay_id,caregiver_id,starttime,endtime,storetime,itemid,amount,amountuom,...,originalamount,originalrate,label,abbreviation,linksto,category,unitname,param_type,lownormalvalue,highnormalvalue
0,10000032,29079034,39553978,66056,2180-07-23 21:10:00,2180-07-23 21:11:00,2180-07-23 21:10:00,226452,100.0,ml,...,100.0,100.0,PO Intake,PO Intake,inputevents,Fluids/Intake,mL,Solution,,
1,10000032,29079034,39553978,88981,2180-07-23 17:00:00,2180-07-23 17:01:00,2180-07-23 18:56:00,226452,200.0,ml,...,200.0,200.0,PO Intake,PO Intake,inputevents,Fluids/Intake,mL,Solution,,
2,10000032,29079034,39553978,88981,2180-07-23 17:00:00,2180-07-23 17:30:00,2180-07-23 17:02:00,220862,49.999999,ml,...,50.0,100.0,Albumin 25%,Albumin 25%,inputevents,Blood Products/Colloids,mL,Solution,,
3,10000032,29079034,39553978,88981,2180-07-23 17:33:00,2180-07-23 18:03:00,2180-07-23 18:16:00,220862,49.999999,ml,...,50.0,100.0,Albumin 25%,Albumin 25%,inputevents,Blood Products/Colloids,mL,Solution,,
4,10000032,29079034,39553978,88981,2180-07-23 18:56:00,2180-07-23 18:57:00,2180-07-23 18:56:00,226452,100.0,ml,...,100.0,100.0,PO Intake,PO Intake,inputevents,Fluids/Intake,mL,Solution,,


In [7]:
selected_columns = ["subject_id", "hadm_id", "stay_id", "starttime", "itemid", "label", 
                    "amount", "amountuom"]
df_icu_inputevents_merge = df_icu_inputevents_merge[selected_columns]
df_icu_inputevents_merge["label"] = df_icu_inputevents_merge["label"].str.lower()

df_icu_inputevents_merge.head()

Unnamed: 0,subject_id,hadm_id,stay_id,starttime,itemid,label,amount,amountuom
0,10000032,29079034,39553978,2180-07-23 21:10:00,226452,po intake,100.0,ml
1,10000032,29079034,39553978,2180-07-23 17:00:00,226452,po intake,200.0,ml
2,10000032,29079034,39553978,2180-07-23 17:00:00,220862,albumin 25%,49.999999,ml
3,10000032,29079034,39553978,2180-07-23 17:33:00,220862,albumin 25%,49.999999,ml
4,10000032,29079034,39553978,2180-07-23 18:56:00,226452,po intake,100.0,ml


In [8]:
print(len(df_icu_inputevents_merge))
print(df_icu_inputevents_merge["amountuom"].value_counts())
print(df_icu_inputevents_merge.columns)

8978893
amountuom
ml                     4811933
mg                     2090004
dose                    794051
units                   511549
mcg                     294538
grams                   195015
mEq                     184023
mEq.                     51311
mmol                     46370
L                           56
ounces                      13
International Units          9
pg                           8
/hour                        4
cm3                          2
uL                           2
nL                           1
ml/hr                        1
mm^3                         1
nMol/ml/min                  1
pL                           1
Name: count, dtype: int64
Index(['subject_id', 'hadm_id', 'stay_id', 'starttime', 'itemid', 'label',
       'amount', 'amountuom'],
      dtype='object')


In [9]:
icu_cxr = pd.read_csv(os.path.join("outputs", "icu_cxr.csv.gz"), compression="gzip")
selected_columns = ["subject_id", "hadm_id"]
icu_subject_hadm_df = icu_cxr[selected_columns].copy()
icu_subject_hadm_df.drop_duplicates(inplace=True, keep="first")
icu_subject_hadm_df.head()

Unnamed: 0,subject_id,hadm_id
0,10001217,27703517
1,10001884,26184834
6,10002013,23581541
10,10002428,28662225
11,10002428,23473524


In [10]:
df_icu_inputevents_result = pd.merge(icu_subject_hadm_df, df_icu_inputevents_merge, on=["subject_id", "hadm_id"], 
                                     how="inner")
df_icu_inputevents_result.head()

Unnamed: 0,subject_id,hadm_id,stay_id,starttime,itemid,label,amount,amountuom
0,10001217,27703517,34592300,2157-12-19 19:30:00,225158,nacl 0.9%,99.999998,ml
1,10001217,27703517,34592300,2157-12-19 19:39:00,225154,morphine sulfate,2.0,mg
2,10001217,27703517,34592300,2157-12-19 20:00:00,220949,dextrose 5%,200.0,ml
3,10001217,27703517,34592300,2157-12-19 20:00:00,225798,vancomycin,1.0,dose
4,10001217,27703517,34592300,2157-12-19 23:36:00,226452,po intake,360.0,ml


In [11]:
df_icu_inputevents_result.to_csv(os.path.join("outputs", "icu_inputevents.csv.gz"), 
                                index=False, compression="gzip")

In [12]:
readable_file_size = get_readable_file_size(os.path.join("outputs", "icu_inputevents.csv.gz"))
print(f"File size: {readable_file_size}")

File size: 29.36 MB


In [13]:
import torch
import ollama

os.environ["CUDA_VISIBLE_DEVICES"] = "5, 6"
device = "cuda" if torch.cuda.is_available() else "cpu"

In [14]:
# Open the file in read mode and read the content as a string
with open('files/inputevents_description.txt', 'r') as file:
    description_text = file.read()

with open('files/prompt_text.txt', 'r') as file:
    prompt_text = file.read()
# Print the content of the file
print(description_text)
print(prompt_text)

This is the description to the icu_inputevents.csv.gz file. This file is located in outputs/icu_inputevents.csv.gz.
subject_id: A unique identifier for each patient. Every patient has a distinct subject_id to differentiate between individuals.
hadm_id: A unique identifier for each hospital admission. Each hospital visit generates a new hadm_id to distinguish between different admissions for the same patient.
stay_id: In MIMIC-IV, this is a unique identifier for each ICU stay. Every ICU visit has a unique stay_id to differentiate multiple ICU stays for the same patient during a single admission or across multiple admissions.
starttime: The start time of the input event. This records the exact time (to the minute or second) when an input, like an infusion or medication, was administered.
itemid: A unique identifier for the item or medication. Each itemid corresponds to a specific type of input, such as a particular medication, solution, or medical supply.
label: The name or label of the 

In [15]:
# extract information from discharge summary
description_text = description_text
prompt_text = prompt_text
question_text = "How many times albumin 25% \does the patient 10001217 have during the hospital stay 27703517?"
input_text = description_text + " " + prompt_text + " " + question_text

response = ollama.chat(model="qwen2.5:14b", messages = [{"role": "user", "content": input_text,}], stream=False)
#response = ollama.chat(model="llama3.1:8b", messages = [{"role": "user", "content": input_text,}], stream=False)
answer_text = response["message"]["content"]
print(answer_text)

```python
import pandas as pd

# Load the data from the compressed CSV file
file_path = 'outputs/icu_inputevents.csv.gz'
data = pd.read_csv(file_path)

# Filter for patient with subject_id 10001217 and hadm_id 27703517 where label is "albumin 25%"
filtered_data = data[(data['subject_id'] == 10001217) & (data['hadm_id'] == 27703517) & (data['label'] == 'albumin 25%')]

# Count the number of times albumin 25% is administered
result = len(filtered_data)
print(result)
```


In [16]:
import pandas as pd

# Load the data from the compressed CSV file
file_path = 'outputs/icu_inputevents.csv.gz'
data = pd.read_csv(file_path)

# Filter for patient with subject_id 10001217 and hadm_id 27703517 where label is "albumin 25%"
filtered_data = data[(data['subject_id'] == 10001217) & (data['hadm_id'] == 27703517) & (data['label'] == 'albumin 25%')]

# Count the number of times albumin 25% is administered
result = len(filtered_data)
print(result)

0


In [17]:
# extract information from discharge summary
description_text = description_text
prompt_text = prompt_text
question_text = "What is the total amount of po intake does the patient 10001217 have during the hospital stay 27703517?"
input_text = description_text + " " + prompt_text + " " + question_text

response = ollama.chat(model="qwen2.5:14b", messages = [{"role": "user", "content": input_text,}], stream=False)
answer_text = response["message"]["content"]
print(answer_text)

```python
import pandas as pd

# Load the csv file
file_path = 'outputs/icu_inputevents.csv.gz'
data = pd.read_csv(file_path)

# Filter data for patient 10001217 during hospital stay 27703517 with po intake
filtered_data = data[(data['subject_id'] == 10001217) & (data['hadm_id'] == 27703517) & (data['label'] == 'po intake')]

# Calculate total amount of po intake
total_amount = filtered_data['amount'].sum()

# Output the result
result = total_amount
print(result)
```


In [18]:
import pandas as pd

# Load the csv file
file_path = 'outputs/icu_inputevents.csv.gz'
data = pd.read_csv(file_path)

# Filter data for patient 10001217 during hospital stay 27703517 with po intake
filtered_data = data[(data['subject_id'] == 10001217) & (data['hadm_id'] == 27703517) & (data['label'] == 'po intake')]

# Calculate total amount of po intake
total_amount = filtered_data['amount'].sum()

# Output the result
result = total_amount
print(result)


1440.0
