In [1]:
import os
import numpy as numpy
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import datetime

from tqdm import tqdm
from utils.utils import get_readable_file_size

In [2]:
ehr_data_dir = "/home/mengliang/DatasetFolder/mimiciv/2.2"

In [3]:
icu_chartevents_path = os.path.join(ehr_data_dir, "icu/chartevents.csv.gz")
df_icu_chartevents = pd.read_csv(icu_chartevents_path, index_col=False, compression="gzip")
df_icu_chartevents.head()

Unnamed: 0,subject_id,hadm_id,stay_id,caregiver_id,charttime,storetime,itemid,value,valuenum,valueuom,warning
0,10000032,29079034,39553978,47007.0,2180-07-23 21:01:00,2180-07-23 22:15:00,220179,82,82.0,mmHg,0.0
1,10000032,29079034,39553978,47007.0,2180-07-23 21:01:00,2180-07-23 22:15:00,220180,59,59.0,mmHg,0.0
2,10000032,29079034,39553978,47007.0,2180-07-23 21:01:00,2180-07-23 22:15:00,220181,63,63.0,mmHg,0.0
3,10000032,29079034,39553978,47007.0,2180-07-23 22:00:00,2180-07-23 22:15:00,220045,94,94.0,bpm,0.0
4,10000032,29079034,39553978,47007.0,2180-07-23 22:00:00,2180-07-23 22:15:00,220179,85,85.0,mmHg,0.0


In [4]:
print(len(df_icu_chartevents["subject_id"].unique()))
print(len(df_icu_chartevents["hadm_id"].unique()))
print(len(df_icu_chartevents["stay_id"].unique()))
print(df_icu_chartevents.dtypes)

50920
66239
73176
subject_id        int64
hadm_id           int64
stay_id           int64
caregiver_id    float64
charttime        object
storetime        object
itemid            int64
value            object
valuenum        float64
valueuom         object
dtype: object


In [5]:
df_items = pd.read_csv(os.path.join(ehr_data_dir, "icu/d_items.csv.gz"))
df_items.head()

Unnamed: 0,itemid,label,abbreviation,linksto,category,unitname,param_type,lownormalvalue,highnormalvalue
0,220001,Problem List,Problem List,chartevents,General,,Text,,
1,220003,ICU Admission date,ICU Admission date,datetimeevents,ADT,,Date and time,,
2,220045,Heart Rate,HR,chartevents,Routine Vital Signs,bpm,Numeric,,
3,220046,Heart rate Alarm - High,HR Alarm - High,chartevents,Alarms,bpm,Numeric,,
4,220047,Heart Rate Alarm - Low,HR Alarm - Low,chartevents,Alarms,bpm,Numeric,,


In [6]:
df_icu_chartevents_merge = pd.merge(df_icu_chartevents, df_items, how='left', on='itemid')
df_icu_chartevents_merge.head()

Unnamed: 0,subject_id,hadm_id,stay_id,caregiver_id,charttime,storetime,itemid,value,valuenum,valueuom,warning,label,abbreviation,linksto,category,unitname,param_type,lownormalvalue,highnormalvalue
0,10000032,29079034,39553978,47007.0,2180-07-23 21:01:00,2180-07-23 22:15:00,220179,82,82.0,mmHg,0.0,Non Invasive Blood Pressure systolic,NBPs,chartevents,Routine Vital Signs,mmHg,Numeric,,
1,10000032,29079034,39553978,47007.0,2180-07-23 21:01:00,2180-07-23 22:15:00,220180,59,59.0,mmHg,0.0,Non Invasive Blood Pressure diastolic,NBPd,chartevents,Routine Vital Signs,mmHg,Numeric,,
2,10000032,29079034,39553978,47007.0,2180-07-23 21:01:00,2180-07-23 22:15:00,220181,63,63.0,mmHg,0.0,Non Invasive Blood Pressure mean,NBPm,chartevents,Routine Vital Signs,mmHg,Numeric,,
3,10000032,29079034,39553978,47007.0,2180-07-23 22:00:00,2180-07-23 22:15:00,220045,94,94.0,bpm,0.0,Heart Rate,HR,chartevents,Routine Vital Signs,bpm,Numeric,,
4,10000032,29079034,39553978,47007.0,2180-07-23 22:00:00,2180-07-23 22:15:00,220179,85,85.0,mmHg,0.0,Non Invasive Blood Pressure systolic,NBPs,chartevents,Routine Vital Signs,mmHg,Numeric,,


In [7]:
selected_columns = ["subject_id", "hadm_id", "stay_id", "charttime", "itemid", "label", 
                    "value", "valueuom"]
df_icu_chartevents_merge = df_icu_chartevents_merge[selected_columns]
df_icu_chartevents_merge["label"] = df_icu_chartevents_merge["label"].str.lower()

df_icu_chartevents_merge.head()

Unnamed: 0,subject_id,hadm_id,stay_id,charttime,itemid,label,value,valueuom
0,10000032,29079034,39553978,2180-07-23 21:01:00,220179,non invasive blood pressure systolic,82,mmHg
1,10000032,29079034,39553978,2180-07-23 21:01:00,220180,non invasive blood pressure diastolic,59,mmHg
2,10000032,29079034,39553978,2180-07-23 21:01:00,220181,non invasive blood pressure mean,63,mmHg
3,10000032,29079034,39553978,2180-07-23 22:00:00,220045,heart rate,94,bpm
4,10000032,29079034,39553978,2180-07-23 22:00:00,220179,non invasive blood pressure systolic,85,mmHg


In [11]:
a = df_icu_chartevents_merge[df_icu_chartevents_merge["label"] == "safety measures"]
a.head()

Unnamed: 0,subject_id,hadm_id,stay_id,charttime,itemid,label,value,valueuom
29,10000032,29079034,39553978,2180-07-23 20:00:00,227969,safety measures,Adequate lighting,
30,10000032,29079034,39553978,2180-07-23 20:00:00,227969,safety measures,Bed alarm activated,
31,10000032,29079034,39553978,2180-07-23 20:00:00,227969,safety measures,Bed locked in low position,
32,10000032,29079034,39553978,2180-07-23 20:00:00,227969,safety measures,Hazard-free environment,
33,10000032,29079034,39553978,2180-07-23 20:00:00,227969,safety measures,Lines and tubes concealed,


In [13]:
value_counts = df_icu_chartevents_merge['label'].value_counts()
value_counts_df = value_counts.reset_index()
value_counts_df.columns = ['label', 'count']

value_counts_df.to_csv('outputs/chartevents_label_counts.csv', index=False)

print("Counts have been saved to 'chartevents_label_counts.csv'")

Counts have been saved to 'chartevents_label_counts.csv'


In [15]:
print(len(df_icu_chartevents_merge))
print(df_icu_chartevents_merge["valueuom"].value_counts())
print(df_icu_chartevents_merge.columns)

313645063
valueuom
mmHg                 25401973
insp/min              9642142
%                     9312983
bpm                   7827348
cmH2O                 3351546
mEq/L                 3064966
L/min                 2819673
mg/dL                 2781756
mL                    2662414
sec                   1524360
°F                    1515987
ml/hr                  993580
cm                     968376
°C                     958887
K/uL                   877493
units                  525828
IU/L                   473024
mmol/L                 448477
g/dl                   428841
kg                     277540
ml/min                 242012
cmH2O/L/seconds        173637
mA                     138292
ng/mL                  111153
mV                      98445
g/dL                    58054
ug/mL                   44789
min                     44066
Inch                    33707
mOsm/kg                 22412
mL/beat                 20836
mmHg.                   19059
RPM                  

In [16]:
print(df_icu_chartevents_merge.columns)

Index(['subject_id', 'hadm_id', 'stay_id', 'charttime', 'itemid', 'label',
       'value', 'valueuom'],
      dtype='object')


In [17]:
label_selected_list = ["heart rate", "respiratory rate", "o2 saturation pulseoxymetry", "heart rhythm",
                       "arterial blood pressure mean", "arterial blood pressure systolic",
                       "arterial blood pressure diastolic", "temperature fahrenheit", "rll lung sounds",
                       "lll lung sounds", "rul lung sounds", "lul lung sounds"]

df_icu_chartevents_merge_filter = df_icu_chartevents_merge[df_icu_chartevents_merge['label'].isin(label_selected_list)]
df_icu_chartevents_merge_filter.head()

Unnamed: 0,subject_id,hadm_id,stay_id,charttime,itemid,label,value,valueuom
3,10000032,29079034,39553978,2180-07-23 22:00:00,220045,heart rate,94,bpm
7,10000032,29079034,39553978,2180-07-23 22:00:00,220210,respiratory rate,20,insp/min
8,10000032,29079034,39553978,2180-07-23 22:00:00,220277,o2 saturation pulseoxymetry,95,%
9,10000032,29079034,39553978,2180-07-23 19:00:00,220045,heart rate,97,bpm
13,10000032,29079034,39553978,2180-07-23 19:00:00,220210,respiratory rate,16,insp/min


In [18]:
df_icu_chartevents_merge_filter.to_csv(os.path.join("outputs", "icu_chartevents.csv.gz"), 
                                       index=False, compression="gzip")

In [19]:
readable_file_size = get_readable_file_size(os.path.join("outputs", "icu_chartevents.csv.gz"))
print(f"File size: {readable_file_size}")

File size: 172.50 MB


In [32]:
import torch
import ollama

os.environ["CUDA_VISIBLE_DEVICES"] = "5, 6"
device = "cuda" if torch.cuda.is_available() else "cpu"

In [33]:
# Open the file in read mode and read the content as a string
with open('files/chartevents_description.txt', 'r') as file:
    file_content = file.read()

# Print the content of the file
print(file_content)

This is the description to the icu_chartevents.csv.gz file. This file is located in outputs/icu_chartevents.csv.gz.
subject_id: A unique identifier for each patient. This ID is specific to each patient and helps differentiate individuals in the database.
hadm_id: A unique identifier for each hospital admission. Each hospital stay has a distinct hadm_id, allowing separation of different admissions for the same patient.
stay_id: In MIMIC-IV, this represents a unique identifier for each ICU stay. Each ICU visit generates a unique stay_id to differentiate multiple ICU stays for the same patient within or across admissions.
charttime: The time when the chart event was recorded. This timestamp indicates when the specific measurement or observation was taken, typically down to the minute.
itemid: A unique identifier for the charted item. This code represents a specific type of measurement or observation, such as heart rate, blood pressure, or temperature.
label: A descriptive name or label fo

In [34]:
# extract information from discharge summary
description_text = file_content
prompt_text = "Please generate python code to answer the question. Only generate code for the question. no explanation and other description. use print to putput the result. the final result variable should be named as result."
question_text = "What is the maximum heart rate achieved by patient 10000032 during their ICU stay?"
input_text = description_text + " " + prompt_text + " " + question_text

response = ollama.chat(model="qwen2.5:14b", messages = [{"role": "user", "content": input_text,}], stream=False)
#response = ollama.chat(model="llama3.1:8b", messages = [{"role": "user", "content": input_text,}], stream=False)
answer_text = response["message"]["content"]
print(answer_text)

```python
import pandas as pd

# Load the data
filename = 'outputs/icu_chartevents.csv.gz'
data = pd.read_csv(filename)

# Filter the data for patient with subject_id 10000032 and itemid corresponding to heart rate
patient_data = data[(data['subject_id'] == 10000032) & (data['label'] == 'heart rate')]

# Find the maximum value of heart rate during ICU stay
max_heart_rate = patient_data['value'].str.replace(',', '').astype(float).max()

result = max_heart_rate
print(result)
```


In [29]:
import pandas as pd

# Load the data
filename = 'outputs/icu_chartevents.csv.gz'
data = pd.read_csv(filename)

# Filter for patient with subject_id 10000032 and itemid corresponding to heart rate
heart_rate_data = data[(data['subject_id'] == 10000032) & (data['label'] == 'heart rate')]

# Extract the value column and convert it to numeric values, ignoring failures
heart_rate_data['value'] = pd.to_numeric(heart_rate_data['value'], errors='coerce')

# Find the maximum heart rate value during their ICU stay
result = heart_rate_data['value'].max()
print(result)

105


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  heart_rate_data['value'] = pd.to_numeric(heart_rate_data['value'], errors='coerce')


In [30]:
# extract information from discharge summary
description_text = file_content
prompt_text = "Please generate python code to answer the question. Only generate code for the question. no explanation and other description. use print to putput the result. the final result variable should be named as result."
question_text = "How many times did the patient 10000032 have heart rate checks during the icu stay?"
input_text = description_text + " " + prompt_text + " " + question_text

response = ollama.chat(model="qwen2.5:14b", messages = [{"role": "user", "content": input_text,}], stream=False)
#response = ollama.chat(model="llama3.1:8b", messages = [{"role": "user", "content": input_text,}], stream=False)
answer_text = response["message"]["content"]
print(answer_text)

```python
import pandas as pd

# Load the data
filename = 'outputs/icu_chartevents.csv.gz'
data = pd.read_csv(filename)

# Filter for specific patient and heart rate measurements
patient_id = 10000032
heart_rate_label = "heart rate"
filtered_data = data[(data['subject_id'] == patient_id) & (data['label'] == heart_rate_label)]

# Count the number of heart rate checks
result = len(filtered_data)
print(result)
```


In [31]:
import pandas as pd

# Load the data
filename = 'outputs/icu_chartevents.csv.gz'
data = pd.read_csv(filename)

# Filter for specific patient and heart rate measurements
patient_id = 10000032
heart_rate_label = "heart rate"
filtered_data = data[(data['subject_id'] == patient_id) & (data['label'] == heart_rate_label)]

# Count the number of heart rate checks
result = len(filtered_data)
print(result)

10
