In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import datetime

from tqdm import tqdm
from utils.utils import get_readable_file_size

In [2]:
ehr_data_dir = "/home/mengliang/DatasetFolder/mimiciv/2.2"

In [3]:
ehr_presrciptions_path = os.path.join(ehr_data_dir, "hosp/prescriptions.csv.gz")
df_ehr_presrciptions = pd.read_csv(ehr_presrciptions_path, index_col=False)

selected_columns = ['subject_id', 'hadm_id', 'starttime', 'stoptime', 'drug', 'dose_val_rx', 'dose_unit_rx', 'route']
df_ehr_presrciptions = df_ehr_presrciptions[selected_columns]

df_ehr_presrciptions.head()

  df_ehr_presrciptions = pd.read_csv(ehr_presrciptions_path, index_col=False)


Unnamed: 0,subject_id,hadm_id,starttime,stoptime,drug,dose_val_rx,dose_unit_rx,route
0,10000032,22595853,2180-05-07 01:00:00,2180-05-07 22:00:00,Acetaminophen,500,mg,PO/NG
1,10000032,22595853,2180-05-07 00:00:00,2180-05-07 22:00:00,Sodium Chloride 0.9% Flush,3,mL,IV
2,10000032,22595853,2180-05-08 08:00:00,2180-05-07 22:00:00,Furosemide,40,mg,PO/NG
3,10000032,22595853,2180-05-07 01:00:00,2180-05-07 22:00:00,Raltegravir,400,mg,PO
4,10000032,22595853,2180-05-07 00:00:00,2180-05-07 22:00:00,Heparin,5000,UNIT,SC


In [4]:
icu_cxr = pd.read_csv(os.path.join("outputs", "icu_cxr.csv.gz"), compression="gzip")
selected_columns = ["subject_id", "hadm_id"]
icu_subject_hadm_df = icu_cxr[selected_columns].copy()
icu_subject_hadm_df.drop_duplicates(inplace=True, keep="first")
icu_subject_hadm_df.head()

Unnamed: 0,subject_id,hadm_id
0,10001217,27703517
1,10001884,26184834
6,10002013,23581541
10,10002428,28662225
11,10002428,23473524


In [5]:
df_icu_prescriptions = pd.merge(icu_subject_hadm_df, df_ehr_presrciptions, 
                                on=["subject_id", "hadm_id"], 
                                how="inner")
df_icu_prescriptions.head()

Unnamed: 0,subject_id,hadm_id,starttime,stoptime,drug,dose_val_rx,dose_unit_rx,route
0,10001217,27703517,2157-12-19 16:00:00,2157-12-24 19:00:00,Bisacodyl,10,mg,PO
1,10001217,27703517,2157-12-19 16:00:00,2157-12-24 19:00:00,OxycoDONE (Immediate Release),5-10,mg,PO/NG
2,10001217,27703517,2157-12-19 16:00:00,2157-12-24 19:00:00,OxycoDONE (Immediate Release),5-10,mg,PO/NG
3,10001217,27703517,2157-12-18 20:00:00,2157-12-18 19:00:00,LeVETiracetam,1000,mg,PO
4,10001217,27703517,2157-12-19 22:00:00,2157-12-20 14:00:00,Vial,1,VIAL,IV


In [6]:
df_icu_prescriptions["drug"] = df_icu_prescriptions["drug"].str.lower()
df_icu_prescriptions["dose_unit_rx"] = df_icu_prescriptions["dose_unit_rx"].str.lower()
df_icu_prescriptions["route"] = df_icu_prescriptions["route"].str.lower()

df_icu_prescriptions.head()

Unnamed: 0,subject_id,hadm_id,starttime,stoptime,drug,dose_val_rx,dose_unit_rx,route
0,10001217,27703517,2157-12-19 16:00:00,2157-12-24 19:00:00,bisacodyl,10,mg,po
1,10001217,27703517,2157-12-19 16:00:00,2157-12-24 19:00:00,oxycodone (immediate release),5-10,mg,po/ng
2,10001217,27703517,2157-12-19 16:00:00,2157-12-24 19:00:00,oxycodone (immediate release),5-10,mg,po/ng
3,10001217,27703517,2157-12-18 20:00:00,2157-12-18 19:00:00,levetiracetam,1000,mg,po
4,10001217,27703517,2157-12-19 22:00:00,2157-12-20 14:00:00,vial,1,vial,iv


In [7]:
print(df_icu_prescriptions.columns)

Index(['subject_id', 'hadm_id', 'starttime', 'stoptime', 'drug', 'dose_val_rx',
       'dose_unit_rx', 'route'],
      dtype='object')


In [8]:
df_icu_prescriptions.to_csv(os.path.join("outputs", "icu_prescriptions.csv.gz"),
                            index=False, compression="gzip")

In [9]:
readable_file_size = get_readable_file_size(os.path.join("outputs", "icu_prescriptions.csv.gz"))
print(f"File size: {readable_file_size}")

File size: 15.74 MB


In [10]:
import torch
import ollama

os.environ["CUDA_VISIBLE_DEVICES"] = "5, 6"
device = "cuda" if torch.cuda.is_available() else "cpu"

In [11]:
# Open the file in read mode and read the content as a string
with open('files/prescriptions_description.txt', 'r') as file:
    description_text = file.read()

with open('files/prompt_text.txt', 'r') as file:
    prompt_text = file.read()
# Print the content of the file
print(description_text)
print(prompt_text)

This is the description to the icu_prescriptions.csv.gz file. This file is located in outputs/icu_prescriptions.csv.gz.
subject_id: A unique identifier for each patient. This ID differentiates each patient in the database.
hadm_id: A unique identifier for each hospital admission. Each admission is associated with a unique hadm_id, distinguishing multiple admissions for the same patient.
starttime: The time when the prescription was started. This timestamp records when the medication administration began, often down to the minute.
stoptime: The time when the prescription was stopped. This indicates the end time for the medication administration.
drug: The name of the prescribed medication. This provides a human-readable name for the drug that was administered to the patient. All characters are in lowercase.
dose_val_rx: The prescribed dosage value. This represents the amount of medication to be administered per dose.
dose_unit_rx: The unit of measurement for the dosage. This specifies t

In [12]:
# extract information from discharge summary
description_text = description_text
prompt_text = prompt_text
question_text = "List the prescription names of patient 10001217 during last admission."
input_text = description_text + " " + prompt_text + " " + question_text

response = ollama.chat(model="qwen2.5:14b", messages = [{"role": "user", "content": input_text,}],
                       stream=False)
answer_text = response["message"]["content"]
print(answer_text)

```python
import pandas as pd

# Load the data
filename = 'outputs/icu_prescriptions.csv.gz'
data = pd.read_csv(filename)

# Filter data for subject_id 10001217
patient_data = data[data['subject_id'] == 10001217]

# Find the last admission ID (hadm_id) of patient 10001217 based on max stoptime
last_admission = patient_data.groupby('hadm_id')['stoptime'].transform(max) == patient_data['stoptime']
last_hadm_id = patient_data[last_admission]['hadm_id'].max()

# Filter prescriptions for the last admission of patient 10001217
prescriptions_last_admission = patient_data[(patient_data['subject_id'] == 10001217) & (patient_data['hadm_id'] == last_hadm_id)]['drug']

# Get unique prescription names
result = prescriptions_last_admission.unique().tolist()
print(result)
```


In [13]:
import pandas as pd

# Load the data
filename = 'outputs/icu_prescriptions.csv.gz'
data = pd.read_csv(filename)

# Filter data for subject_id 10001217
patient_data = data[data['subject_id'] == 10001217]

# Find the last admission ID (hadm_id) of patient 10001217 based on max stoptime
last_admission = patient_data.groupby('hadm_id')['stoptime'].transform(max) == patient_data['stoptime']
last_hadm_id = patient_data[last_admission]['hadm_id'].max()

# Filter prescriptions for the last admission of patient 10001217
prescriptions_last_admission = patient_data[(patient_data['subject_id'] == 10001217) & (patient_data['hadm_id'] == last_hadm_id)]['drug']

# Get unique prescription names
result = prescriptions_last_admission.unique().tolist()
print(result)


['bisacodyl', 'oxycodone (immediate release) ', 'levetiracetam', 'vial', 'pantoprazole', '0.9% sodium chloride (mini bag plus)', 'meropenem', 'bag', 'magnesium sulfate', 'ondansetron', 'docusate sodium', 'acetaminophen', 'morphine sulfate', '0.9% sodium chloride', 'hydralazine', '5% dextrose', 'vancomycin', 'sodium chloride 0.9%  flush', 'syringe (0.9% sodium chloride)', 'alteplase (catheter clearance)', 'iso-osmotic dextrose', 'senna', 'lactated ringers', 'heparin flush (10 units/ml)', 'heparin', '*nf* ertapenem sodium', 'influenza virus vaccine']


  last_admission = patient_data.groupby('hadm_id')['stoptime'].transform(max) == patient_data['stoptime']


In [14]:
# extract information from discharge summary
description_text = description_text
prompt_text = prompt_text
question_text = "Did patient 10001217 receive heparin during their admission?"
input_text = description_text + " " + prompt_text + " " + question_text

response = ollama.chat(model="qwen2.5:14b", messages = [{"role": "user", "content": input_text,}],
                       stream=False)
answer_text = response["message"]["content"]
print(answer_text)

```python
import pandas as pd

# Load the data from csv.gz file
filename = 'outputs/icu_prescriptions.csv.gz'
data = pd.read_csv(filename)

# Filter for patient with subject_id 10001217 and drug heparin
patient_data = data[(data['subject_id'] == 10001217) & (data['drug'] == 'heparin')]

# Check if the filtered dataframe is empty or not
result = not patient_data.empty
print(result)
```


In [15]:
import pandas as pd

# Load the data from csv.gz file
filename = 'outputs/icu_prescriptions.csv.gz'
data = pd.read_csv(filename)

# Filter for patient with subject_id 10001217 and drug heparin
patient_data = data[(data['subject_id'] == 10001217) & (data['drug'] == 'heparin')]

# Check if the filtered dataframe is empty or not
result = not patient_data.empty
print(result)


True
