In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import datetime

from tqdm import tqdm
from utils.utils import get_readable_file_size

In [2]:
ehr_data_dir = "/home/mengliang/DatasetFolder/mimiciv/2.2"

In [3]:
ehr_procedures_path = os.path.join(ehr_data_dir, "hosp/procedures_icd.csv.gz")
df_ehr_procedures = pd.read_csv(ehr_procedures_path, index_col=False)

df_ehr_procedures.head()

Unnamed: 0,subject_id,hadm_id,seq_num,chartdate,icd_code,icd_version
0,10000032,22595853,1,2180-05-07,5491,9
1,10000032,22841357,1,2180-06-27,5491,9
2,10000032,25742920,1,2180-08-06,5491,9
3,10000068,25022803,1,2160-03-03,8938,9
4,10000117,27988844,1,2183-09-19,0QS734Z,10


In [4]:
icu_cxr = pd.read_csv(os.path.join("outputs", "icu_cxr.csv.gz"), compression="gzip")
selected_columns = ["subject_id", "hadm_id"]
icu_subject_hadm_df = icu_cxr[selected_columns].copy()
icu_subject_hadm_df.drop_duplicates(inplace=True, keep="first")
icu_subject_hadm_df.head()

Unnamed: 0,subject_id,hadm_id
0,10001217,27703517
1,10001884,26184834
6,10002013,23581541
10,10002428,28662225
11,10002428,23473524


In [5]:
df_icu_procedures = pd.merge(icu_subject_hadm_df, df_ehr_procedures, 
                            on=["subject_id", "hadm_id"], 
                            how="inner")
df_icu_procedures.head()

Unnamed: 0,subject_id,hadm_id,seq_num,chartdate,icd_code,icd_version
0,10001217,27703517,1,2157-12-19,0139,9
1,10001884,26184834,1,2131-01-11,5A1945Z,10
2,10001884,26184834,2,2131-01-12,5A1955Z,10
3,10001884,26184834,3,2131-01-11,0BH17EZ,10
4,10001884,26184834,4,2131-01-11,5A1223Z,10


In [6]:
icd_procedures_path = os.path.join(ehr_data_dir, "hosp/d_icd_procedures.csv.gz")
df_ehr_icd_procedures = pd.read_csv(icd_procedures_path)
df_ehr_icd_procedures.head()

Unnamed: 0,icd_code,icd_version,long_title
0,1,9,Therapeutic ultrasound of vessels of head and ...
1,2,9,Therapeutic ultrasound of heart
2,3,9,Therapeutic ultrasound of peripheral vascular ...
3,9,9,Other therapeutic ultrasound
4,1,10,"Central Nervous System and Cranial Nerves, Bypass"


In [7]:
df_icu_procedures_result = pd.merge(df_icu_procedures, df_ehr_icd_procedures,  
                                    on=["icd_code", "icd_version"], 
                                    how="left")

selected_columns = ["subject_id", "hadm_id", "icd_code", "icd_version", "long_title"]
df_icu_procedures_result = df_icu_procedures_result[selected_columns]
df_icu_procedures_result["long_title"] = df_icu_procedures_result["long_title"].str.lower()
df_icu_procedures_result.head()

Unnamed: 0,subject_id,hadm_id,icd_code,icd_version,long_title
0,10001217,27703517,0139,9,other incision of brain
1,10001884,26184834,5A1945Z,10,"respiratory ventilation, 24-96 consecutive hours"
2,10001884,26184834,5A1955Z,10,"respiratory ventilation, greater than 96 conse..."
3,10001884,26184834,0BH17EZ,10,"insertion of endotracheal airway into trachea,..."
4,10001884,26184834,5A1223Z,10,"performance of cardiac pacing, continuous"


In [8]:
print(df_icu_procedures_result.columns)

Index(['subject_id', 'hadm_id', 'icd_code', 'icd_version', 'long_title'], dtype='object')


In [9]:
df_icu_procedures_result.to_csv(os.path.join("outputs", "icu_procedures.csv.gz"),
                                index=False, compression="gzip")

In [10]:
readable_file_size = get_readable_file_size(os.path.join("outputs", "icu_procedures.csv.gz"))
print(f"File size: {readable_file_size}")

File size: 689.38 KB


In [11]:
import torch
import ollama

os.environ["CUDA_VISIBLE_DEVICES"] = "5, 6"
device = "cuda" if torch.cuda.is_available() else "cpu"

In [12]:
# Open the file in read mode and read the content as a string
with open('files/procedures_description.txt', 'r') as file:
    description_text = file.read()

with open('files/prompt_text.txt', 'r') as file:
    prompt_text = file.read()
# Print the content of the file
print(description_text)
print(prompt_text)

This is the description to the icu_procedures.csv.gz file. This file is located in outputs/icu_procedures.csv.gz.
subject_id: A unique identifier for each patient. This ID is specific to each patient, distinguishing them within the dataset.
hadm_id: A unique identifier for each hospital admission. Each admission has its own hadm_id, allowing for separation of different admissions for the same patient.
icd_code: The ICD (International Classification of Diseases) code for the procedure. This code represents the specific procedure performed on the patient.
icd_version: The version of the ICD coding system used. This indicates which version of the ICD codes (e.g., ICD-9 or ICD-10) was applied to classify the procedure.
long_title: A descriptive title for the ICD code. This provides a human-readable description of the procedure, such as “appendectomy” or “coronary artery bypass.”. All characters are in lowercase.
Please generate python code to answer the question. Only generate code for the

In [13]:
# extract information from discharge summary
description_text = description_text
prompt_text = prompt_text
question_text = "How many times did the patient 10001217 receive left heart cardiac catheterization?"
input_text = description_text + " " + prompt_text + " " + question_text

response = ollama.chat(model="qwen2.5:14b", messages = [{"role": "user", "content": input_text,}],
                       stream=False)
answer_text = response["message"]["content"]
print(answer_text)

```python
import pandas as pd

# Load the CSV file
file_path = 'outputs/icu_procedures.csv.gz'
data = pd.read_csv(file_path)

# Filter for patient with subject_id 10001217 and the specific procedure
patient_id = 10001217
procedure_code = "42659"  # Assuming this is the ICD code for left heart cardiac catheterization

filtered_data = data[(data['subject_id'] == patient_id) & (data['icd_code'] == procedure_code)]

# Count occurrences of the procedure for the given patient
result = len(filtered_data)
print(result)
```


In [14]:
import pandas as pd

# Load the CSV file
file_path = 'outputs/icu_procedures.csv.gz'
data = pd.read_csv(file_path)

# Filter for patient with subject_id 10001217 and the specific procedure
patient_id = 10001217
procedure_code = "42659"  # Assuming this is the ICD code for left heart cardiac catheterization

filtered_data = data[(data['subject_id'] == patient_id) & (data['icd_code'] == procedure_code)]

# Count occurrences of the procedure for the given patient
result = len(filtered_data)
print(result)


0
