## Setup

In [None]:
%%capture

from datetime import datetime
from typing import Any, Dict, List, Optional
import os

# PyHealth Packages
from pyhealth.datasets import MIMIC4Dataset
from pyhealth.tasks.multimodal_mimic4 import ClinicalNotesMIMIC4, ClinicalNotesICDLabsMIMIC4
from pyhealth.tasks.base_task import BaseTask

# Load MIMIC4 Files
# There's probably better ways dealing with this on the cluster, but working locally for now 
# (see: https://github.com/sunlabuiuc/PyHealth/blob/master/examples/mortality_prediction/multimodal_mimic4_minimal.py)

TASK = "ClinicalNotesICDLabsMIMIC4" # The idea here is that we want additive tasks so we can evaluate the value in adding more modalities

PYHEALTH_REPO_ROOT = '/Users/wpang/Desktop/PyHealth'

EHR_ROOT = os.path.join(PYHEALTH_REPO_ROOT, "local_data/local/data/physionet.org/files/mimiciv/2.2")
NOTE_ROOT = os.path.join(PYHEALTH_REPO_ROOT, "local_data/local/data/physionet.org/files/mimic-iv-note/2.2")
CXR_ROOT = os.path.join(PYHEALTH_REPO_ROOT,"local_data/local/data/physionet.org/files/mimic-cxr-jpg/2.0.0")
CACHE_DIR = os.path.join(PYHEALTH_REPO_ROOT,"local_data/local/data/wp/pyhealth_cache")


if TASK == "ClinicalNotesMIMIC4": # A bit janky setup at the moment and open to iteration, but conveys the point for now
    dataset = MIMIC4Dataset(
            ehr_root=EHR_ROOT,
            note_root=NOTE_ROOT,
            ehr_tables=["diagnoses_icd", "procedures_icd", "prescriptions", "labevents"],
            note_tables=["discharge", "radiology"],
            cache_dir=CACHE_DIR,
            num_workers=8,
            dev=True
        )
    
    # Apply multimodal task
    task = ClinicalNotesMIMIC4() 
    samples = dataset.set_task(task)

    # Get and print sample
    sample = samples[0]
    print(sample)

elif TASK == 'ClinicalNotesICDLabsMIMIC4':
    dataset = MIMIC4Dataset(
            ehr_root=EHR_ROOT,
            note_root=NOTE_ROOT,
            ehr_tables=["diagnoses_icd", "procedures_icd", "prescriptions", "labevents"],
            note_tables=["discharge", "radiology"],
            cache_dir=CACHE_DIR,
            num_workers=8,
            dev=True
        )

In [None]:
# dataset._unique_patient_ids[:5]
ID = "10095258"

In [None]:
# Apply multimodal task
task = ClinicalNotesICDLabsMIMIC4() 

# Single patient
patient = dataset.get_patient(ID)  
samples = task(patient)

## Radiology Notes Preview

In [None]:
TYPE = "radiology"
#TYPE = "discharge"
NOTE = "radiology_notes"
# NOTE = "discharge_notes"
HADM_ID = '22880743'

CHARACTERS_PREVIEW = 0

In [None]:
# %%capture

print("----")
print("Admission IDs (hadm_id)")
admission_ids = []
for index, content in enumerate(patient.get_events(event_type="admissions")):
    print(f"{content.attr_dict['hadm_id']} -> Admission Time: {content.timestamp}")
    admission_ids.append(content.attr_dict['hadm_id'])

In [None]:
%%capture

print("----")
print(f"Count of {TYPE} notes for hadm_id: {HADM_ID}")
print(len(patient.get_events(
                event_type=TYPE, filters=[("hadm_id", "==", HADM_ID)])))
print("----")
print(f"Note ID for {TYPE} notes for hadm_id: {HADM_ID}")
for index, content in enumerate(patient.get_events(event_type=TYPE, filters=[("hadm_id", "==", HADM_ID)])):
    print(f"{content.attr_dict['note_id']} -> Note Timestamp: {content.timestamp} -> First 100 Characters: {content.text[:100]}")
print("----")

In [None]:
for admission_id in admission_ids:
    print(f"{NOTE}: Admission ID: {admission_id}")
    print("----")
    for index, content in enumerate(patient.get_events(event_type=TYPE, filters=[("hadm_id", "==", admission_id)])):
        print(f"{content.attr_dict['note_id']} -> Note Timestamp: {content.timestamp} -> First {CHARACTERS_PREVIEW} Characters: {content.text[:CHARACTERS_PREVIEW]}")
        print("\n\n")

## ICD-10 Codes