In [None]:
import pandas as pd
import numpy as np
from clifpy import ClifOrchestrator
import sys
import os

# Setup output path

In [None]:
# output
output_folder = "../output"
os.makedirs(output_folder, exist_ok=True)
os.makedirs(f'{output_folder}/final', exist_ok=True)
os.makedirs(f'{output_folder}/final/graphs', exist_ok=True)

# Intermediate tables
intermediate_path = "../private_tables"
os.makedirs(intermediate_path, exist_ok=True)

# Initialization

In [None]:
co = ClifOrchestrator(config_path="../config/config.yaml")

# Load Data

## Required variables

- hospitalization: age_at_admission
- labs: pco2_arterial, ph_arterial, po2_arterial, wbc, lymphocytes_absolute, neutrophils_absolute, hemoglobin, platelet_count,
    albumin, bicarbonate, creatinine, bilirubin_total, ferritin, lactate, ldh, procalcitonin, crp
- vitals: temp_c, heart_rate, map, respiratory_rate, spo2
- respiratory_support: peep_set, fio2_set, plateau_pressure_obs
- medication_admin_continous: norepinephrine
- patient_assessments: gcs_eye, gcs_motor, RASS
- crrt_therapy: crrt_flag
- ecmo_mcs: ecmo_flag

In [None]:
# defined required columns and filters for each table
table_requirements = {
    'hospitalization': {
        'columns': ['hospitalization_id', 'patient_id', 'admission_dttm', 'discharge_dttm', 'age_at_admission', 
                      'admission_type_category', 'discharge_category'],
        'filters': {}
    },
    'patient': {
        'columns': ['patient_id', 'race_category', 'ethnicity_category', 'sex_category', 'death_dttm'],
        'filters': {}
    },
    'adt': {
        'columns': ['hospitalization_id', 'hospital_id', 'in_dttm', 'out_dttm', 'location_category', 'location_type'],
        'filters': {}
    },
    'labs': {
        'columns': ['hospitalization_id', 'lab_result_dttm', 'lab_value_numeric', 'lab_category'],
        'filters': {'lab_category': ['co2_arterial', 'ph_arterial', 'po2_arterial', 'wbc', 
                                          'lymphocytes_absolute', 'neutrophils_absolute', 'hemoglobin', 
                                          'platelet_count', 'albumin', 'bicarbonate', 'creatinine', 'bilirubin_total', 
                                          'ferritin', 'lactate', 'ldh', 'procalcitonin', 'crp']
                                          }
    },
    'vitals': {
        'columns': ['hospitalization_id', 'recorded_dttm', 'vital_value', 'vital_category'],
        'filters': {'vital_category': ['temp_c', 'heart_rate', 'sbp', 'spo2', 'respiratory_rate']}
    },
    'respiratory_support': {
        'columns': ['hospitalization_id', 'recorded_dttm', 'device_category', 'mode_category', 'tracheostomy', 'peep_set', 'fio2_set', 'plateau_pressure_obs'],
        'filters': {}
    },
    'medication_admin_continuous': {
        'columns': ['hospitalization_id', 'admin_dttm', 'med_category', 'med_route_category', 'med_dose', 'med_dose_unit'],
        'filters': {'med_category': ['norepinephrine']}
    },
    'patient_assessments': {
        'columns': ['hospitalization_id', 'recorded_dttm', 'assessment_category', 'numerical_value'],
        'filters': {'assessment_category': ['gcs_eye', 'gcs_motor', 'RASS']}
    },
    'crrt_therapy': {
        'columns': ['hospitalization_id', 'recorded_dttm'],
        'filters': {}
    },
    # 'ecmo_mcs': {
    #     'columns': ['hospitalization_id', 'recorded_dttm'],
    #     'filters': {}
    # }

}

In [None]:
co.initialize(
    tables=table_requirements.keys(),
    columns={t: c['columns'] for t, c in table_requirements.items()},
    filters={t: c['filters'] for t, c in table_requirements.items()}
)

# Cohort Identification Criteria:
1. Age >= 18
2. Paitnet admitted to ICU between 2018-01-01 and 2024-12-31
3. Have mechanical ventilation > 48 hours during ICU stay 
4. 

# Data Preparation Workflow:
1. Aggregate daily average measurements and pivot to create one row per ICU-day
2. VAP start dttm: 
3. ecmo_flag: any ecmo recorded dttm in a icu-day then mark as 1
4. crrt_flag: any crrt recorded dttm in a icu-day then mark as 1