# Epidemiology of Sedation in Mechanical Ventilation

In [1]:
import os
os.chdir("..")
os.getcwd()

'/Users/wliao0504/code/clif/CLIF-epi-of-sedation'

In [53]:
import pandas as pd
import duckdb
from utils import pyCLIF as pc
from utils.waterfall import process_resp_support_waterfall
import pytest

## Cohort Identification

In [70]:
adt = pc.load_data("clif_adt")
hospitalization = pc.load_data("clif_hospitalization")

Data loaded successfully from /Users/wliao0504/code/clif/CLIF-MIMIC/output/rclif-dev-test-copy/clif_adt.parquet
Data loaded successfully from /Users/wliao0504/code/clif/CLIF-MIMIC/output/rclif-dev-test-copy/clif_hospitalization.parquet


### Create ICU-stay level unique id

In [72]:
import warnings
with warnings.catch_warnings():
    warnings.filterwarnings("ignore", category=FutureWarning)
    stitched_encounters = pc.stitch_encounters(hospitalization, adt)

In [None]:
# create a mapping table
query = """
SELECT DISTINCT patient_id, hospitalization_id, encounter_block
FROM stitched_encounters
"""
hosp_to_enc_blk_mapper = duckdb.sql(query).to_df()

In [159]:
query = """
SELECT hospitalization_id
    , encounter_block
    , date_trunc('hour', in_dttm) as in_date_hr
    , 1 as new_icu_stay
FROM stitched_encounters
WHERE location_category = 'icu'
"""
new_icu_start_hours = duckdb.sql(query).to_df()

hosp_ids_w_icu_stays = new_icu_start_hours['hospitalization_id'].unique().tolist()

### Hr 24

In [94]:
resp = pc.load_data(
    table = "clif_respiratory_support",
    filters = {
        "hospitalization_id": hosp_ids_w_icu_stays
    }
    )

Data loaded successfully from /Users/wliao0504/code/clif/CLIF-MIMIC/output/rclif-dev-test-copy/clif_respiratory_support.parquet


In [95]:
resp_f = process_resp_support_waterfall(resp)

✦ Phase 0: initialise & create hourly scaffold
FIO2_SET mean= 0.4902933168490583 is within the required range
Creating hourly scaffold for each encounter


Creating hourly scaffolds: 100%|██████████| 68063/68063 [00:03<00:00, 17112.98it/s]


✦ Phase 1: heuristic inference of device / mode
✦ Phase 2: build device / mode hierarchical IDs
✦ Phase 3: numeric down/up-fill inside mode_name_id blocks
Applying waterfall fill to 68063 encounters...


  return getattr(df, df_function)(wrapper, **kwargs)
Waterfall fill by mode_name_id: 100%|██████████| 417289/417289 [02:46<00:00, 2502.64it/s]


✦ Phase 4: final deduplication & ordering
[OK] Respiratory-support waterfall complete.


In [None]:
focal_hosp_ids = [
    '21738444', 
    '20004088', 
    '20006154', 
    '20018306'
    ]

In [206]:
query = f"""
SELECT SUM(CASE WHEN location_category = 'icu' THEN 1 ELSE 0 END) as total_icu_stays
FROM adt
WHERE hospitalization_id IN ({",".join(focal_hosp_ids)})
"""
duckdb.sql(query).to_df()

Unnamed: 0,total_icu_stays
0,6.0


In [207]:
resp_f['date_hr'] = resp_f['recorded_dttm'].dt.floor('h')

query = f"""
SELECT t1.hospitalization_id
    , t3.encounter_block
    , t1.date_hr
    , MAX(CASE WHEN t2.new_icu_stay = 1 THEN 1 ELSE 0 END) as new_icu_start_from_adt
    , MAX(CASE WHEN t1.device_category = 'imv' THEN 1 ELSE 0 END) as on_imv
    , ROW_NUMBER() OVER (PARTITION BY t1.hospitalization_id ORDER BY t1.date_hr) as rn_by_hosp
    , CASE WHEN (
        rn_by_hosp = 1 -- new hospitalization
        OR new_icu_start_from_adt = 1 -- new icu stay
    ) THEN 1 ELSE 0 END as new_icu_stay
FROM resp_f as t1
LEFT JOIN new_icu_start_hours AS t2
    ON t1.hospitalization_id = t2.hospitalization_id
    AND t1.date_hr = t2.in_date_hr
LEFT JOIN hosp_to_enc_blk_mapper AS t3
    ON t1.hospitalization_id = t3.hospitalization_id
-- WHERE t1.hospitalization_id IN ({",".join(focal_hosp_ids)})
GROUP BY t1.hospitalization_id, t1.date_hr, t3.encounter_block
ORDER BY t1.hospitalization_id, t1.date_hr
"""
df1 = duckdb.sql(query).to_df()

In [208]:
query = """
-- generate unique icu stay ids
WITH t1 AS (
    SELECT hospitalization_id
        , encounter_block
        , date_hr
        , on_imv
        , new_icu_stay
        , SUM(new_icu_stay) OVER (ORDER BY hospitalization_id, date_hr) as icu_stay_id
    FROM df1
    -- keep only hospitalizations that have at least one hour on imv
    WHERE hospitalization_id IN (
        SELECT DISTINCT hospitalization_id
        FROM df1
        GROUP BY hospitalization_id
        HAVING MAX(on_imv) = 1
    )
),
-- generate unique imv streak ids
t2 AS (
    SELECT hospitalization_id
        , icu_stay_id
        -- , encounter_block
        , date_hr
        , on_imv
        , ROW_NUMBER() OVER (PARTITION BY icu_stay_id ORDER BY date_hr) as rn_overall
        , ROW_NUMBER() OVER (PARTITION BY icu_stay_id, on_imv ORDER BY date_hr) as rn_by_imv_status
        , rn_overall - rn_by_imv_status as imv_streak_id
    FROM t1
    -- keep only icu stays that have at least one hour on imv
    WHERE icu_stay_id IN (
        SELECT DISTINCT icu_stay_id
        FROM t1
        GROUP BY icu_stay_id
        HAVING MAX(on_imv) = 1
    )
    ORDER BY hospitalization_id, icu_stay_id, date_hr
),
-- mark the 24th and 72th hour of each imv streak
t3 AS (
    SELECT hospitalization_id, icu_stay_id, date_hr
        , imv_streak_id, on_imv
        , SUM(on_imv) OVER (PARTITION BY icu_stay_id, imv_streak_id ORDER BY date_hr) as imv_hrs_in_streak
        , CASE WHEN (imv_hrs_in_streak = 24) THEN 1 ELSE 0 END as hr_24_on_imv
        , CASE WHEN (imv_hrs_in_streak = 72) THEN 1 ELSE 0 END as hr_72_on_imv
        -- calculate hour since first intubation within each icu stay
        , MIN(CASE WHEN on_imv = 1 THEN date_hr END) OVER (PARTITION BY icu_stay_id) as first_imv_hr_in_icu_stay
        -- can only calculate diff in secs, so convert to hrs
        ,  EXTRACT(EPOCH FROM (date_hr - first_imv_hr_in_icu_stay)) / 3600 as hrs_since_first_imv
    FROM t2
    ORDER BY hospitalization_id, icu_stay_id, date_hr
    )
-- exclude cases with reintubation within 72 hours
SELECT hospitalization_id, icu_stay_id, date_hr
    , imv_streak_id, on_imv, imv_hrs_in_streak, hrs_since_first_imv
    , hr_24_on_imv, hr_72_on_imv
    , COUNT(DISTINCT CASE WHEN hrs_since_first_imv BETWEEN 0 AND 72 THEN imv_streak_id END) 
        OVER (PARTITION BY icu_stay_id) as n_imv_streaks_in_72_hrs
    , CASE WHEN n_imv_streaks_in_72_hrs <= 2 AND hr_24_on_imv = 1 THEN 1 ELSE 0 END as hr_24_on_imv_noreintub
    , CASE WHEN n_imv_streaks_in_72_hrs = 1 AND hr_72_on_imv = 1 THEN 1 ELSE 0 END as hr_72_on_imv_noreintub
FROM t3
ORDER BY hospitalization_id, icu_stay_id, date_hr
"""
df2 = duckdb.sql(query).to_df()

In [209]:
# keep the cohort
query = """
SELECT *
FROM df1
WHERE hospitalization_id IN (
    SELECT DISTINCT hospitalization_id
    FROM df2
    WHERE hr_24_on_imv = 1
)
"""
df3 = duckdb.sql(query).to_df()

cohort_hosp_ids = df3['hospitalization_id'].unique().tolist()

In [None]:
%%ipytest

# sanity tests against the MIMIC-IV data
@pytest.mark.parametrize("hospitalization_id,date_hr,expected_hr,expected_flag,expected_flag_noreintub", [
    # on imv for 24-hrs twice during the same hospitalization -- so would be excluded if no reintubation within 72 hrs
    (21738444, "2186-09-14 17:00:00-06:00", 24, 1, 0),  
    (21738444, "2186-09-14 18:00:00-06:00", 24, 0, 0),  
    (21738444, "2186-09-16 18:00:00-06:00", 24, 1, 0), # second streak within the hosp
    # not on imv for the first few hrs but long streak afterwards
    (20004088, "2159-09-30 09:00:00-06:00", 24, 1, 1),
    (20004088, "2159-10-02 09:00:00-06:00", 72, 1, 1),
    # very short streaks: 20006154
    # 3 icu stays within the same hospitalization
    (20018306, "2136-05-16 05:00:00-06:00", 24, 1, 1),
    (20018306, "2136-07-01 19:00:00-06:00", 24, 1, 1),
    # (20018306, "2136-06-01 03:00:00-06:00", 24, 0), # in a icu stay that was filtered out in the df because of no imv ever
])
def test_if_on_imv_at_hr_x(hospitalization_id, date_hr, expected_hr, expected_flag, expected_flag_noreintub):
    query = f"""
    SELECT hr_{expected_hr}_on_imv, hr_{expected_hr}_on_imv_noreintub
    FROM df2
    WHERE hospitalization_id = {hospitalization_id}
    AND date_hr = '{date_hr}'
    """
    result = duckdb.sql(query).to_df()
    observed_flag = result[f'hr_{expected_hr}_on_imv'].iloc[0]
    observed_flag_noreintub = result[f'hr_{expected_hr}_on_imv_noreintub'].iloc[0]
    assert observed_flag == expected_flag
    assert observed_flag_noreintub == expected_flag_noreintub

[32m.[0m[32m.[0m[32m.[0m[32m.[0m[32m.[0m[32m.[0m[32m.[0m[32m                                                                                      [100%][0m
[32m[32m[1m7 passed[0m[32m in 0.07s[0m[0m


## Vitals

In [211]:
vitals = pc.load_data(
    table = "clif_vitals",
    filters = {
        "hospitalization_id": cohort_hosp_ids
    }
)

Data loaded successfully from /Users/wliao0504/code/clif/CLIF-MIMIC/output/rclif-dev-test-copy/clif_vitals.parquet


In [212]:
vitals

Unnamed: 0,hospitalization_id,recorded_dttm,vital_name,vital_category,vital_value,meas_site_name
0,29888503,2137-07-13 05:00:00+00:00,O2 saturation pulseoxymetry,spo2,100.0,
1,29888503,2137-07-13 06:00:00+00:00,Arterial Blood Pressure diastolic,dbp,60.0,
2,29888503,2137-07-13 06:00:00+00:00,Arterial Blood Pressure mean,map,80.0,
3,29888503,2137-07-13 06:00:00+00:00,O2 saturation pulseoxymetry,spo2,100.0,
4,29888503,2137-07-13 07:00:00+00:00,Arterial Blood Pressure diastolic,dbp,50.0,
...,...,...,...,...,...,...
30242666,27791218,2162-08-11 19:00:00+00:00,Respiratory Rate,respiratory_rate,20.0,
30242667,27791218,2162-08-11 20:00:00+00:00,Respiratory Rate,respiratory_rate,22.0,
30242668,27791218,2162-08-11 21:00:00+00:00,Respiratory Rate,respiratory_rate,25.0,
30242669,27791218,2162-08-11 22:00:00+00:00,Respiratory Rate,respiratory_rate,26.0,
