# Epidemiology of Sedation in Mechanical Ventilation

In [1]:
import os
os.chdir("..")
os.getcwd()

'/Users/wliao0504/code/clif/CLIF-epi-of-sedation'

In [53]:
import pandas as pd
import duckdb
from utils import pyCLIF as pc
from utils.waterfall import process_resp_support_waterfall
import pytest

## Cohort Identification

In [70]:
adt = pc.load_data("clif_adt")
hospitalization = pc.load_data("clif_hospitalization")

Data loaded successfully from /Users/wliao0504/code/clif/CLIF-MIMIC/output/rclif-dev-test-copy/clif_adt.parquet
Data loaded successfully from /Users/wliao0504/code/clif/CLIF-MIMIC/output/rclif-dev-test-copy/clif_hospitalization.parquet


### Create ICU-stay level unique id

In [72]:
import warnings
with warnings.catch_warnings():
    warnings.filterwarnings("ignore", category=FutureWarning)
    stitched_encounters = pc.stitch_encounters(hospitalization, adt)

In [None]:
# create a mapping table
query = """
SELECT DISTINCT patient_id, hospitalization_id, encounter_block
FROM stitched_encounters
"""
hosp_to_enc_blk_mapper = duckdb.sql(query).to_df()

In [113]:
query = """
SELECT hospitalization_id
    , encounter_block
    , date_trunc('hour', in_dttm) as in_date_hr
    , 1 as new_icu_stay
FROM stitched_encounters
WHERE location_category = 'icu'
"""
new_icu_start_hours = duckdb.sql(query).to_df()

hosp_ids_w_icu_stays = new_icu_start_hours['hospitalization_id'].unique().tolist()

### Hr 24

In [94]:
resp = pc.load_data(
    table = "clif_respiratory_support",
    filters = {
        "hospitalization_id": hosp_ids_w_icu_stays
    }
    )

Data loaded successfully from /Users/wliao0504/code/clif/CLIF-MIMIC/output/rclif-dev-test-copy/clif_respiratory_support.parquet


In [95]:
resp_f = process_resp_support_waterfall(resp)

✦ Phase 0: initialise & create hourly scaffold
FIO2_SET mean= 0.4902933168490583 is within the required range
Creating hourly scaffold for each encounter


Creating hourly scaffolds: 100%|██████████| 68063/68063 [00:03<00:00, 17112.98it/s]


✦ Phase 1: heuristic inference of device / mode
✦ Phase 2: build device / mode hierarchical IDs
✦ Phase 3: numeric down/up-fill inside mode_name_id blocks
Applying waterfall fill to 68063 encounters...


  return getattr(df, df_function)(wrapper, **kwargs)
Waterfall fill by mode_name_id: 100%|██████████| 417289/417289 [02:46<00:00, 2502.64it/s]


✦ Phase 4: final deduplication & ordering
[OK] Respiratory-support waterfall complete.


In [None]:
focal_hosp_ids = ['21738444', '20004088', '20006154', '20018306']

In [112]:
query = f"""
SELECT SUM(CASE WHEN location_category = 'icu' THEN 1 ELSE 0 END) as total_icu_stays
FROM adt
WHERE hospitalization_id IN ({",".join(focal_hosp_ids)})
"""
duckdb.sql(query).to_df()

Unnamed: 0,total_icu_stays
0,6.0


In [None]:
resp_f['date_hr'] = resp_f['recorded_dttm'].dt.floor('h')

query = f"""
SELECT t1.hospitalization_id
    , t3.encounter_block
    , t1.date_hr
    , MAX(CASE WHEN t2.new_icu_stay = 1 THEN 1 ELSE 0 END) as new_icu_stay
    , MAX(CASE WHEN device_category = 'imv' THEN 1 ELSE 0 END) as on_imv
    , SUM(on_imv) OVER (
        PARTITION BY t1.hospitalization_id, on_imv
        ORDER BY date_hr 
        ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) as total_hrs_on_imv
    , ROW_NUMBER() OVER (
        PARTITION BY t1.hospitalization_id, on_imv
        ORDER BY date_hr) as hr_index
    , CASE WHEN (on_imv = 1) AND (hr_index = 24) AND (total_hrs_on_imv = 24) THEN 1 ELSE 0 END as hr_24_on_imv
    , CASE WHEN (on_imv = 1) AND (hr_index = 72) AND (total_hrs_on_imv = 72) THEN 1 ELSE 0 END as hr_72_on_imv
FROM resp_f as t1
LEFT JOIN new_icu_start_hours AS t2
    ON t1.hospitalization_id = t2.hospitalization_id
    AND t1.date_hr = t2.in_date_hr
LEFT JOIN hosp_to_enc_blk_mapper AS t3
    ON t1.hospitalization_id = t3.hospitalization_id
WHERE t1.hospitalization_id IN ({",".join(focal_hosp_ids)})
GROUP BY t1.hospitalization_id, t1.date_hr, t3.encounter_block
ORDER BY t1.hospitalization_id, t1.date_hr
"""
df1 = duckdb.sql(query).to_df()

BinderException: Binder Error: window function calls cannot be nested

In [None]:
query = """
SELECT *
FROM df1
WHERE hospitalization_id IN (
    SELECT DISTINCT hospitalization_id
    FROM df1
    WHERE hr_24_on_imv = 1
)
"""
df2 = duckdb.sql(query).to_df()

In [None]:
%%ipytest

@pytest.mark.parametrize("hospitalization_id,date_hr,expected_hr,expected_result", [
    # on imv for 24-hrs twice during the same hospitalization -- should not count the second streak?
    (21738444, "2186-09-14 17:00:00-06:00", 24, 1),  
    (21738444, "2186-09-14 18:00:00-06:00", 24, 0),  
    (21738444, "2186-09-16 18:00:00-06:00", 24, 0),
    # not on imv for the first few hrs
    (20004088, "2159-09-30 09:00:00-06:00", 24, 1),
    (20004088, "2159-10-02 09:00:00-06:00", 72, 1),
    # very short streaks: 20006154
])
def test_if_on_imv_at_hr_x(hospitalization_id, date_hr, expected_hr, expected_result):
    query = f"""
    SELECT hr_{expected_hr}_on_imv
    FROM df1
    WHERE hospitalization_id = {hospitalization_id}
    AND date_hour = '{date_hr}'
    """
    result = duckdb.sql(query).to_df()
    actual_result = result[f'hr_{expected_hr}_on_imv'].iloc[0]
    assert actual_result == expected_result

[32m.[0m[32m.[0m[32m.[0m[32m.[0m[32m.[0m[32m                                                                                        [100%][0m
[32m[32m[1m5 passed[0m[32m in 0.07s[0m[0m


## Resp