# Init

In [1]:
import os
os.chdir('..')
os.getcwd()

'/Users/wliao0504/code/clif/CLIF-epi-of-sedation'

## Import

In [2]:
from clifpy import ClifOrchestrator
import pandas as pd
import duckdb
from pathlib import Path
from clifpy.utils.unit_converter import convert_dose_units_by_med_category

In [3]:
site = 'mimic'

In [4]:
q = """
SET TimeZone='US/Eastern'
"""
duckdb.sql(q)

## Utils

In [5]:
def run_query_from_file(sql_file_path: str) -> duckdb.DuckDBPyRelation:
    """
    Loads a query from a .sql file and executes it using the given DuckDB connection.

    Args:
        con: An active DuckDB connection.
        sql_file_path: The absolute path to the .sql file.

    Returns:
        A pandas DataFrame with the results of the query.
    """
    print(f"--- Loading and executing query from {sql_file_path} ---")
    
    # Read the entire content of the .sql file
    query = Path(sql_file_path).read_text()

    # Execute the query and return as a pandas DataFrame
    result = duckdb.sql(query)

    print("Query executed successfully.")
    return result

In [49]:
def add_day_shift_id(df: pd.DataFrame, timestamp_name = 'event_dttm') -> pd.DataFrame:
    df['_dh'] = df[timestamp_name].dt.floor('h')
    df['_hr'] = df[timestamp_name].dt.hour
    q = """
    WITH day_starts AS (
        FROM df
        SELECT *
            , _shift: CASE WHEN _hr >= 7 AND _hr < 19 THEN 'day' ELSE 'night' END
            , _is_day_start: CASE
                WHEN _hr = 7 AND COALESCE(LAG(_hr) OVER w, -1) != 7 THEN 1
                ELSE 0 END
        WINDOW w AS (PARTITION BY hospitalization_id ORDER BY _dh)       
    )
    FROM day_starts
    -- INNER JOIN cohort_hosp_ids_df USING (hospitalization_id)
    SELECT *
        , _nth_day: SUM(_is_day_start) OVER w
        , _day_shift: 'day' || _nth_day::INT::TEXT || '_' || _shift
    WINDOW w AS (PARTITION BY hospitalization_id ORDER BY _dh)       
    ORDER BY hospitalization_id, _dh
    """
    return duckdb.sql(q).df()

# Proprocess

In [6]:
co = ClifOrchestrator(config_path="config/mimic_config.json")

游닉 ClifOrchestrator initialized


In [7]:
cohort_hosp_ids_df = pd.read_csv('data/cohort_hosp_ids.csv')
cohort_hosp_ids = cohort_hosp_ids_df['hospitalization_id'].astype(str).tolist()

## Patient & Hosp

In [8]:
from clifpy import Patient, Hospitalization
pt = Patient.from_file(
    config_path = 'config/config.json',
    columns = ['patient_id', 'death_dttm'],
    )
pt_df = pt.df

hosp = Hospitalization.from_file(
    config_path = 'config/config.json',
    columns = ['patient_id', 'hospitalization_id', 'discharge_dttm', 'discharge_category'],
    )
hosp_df = hosp.df

q = """
FROM hosp_df
INNER JOIN cohort_hosp_ids_df USING (hospitalization_id)
SELECT DISTINCT patient_id, hospitalization_id
"""
pt_to_hosp_id_mapper = duckdb.sql(q).df()

cohort_pt_ids = pt_to_hosp_id_mapper['patient_id'].tolist()

游닉 Configuration loaded from config/config.json
游닉 Initialized patient table
游닉 Data directory: /Users/wliao0504/code/clif/CLIF-MIMIC/output/rclif-dev-test
游닉 File type: parquet
游닉 Timezone: US/Eastern
游닉 Output directory: /Users/wliao0504/code/clif/CLIF-epi-of-sedation/output
游닉 Loaded schema from /Users/wliao0504/code/clif/CLIF-epi-of-sedation/.venv/lib/python3.12/site-packages/clifpy/schemas/patient_schema.yaml
游닉 Loaded outlier configuration
游닉 Configuration loaded from config/config.json
游닉 Initialized hospitalization table
游닉 Data directory: /Users/wliao0504/code/clif/CLIF-MIMIC/output/rclif-dev-test
游닉 File type: parquet
游닉 Timezone: US/Eastern
游닉 Output directory: /Users/wliao0504/code/clif/CLIF-epi-of-sedation/output
游닉 Loaded schema from /Users/wliao0504/code/clif/CLIF-epi-of-sedation/.venv/lib/python3.12/site-packages/clifpy/schemas/hospitalization_schema.yaml
游닉 Loaded outlier configuration


## Vitals

In [9]:
vitals_path = co.data_directory + '/clif_vitals.parquet'
q = f"""
-- find the latest recorded vital for each hospitalization
FROM '{vitals_path}'
SELECT hospitalization_id
    , MAX(recorded_dttm) AS recorded_dttm
GROUP BY hospitalization_id
"""
last_vitals_df = duckdb.sql(q).df()

In [10]:
vitals = co.load_table(
    'vitals', 
    columns = ['hospitalization_id', 'recorded_dttm', 'vital_category', 'vital_value'],
    filters = {
        'vital_category': ['spo2', 'weight_kg', 'respiratory_rate', 'heart_rate'],
        'hospitalization_id': cohort_hosp_ids
    }
    )
vitals_df = vitals.df

游닉 Initialized vitals table
游닉 Data directory: /Users/wliao0504/code/clif/CLIF-MIMIC/output/rclif-dev-test
游닉 File type: parquet
游닉 Timezone: US/Eastern
游닉 Output directory: /Users/wliao0504/code/clif/CLIF-epi-of-sedation/output
游닉 Loaded schema from /Users/wliao0504/code/clif/CLIF-epi-of-sedation/.venv/lib/python3.12/site-packages/clifpy/schemas/vitals_schema.yaml
游닉 Loaded outlier configuration


In [11]:
q = """
WITH w AS (
PIVOT_WIDER vitals_df
ON vital_category
USING MAX(vital_value)
)
SELECT *
    , _tachy: CASE WHEN heart_rate > 130 THEN 1 ELSE 0 END
    , _brady: CASE WHEN heart_rate < 60 THEN 1 ELSE 0 END
    , _rr_high: CASE WHEN respiratory_rate > 35 THEN 1 ELSE 0 END
    , _rr_low: CASE WHEN respiratory_rate < 8 THEN 1 ELSE 0 END
FROM w
ORDER BY hospitalization_id, recorded_dttm
"""
vitals_w = duckdb.sql(q).df()
vitals_w.head()

Unnamed: 0,hospitalization_id,recorded_dttm,heart_rate,respiratory_rate,spo2,weight_kg,_tachy,_brady,_rr_high,_rr_low
0,20001305,2178-03-25 02:59:00-05:00,,,,44.0,0,0,0,0
1,20001305,2178-03-25 05:32:00-05:00,76.0,18.0,,,0,0,0,0
2,20001305,2178-03-25 05:33:00-05:00,,,,43.9,0,0,0,0
3,20001305,2178-03-25 05:49:00-05:00,,,100.0,,0,0,0,0
4,20001305,2178-03-25 06:00:00-05:00,73.0,25.0,100.0,,0,0,0,0


## Code status

In [12]:
from clifpy import CodeStatus
cs = CodeStatus.from_file(
    config_path = 'config/config.json',
    columns = ['patient_id', 'start_dttm', 'code_status_category'],
    filters = {
        'patient_id': cohort_pt_ids
    }
    )
cs_df = cs.df
q = """
FROM cs_df
LEFT JOIN pt_to_hosp_id_mapper USING (patient_id)
SELECT hospitalization_id, start_dttm, code_status_category
"""
cs_df = duckdb.sql(q).df()

游닉 Configuration loaded from config/config.json
游닉 Initialized code_status table
游닉 Data directory: /Users/wliao0504/code/clif/CLIF-MIMIC/output/rclif-dev-test
游닉 File type: parquet
游닉 Timezone: US/Eastern
游닉 Output directory: /Users/wliao0504/code/clif/CLIF-epi-of-sedation/output
游닉 Loaded schema from /Users/wliao0504/code/clif/CLIF-epi-of-sedation/.venv/lib/python3.12/site-packages/clifpy/schemas/code_status_schema.yaml
游닉 Loaded outlier configuration


## Resp

In [13]:
co.initialize(tables = ['respiratory_support'])

游닉 Initialized respiratory_support table
游닉 Data directory: /Users/wliao0504/code/clif/CLIF-MIMIC/output/rclif-dev-test
游닉 File type: parquet
游닉 Timezone: US/Eastern
游닉 Output directory: /Users/wliao0504/code/clif/CLIF-epi-of-sedation/output
游닉 Loaded schema from /Users/wliao0504/code/clif/CLIF-epi-of-sedation/.venv/lib/python3.12/site-packages/clifpy/schemas/respiratory_support_schema.yaml
游닉 Loaded outlier configuration


In [14]:
# processed_bf = co.respiratory_support.waterfall(bfill = True)

In [15]:
# processed_bf.df.to_parquet(f"output/intermediate/{site}_resp_processed_bf.parquet")

In [16]:
# resp_p = processed_bf.df
resp_p = pd.read_parquet(f"output/intermediate/{site}_resp_processed_bf.parquet")

In [17]:
resp_p['tracheostomy'] = resp_p['tracheostomy'].astype(int)

In [18]:
# sbt_blocks_df = run_query_from_file('code/sbt.sql')

In [51]:
resp_view = run_query_from_file('code/sbt.sql').df()
# assert len(resp_view) == len(resp_p), 'length altered'

--- Loading and executing query from code/sbt.sql ---
Query executed successfully.


In [45]:
q = """
-- look for represenative examples to eyeball
FROM resp_view
SELECT hospitalization_id
    , MAX(_trach_1st) AS _trach_1st
    , MAX(_fail_extub) AS _fail_extub
    , COUNT(*) AS _n
GROUP BY hospitalization_id
HAVING _trach_1st = 1 AND _fail_extub = 1 AND _N > 100
ORDER BY _n
LIMIT 10
"""
duckdb.sql(q).df()

Unnamed: 0,hospitalization_id,_trach_1st,_fail_extub,_n
0,23817409,1,1,102
1,25006664,1,1,114
2,28357467,1,1,117
3,24922752,1,1,130
4,21903132,1,1,131
5,25898987,1,1,133
6,27370800,1,1,139
7,29168199,1,1,143
8,29525590,1,1,144
9,21544901,1,1,145


### Trajectory

In [53]:
resp_view_w_ids = add_day_shift_id(resp_view)
assert len(resp_view_w_ids) == len(resp_view), 'length altered'

In [70]:
q = """
WITH agg as (
    FROM resp_view_w_ids
    SELECT hospitalization_id
        , _nth_day
        , sbt_done: COALESCE(MAX(sbt_done), 0)
        , _extub_1st: COALESCE(MAX(_extub_1st), 0)
        , success_extub: COALESCE(MAX(_success_extub), 0)
        , _intub: COALESCE(MAX(_intub), 0)
        , _trach_1st: COALESCE(MAX(_trach_1st), 0)
        , _fail_extub: COALESCE(MAX(_fail_extub), 0)
        , _withdrawl_lst: COALESCE(MAX(_withdrawl_lst), 0)
        , _death_after_extub_wo_reintub: COALESCE(MAX(_death_after_extub_wo_reintub), 0)
        , discharge: ANY_VALUE(discharge_category)
        , code_status: ANY_VALUE(code_status_category ORDER BY cs_start_dttm DESC)

    --WHERE hospitalization_id in ('20001361', '20004088', '20005024')
    GROUP BY hospitalization_id, _nth_day
)
, aug as (
    FROM agg
    SELECT *
        , _exit_sum: success_extub + _trach_1st + _fail_extub + _withdrawl_lst + _death_after_extub_wo_reintub
        , _exit: _exit_sum::BOOL::INT
)
SELECT *
FROM aug
ORDER BY hospitalization_id, _nth_day
"""
resp_traj_by_days = duckdb.sql(q).df()
# resp_traj_by_days.head()

## ADT

In [28]:
adt = co.load_table(
    'adt',
    columns = ['hospitalization_id', 'in_dttm', 'out_dttm', 'location_name', 'location_category'],
    filters = {
        'hospitalization_id': cohort_hosp_ids
    }
)
adt_df = adt.df

游닉 Initialized adt table
游닉 Data directory: /Users/wliao0504/code/clif/CLIF-MIMIC/output/rclif-dev-test
游닉 File type: parquet
游닉 Timezone: US/Eastern
游닉 Output directory: /Users/wliao0504/code/clif/CLIF-epi-of-sedation/output
游닉 Loaded schema from /Users/wliao0504/code/clif/CLIF-epi-of-sedation/.venv/lib/python3.12/site-packages/clifpy/schemas/adt_schema.yaml
游닉 Loaded outlier configuration


## PA

In [29]:
co.load_table(
    'patient_assessments',
    columns = ['hospitalization_id', 'recorded_dttm', 'assessment_category', 'numerical_value'],
    filters = {
        'assessment_category': ['gcs_total'],
        'hospitalization_id': cohort_hosp_ids
    }
)

pa_df = co.patient_assessments.df

游닉 Initialized patient_assessments table
游닉 Data directory: /Users/wliao0504/code/clif/CLIF-MIMIC/output/rclif-dev-test
游닉 File type: parquet
游닉 Timezone: US/Eastern
游닉 Output directory: /Users/wliao0504/code/clif/CLIF-epi-of-sedation/output
游닉 Loaded schema from /Users/wliao0504/code/clif/CLIF-epi-of-sedation/.venv/lib/python3.12/site-packages/clifpy/schemas/patient_assessments_schema.yaml
游닉 Loaded outlier configuration


In [30]:
q = """
PIVOT_WIDER pa_df
ON assessment_category
USING MAX(numerical_value)
"""
pa_w = duckdb.sql(q).df()
pa_w.head()

Unnamed: 0,hospitalization_id,recorded_dttm,gcs_total
0,26520382,2122-10-18 04:57:00-05:00,15.0
1,27593240,2121-03-02 08:00:00-05:00,8.0
2,27247343,2133-01-07 12:00:00-05:00,15.0
3,27210310,2180-10-06 08:41:00-05:00,15.0
4,27210310,2180-10-09 00:39:00-05:00,9.0


## Cont Meds

In [31]:
from clifpy import MedicationAdminContinuous

In [32]:
mac = MedicationAdminContinuous.from_file(
    config_path = 'config/config.json',
    columns = ['hospitalization_id', 'admin_dttm', 'med_name', 'med_category', 'med_dose', 'med_dose_unit'],
    filters = {
        'med_group': ['vasoactives', 'sedation'], 
        'hospitalization_id': cohort_hosp_ids
    }
    )

游닉 Configuration loaded from config/config.json
游닉 Initialized medication_admin_continuous table
游닉 Data directory: /Users/wliao0504/code/clif/CLIF-MIMIC/output/rclif-dev-test
游닉 File type: parquet
游닉 Timezone: US/Eastern
游닉 Output directory: /Users/wliao0504/code/clif/CLIF-epi-of-sedation/output
游닉 Loaded schema from /Users/wliao0504/code/clif/CLIF-epi-of-sedation/.venv/lib/python3.12/site-packages/clifpy/schemas/medication_admin_continuous_schema.yaml
游닉 Loaded outlier configuration


In [33]:
from typing import override
from clifpy.utils.unit_converter import convert_dose_units_by_med_category

preferred_units = {
    'dopamine': 'mcg/kg/min',
    'dobutamine': 'mcg/kg/min',
    'norepinephrine': 'mcg/kg/min',
    'epinephrine': 'mcg/kg/min',
    'phenylephrine': 'mcg/kg/min',
    'angiotensin': 'mcg/kg/min',
    'vasopressin': 'u/min',
    'milrinone': 'mcg/kg/min',
    'propofol': 'mcg/min',
    'dexmedetomidine': 'mcg/min',
    'ketamine': 'mcg/min',
    'midazolam': 'mcg/min',
    'fentanyl': 'mcg/min',
    'hydromorphone': 'mcg/min',
    'morphine': 'mcg/min',
    'remifentanil': 'mcg/min',
    'pentobarbital': 'mcg/min',
    'lorazepam': 'mcg/min'
    }

mac_converted, mac_summary = convert_dose_units_by_med_category(
    mac.df,
    vitals_df = vitals_df,
    preferred_units = preferred_units,
    override = True
)

No weight_kg column found, adding the most recent from vitals
The following med_categories are given a preferred unit but not found in the input med_df: {'remifentanil'}


In [34]:
q = """
WITH t1 AS (
    SELECT hospitalization_id
        , admin_dttm
        , med_category_unit: med_category || '_' || REPLACE(med_dose_unit_converted, '/', '_')
        , med_dose_converted
    FROM mac_converted
)
, t2 AS (
    PIVOT_WIDER t1
    ON med_category_unit
    USING FIRST(med_dose_converted)
)
SELECT *
FROM t2
ORDER BY hospitalization_id, admin_dttm
"""
mac_w = duckdb.sql(q).df()

## Intm Meds

In [35]:
intm_meds = co.load_table(
    'medication_admin_intermittent',
    columns = ['hospitalization_id', 'admin_dttm', 'med_name', 'med_category', 'med_dose', 'med_dose_unit'],
    filters = {
        'hospitalization_id': cohort_hosp_ids,
        'med_group': ['sedation']
    }
)

游닉 Initialized medication_admin_intermittent table
游닉 Data directory: /Users/wliao0504/code/clif/CLIF-MIMIC/output/rclif-dev-test
游닉 File type: parquet
游닉 Timezone: US/Eastern
游닉 Output directory: /Users/wliao0504/code/clif/CLIF-epi-of-sedation/output
游닉 Loaded schema from /Users/wliao0504/code/clif/CLIF-epi-of-sedation/.venv/lib/python3.12/site-packages/clifpy/schemas/medication_admin_intermittent_schema.yaml
游닉 Loaded outlier configuration


In [36]:
from typing import override
from clifpy.utils.unit_converter import convert_dose_units_by_med_category

intm_preferred_units = {
    'propofol': 'mcg',
    'dexmedetomidine': 'mcg',
    'ketamine': 'mcg',
    'midazolam': 'mcg',
    'fentanyl': 'mcg',
    'hydromorphone': 'mcg',
    'morphine': 'mcg',
    'remifentanil': 'mcg',
    'pentobarbital': 'mcg',
    'lorazepam': 'mcg'
    }

mai_converted, mai_summary = convert_dose_units_by_med_category(
    intm_meds.df,
    vitals_df = vitals_df,
    preferred_units = intm_preferred_units,
    override = True
)

No weight_kg column found, adding the most recent from vitals
The following med_categories are given a preferred unit but not found in the input med_df: {'fentanyl', 'morphine', 'dexmedetomidine', 'lorazepam', 'remifentanil', 'pentobarbital', 'hydromorphone'}


## Wide

In [37]:
# co.create_wide_dataset(
#     tables_to_load = ['vitals', 'patient_assessments'],
#     batch_size = -1
# )
# wide = co.wide_df

# Timestamps

In [38]:
q = """
SELECT hospitalization_id, recorded_dttm AS event_dttm FROM resp_view
UNION
SELECT hospitalization_id, recorded_dttm AS event_dttm FROM vitals_w
UNION
SELECT hospitalization_id, admin_dttm AS event_dttm FROM mac_w
UNION
SELECT hospitalization_id, recorded_dttm AS event_dttm FROM pa_w
--UNION
--SELECT hospitalization_id, in_dttm AS event_dttm FROM adt_df
--UNION
--SELECT hospitalization_id, out_dttm AS event_dttm FROM adt_df
"""
all_timestamps = duckdb.sql(q).df()

all_timestamps['_dh'] = all_timestamps['event_dttm'].dt.floor('h')
all_timestamps['_hr'] = all_timestamps['event_dttm'].dt.hour

In [39]:
q = """
WITH day_starts AS (
    FROM all_timestamps
    SELECT *
        , _shift: CASE WHEN _hr >= 7 AND _hr < 19 THEN 'day' ELSE 'night' END
        , _is_day_start: CASE
            WHEN _hr = 7 AND COALESCE(LAG(_hr) OVER w, -1) != 7 THEN 1
            ELSE 0 END
    WINDOW w AS (PARTITION BY hospitalization_id ORDER BY event_dttm)       
)
FROM day_starts
INNER JOIN cohort_hosp_ids_df USING (hospitalization_id)
SELECT *
    , _nth_day: SUM(_is_day_start) OVER w
    , _day_shift_id: 'day' || _nth_day::INT::TEXT || '_' || _shift
WINDOW w AS (PARTITION BY hospitalization_id ORDER BY event_dttm)       
ORDER BY hospitalization_id, event_dttm
"""
timestamps_w_ids = duckdb.sql(q).df()

# Sedation dose

In [40]:
sed = MedicationAdminContinuous.from_file(
    config_path = 'config/config.json',
    columns = [
        'hospitalization_id', 'admin_dttm', 'med_name', 'med_category', 'med_dose', 'med_dose_unit',
        'mar_action_name', 'mar_action_category'
        ],
    filters = {
        'med_group': ['sedation'], 
        'hospitalization_id': cohort_hosp_ids
    }
    )

sed_preferred_units = {
    # 'dopamine': 'mcg/kg/min',
    # 'dobutamine': 'mcg/kg/min',
    # 'norepinephrine': 'mcg/kg/min',
    # 'epinephrine': 'mcg/kg/min',
    # 'phenylephrine': 'mcg/kg/min',
    # 'angiotensin': 'mcg/kg/min',
    # 'vasopressin': 'u/min',
    # 'milrinone': 'mcg/kg/min',
    'propofol': 'mg/min',
    'dexmedetomidine': 'mcg/min',
    'ketamine': 'mg/min',
    'midazolam': 'mg/min',
    'fentanyl': 'mcg/min',
    'hydromorphone': 'mg/min',
    'morphine': 'mg/min',
    'remifentanil': 'mcg/min',
    'pentobarbital': 'mg/min',
    'lorazepam': 'mg/min'
    }

sed_converted, sed_summary = convert_dose_units_by_med_category(
    sed.df,
    vitals_df = vitals_df,
    preferred_units = sed_preferred_units,
    override = True
)

游닉 Configuration loaded from config/config.json
游닉 Initialized medication_admin_continuous table
游닉 Data directory: /Users/wliao0504/code/clif/CLIF-MIMIC/output/rclif-dev-test
游닉 File type: parquet
游닉 Timezone: US/Eastern
游닉 Output directory: /Users/wliao0504/code/clif/CLIF-epi-of-sedation/output
游닉 Loaded schema from /Users/wliao0504/code/clif/CLIF-epi-of-sedation/.venv/lib/python3.12/site-packages/clifpy/schemas/medication_admin_continuous_schema.yaml
游닉 Loaded outlier configuration
No weight_kg column found, adding the most recent from vitals
The following med_categories are given a preferred unit but not found in the input med_df: {'remifentanil'}


In [41]:
q = """
WITH t1 AS (
    SELECT hospitalization_id
        , admin_dttm as event_dttm
        , med_category_unit: med_category || '_' || REPLACE(med_dose_unit_converted, '/', '_')
        , med_dose_converted
    FROM sed_converted
)
, t2 AS (
    PIVOT_WIDER t1
    ON med_category_unit
    USING FIRST(med_dose_converted)
)
SELECT *
FROM t2
ORDER BY hospitalization_id, event_dttm
"""
sed_w = duckdb.sql(q).df()

## Grid

In [42]:
q = """
FROM all_timestamps
SELECT hospitalization_id
    , MIN(event_dttm) AS _start_dttm
    , MAX(event_dttm) AS _end_dttm
GROUP BY hospitalization_id
ORDER BY hospitalization_id
"""
time_ranges = duckdb.sql(q).df()
time_ranges['_start_hr'] = time_ranges['_start_dttm'].dt.floor('h')
time_ranges['_end_hr'] = time_ranges['_end_dttm'].dt.ceil('h')

q = """
SELECT 
    hospitalization_id,
    unnest(generate_series(_start_hr, _end_hr, INTERVAL '1 hour')) AS event_dttm
FROM time_ranges
ORDER BY hospitalization_id, event_dttm
"""
hrly_grids = duckdb.sql(q).df()

In [43]:
q = """
-- create the hourly grid for the wide sedation table
FROM hrly_grids g
FULL JOIN sed_w m USING (hospitalization_id, event_dttm)
ORDER BY hospitalization_id, event_dttm
"""
sed_wg = duckdb.sql(q).df()
sed_wg['_dh'] = sed_wg['event_dttm'].dt.floor('h')
sed_wg['_hr'] = sed_wg['event_dttm'].dt.hour

## Cumulative dose

In [44]:
sed_dose_by_hr = run_query_from_file('code/sed_dose_by_hr.sql').df()

--- Loading and executing query from code/sed_dose_by_hr.sql ---
Query executed successfully.


In [45]:
q = """
WITH day_starts AS (
    FROM sed_dose_by_hr
    SELECT *
        , _shift: CASE WHEN _hr >= 7 AND _hr < 19 THEN 'day' ELSE 'night' END
        , _is_day_start: CASE
            WHEN _hr = 7 AND COALESCE(LAG(_hr) OVER w, -1) != 7 THEN 1
            ELSE 0 END
    WINDOW w AS (PARTITION BY hospitalization_id ORDER BY _dh)       
)
FROM day_starts
-- INNER JOIN cohort_hosp_ids_df USING (hospitalization_id)
SELECT *
    , _nth_day: SUM(_is_day_start) OVER w
    , _day_shift: 'day' || _nth_day::INT::TEXT || '_' || _shift
WINDOW w AS (PARTITION BY hospitalization_id ORDER BY _dh)       
ORDER BY hospitalization_id, _dh
"""
sed_dose_by_hr_w_ids = duckdb.sql(q).df()
assert len(sed_dose_by_hr_w_ids) == len(sed_dose_by_hr), 'length altered'

In [46]:
q = """
--
FROM sed_dose_by_hr_w_ids
SELECT hospitalization_id, _day_shift
    , SUM(COLUMNS('_min'))
GROUP BY hospitalization_id, _day_shift
ORDER BY hospitalization_id, _day_shift
"""
sed_dose_by_day_shift = duckdb.sql(q).df()

# Join

In [47]:
q = """
WITH t3 AS (
    FROM all_timestamps t
    LEFT JOIN mac_w m ON
        t.hospitalization_id = m.hospitalization_id
        AND t.event_dttm = m.admin_dttm
    SELECT t.hospitalization_id, t.event_dttm
        , LAST_VALUE(COLUMNS('_min') IGNORE NULLS) OVER (
            PARTITION BY t.hospitalization_id ORDER BY event_dttm
        )
), t4 AS (
    SELECT hospitalization_id, event_dttm
        , COALESCE(COLUMNS('_min'), 0)
    FROM t3
), t5 AS (
    SELECT *
        -- ref: https://doi.org/10.1016/j.jcrc.2020.11.002
        , _nee: norepinephrine_mcg_kg_min 
            + epinephrine_mcg_kg_min 
            + phenylephrine_mcg_kg_min / 10.0 
            + dopamine_mcg_kg_min / 100.0 
            + vasopressin_u_min * 2.5 
            + angiotensin_mcg_kg_min * 10
        , _hemo_stable_by_nee: CASE WHEN _nee <= 0.2 THEN 1 ELSE 0 END
        -- to cover the two vasos not in the formula: milrinone and dobutamine
        , _hemo_stable_by_abc: CASE WHEN dobutamine_mcg_kg_min < 0.5
            AND milrinone_mcg_kg_min = 0 THEN 1 ELSE 0 END
        , _hemo_stable: CASE WHEN _hemo_stable_by_nee AND _hemo_stable_by_abc THEN 1 ELSE 0 END
    FROM t4
)
SELECT *
FROM t5
ORDER BY hospitalization_id, event_dttm
"""
# a = augmented
mac_wa = duckdb.sql(q).df()

In [48]:
q = """
WITH t1 AS (
FROM timestamps_w_ids t
LEFT JOIN resp_view r ON
    t.hospitalization_id = r.hospitalization_id
    AND t.event_dttm = r.recorded_dttm
LEFT JOIN vitals_w v ON
    t.hospitalization_id = v.hospitalization_id
    AND t.event_dttm = v.recorded_dttm
    AND v.spo2 IS NOT NULL
LEFT JOIN mac_wa m ON
    t.hospitalization_id = m.hospitalization_id
    AND t.event_dttm = m.event_dttm
LEFT JOIN pa_w p ON
    t.hospitalization_id = p.hospitalization_id
    AND t.event_dttm = p.recorded_dttm
SELECT _hospitalization_id: t.hospitalization_id
    , _time: t.event_dttm
    , _nth_day
    , _day_shift_id
    --, _nth_hr: ROW_NUMBER() OVER (PARTITION BY _hospitalization_id ORDER BY _time)
    , device_name, device_category
    , mode_name, mode_category
    , fio2_set
    , peep_set
    , pressure_support_set
    , tracheostomy
    , gcs_total
    , spo2
    , _spo2: LAST_VALUE(v.spo2 IGNORE NULLS) OVER (PARTITION BY _hospitalization_id ORDER BY _time)
    , _resp_stable: CASE
        WHEN fio2_set <= 0.5
            AND peep_set <= 8
            AND _spo2 >= 88
        THEN 1 ELSE 0 END
    , m.* 
    , _stable: CASE WHEN _resp_stable AND _hemo_stable THEN 1 ELSE 0 END
    , sbt_eligible: CASE WHEN device_category = 'imv' AND _stable = 1 AND tracheostomy = 0 
        THEN 1 ELSE 0 END
    , sbt_done
    , extub
)
SELECT *
FROM t1
ORDER BY _hospitalization_id, _time
"""
full_view = duckdb.sql(q).df()

BinderException: Binder Error: Referenced column "extub" not found in FROM clause!
Candidate bindings: "_extub_1st", "_intub", "_death_after_extub_wo_reintub", "_hemo_stable", "_hemo_stable_by_abc"

In [None]:
q = """
SELECT _hospitalization_id
    , _time
    , sbt_eligible
    , sbt_done
    , extub
    , device_category, device_name
    , mode_category, mode_name
    , fio2_set
    , peep_set
    , pressure_support_set
    , _spo2
FROM full_view
--WHERE hospitalization_id in ('20001361', '20004088', '20005024')
"""
check_view = duckdb.sql(q).df()

q = """
SELECT _hospitalization_id
    , _nth_day
    , sbt_eligible: COALESCE(MAX(sbt_eligible), 0)
    , sbt_done: COALESCE(MAX(sbt_done), 0)
    , extub: COALESCE(MAX(extub), 0)
FROM full_view
--WHERE hospitalization_id in ('20001361', '20004088', '20005024')
GROUP BY _hospitalization_id, _nth_day
ORDER BY _hospitalization_id, _nth_day
"""
agg_view = duckdb.sql(q).df()