In [None]:
# Debug: Check what columns are in mac_cohort_dosage_w
print("Columns in mac_cohort_dosage_w:")
print(mac_cohort_dosage_w.columns.tolist())
print("\nShape:", mac_cohort_dosage_w.shape)
print("\nFirst few rows:")
print(mac_cohort_dosage_w.head())
print("\nData types:")
print(mac_cohort_dosage_w.dtypes)


In [None]:
# Also check what med_category values you're trying to use as column names
print("Unique med_category values:")
print(mac_cohort_dosage['med_category'].unique().tolist())
print("\nColumns in mac_cohort_dosage:")
print(mac_cohort_dosage.columns.tolist())


In [None]:
# Fixed TableOne call
# The continuous parameter should be actual column names from mac_cohort_dosage_w, not category names
med_columns = [col for col in mac_cohort_dosage_w.columns if col not in ['hospitalization_id', 'date_hr', 'cohort_flag']]

print("Using these columns as continuous variables:")
print(med_columns)

# Create TableOne with correct column names
table1 = tableone.TableOne(
    mac_cohort_dosage_w, 
    continuous=med_columns,  # Use actual column names from the dataframe
    categorical=['cohort_flag'], 
    groupby='cohort_flag'
)

print("\nTableOne results:")
print(table1.tabulate(tablefmt="github"))


# Epidemiology of Sedation in Mechanical Ventilation

In [107]:
import os
os.chdir("..")
os.getcwd()

'/Users/wliao0504/code/clif'

## Import

In [122]:
from importlib import reload
import pandas as pd
import duckdb
from utils import pyCLIF as pc
reload(pc)
from utils.waterfall import process_resp_support_waterfall
import ipytest
import tableone
from utils.data_cleaner import remove_outliers_with_timing

Loaded configuration from config.json


## Cohort Identification

In [10]:
adt = pc.load_data("clif_adt")
hospitalization = pc.load_data("clif_hospitalization")

Data loaded successfully from /Users/wliao0504/code/clif/CLIF-MIMIC/output/rclif-dev-test-copy/clif_adt.parquet
Data loaded successfully from /Users/wliao0504/code/clif/CLIF-MIMIC/output/rclif-dev-test-copy/clif_hospitalization.parquet


### Create ICU-stay level unique id

In [11]:
import warnings
with warnings.catch_warnings():
    warnings.filterwarnings("ignore", category=FutureWarning)
    stitched_encounters = pc.stitch_encounters(hospitalization, adt)

In [12]:
# create a mapping table
query = """
SELECT DISTINCT patient_id, hospitalization_id, encounter_block
FROM stitched_encounters
"""
hosp_to_enc_blk_mapper = duckdb.sql(query).to_df()

In [13]:
query = """
SELECT hospitalization_id
    , encounter_block
    , date_trunc('hour', in_dttm) as in_date_hr
    , 1 as new_icu_stay
FROM stitched_encounters
WHERE location_category = 'icu'
"""
new_icu_start_hours = duckdb.sql(query).to_df()

hosp_ids_w_icu_stays = new_icu_start_hours['hospitalization_id'].unique().tolist()

### Hour 24 and 72

In [14]:
# resp = pc.load_data(
#     table = "clif_respiratory_support",
#     filters = {
#         "hospitalization_id": hosp_ids_w_icu_stays
#     }
#     )

In [15]:
# resp_f = process_resp_support_waterfall(resp)
resp_f = pd.read_parquet("resp_f.parquet")

In [16]:
focal_hosp_ids = [
    '21738444', 
    '20004088', 
    '20006154', 
    '20018306'
    ]

In [17]:
# query = f"""
# SELECT SUM(CASE WHEN location_category = 'icu' THEN 1 ELSE 0 END) as total_icu_stays
# FROM adt
# WHERE hospitalization_id IN ({",".join(focal_hosp_ids)})
# """
# duckdb.sql(query).to_df()

In [18]:
resp_f['date_hr'] = resp_f['recorded_dttm'].dt.floor('h')

query = f"""
SELECT t1.hospitalization_id
    , t3.encounter_block
    , t1.date_hr
    , MAX(CASE WHEN t2.new_icu_stay = 1 THEN 1 ELSE 0 END) as new_icu_start_from_adt
    , MAX(CASE WHEN t1.device_category = 'imv' THEN 1 ELSE 0 END) as on_imv
    , MAX(CASE WHEN t1.tracheostomy is True OR t1.tracheostomy = 1 THEN 1 ELSE 0 END) as trach_ever
    , ROW_NUMBER() OVER (PARTITION BY t1.hospitalization_id ORDER BY t1.date_hr) as rn_by_hosp
    , CASE WHEN (
        rn_by_hosp = 1 -- new hospitalization
        OR new_icu_start_from_adt = 1 -- new icu stay
    ) THEN 1 ELSE 0 END as new_icu_stay
FROM resp_f as t1
LEFT JOIN new_icu_start_hours AS t2
    ON t1.hospitalization_id = t2.hospitalization_id
    AND t1.date_hr = t2.in_date_hr
LEFT JOIN hosp_to_enc_blk_mapper AS t3
    ON t1.hospitalization_id = t3.hospitalization_id
-- WHERE t1.hospitalization_id IN ({",".join(focal_hosp_ids)})
GROUP BY t1.hospitalization_id, t1.date_hr, t3.encounter_block
ORDER BY t1.hospitalization_id, t1.date_hr
"""
df1 = duckdb.sql(query).to_df()

In [19]:
query = """
-- generate unique icu stay ids
WITH t1 AS (
    SELECT hospitalization_id
        , encounter_block
        , date_hr
        , on_imv
        , new_icu_stay
        , SUM(new_icu_stay) OVER (ORDER BY hospitalization_id, date_hr) as icu_stay_id
    FROM df1
    -- keep only hospitalizations that have at least one hour on imv and no tracheostomy
    WHERE hospitalization_id IN (
        SELECT DISTINCT hospitalization_id
        FROM df1
        GROUP BY hospitalization_id
        HAVING MAX(on_imv) = 1 AND MAX(trach_ever) = 0
    )
),
-- generate unique imv streak ids
t2 AS (
    SELECT hospitalization_id
        , icu_stay_id
        -- , encounter_block
        , date_hr
        , on_imv
        , ROW_NUMBER() OVER (PARTITION BY icu_stay_id ORDER BY date_hr) as rn_overall
        , ROW_NUMBER() OVER (PARTITION BY icu_stay_id, on_imv ORDER BY date_hr) as rn_by_imv_status
        , rn_overall - rn_by_imv_status as imv_streak_id
    FROM t1
    -- keep only icu stays that have at least one hour on imv
    WHERE icu_stay_id IN (
        SELECT DISTINCT icu_stay_id
        FROM t1
        GROUP BY icu_stay_id
        HAVING MAX(on_imv) = 1
    )
    ORDER BY hospitalization_id, icu_stay_id, date_hr
),
-- mark the 24th and 72th hour of each imv streak
t3 AS (
    SELECT hospitalization_id, icu_stay_id, date_hr
        , imv_streak_id, on_imv
        , SUM(on_imv) OVER (PARTITION BY icu_stay_id, imv_streak_id ORDER BY date_hr) as imv_hrs_in_streak
        , CASE WHEN (imv_hrs_in_streak = 24) THEN 1 ELSE 0 END as hr_24_on_imv
        , CASE WHEN (imv_hrs_in_streak = 72) THEN 1 ELSE 0 END as hr_72_on_imv
        -- calculate hour since first intubation within each icu stay
        , MIN(CASE WHEN on_imv = 1 THEN date_hr END) OVER (PARTITION BY icu_stay_id) as first_imv_hr_in_icu_stay
        -- can only calculate diff in secs, so convert to hrs
        ,  EXTRACT(EPOCH FROM (date_hr - first_imv_hr_in_icu_stay)) / 3600 + 1 as hrs_since_first_imv
    FROM t2
    ORDER BY hospitalization_id, icu_stay_id, date_hr
    )
-- exclude cases with reintubation within 72 hours
SELECT hospitalization_id, icu_stay_id, date_hr
    , imv_streak_id, on_imv, imv_hrs_in_streak, hrs_since_first_imv
    , hr_24_on_imv, hr_72_on_imv
    , COUNT(DISTINCT CASE WHEN hrs_since_first_imv BETWEEN 0 AND 72 THEN imv_streak_id END) 
        OVER (PARTITION BY icu_stay_id) as n_imv_streaks_in_72_hrs
    , CASE WHEN n_imv_streaks_in_72_hrs <= 2 AND hr_24_on_imv = 1 THEN 1 ELSE 0 END as hr_24_on_imv_noreintub
    , CASE WHEN n_imv_streaks_in_72_hrs = 1 AND hr_72_on_imv = 1 THEN 1 ELSE 0 END as hr_72_on_imv_noreintub
FROM t3
ORDER BY hospitalization_id, icu_stay_id, date_hr
"""
df2 = duckdb.sql(query).to_df()

In [100]:
%%ipytest

# sanity tests against the MIMIC-IV data
@pytest.mark.parametrize("hospitalization_id,date_hr,expected_hr,expected_flag,expected_flag_noreintub", [
    # on imv for 24-hrs twice during the same hospitalization -- so would be excluded if no reintubation within 72 hrs
    (21738444, "2186-09-14 17:00:00-06:00", 24, 1, 0),  
    (21738444, "2186-09-14 18:00:00-06:00", 24, 0, 0),  
    (21738444, "2186-09-16 18:00:00-06:00", 24, 1, 0), # second streak within the hosp
    # not on imv for the first few hrs but long streak afterwards
    (20004088, "2159-09-30 09:00:00-06:00", 24, 1, 1),
    (20004088, "2159-10-02 09:00:00-06:00", 72, 1, 1),
    # very short streaks: 20006154
    # 3 icu stays within the same hospitalization
    (20018306, "2136-05-16 05:00:00-06:00", 24, 1, 1),
    (20018306, "2136-07-01 19:00:00-06:00", 24, 1, 1),
    # (20018306, "2136-06-01 03:00:00-06:00", 24, 0), # in a icu stay that was filtered out in the df because of no imv ever
])
def test_if_on_imv_at_hr_x(hospitalization_id, date_hr, expected_hr, expected_flag, expected_flag_noreintub):
    query = f"""
    SELECT hr_{expected_hr}_on_imv, hr_{expected_hr}_on_imv_noreintub
    FROM df2
    WHERE hospitalization_id = {hospitalization_id}
    AND date_hr = '{date_hr}'
    """
    result = duckdb.sql(query).to_df()
    observed_flag = result[f'hr_{expected_hr}_on_imv'].iloc[0]
    observed_flag_noreintub = result[f'hr_{expected_hr}_on_imv_noreintub'].iloc[0]
    assert observed_flag == expected_flag
    assert observed_flag_noreintub == expected_flag_noreintub

UsageError: Cell magic `%%ipytest` not found.


### The Cohort

In [23]:
# keep the cohort
query = """
SELECT hospitalization_id
    , encounter_block
    --, icu_stay_id
    , date_hr
    , CASE WHEN hr_24_on_imv_noreintub = 1 THEN 'hr_24'
        WHEN hr_72_on_imv_noreintub = 1 THEN 'hr_72'
        ELSE NULL END as cohort_flag
FROM df2
LEFT JOIN hosp_to_enc_blk_mapper USING (hospitalization_id)
WHERE hr_24_on_imv_noreintub = 1 OR hr_72_on_imv_noreintub = 1
"""
cohort = duckdb.sql(query).to_df()

cohort_hosp_ids = cohort['hospitalization_id'].unique().tolist()

In [None]:
query = """
SELECT *
    , 
FROM cohort
WHERE cohort_flag IS NOT NULL AND med_category is NOT NULL
"""
mac_cohort_hrs = duckdb.sql(query).to_df()

BinderException: Binder Error: Referenced column "med_category" not found in FROM clause!
Candidate bindings: "date_hr", "encounter_block"

## Vitals

In [27]:
vital_categories = [
    "weight_kg",
    "height_cm",
    "temp_c"
    "heart_rate",
    "respiratory_rate",
    "temperature",
    "sbp",
    "dbp",
    "spo2",
    "map"
]

In [28]:
vitals_required_columns = [
    "hospitalization_id",
    "recorded_dttm",
    "vital_category",
    "vital_value"
]

vitals = pc.load_data(
    table = "clif_vitals",
    filters = {
        "hospitalization_id": cohort_hosp_ids
    },
    columns = vitals_required_columns
)

vitals['date_hr'] = vitals['recorded_dttm'].dt.floor('h')

Data loaded successfully from /Users/wliao0504/code/clif/CLIF-MIMIC/output/rclif-dev-test-copy/clif_vitals.parquet


In [119]:
vitals = remove_outliers_with_timing(vitals, 'vitals', 'vital_value', file_path='config/outliers.json')

Outliers found in vitals
Category 'map': 3876 outliers (0.13%) have been set to NaN.
Category 'spo2': 1592 outliers (0.06%) have been set to NaN.
Category 'temp_c': 1104 outliers (0.14%) have been set to NaN.
Category 'respiratory_rate': 610 outliers (0.02%) have been set to NaN.
Category 'weight_kg': 499 outliers (0.26%) have been set to NaN.
Category 'dbp': 424 outliers (0.01%) have been set to NaN.
Category 'sbp': 86 outliers (0.00%) have been set to NaN.
Category 'height_cm': 32 outliers (0.29%) have been set to NaN.
Category 'heart_rate': 12 outliers (0.00%) have been set to NaN.
⏱️ Outlier removal completed in 7.83 seconds
🟢 Processed 18,115,764 rows


In [29]:
query = f"""
SELECT *
FROM cohort c
CROSS JOIN (SELECT DISTINCT vital_category FROM vitals) v
ORDER BY hospitalization_id, date_hr, vital_category
"""
cohort_hrs_cross_vital_categories = duckdb.sql(query).to_df()

In [82]:
query = """
-- fill any missing values in the cohort hours with the nearest in time (in the past or future)
SELECT hospitalization_id
    , date_hr
    , cohort_flag
    , vital_category
    , MEAN(vital_value) as mean_value
    
    , LAG(mean_value) OVER (PARTITION BY hospitalization_id, vital_category ORDER BY date_hr) as mean_value_lag
    , LAST_VALUE(mean_value IGNORE NULLS) OVER (
        PARTITION BY hospitalization_id, vital_category 
        ORDER BY date_hr
        ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
    ) as mean_value_final
    -- , COALESCE(mean_value, mean_value_last) as mean_value_final
FROM cohort_hrs_cross_vital_categories
FULL OUTER JOIN vitals USING (hospitalization_id, date_hr, vital_category)
GROUP BY hospitalization_id, date_hr, cohort_flag, vital_category
ORDER BY hospitalization_id, vital_category, date_hr, cohort_flag
"""
vitals_hrly = duckdb.sql(query).to_df()

In [83]:
query = """
SELECT hospitalization_id
    , date_hr
    , cohort_flag
    , vital_category
    , mean_value
    , mean_value_final
FROM vitals_hrly
WHERE cohort_flag IS NOT NULL
ORDER BY hospitalization_id, vital_category, date_hr, cohort_flag
"""
vitals_cohort_hrs = duckdb.sql(query).to_df()

## Medications

In [147]:
sed_med_categories = [
    "midazolam", "lorazepam", "hydromorphone", "fentanyl", "propofol", "dexmedetomidine", "ketamine"
]

mac_required_columns = [
    "hospitalization_id", 
    "admin_dttm",
    "med_category",
    "med_dose",
    "med_dose_unit",
    "mar_action_name"
]

mac = pc.load_data(
    table = "clif_medication_admin_continuous",
    columns = mac_required_columns,
    filters = {
        "hospitalization_id": cohort_hosp_ids,
        "med_category": sed_med_categories
    }
)

mac['date_hr'] = mac['admin_dttm'].dt.floor('h')

Data loaded successfully from /Users/wliao0504/code/clif/CLIF-MIMIC/output/rclif-dev-test-copy/clif_medication_admin_continuous.parquet


### Check dosage unit

In [148]:
query = """
SELECT med_category, med_dose_unit, COUNT(*) as n
FROM mac
GROUP BY med_category, med_dose_unit
ORDER BY med_category, n DESC
"""
med_units_count = duckdb.sql(query).to_df()
med_units_count

Unnamed: 0,med_category,med_dose_unit,n
0,dexmedetomidine,mcg/kg/hour,89430
1,fentanyl,mcg/hour,113807
2,fentanyl,mcg/kg/hour,2
3,hydromorphone,mg/hour,9356
4,ketamine,mg/kg/hour,7513
5,ketamine,mg/hour,101
6,ketamine,mcg/kg/min,33
7,ketamine,mcg/min,11
8,lorazepam,mg/hour,128
9,midazolam,mg/hour,46571


### Look up patient weights

In [149]:
mac.columns

Index(['hospitalization_id', 'admin_dttm', 'med_category', 'med_dose',
       'med_dose_unit', 'mar_action_name', 'date_hr'],
      dtype='object')

In [150]:
query = """
SELECT m.*
    , v.vital_value as weight_kg
    , v.recorded_dttm as weight_recorded_dttm
    -- rn = 1 for the weight w/ the latest recorded_dttm (and thus most recent)
    , ROW_NUMBER() OVER (
        PARTITION BY m.hospitalization_id, m.admin_dttm, m.med_category
        ORDER BY v.recorded_dttm DESC
        ) as rn
FROM mac m
LEFT JOIN vitals v 
    ON m.hospitalization_id = v.hospitalization_id 
    AND v.vital_category = 'weight_kg'
    AND v.recorded_dttm <= m.admin_dttm  -- only past weights
QUALIFY (rn = 1) OR (weight_kg IS NULL) -- include meds even if no weight found
ORDER BY m.hospitalization_id, m.admin_dttm, m.med_category, rn
"""
mac_w_wt = duckdb.sql(query).to_df()

### Standardize dosage unit

In [151]:
def standardize_dose_unit(df_name: str) -> pd.DataFrame:
    """
    Standardize everything to mcg/min.
    Assumes the presentation of the following columns:
    - med_dose_unit: the original unit of the dose
    - med_dose: the original dose
    - weight_kg: the (imputed, most recent) weight of the patient
    """
    query = f"""
    SELECT *
        , LOWER(med_dose_unit) AS med_dose_unit_lower
        , CASE WHEN regexp_matches(med_dose_unit_lower, '/h(r|our)?\\b') THEN 1/60.0
            WHEN regexp_matches(med_dose_unit_lower, '/m(in|inute)?\\b') THEN 1.0
            ELSE NULL END as time_multiplier
        , CASE WHEN contains(med_dose_unit_lower, '/kg/') THEN weight_kg
            ELSE 1 END AS pt_weight_adjustment
        , CASE WHEN contains(med_dose_unit_lower, 'mcg/') THEN 1.0
            WHEN contains(med_dose_unit_lower, 'mg/') THEN 1000.0
            WHEN contains(med_dose_unit_lower, 'ng/') THEN 0.001
            WHEN contains(med_dose_unit_lower, 'milli') THEN 0.001
            ELSE NULL END as dose_mass_multiplier
        , med_dose * time_multiplier * pt_weight_adjustment * dose_mass_multiplier as med_dose_converted
        , CASE WHEN med_dose_unit_lower ~ 'units/' THEN 'units/min'
            ELSE 'mcg/min' END as med_dose_unit_converted
    FROM {df_name}
    """
    return duckdb.sql(query).to_df()

mac_converted = standardize_dose_unit('mac_w_wt')

### Scaffold cohort with med_categories

In [152]:
query = f"""
SELECT *
FROM cohort c
CROSS JOIN (SELECT DISTINCT med_category FROM mac) m
ORDER BY hospitalization_id, date_hr, med_category
"""
cohort_hrs_cross_med_categories = duckdb.sql(query).to_df()

### Calculate cumulative dosage

In [165]:
query = """
-- insert the cohort hours into the mac record
SELECT hospitalization_id
    , cohort_flag
    , med_category
    , date_hr
    , admin_dttm
    , mar_action_name
    -- , med_dose, med_dose_unit
    , med_dose_converted as med_dose
    , med_dose_unit_converted as med_dose_unit
    , weight_kg
    , LAST_VALUE(admin_dttm IGNORE NULLS) OVER (
        PARTITION BY hospitalization_id, med_category 
        ORDER BY date_hr, admin_dttm 
        ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
    ) as admin_dttm_last
    , LAST_VALUE(med_dose IGNORE NULLS) OVER (
        PARTITION BY hospitalization_id, med_category 
        ORDER BY date_hr, admin_dttm 
        ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
    ) as med_dose_last
    , LAST_VALUE(mar_action_name IGNORE NULLS) OVER (
        PARTITION BY hospitalization_id, med_category 
        ORDER BY date_hr, admin_dttm 
        ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
    ) as mar_action_name_last
    -- add helper flags for first and last streak of the same med within the hour -- which needs special handling
    , CASE WHEN admin_dttm = MIN(admin_dttm) OVER (PARTITION BY hospitalization_id, med_category, date_hr) 
        THEN 1 ELSE 0 END as is_first_streak
    , CASE WHEN admin_dttm = MAX(admin_dttm) OVER (PARTITION BY hospitalization_id, med_category, date_hr) 
        THEN 1 ELSE 0 END as is_last_streak
    , date_hr + INTERVAL '1 hour' as date_hr_next
FROM cohort_hrs_cross_med_categories c
FULL OUTER JOIN mac_converted m USING (hospitalization_id, date_hr, med_category)
ORDER BY hospitalization_id, med_category, date_hr, admin_dttm
"""
mac_hrly = duckdb.sql(query).to_df()

In [None]:
# query = """
# SELECT *

# FROM mac_hrly
# ORDER BY hospitalization_id, med_category, date_hr, admin_dttm
# """
# # forward filled
# mac_hrly_ff = duckdb.sql(query).to_df()

In [163]:
query = """
-- keep only the med admins that are within the cohort hours
SELECT *
FROM mac_hrly
WHERE cohort_flag IS NOT NULL AND admin_dttm_last IS NOT NULL
ORDER BY hospitalization_id, med_category, date_hr, admin_dttm
"""
mac_cohort_hrs = duckdb.sql(query).to_df()

In [198]:
mask = mac_cohort_hrs['cohort_flag'] == 'hr_24'
mask.sum()

np.int64(27790)

In [172]:
query = """
-- calculate the cumulative dosage within the hr
SELECT hospitalization_id, cohort_flag, med_category, date_hr
    , SUM(CASE 
        -- if no mac admin record within the cohort hour, use the last observed dose and assume it runs the entire 60 mins
        WHEN admin_dttm IS NULL
            THEN 60.0 * med_dose_last 
        -- otherwise, calculate the cumulative dosage within the hr
        WHEN is_first_streak = 1
            THEN EXTRACT(EPOCH FROM (admin_dttm - date_hr))/60.0 * med_dose_last 
        WHEN is_first_streak != 1 AND admin_dttm IS NOT NULL
            THEN EXTRACT(EPOCH FROM (admin_dttm - admin_dttm_last))/60.0 * med_dose_last 
        WHEN is_last_streak = 1
            THEN EXTRACT(EPOCH FROM (date_hr_next - admin_dttm))/60.0 * med_dose
        ELSE 0 END) as total_dosage
    , 'mcg' as med_dose_unit
FROM mac_cohort_hrs
GROUP BY hospitalization_id, med_category, date_hr, cohort_flag, med_dose_unit
ORDER BY hospitalization_id, med_category, date_hr
"""
mac_cohort_dosage = duckdb.sql(query).to_df()

In [190]:
mac_cohort_dosage_w = mac_cohort_dosage.pivot_table(
    index=['hospitalization_id', 'date_hr', 'cohort_flag'], 
    columns='med_category', 
    values='total_dosage', 
    fill_value=0
).reset_index()
mac_cohort_dosage_w.columns.name = None

In [None]:
# query = """
# SELECT hospitalization_id
#     , date_hr
#     , cohort_flag
#     , med_category
#     , COALESCE(total_dosage, 0) as total_dosage
#     , med_dose_unit
# FROM cohort_hrs_cross_med_categories c
# FULL OUTER JOIN mac_cohort_dosage m USING (hospitalization_id, date_hr, med_category, cohort_flag)
# ORDER BY hospitalization_id, cohort_flag, med_category, date_hr
# """
# df = duckdb.sql(query).to_df()

In [180]:
url="https://raw.githubusercontent.com/tompollard/data/master/primary-biliary-cirrhosis/pbc.csv"
data=pd.read_csv(url)

In [182]:
mac_cohort_dosage['med_category'].unique().tolist()

['dexmedetomidine',
 'fentanyl',
 'midazolam',
 'propofol',
 'hydromorphone',
 'ketamine',
 'lorazepam']

In [197]:
tableone.TableOne(
    mac_cohort_dosage_w, 
    continuous=mac_cohort_dosage['med_category'].unique().tolist(), 
    categorical=[], 
    groupby='cohort_flag')

Unnamed: 0_level_0,Unnamed: 1_level_0,Grouped by cohort_flag,Grouped by cohort_flag,Grouped by cohort_flag,Grouped by cohort_flag
Unnamed: 0_level_1,Unnamed: 1_level_1,Missing,Overall,hr_24,hr_72
n,,,17319,12806,4513
"dexmedetomidine, mean (SD)",,0.0,3.8 (14.3),2.9 (12.2),6.2 (18.8)
"fentanyl, mean (SD)",,0.0,3043.8 (13359.1),3117.8 (15301.4),2834.0 (4523.4)
"hydromorphone, mean (SD)",,0.0,2.1 (21.2),1.6 (18.1),3.5 (28.1)
"ketamine, mean (SD)",,0.0,0.3 (3.4),0.2 (3.1),0.4 (4.2)
"lorazepam, mean (SD)",,0.0,0.1 (4.6),0.1 (4.8),0.1 (4.0)
"midazolam, mean (SD)",,0.0,38.4 (165.3),37.0 (115.0),42.3 (259.3)
"propofol, mean (SD)",,0.0,755.6 (1104.9),813.7 (1138.7),590.6 (984.4)
