# Init

In [None]:
import os
os.chdir("..")
os.getcwd()

## Import

In [4]:
# src/tables/medication_admin_continuous.py
import numpy as np
import pandas as pd
import logging
from importlib import reload
import src.utils
reload(src.utils)
import duckdb
from src.utils import construct_mapper_dict, fetch_mimic_events, load_mapping_csv, \
    get_relevant_item_ids, find_duplicates, rename_and_reorder_cols, save_to_rclif, \
    convert_and_sort_datetime, setup_logging, search_mimic_items
# from fuzzywuzzy import process

loaded configuration from /Users/wliao0504/code/clif/CLIF-MIMIC/src/../config/config.json


In [3]:
setup_logging()

MAC_COLUMNS = [
    "hospitalization_id", "med_order_id", "admin_dttm", "med_name", "med_category", "med_group", 
    "med_route_name", "med_route_category", "med_dose", "med_dose_unit", "mar_action_name", "mar_action_category"
]

MAC_COL_RENAME_MAPPER = {
    "dose": "med_dose",
    "rateuom": "med_dose_unit",
    "amountuom": "med_dose_unit",
    "new_mar": "mar_action_name", 
    "linkorderid": "med_order_id",
    "recorded_dttm": "admin_dttm"
}

MAC_MCIDE_URL = "https://raw.githubusercontent.com/clif-consortium/CLIF/main/mCIDE/clif_medication_admin_continuous_med_categories.csv"

def map_name_to_category(name, categories):
    '''
    Map a medication name to a category using fuzzy matching.
    '''
    match, score = process.extractOne(name, categories)
    return match if score >= 80 else None

def are_doses_close(doses):
    return (abs(doses.iloc[0] - doses.iloc[1]) / max(doses.iloc[0], doses.iloc[1])) <= 0.1

# drop the row with the shorter mar_action_name
def drop_shorter_action_name(group):
    if len(group) == 2 and are_doses_close(group['med_dose']):
        return group.loc[[group['mar_action_name'].str.len().idxmax()]]
    return group

2025-01-07 02:40:53,431 - INFO - initialized logging at logs/test.log


# Dev

## Load

In [15]:
mac_mapping = load_mapping_csv("mac")

logging.info("parsing the mapping files to identify relevant items and fetch corresponding events...")
mac_item_ids = get_relevant_item_ids(
    mapping_df = mac_mapping, 
    decision_col = "decision", 
    excluded_labels = ["NO MAPPING", "UNSURE", "MAPPED ELSEWHERE", "NOT AVAILABLE", "TO MAP, ELSEWHERE"]
    ) 

mac_events = fetch_mimic_events(mac_item_ids).pipe(convert_and_sort_datetime)



## Relevant columns

In [26]:
print("--- does not seem relevant:\n")
print(mac_events.value_counts("ordercomponenttypedescription"))

--- does not seem relevant:

ordercomponenttypedescription
Main order parameter                                                                                    3418848
Mixed solution                                                                                          1155586
Additives                                         Ampoule                                                 99056
Name: count, dtype: int64


In [27]:
print("--- does not seem relevant:\n")
mac_events.value_counts('secondaryordercategoryname')

--- does not seem relevant:



secondaryordercategoryname
02-Fluids (Crystalloids)    3327069
Additive (Crystalloid)       235017
Additives (PN)                19554
Name: count, dtype: int64

In [50]:
query = f"""
SELECT subject_id, hadm_id
    , starttime, endtime --, storetime
    , linkorderid
    , statusdescription
    , med_category
    , rate, rateuom
    , amount, amountuom
    , patientweight
    , totalamount, totalamountuom, originalamount, originalrate
    , ordercategoryname, ordercategorydescription
    , e.label
    , CASE WHEN (endtime - starttime) = INTERVAL '1 minute'
        THEN 1 ELSE 0 END AS duration_1min
FROM mac_events e
LEFT JOIN mac_mapping m USING (itemid)
ORDER BY hadm_id, starttime, linkorderid, med_category, endtime
"""
mac_selected_and_mapped = duckdb.sql(query).df()

In [None]:
print("note that there is a 'Bolus' category under statusdescription, which is the MAR action")
mac_selected_and_mapped.value_counts("statusdescription")

statusdescription
FinishedRunning    2515685
ChangeDose/Rate    2090635
Stopped             284600
Paused              240503
Bolus                 2621
Name: count, dtype: int64

## Remove intermittents

First identify the intermittents and observe their patternã€‚

In [51]:
find_intm_where_clause = """
ordercategoryname = '05-Med Bolus'
    OR ordercategorydescription = 'Drug Push'
    OR statusdescription = 'Bolus'
"""

In [52]:
query = f"""
SELECT *
    -- flags to identify intermittents
    , CASE WHEN ordercategoryname = '05-Med Bolus'
        THEN 1 ELSE 0 END AS intm_by_ordercategoryname
    , CASE WHEN ordercategorydescription = 'Drug Push'
        THEN 1 ELSE 0 END AS intm_by_ordercategorydescription
    , CASE WHEN statusdescription = 'Bolus'
        THEN 1 ELSE 0 END AS intm_by_statusdescription
    -- flags to count discrepencies, i.e. if all of the ostensible intermittents satisfy the 1min duration rule
    , CASE WHEN intm_by_ordercategorydescription = 1 AND duration_1min = 0
        THEN 1 ELSE 0 END AS not_1min_ordercategorydescription
    , CASE WHEN intm_by_ordercategoryname = 1 AND duration_1min = 0
        THEN 1 ELSE 0 END AS not_1min_ordercategoryname
    , CASE WHEN intm_by_statusdescription = 1 AND duration_1min = 0
        THEN 1 ELSE 0 END AS not_1min_statusdescription
FROM mac_selected_and_mapped
WHERE {find_intm_where_clause}

"""
mac_intm = duckdb.sql(query).df()

print(f"# of intermittents identified through ordercategoryname that failed the 1-min rule: {mac_intm.not_1min_ordercategoryname.sum()}")
print(f"# of intermittents identified through ordercategorydescription that failed the 1-min rule: {mac_intm.not_1min_ordercategorydescription.sum()}")
print(f"# of intermittents identified through statusdescription that failed the 1-min rule: {mac_intm.not_1min_statusdescription.sum()}")

# of intermittents identified through ordercategoryname that failed the 1-min rule: 0
# of intermittents identified through ordercategorydescription that failed the 1-min rule: 0
# of intermittents identified through statusdescription that failed the 1-min rule: 1262


From the result it seems `intm_by_statusdescription` is particularly messy, while the other two can be used to safety identify intermittents.

In [53]:
query = """
SELECT *
FROM mac_intm
WHERE not_1min_statusdescription = 1
"""
df = duckdb.sql(query).df()

In [54]:
# check if any of continuous events has 1-min duration
query = f"""
SELECT *
FROM mac_selected_and_mapped
WHERE NOT ({find_intm_where_clause})
"""
intermittent_removed = duckdb.sql(query).df()

In [55]:
query = f"""
SELECT *
FROM intermittent_removed
WHERE duration_1min = 1
"""
cont_yet_1min = duckdb.sql(query).df()

## Resolve duplicates

In [60]:
query = """
SELECT *
FROM intermittent_removed
QUALIFY COUNT(*) OVER (PARTITION BY hadm_id, med_category, starttime, statusdescription) > 1
"""
mac_dups = duckdb.sql(query).df()

In [None]:
# final check there is no dup for god's sake
# mac_ldfd.duplicated(subset=meds_keycols, keep=False).sum()

SyntaxError: invalid syntax (434724452.py, line 2)