# Init

In [1]:
cd ..

/Users/wliao0504/code/clif/CLIF-MIMIC


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [2]:
pwd

'/Users/wliao0504/code/clif/CLIF-MIMIC'

In [11]:
# src/tables/patient_assessments.py
import numpy as np
import pandas as pd
import logging
import duckdb
import pandera as pa
from importlib import reload
import src.utils as utils
from typing import Dict, List
# reload(utils)
from src.utils import construct_mapper_dict, fetch_mimic_events, load_mapping_csv, \
    get_relevant_item_ids, find_duplicates, rename_and_reorder_cols, save_to_rclif, \
    convert_and_sort_datetime, setup_logging, con, REPO_ROOT, mimic_table_pathfinder, \
    search_mimic_items
from src.utils_qa import all_null_check

# Utils

In [None]:
CLIF_CRRT_SCHEMA = pa.DataFrameSchema(
    {
        "hospitalization_id": pa.Column(str, nullable=False),
        "recorded_dttm": pa.Column(pd.DatetimeTZDtype(unit="us", tz="UTC"), nullable=False),
        "crrt_mode_name": pa.Column(str, checks=[all_null_check], nullable=True),
        "crrt_mode_category": pa.Column(str, checks=[all_null_check], nullable=True),
        "dialysis_machine_name": pa.Column(str, checks=[all_null_check], nullable=True),
        "blood_flow_rate": pa.Column(float, nullable=True),
        "pre_filter_replacement_fluid_rate": pa.Column(float, nullable=True),
        "post_filter_replacement_fluid_rate": pa.Column(float, nullable=True),
        "dialysate_flow_rate": pa.Column(float, nullable=True),
        "ultrafiltration_out": pa.Column(float, nullable=True),
    },  
    strict=True,
)

CRRT_COLUMNS: List[str] = list(CLIF_CRRT_SCHEMA.columns.keys())

# Dev

In [10]:
dialysis_mapping = load_mapping_csv("dialysis")
dialysis_mapping.head()

Unnamed: 0,clif_variable,table,itemid,label,abbreviation,linksto,category,unitname,param_type,count,value_instances,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14
0,,,224146,System Integrity,System Integrity,chartevents,Dialysis,,Text,241835,"{'Active': 176983, 'Clots Present': 43537, 'Ne...",,,,
1,ultrafiltration_out,crrt_therapy,226457,Ultrafiltrate Output,Ultrafiltrate Output,chartevents,Dialysis,mL,Numeric,217653,"Max: 16800.0, Min: -600.0, Mean: 365.13",,,this acounts for rr in addition to dr,should be highest for H > DF > D
2,,,224191,Hourly Patient Fluid Removal,Hourly PFR,chartevents,Dialysis,mL,Numeric,209426,"Max: 150179.0, Min: -950.0, Mean: 374.84",,,,HDF > HD > H
3,,,224149,Access Pressure,Access Pressure,chartevents,Dialysis,mmHg,Numeric,206268,"Max: 500.0, Min: -308135.0, Mean: -52.29",,,,
4,,,224150,Filter Pressure,Filter Pressure,chartevents,Dialysis,mmHg,Numeric,206263,"Max: 60132.0, Min: -503.0, Mean: 122.07",,,,


In [5]:
crrt_items = dialysis_mapping[dialysis_mapping["table"].astype(str).str.contains("crrt")]
crrt_items

Unnamed: 0,clif_variable,table,itemid,label,abbreviation,linksto,category,unitname,param_type,count,value_instances,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14
1,ultrafiltration_out,crrt_therapy,226457,Ultrafiltrate Output,Ultrafiltrate Output,chartevents,Dialysis,mL,Numeric,217653,"Max: 16800.0, Min: -600.0, Mean: 365.13",,,this acounts for rr in addition to dr,should be highest for H > DF > D
8,dialysate_flow_rate,crrt_therapy,224154,Dialysate Rate,Dialysate Rate,chartevents,Dialysis,ml/hr,Numeric,200611,"Max: 1400000.0, Min: 0.0, Mean: 743.96",,,,
12,blood_flow_rate,crrt_therapy,224144,Blood Flow (ml/min),Blood Flow (ml/min),chartevents,Dialysis,ml/min,Numeric,196378,"Max: 1801300.0, Min: -120.0, Mean: 167.79",,,,
13,pre_filter_replacement_fluid_rate,crrt_therapy,228005,PBP (Prefilter) Replacement Rate,PBP (Prefilter) Replacement Rate,chartevents,Dialysis,ml/hr,Numeric,183428,"Max: 1822540.0, Min: -601600.0, Mean: 1590.45",,,,
14,post_filter_replacement_fluid_rate,crrt_therapy,228006,Post Filter Replacement Rate,Post Filter Replacement Rate,chartevents,Dialysis,ml/hr,Numeric,179745,"Max: 5000.0, Min: -35300.0, Mean: 232.86",,,,
18,crrt_mode_name,crrt_therapy,227290,CRRT mode,CRRT mode,chartevents,Dialysis,,Text,121114,"{'CVVHDF': 116065, 'CVVHD': 2874, 'CVVH': 2113...",,,,HDF: both rate


In [6]:
crrt_id_to_variable_mapper = dict(zip(crrt_items["itemid"], crrt_items["clif_variable"]))
crrt_id_to_variable_mapper

{226457: 'ultrafiltration_out',
 224154: 'dialysate_flow_rate',
 224144: 'blood_flow_rate',
 228005: 'pre_filter_replacement_fluid_rate',
 228006: 'post_filter_replacement_fluid_rate',
 227290: 'crrt_mode_name'}

In [7]:
crrt_item_ids = crrt_items["itemid"].tolist()
crrt_item_ids

[226457, 224154, 224144, 228005, 228006, 227290]

In [8]:
crrt_events = fetch_mimic_events(crrt_item_ids)

In [9]:
crrt_events.head()

Unnamed: 0,itemid,label,hadm_id,stay_id,time,value,valueuom
0,224154,Dialysate Rate,28317408,32824762,2150-05-21 07:00:00,800,ml/hr
1,228005,PBP (Prefilter) Replacement Rate,28317408,32824762,2150-05-21 07:00:00,1400,ml/hr
2,228006,Post Filter Replacement Rate,28317408,32824762,2150-05-21 07:00:00,200,ml/hr
3,224154,Dialysate Rate,28317408,32824762,2150-05-21 08:00:00,800,ml/hr
4,228005,PBP (Prefilter) Replacement Rate,28317408,32824762,2150-05-21 08:00:00,1400,ml/hr


In [15]:
crrt_events["crrt_variable"] = crrt_events["itemid"].map(crrt_id_to_variable_mapper)
crrt_events.head()

Unnamed: 0,itemid,label,hadm_id,stay_id,time,value,valueuom,crrt_variable
0,224154,Dialysate Rate,28317408,32824762,2150-05-21 07:00:00,800,ml/hr,dialysate_flow_rate
1,228005,PBP (Prefilter) Replacement Rate,28317408,32824762,2150-05-21 07:00:00,1400,ml/hr,pre_filter_replacement_fluid_rate
2,228006,Post Filter Replacement Rate,28317408,32824762,2150-05-21 07:00:00,200,ml/hr,post_filter_replacement_fluid_rate
3,224154,Dialysate Rate,28317408,32824762,2150-05-21 08:00:00,800,ml/hr,dialysate_flow_rate
4,228005,PBP (Prefilter) Replacement Rate,28317408,32824762,2150-05-21 08:00:00,1400,ml/hr,pre_filter_replacement_fluid_rate


In [36]:
crrt_events.value_counts(["crrt_variable", "valueuom"])

crrt_variable                       valueuom
ultrafiltration_out                 mL          365849
dialysate_flow_rate                 ml/hr       318185
blood_flow_rate                     ml/min      313631
pre_filter_replacement_fluid_rate   ml/hr       300423
post_filter_replacement_fluid_rate  ml/hr       295814
Name: count, dtype: int64

In [35]:
crrt_events_w = crrt_events.pivot(
    index=["hadm_id", "time"], columns=["crrt_variable"], values="value"
    ).reset_index().rename_axis(None, axis=1)

crrt_events_w.head()

Unnamed: 0,hadm_id,time,blood_flow_rate,crrt_mode_name,dialysate_flow_rate,post_filter_replacement_fluid_rate,pre_filter_replacement_fluid_rate,ultrafiltration_out
0,20003427,2184-05-17 01:00:00,,,,,,66.0
1,20003427,2184-05-17 01:40:00,150.0,,700.0,200.0,1600.0,
2,20003427,2184-05-17 02:00:00,150.0,,700.0,200.0,1600.0,471.0
3,20003427,2184-05-17 03:00:00,150.0,,700.0,200.0,1600.0,638.0
4,20003427,2184-05-17 04:00:00,150.0,,700.0,200.0,1600.0,527.0


In [29]:
crrt_events_w.value_counts("crrt_mode_name")

crrt_mode_name
CVVHDF    192933
CVVHD       5607
CVVH        4427
SCUF          68
Name: count, dtype: int64

In [40]:
query = """
SELECT
    CAST(hadm_id as VARCHAR) as hospitalization_id,
    CAST(time as TIMESTAMP) as recorded_dttm,
    CAST(crrt_mode_name as VARCHAR) as crrt_mode_name,
    CAST(lower(crrt_mode_name) as VARCHAR) as crrt_mode_category,
    CAST(NULL as VARCHAR) as dialysis_machine_name,
    CAST(blood_flow_rate as FLOAT) * 60 as blood_flow_rate, -- convert from mL/min to mL/hr
    CAST(pre_filter_replacement_fluid_rate as FLOAT) as pre_filter_replacement_fluid_rate,
    CAST(post_filter_replacement_fluid_rate as FLOAT) as post_filter_replacement_fluid_rate,
    CAST(dialysate_flow_rate as FLOAT) as dialysate_flow_rate,
    CAST(ultrafiltration_out as FLOAT) as ultrafiltration_out
FROM crrt_events_w
"""
df = con.execute(query).df()
df.head()

Unnamed: 0,hospitalization_id,recorded_dttm,crrt_mode_name,crrt_mode_category,dialysis_machine_name,blood_flow_rate,pre_filter_replacement_fluid_rate,post_filter_replacement_fluid_rate,dialysate_flow_rate,ultrafiltration_out
0,20003427,2184-05-17 01:00:00,,,,,,,,66.0
1,20003427,2184-05-17 01:40:00,,,,9000.0,1600.0,200.0,700.0,
2,20003427,2184-05-17 02:00:00,,,,9000.0,1600.0,200.0,700.0,471.0
3,20003427,2184-05-17 03:00:00,,,,9000.0,1600.0,200.0,700.0,638.0
4,20003427,2184-05-17 04:00:00,,,,9000.0,1600.0,200.0,700.0,527.0


# Validate

In [42]:
import src.tables.crrt_therapy as crrt_therapy
reload(crrt_therapy)

test_outcome = crrt_therapy._test()

{'schema_tested': True}
