# Unit Converter Demo

Demonstration of `standardize_dose_to_limited_units()` using CLIF medication data.

In [1]:
import pandas as pd
import sys
from pathlib import Path

# Add parent directory to path
sys.path.append(str(Path().absolute().parent))

from clifpy.utils.unit_converter import (
    standardize_dose_to_limited_units,
    convert_dose_units_by_med_category
)

In [2]:
test_df = pd.read_csv('../tests/fixtures/unit_converter/test_unit_converter - convert_dose_units_by_med_category.csv')

In [3]:
test_df

Unnamed: 0,rn,case,med_category,med_dose,med_dose_unit,med_dose_unit_normalized,unit_class,unit_subclass,med_dose_limited,med_dose_unit_limited,weight_kg,med_dose_unit_preferred,unit_class_preferred,unit_subclass_preferred,med_dose_converted,med_dose_unit_converted,convert_status,note
0,0.0,valid,propofol,6.0,MCG/KG/HR,mcg/kg/hr,rate,mass,7.0,mcg/min,70.0,mcg/kg/min,rate,mass,0.1,mcg/kg/min,success,add weight
1,1.0,valid,propofol,840.0,MCG/HR,mcg/hr,rate,mass,14.0,mcg/min,70.0,mcg/kg/min,rate,mass,0.2,mcg/kg/min,success,add weight
2,2.0,invalid,propofol,7.0,MCG,mcg,amount,mass,7.0,mcg,70.0,mcg/kg/min,rate,mass,7.0,mcg,cannot convert amount to rate,can't convert amount to rate
3,3.0,invalid,propofol,14.0,mcg,mcg,amount,mass,14.0,mcg,70.0,mcg/kg/min,rate,mass,14.0,mcg,cannot convert amount to rate,can't convert amount to rate
4,4.0,invalid,fentanyl,1000.0,mu/min,mu/min,rate,unit,1.0,u/min,74.0,mcg/hr,rate,mass,1.0,u/min,cannot convert unit to mass,can't convert u to mcg in rate
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
985,985.0,,,,,,,,0.0,,1055.0,,,,,,,
986,986.0,,,,,,,,0.0,,1056.0,,,,,,,
987,987.0,,,,,,,,0.0,,1057.0,,,,,,,
988,988.0,,,,,,,,0.0,,1058.0,,,,,,,


## Load CLIF Medication Data

In [8]:
# Load medication data
med_df = pd.read_parquet('../clifpy/data/clif_demo/clif_medication_admin_continuous.parquet')

# Add weight_kg column (using dummy weights for demo)
med_df['weight_kg'] = 75.0  # Default weight for demo

print(f"Loaded {len(med_df)} medication records")
print(f"Unique dose units: {med_df['med_dose_unit'].nunique()}")
print("\nDose unit distribution:")
print(med_df.value_counts(['med_category', 'med_dose_unit'], dropna=False))

Loaded 6810 medication records
Unique dose units: 8

Dose unit distribution:
med_category        med_dose_unit
dextrose            mL/hour          1948
norepinephrine      mcg/kg/min       1035
propofol            mcg/kg/min        731
phenylephrine       mcg/kg/min        718
fentanyl            mcg/hour          477
insulin             units/hour        327
dexmedetomidine     mcg/kg/hour       300
heparin             units/hour        207
magnesium           mL/hour           121
midazolam           mg/hour           107
nicardipine         mcg/kg/min        103
amiodarone          mg/min             88
magnesium           NaN                86
furosemide          mg/hour            69
vasopressin         units/hour         67
dobutamine          mcg/kg/min         50
esmolol             mcg/kg/min         47
diltiazem           mg/hour            43
epinephrine         mcg/kg/min         41
tpn                 mL/hour            36
pantoprazole        mg/hour            35
dopamin

## Run Standardization

In [5]:
# Prepare required columns
input_df = med_df[['hospitalization_id', 'admin_dttm', 'med_dose', 'med_dose_unit', 'weight_kg']].copy()

# Rename for compatibility
# input_df = input_df.rename(columns={'med_dose_unit': 'med_dose_unit'})

# Run standardization
limited_df, counts_df = standardize_dose_to_limited_units(input_df)

print("Conversion complete!")

Conversion complete!


In [6]:
preferred_units = {
    'propofol': 'mcg/kg/min',
    'midazolam': 'mg/hr',
    # 'fentanyl': 'mcg/hr',
    # 'insulin': 'u/hr',
    # 'norepinephrine': 'ng/kg/min',
    # 'dextrose': 'g',
    # 'heparin': 'l/hr',
    # 'bivalirudin': 'ml/hr',
    # 'oxytocin': 'mu',
    # 'lactated_ringers_solution': 'ml',
    # 'liothyronine': 'u/hr',
    # 'zidovudine': 'iu/hr'
    }

preferred_units_df = convert_dose_units_by_med_category(
    med_df,
    preferred_units = preferred_units,
    override = False
)

## Output 1: Converted Data

In [7]:
# Show sample conversions
display_cols = ['med_dose', 'med_dose_unit', 'med_dose_unit_normalized', 
                'med_dose_converted', 'med_dose_unit_converted', 'unit_class']

print("Sample conversions:")
limited_df[display_cols].drop_duplicates('med_dose_unit').head(10)

Sample conversions:


KeyError: "['med_dose_converted', 'med_dose_unit_converted'] not in index"

## Output 2: Conversion Counts Summary

In [None]:
# Show conversion patterns and frequencies
print("Conversion patterns summary:")
counts_df #.sort_values('count', ascending=False)

Conversion patterns summary:


Unnamed: 0,med_dose_unit,med_dose_unit_normalized,med_dose_unit_converted,unit_class,count
0,mg/hour,mg/hr,mcg/min,rate,282
1,grams/hour,g/hr,mcg/min,rate,12
2,mcg/kg/min,mcg/kg/min,mcg/min,rate,2780
3,,,,unrecognized,124
4,mcg/hour,mcg/hr,mcg/min,rate,511
5,mL/hour,ml/hr,ml/min,rate,2105
6,units/hour,u/hr,u/min,rate,601
7,mg/min,mg/min,mcg/min,rate,95
8,mcg/kg/hour,mcg/kg/hr,mcg/min,rate,300


## Summary Statistics

In [None]:
# Conversion statistics
total = len(converted_df)
converted = converted_df['med_dose_unit_converted'].notna().sum()

print(f"Total records: {total}")
print(f"Successfully converted: {converted} ({100*converted/total:.1f}%)")
print(f"\nStandardized output units:")
for unit in sorted(converted_df['med_dose_unit_converted'].dropna().unique()):
    count = (converted_df['med_dose_unit_converted'] == unit).sum()
    print(f"  {unit}: {count} records")

Total records: 6810
Successfully converted: 6686 (98.2%)

Standardized output units:
  mcg/min: 3980 records
  ml/min: 2105 records
  u/min: 601 records
