# Unit Converter Demo

Demonstration of `standardize_dose_to_limited_units()` using CLIF medication data.

In [1]:
import pandas as pd
import sys
from pathlib import Path

# Add parent directory to path
sys.path.append(str(Path().absolute().parent))

from clifpy.utils.unit_converter import standardize_dose_to_limited_units

## Load CLIF Medication Data

In [2]:
# Load medication data
med_df = pd.read_parquet('../clifpy/data/clif_demo/clif_medication_admin_continuous.parquet')

# Add weight_kg column (using dummy weights for demo)
med_df['weight_kg'] = 75.0  # Default weight for demo

print(f"Loaded {len(med_df)} medication records")
print(f"Unique dose units: {med_df['med_dose_unit'].nunique()}")
print("\nDose unit distribution:")
print(med_df['med_dose_unit'].value_counts())

Loaded 6810 medication records
Unique dose units: 8

Dose unit distribution:
med_dose_unit
mcg/kg/min     2780
mL/hour        2105
units/hour      601
mcg/hour        511
mcg/kg/hour     300
mg/hour         282
mg/min           95
grams/hour       12
Name: count, dtype: int64


## Run Standardization

In [3]:
# Prepare required columns
input_df = med_df[['hospitalization_id', 'admin_dttm', 'med_dose', 'med_dose_unit', 'weight_kg']].copy()

# Rename for compatibility
input_df = input_df.rename(columns={'med_dose_unit': 'med_dose_unit'})

# Run standardization
converted_df, counts_df = standardize_dose_to_limited_units(input_df)

print("Conversion complete!")

Conversion complete!


## Output 1: Converted Data

In [4]:
# Show sample conversions
display_cols = ['med_dose', 'med_dose_unit', 'med_dose_unit_normalized', 
                'med_dose_converted', 'med_dose_unit_converted', 'unit_class']

print("Sample conversions:")
converted_df[display_cols].drop_duplicates('med_dose_unit').head(10)

Sample conversions:


Unnamed: 0,med_dose,med_dose_unit,med_dose_unit_normalized,med_dose_converted,med_dose_unit_converted,unit_class
0,5.0,mL/hour,ml/hr,0.083333,ml/min,rate
46,40.064104,mcg/kg/min,mcg/kg/min,3004.807793,mcg/min,rate
56,2.0,units/hour,u/hr,0.033333,u/min,rate
157,5.0,mg/hour,mg/hr,83.333325,mcg/min,rate
232,,,,,,unrecognized
247,25.000002,mcg/hour,mcg/hr,0.416667,mcg/min,rate
257,1.0,mg/min,mg/min,1000.0,mcg/min,rate
344,0.702905,mcg/kg/hour,mcg/kg/hr,0.878632,mcg/min,rate
1266,4.0,grams/hour,g/hr,66666.666667,mcg/min,rate


## Output 2: Conversion Counts Summary

In [7]:
# Show conversion patterns and frequencies
print("Conversion patterns summary:")
counts_df #.sort_values('count', ascending=False)

Conversion patterns summary:


Unnamed: 0,med_dose_unit,med_dose_unit_normalized,med_dose_unit_converted,unit_class,count
0,mcg/kg/min,mcg/kg/min,mcg/min,rate,2780
1,,,,unrecognized,124
2,mg/hour,mg/hr,mcg/min,rate,282
3,mcg/hour,mcg/hr,mcg/min,rate,511
4,mL/hour,ml/hr,ml/min,rate,2105
5,units/hour,u/hr,u/min,rate,601
6,mg/min,mg/min,mcg/min,rate,95
7,mcg/kg/hour,mcg/kg/hr,mcg/min,rate,300
8,grams/hour,g/hr,mcg/min,rate,12


## Summary Statistics

In [6]:
# Conversion statistics
total = len(converted_df)
converted = converted_df['med_dose_unit_converted'].notna().sum()

print(f"Total records: {total}")
print(f"Successfully converted: {converted} ({100*converted/total:.1f}%)")
print(f"\nStandardized output units:")
for unit in sorted(converted_df['med_dose_unit_converted'].dropna().unique()):
    count = (converted_df['med_dose_unit_converted'] == unit).sum()
    print(f"  {unit}: {count} records")

Total records: 6810
Successfully converted: 6686 (98.2%)

Standardized output units:
  mcg/min: 3980 records
  ml/min: 2105 records
  u/min: 601 records
