In [3]:
import sys
from pathlib import Path
from clifpy import  RespiratorySupport

In [4]:
def find_project_root(start=None):
    p = Path(start or Path.cwd())
    for d in [p, *p.parents]:
        if (d / "pyproject.toml").exists() or (d / "clifpy").is_dir():
            return d
    return p

project_root = find_project_root()
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))
DATA_DIR = (project_root / "clifpy" / "data" / "clif_demo").resolve()
OUTPUT_DIR = (project_root / "examples" / "output").resolve()
FILETYPE = "parquet"
TIMEZONE = "US/Eastern"

In [5]:
resp_support_table = RespiratorySupport.from_file(
    data_directory=str(DATA_DIR),
    filetype=FILETYPE,
    timezone=TIMEZONE,
    output_directory=str(OUTPUT_DIR),
)

Loading clif_respiratory_support.parquet
Data loaded successfully from clif_respiratory_support.parquet
recorded_dttm: null count before conversion= 0
recorded_dttm: Converted from UTC to your timezone (US/Eastern).
recorded_dttm: null count after conversion= 0


In [6]:
resp_support_table.validate()

Validation completed with 7 error(s). See `errors` attribute.


In [7]:
resp_support_table.errors

[{'type': 'null_values', 'column': 'device_category', 'count': 763},
 {'type': 'invalid_category',
  'column': 'device_category',
  'values': ['imv',
   'face mask',
   'nasal cannula',
   'high flow nc',
   'other',
   'nippv',
   'cpap']},
 {'type': 'invalid_category',
  'column': 'mode_category',
  'values': ['assist control-volume control',
   'pressure support/cpap',
   'pressure-regulated volume control',
   'pressure control',
   'other',
   'volume support']},
 {'type': 'datatype_mismatch', 'column': 'tracheostomy', 'expected': 'INT'},
 {'type': 'datetime_timezone',
  'column': 'recorded_dttm',
  'timezone': 'US/Eastern',
  'expected': 'UTC',
 {'type': 'invalid_categorical_values',
  'column': 'device_category',
  'invalid_values': ['imv',
   'face mask',
   'nasal cannula',
   'high flow nc',
   'other',
   'nippv',
   'cpap'],
  'invalid_counts': {'imv': 1257,
   'face mask': 248,
   'nasal cannula': 848,
   'high flow nc': 81,
   'other': 3,
   'nippv': 28,
   'cpap': 4},
  

In [8]:
resp_support_table.isvalid()

False

In [9]:
resp_support_table.df.dtypes

hospitalization_id                           string[python]
recorded_dttm                    datetime64[us, US/Eastern]
device_name                                          object
device_category                                      object
vent_brand_name                                      object
mode_name                                            object
mode_category                                        object
tracheostomy                                           bool
fio2_set                                            float64
lpm_set                                             float64
tidal_volume_set                                    float64
resp_rate_set                                       float64
pressure_control_set                                float64
pressure_support_set                                float64
flow_rate_set                                       float64
peak_inspiratory_pressure_set                       float64
inspiratory_time_set                    

In [10]:
processed = resp_support_table.waterfall(verbose=True)

  & rs["mode_category"].str.contains(


Converting timezone from US/Eastern to UTC for waterfall processing
✦ Phase 0: initialise & create hourly scaffold
  • Building hourly scaffold via DuckDB
  • Scaffold rows created: 9,928
✦ Phase 1: heuristic inference of device & mode
✦ Phase 2: build hierarchical IDs
✦ Phase 3: forward-only numeric fill inside mode_name_id blocks
  • applying waterfall fill to 105 encounters


  return getattr(df, df_function)(wrapper, **kwargs)
Waterfall fill by mode_name_id: 100%|██████████| 533/533 [00:00<00:00, 4475.12it/s]

✦ Phase 4: final dedup & ordering
[OK] Respiratory-support waterfall complete.
Converting timezone from UTC back to US/Eastern after processing





In [11]:
processed.df

Unnamed: 0,hospitalization_id,recorded_dttm,device_name,device_category,vent_brand_name,mode_name,mode_category,tracheostomy,fio2_set,lpm_set,...,plateau_pressure_obs,peak_inspiratory_pressure_obs,peep_obs,minute_vent_obs,mean_airway_pressure_obs,is_scaffold,device_cat_id,device_id,mode_cat_id,mode_name_id
0,20044587,2113-08-25 12:00:00-05:00,endotracheal tube,imv,Drager,,,False,,,...,14.0,,5.0,,,False,1,1,1,1
1,20044587,2113-08-25 12:34:00-05:00,endotracheal tube,imv,,cmv/assist/autoflow,assist control-volume control,False,1.0,,...,,16.0,,8.9,8.0,False,1,1,2,2
2,20044587,2113-08-25 12:58:00-05:00,endotracheal tube,imv,,cmv/assist/autoflow,assist control-volume control,False,0.7,,...,,,,,,False,1,1,2,2
3,20044587,2113-08-25 12:59:59-05:00,endotracheal tube,imv,,cmv/assist/autoflow,assist control-volume control,False,0.7,,...,,,,,,True,1,1,2,2
4,20044587,2113-08-25 13:52:00-05:00,endotracheal tube,imv,,cmv/assist/autoflow,assist control-volume control,False,0.4,,...,,,,,,False,1,1,2,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13155,29974575,2131-03-08 01:59:59-05:00,nasal cannula,nasal cannula,,standby,,False,,3.0,...,,,,,,True,2,2,11,16
13156,29974575,2131-03-08 02:59:59-05:00,nasal cannula,nasal cannula,,standby,,False,,3.0,...,,,,,,True,2,2,11,16
13157,29974575,2131-03-08 03:59:59-05:00,nasal cannula,nasal cannula,,standby,,False,,3.0,...,,,,,,True,2,2,11,16
13158,29974575,2131-03-08 04:00:00-05:00,nasal cannula,nasal cannula,,standby,,False,,3.0,...,,,,,,False,2,2,11,16
