## Standard test for wide function

In [None]:
import pandas as pd
from clifpy import ClifOrchestrator

# Initialize the ClifOrchestrator
# You'll need to adjust the data_directory path to your actual data location
co = ClifOrchestrator(
    data_directory='../clifpy/data/clif_demo/',  # Adjust this path
    filetype='parquet',  # or 'csv' depending on your data format
    timezone='UTC',
    output_directory=None  # Will create 'output' directory in current working directory
)

# Create the cohort DataFrame
cohort_df = pd.DataFrame({
    'hospitalization_id': ['23559586', '20626031'],
    'start_time': pd.to_datetime(['2137-01-01 14:29:00-06:00', '2132-12-14 08:00:00-06:00']),
    'end_time': pd.to_datetime(['2137-08-25 14:00:00-06:00', '2132-12-20 01:00:00-06:00'])
})

# Load the required tables
co.load_table('vitals')
co.load_table('labs')
co.load_table('patient_assessments')
co.load_table('medication_admin_continuous')

# Create the wide dataset
wide_df = co.create_wide_dataset(
   # tables_to_load=['vitals', 'labs', 'patient_assessments', 'medication_admin_continuous'],
   
    category_filters={
        'labs': ['creatinine','platelet_count','po2_arterial','bilirubin_total'],
        'vitals': ['map','spo2', 'weight_kg'],
        'patient_assessments': ['gcs_total','sbt_delivery_pass_fail','braden_activity'],
        "medication_admin_continuous": ["norepinephrine","epinephrine","phenylephrine","vasopressin",
                "dopamine","angiotensin","dobutamine","milrinone"]
    },
    sample=True,  # Use 20 random hospitalizations
    cohort_df=cohort_df
)

# Optional: Display the resulting DataFrame
print(f"Wide dataset created with shape: {co.wide_df.shape}")
print(f"Columns: {list(co.wide_df.columns)}")

## expected shape -> Wide dataset created with shape: (924, 29)

In [None]:
co.wide_df.dtypes

## Wide df test with encounter Stitching

In [4]:
import pandas as pd
from clifpy import ClifOrchestrator
import sys
from pathlib import Path
import pandas as pd
import numpy as np

def find_project_root(start=None):
    p = Path(start or Path.cwd())
    for d in [p, *p.parents]:
        if (d / "pyproject.toml").exists() or (d / "clifpy").is_dir():
            return d
    return p

project_root = find_project_root()
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))
DATA_DIR = (project_root / "clifpy" / "data" / "clif_demo").resolve()
OUTPUT_DIR = (project_root / "examples" / "output").resolve()
FILETYPE = "parquet"
TIMEZONE = "US/Eastern"

print(f"Data directory: {DATA_DIR}")
print(f"Output directory: {OUTPUT_DIR}")


# Initialize orchestrator with encounter stitching enabled
clif = ClifOrchestrator(
    data_directory=str(DATA_DIR),
    filetype=FILETYPE,
    timezone=TIMEZONE,
    output_directory=str(OUTPUT_DIR),
    stitch_encounter=True,  # Enable encounter stitching
    stitch_time_interval=6  # 6-hour window (default)
)

clif.initialize(['hospitalization', 'adt'])

# Access the encounter mapping
encounter_mapping = clif.get_encounter_mapping()

if encounter_mapping is not None:
    print(f"Total hospitalizations: {len(encounter_mapping)}")
    print(f"Total encounter blocks: {encounter_mapping['encounter_block'].nunique()}")
    print(f"\nEncounter mapping shape: {encounter_mapping.shape}")

Data directory: /Users/sudo_sage/Documents/WORK/clifpy/clifpy/data/clif_demo
Output directory: /Users/sudo_sage/Documents/WORK/clifpy/examples/output
Using directly provided parameters
ClifOrchestrator initialized.
Using directly provided parameters
Loading clif_hospitalization.parquet
Data loaded successfully from clif_hospitalization.parquet
admission_dttm: null count before conversion= 0
admission_dttm: Converted from UTC to your timezone (US/Eastern).
admission_dttm: null count after conversion= 0
discharge_dttm: null count before conversion= 0
discharge_dttm: Converted from UTC to your timezone (US/Eastern).
discharge_dttm: null count after conversion= 0
Using directly provided parameters
Loading clif_adt.parquet
Data loaded successfully from clif_adt.parquet
in_dttm: null count before conversion= 0
in_dttm: Converted from UTC to your timezone (US/Eastern).
in_dttm: null count after conversion= 0
out_dttm: null count before conversion= 275
out_dttm: Converted from UTC to your time

In [5]:
# Create the cohort DataFrame
cohort_df = pd.DataFrame({
    'hospitalization_id': ['23559586', '20626031'],
    'start_time': pd.to_datetime(['2137-01-01 14:29:00-06:00', '2132-12-14 08:00:00-06:00']),
    'end_time': pd.to_datetime(['2137-08-25 14:00:00-06:00', '2132-12-20 01:00:00-06:00'])
})

# Load the required tables
clif.load_table('vitals')
clif.load_table('labs')
clif.load_table('patient_assessments')
clif.load_table('medication_admin_continuous')

# Create the wide dataset
clif.create_wide_dataset(
   # tables_to_load=['vitals', 'labs', 'patient_assessments', 'medication_admin_continuous'],
   
    category_filters={
        'labs': ['creatinine','platelet_count','po2_arterial','bilirubin_total'],
        'vitals': ['map','spo2', 'weight_kg'],
        'patient_assessments': ['gcs_total'],
        "medication_admin_continuous": ["norepinephrine","epinephrine","phenylephrine","vasopressin",
                "dopamine","angiotensin","dobutamine","milrinone"]
    },
    sample=True,  # Use 20 random hospitalizations
    cohort_df=cohort_df
)

# Optional: Display the resulting DataFrame
print(f"Wide dataset created with shape: {clif.wide_df.shape}")
print(f"Columns: {list(clif.wide_df.columns)}")

Using directly provided parameters
Loading clif_vitals.parquet
Data loaded successfully from clif_vitals.parquet
recorded_dttm: null count before conversion= 0
recorded_dttm: Converted from UTC to your timezone (US/Eastern).
recorded_dttm: null count after conversion= 0
Using directly provided parameters
Loading clif_labs.parquet
Data loaded successfully from clif_labs.parquet
lab_order_dttm: null count before conversion= 43419
lab_order_dttm: Converted from UTC to your timezone (US/Eastern).
lab_order_dttm: null count after conversion= 43419
lab_collect_dttm: null count before conversion= 0
lab_collect_dttm: Converted from UTC to your timezone (US/Eastern).
lab_collect_dttm: null count after conversion= 0
lab_result_dttm: null count before conversion= 0
lab_result_dttm: Converted from UTC to your timezone (US/Eastern).
lab_result_dttm: null count after conversion= 0
Using directly provided parameters
Loading clif_patient_assessments.parquet
Data loaded successfully from clif_patient_a

## test with meds conversion

In [1]:
import pandas as pd
from clifpy import ClifOrchestrator
import sys
from pathlib import Path
import pandas as pd
import numpy as np

def find_project_root(start=None):
    p = Path(start or Path.cwd())
    for d in [p, *p.parents]:
        if (d / "pyproject.toml").exists() or (d / "clifpy").is_dir():
            return d
    return p

project_root = find_project_root()
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))
DATA_DIR = (project_root / "clifpy" / "data" / "clif_demo").resolve()
OUTPUT_DIR = (project_root / "examples" / "output").resolve()
FILETYPE = "parquet"
TIMEZONE = "US/Eastern"

print(f"Data directory: {DATA_DIR}")
print(f"Output directory: {OUTPUT_DIR}")


# Initialize orchestrator with encounter stitching enabled
clif = ClifOrchestrator(
    data_directory=str(DATA_DIR),
    filetype=FILETYPE,
    timezone=TIMEZONE,
    output_directory=str(OUTPUT_DIR),
    stitch_encounter=True,  # Enable encounter stitching
    stitch_time_interval=6  # 6-hour window (default)
)

clif.initialize(['hospitalization', 'adt'])

# Access the encounter mapping
encounter_mapping = clif.get_encounter_mapping()

if encounter_mapping is not None:
    print(f"Total hospitalizations: {len(encounter_mapping)}")
    print(f"Total encounter blocks: {encounter_mapping['encounter_block'].nunique()}")
    print(f"\nEncounter mapping shape: {encounter_mapping.shape}")

Data directory: /Users/sudo_sage/Documents/WORK/clifpy/clifpy/data/clif_demo
Output directory: /Users/sudo_sage/Documents/WORK/clifpy/examples/output
Using directly provided parameters
ClifOrchestrator initialized.
Using directly provided parameters
Loading clif_hospitalization.parquet
Data loaded successfully from clif_hospitalization.parquet
admission_dttm: null count before conversion= 0
admission_dttm: Converted from UTC to your timezone (US/Eastern).
admission_dttm: null count after conversion= 0
discharge_dttm: null count before conversion= 0
discharge_dttm: Converted from UTC to your timezone (US/Eastern).
discharge_dttm: null count after conversion= 0
Using directly provided parameters
Loading clif_adt.parquet
Data loaded successfully from clif_adt.parquet
in_dttm: null count before conversion= 0
in_dttm: Converted from UTC to your timezone (US/Eastern).
in_dttm: null count after conversion= 0
out_dttm: null count before conversion= 275
out_dttm: Converted from UTC to your time

In [2]:
# # Create the cohort DataFrame
# cohort_df = pd.DataFrame({
#     'hospitalization_id': ['23559586', '20626031'],
#     'start_time': pd.to_datetime(['2137-01-01 14:29:00-06:00', '2132-12-14 08:00:00-06:00']),
#     'end_time': pd.to_datetime(['2137-08-25 14:00:00-06:00', '2132-12-20 01:00:00-06:00'])
# })

# Load the required tables
clif.load_table('vitals')
clif.load_table('labs')
clif.load_table('patient_assessments')
clif.load_table('medication_admin_continuous')
preferred_units_cont = {
    "propofol": "mcg/min",
    "fentanyl": "mcg/hr",
    "insulin": "u/hr",
    "midazolam": "mg/hr",
    "heparin": "u/min"
}

clif.convert_dose_units_for_continuous_meds(preferred_units_cont)
# Create the wide dataset
clif.create_wide_dataset(
   # tables_to_load=['vitals', 'labs', 'patient_assessments', 'medication_admin_continuous'],
   
    category_filters={
        'labs': ['creatinine','platelet_count','po2_arterial','bilirubin_total'],
        'vitals': ['map','spo2', 'weight_kg'],
        'patient_assessments': ['gcs_total'],
        "medication_admin_continuous": ["norepinephrine","epinephrine","phenylephrine","vasopressin",
                "dopamine","angiotensin","dobutamine","milrinone"]
    }
    #,
    #sample=True,  # Use 20 random hospitalizations
   # cohort_df=cohort_df
)

# Optional: Display the resulting DataFrame
print(f"Wide dataset created with shape: {clif.wide_df.shape}")
print(f"Columns: {list(clif.wide_df.columns)}")

Using directly provided parameters
Loading clif_vitals.parquet
Data loaded successfully from clif_vitals.parquet
recorded_dttm: null count before conversion= 0
recorded_dttm: Converted from UTC to your timezone (US/Eastern).
recorded_dttm: null count after conversion= 0
Using directly provided parameters
Loading clif_labs.parquet
Data loaded successfully from clif_labs.parquet
lab_order_dttm: null count before conversion= 43419
lab_order_dttm: Converted from UTC to your timezone (US/Eastern).
lab_order_dttm: null count after conversion= 43419
lab_collect_dttm: null count before conversion= 0
lab_collect_dttm: Converted from UTC to your timezone (US/Eastern).
lab_collect_dttm: null count after conversion= 0
lab_result_dttm: null count before conversion= 0
lab_result_dttm: Converted from UTC to your timezone (US/Eastern).
lab_result_dttm: null count after conversion= 0
Using directly provided parameters
Loading clif_patient_assessments.parquet
Data loaded successfully from clif_patient_a

In [3]:
clif.wide_df.columns

Index(['hospitalization_id', 'patient_id', 'age_at_admission', 'event_time',
       'dobutamine_mcg_min', 'dopamine_mcg_min', 'epinephrine_mcg_min',
       'milrinone_mcg_min', 'norepinephrine_mcg_min', 'phenylephrine_mcg_min',
       'vasopressin_u_min', 'map', 'spo2', 'weight_kg', 'bilirubin_total',
       'creatinine', 'platelet_count', 'po2_arterial', 'hospital_id',
       'in_dttm', 'out_dttm', 'location_category', 'location_type',
       'encounter_block', 'day_number', 'hosp_id_day_key', 'angiotensin',
       'gcs_total'],
      dtype='object')

In [5]:
clif.medication_admin_continuous.df.med_category.unique()

array(['dextrose', 'propofol', 'insulin', 'magnesium', 'heparin',
       'esmolol', 'diltiazem', 'phenylephrine', 'norepinephrine',
       'vasopressin', 'fentanyl', 'amiodarone', 'labetalol',
       'dexmedetomidine', 'sodium bicarbonate', 'pantoprazole', 'tpn',
       'nicardipine', 'dobutamine', 'dopamine', 'midazolam', 'furosemide',
       'morphine', 'octreotide', 'aminocaproic', 'epinephrine',
       'bumetanide', 'milrinone', 'rocuronium', 'hydromorphone'],
      dtype=object)

In [4]:
clif.wide_df

Unnamed: 0,hospitalization_id,patient_id,age_at_admission,event_time,norepinephrine_mcg_min,phenylephrine_mcg_min,vasopressin_u_min,map,spo2,weight_kg,...,location_type,encounter_block,day_number,hosp_id_day_key,epinephrine,dopamine,angiotensin,dobutamine,milrinone,gcs_total
0,20626031,10005817,66,2132-12-14 08:05:00-06:00,,,,,,,...,,,1,20626031_day_1,,,,,,
1,20626031,10005817,66,2132-12-14 11:02:00-06:00,,,,,,,...,,,1,20626031_day_1,,,,,,
2,20626031,10005817,66,2132-12-14 13:03:00-06:00,,,,,,,...,,,1,20626031_day_1,,,,,,
3,20626031,10005817,66,2132-12-14 13:04:00-06:00,,,,,,,...,,,1,20626031_day_1,,,,,,
4,20626031,10005817,66,2132-12-14 16:40:00-06:00,,,,,,,...,,,1,20626031_day_1,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
919,23559586,10003400,75,2137-08-25 12:00:00-06:00,,,,,99.0,,...,,,22,23559586_day_22,,,,,,
920,23559586,10003400,75,2137-08-25 12:01:00-06:00,,,,71.0,,,...,,,22,23559586_day_22,,,,,,
921,23559586,10003400,75,2137-08-25 13:00:00-06:00,,,,,99.0,,...,,,22,23559586_day_22,,,,,,
922,23559586,10003400,75,2137-08-25 13:01:00-06:00,,,,61.0,,,...,,,22,23559586_day_22,,,,,,
