In [1]:
# 📌 Step 0 — Environment setup and imports  
# This cell brings in standard libraries and adds the project root to `sys.path`  
# so that we can import the local `clifpy` package from within the notebook.
import sys
import os
from pathlib import Path

from datetime import datetime, timedelta
from clifpy import Position
 

In [2]:
# 📌 Step 1 — Load the Position table  
# The `Position.from_file` helper reads the Parquet files, attaches the DataFrame to  
# `position.df`, and sets up logging & schema information automatically.
position = Position.from_file(data_directory="/Users/dema/WD/clifpy/clifpy/data/clif_demo", filetype="parquet", timezone="UTC")

Loading clif_position.parquet
Data loaded successfully from clif_position.parquet
recorded_dttm: null count before conversion= 0
recorded_dttm: Your timezone is UTC, Converting to your site timezone (UTC).
recorded_dttm: null count after conversion= 0


In [3]:
# 📌 Step 2 — Optional data wrangling  
# 1. Convert timezone-aware datetime columns to your preferred zone.  
# 2. Drop columns you no longer need (`lab_order_dttm`).  
# 3. Display the first few rows to verify the changes.
datetime_cols = position.df.select_dtypes(include=["datetimetz"]).columns
position.df[datetime_cols] = position.df[datetime_cols].apply(lambda col: col.dt.tz_convert("America/New_York"))
position.df.head()

Unnamed: 0,hospitalization_id,recorded_dttm,position_name,position_category
0,23559586,2137-08-25 09:00:00-05:00,Sitting,not_prone
1,23559586,2137-08-25 10:00:00-05:00,Sitting,not_prone
2,23559586,2137-09-01 10:00:00-05:00,Left Side,not_prone
3,23559586,2137-08-25 12:00:00-05:00,Right Side,not_prone
4,23559586,2137-09-01 12:00:00-05:00,Left Side,not_prone


In [4]:
# 📌 Step 3 — Quick table summary  
# Produces a dictionary with counts, memory usage, missing-data metrics, and validity flag.
position.get_summary()

Unnamed: 0,attribute,value
0,table_name,position
1,num_rows,4742
2,num_columns,4
3,columns,"['hospitalization_id', 'recorded_dttm', 'posit..."
4,memory_usage_mb,0.923881
5,validation_errors,0
6,is_valid,True


In [5]:
# 📌 Step 4 — Persist the summary to disk  
# Saves the dictionary above to `output/summary_position.json`.
position.save_summary()

In [6]:
# 📌 Step 5 — Run full validation suite  
# Executes all schema and data-quality checks for the Labs table.
position.validate()

Validation completed successfully.


In [7]:
# 📌 Step 6 — Inspect validation errors (if any)
for error in position.errors:
    print(error)

In [8]:
# 📌 Step 7 - Explore position statistics by category
# Provides count, central-tendency, dispersion metrics, missingness, and unique-patient counts per position variable.
position.get_position_category_stats()

Unnamed: 0_level_0,count,unique,missing_pct
position_category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
not_prone,4741,128,0.0
prone,1,1,0.0
