# Examples on ditribution methods

This notebook demonstrates the distribution analysis methods added to the BaseTable class:
1. `analyze_categorical_distributions()` - For categorical variable distributions
2. `calculate_ecdf()` - For continuous variable ECDF calculations


## Setup and Data Loading

In [None]:
import sys
from pathlib import Path
from clifpy.tables.labs import Labs


In [5]:
def find_project_root(start=None):
    p = Path(start or Path.cwd())
    for d in [p, *p.parents]:
        if (d / "pyproject.toml").exists() or (d / "clifpy").is_dir():
            return d
    return p

project_root = find_project_root()
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))
DATA_DIR = (project_root / "clifpy" / "data" / "clif_demo").resolve()
OUTPUT_DIR = (project_root / "examples" / "output").resolve()
FILETYPE = "parquet"
TIMEZONE = "US/Eastern"

print(f"Data directory: {DATA_DIR}")
print(f"Output directory: {OUTPUT_DIR}")

Data directory: /Users/dema/WD/clifpy/clifpy/data/clif_demo
Output directory: /Users/dema/WD/clifpy/examples/output


## Load Labs Table

In [None]:
labs_table = Labs.from_file(
    data_directory=DATA_DIR,
    filetype=FILETYPE,
    timezone=TIMEZONE,
    output_directory=OUTPUT_DIR
)

## Example 1: Categorical Distribution Analysis

### Overall Categorical Distributions
- % = (unique hospitalizations with this lab category) / (total 
  unique hospitalizations in the entire table) × 100

In [None]:
cat_distributions = labs_table.analyze_categorical_distributions()

cat_distributions

In [None]:
cat_distributions_plot = labs_table.plot_categorical_distributions() 
cat_distributions_plot

## Example 2: ECDF Calculation for Continuous Variables

In [None]:
from clifpy.tables.vitals import Vitals
vitals_table = Vitals.from_file(
    data_directory=DATA_DIR,
    filetype=FILETYPE,
    timezone=TIMEZONE,
    output_directory=OUTPUT_DIR
)

In [None]:
ecdf_plot_vitals = vitals_table.plot_stratified_ecdf('vital_value', 'vital_category')

In [None]:
ecdf_plot_B = labs_table.plot_stratified_ecdf('lab_value_numeric','lab_category', category_values=['bun', 'creatinine', 'sodium'])
