# ECDF Generator - Pandas Only

This notebook uses **Pandas exclusively** for the ECDF computation pipeline.

**Features:**
1. Extract ICU time windows from ADT table
2. Filter labs/vitals to values during ICU stays only
3. Standardize lab reference units
4. Compute ECDF (distinct value/probability pairs)
5. Compute quantile bins with auto-extreme-splitting

## Imports and Setup

In [None]:
import json
import yaml
import os
from pathlib import Path
from typing import Dict, List, Any, Tuple, Optional
import pandas as pd
import numpy as np
from datetime import datetime

# For notebook: set project root relative to notebook location
PROJECT_ROOT = Path(os.getcwd()).parent.parent
print(f"Project root: {PROJECT_ROOT}")

## Configuration Loading

In [None]:
def load_configs(
    clif_config_path: str = None,
    outlier_config_path: str = None,
    lab_vital_config_path: str = None
) -> Tuple[Dict, Dict, Dict]:
    """
    Load all required configuration files.

    Returns:
        Tuple of (clif_config, outlier_config, lab_vital_config)
    """
    if clif_config_path is None:
        clif_config_path = PROJECT_ROOT / 'config' / 'config.json'
    if outlier_config_path is None:
        outlier_config_path = PROJECT_ROOT / 'modules' / 'ecdf' / 'config' / 'outlier_config.yaml'
    if lab_vital_config_path is None:
        lab_vital_config_path = PROJECT_ROOT / 'modules' / 'ecdf' / 'config' / 'lab_vital_config.yaml'

    with open(clif_config_path, 'r') as f:
        clif_config = json.load(f)

    with open(outlier_config_path, 'r') as f:
        outlier_config = yaml.safe_load(f)

    with open(lab_vital_config_path, 'r') as f:
        lab_vital_config = yaml.safe_load(f)

    return clif_config, outlier_config, lab_vital_config

In [None]:
# Load configurations
clif_config, outlier_config, lab_vital_config = load_configs()

print("CLIF Config:")
print(f"  tables_path: {clif_config.get('tables_path')}")
print(f"  file_type: {clif_config.get('file_type')}")
print()
print(f"Outlier config tables: {list(outlier_config.get('tables', {}).keys())}")
print(f"Lab categories in config: {list(lab_vital_config.get('labs', {}).keys())}")
print(f"Vital categories in config: {list(lab_vital_config.get('vitals', {}).keys())}")

## Helper Functions

In [None]:
def sanitize_unit_for_filename(unit) -> str:
    """
    Sanitize unit string for use in filename.
    Handles both string and list inputs.
    """
    if unit is None:
        return "unknown"
    
    # Handle list of units
    if isinstance(unit, list):
        unit = '_'.join(str(u) for u in unit if u is not None)
    
    if not unit:
        return "unknown"

    sanitized = unit.replace('/', '_').replace('%', 'pct').replace('Â°', 'deg')
    sanitized = ''.join(c if c.isalnum() or c == '_' else '_' for c in sanitized)
    while '__' in sanitized:
        sanitized = sanitized.replace('__', '_')
    sanitized = sanitized.strip('_').lower()

    return sanitized

## Lab Standardization (using clifpy)

In [None]:
import clifpy
from clifpy.tables import Labs
print(f"clifpy location: {clifpy.__file__}")

In [None]:
def standardize_labs_pandas(
    tables_path: str,
    file_type: str,
    output_path: str
) -> None:
    """
    Standardize lab reference units and save as parquet.
    Uses clifpy with Pandas DataFrame input.
    """
    labs_path = os.path.join(tables_path, f'clif_labs.{file_type}')
    print(f"Loading labs from {labs_path}...")

    # Load as Pandas DataFrame
    labs_df = pd.read_parquet(labs_path)
    print(f"Loaded {len(labs_df):,} rows")
    
    labs_inst = Labs(data=labs_df)
    
    # Standardize (returns pandas DataFrame when inplace=False)
    labs_standard = labs_inst.standardize_reference_units(
        save=True, 
        output_directory=str(Path(output_path).parent),
        lowercase=True, 
        inplace=False
    )
    
    # Save as parquet
    labs_standard.to_parquet(output_path, index=False)
    print(f"Saved standardized labs to {output_path}")

In [None]:
# Standardize labs (run once)
STANDARDIZED_LABS_PATH = '../../output/intermediate/clif_labs_standardized.parquet'
os.makedirs(os.path.dirname(STANDARDIZED_LABS_PATH), exist_ok=True)

# Uncomment to run standardization
# standardize_labs_pandas(
#     clif_config['tables_path'],
#     clif_config['file_type'],
#     STANDARDIZED_LABS_PATH
# )

## ICU Time Window Extraction

In [None]:
def extract_icu_time_windows(
    tables_path: str,
    file_type: str
) -> pd.DataFrame:
    """
    Extract ICU time windows from ADT table using Pandas.

    Returns:
        Pandas DataFrame with columns:
        - hospitalization_id: str
        - in_dttm: datetime
        - out_dttm: datetime
    """
    adt_path = os.path.join(tables_path, f'clif_adt.{file_type}')
    print(f"Loading ICU time windows from {adt_path}...")

    # Load ADT table
    adt_df = pd.read_parquet(adt_path, columns=['hospitalization_id', 'location_category', 'in_dttm', 'out_dttm'])
    
    # Filter to ICU locations
    icu_windows_df = adt_df[adt_df['location_category'].str.lower() == 'icu'][['hospitalization_id', 'in_dttm', 'out_dttm']].copy()
    
    # Remove timezone info for comparison
    if icu_windows_df['in_dttm'].dt.tz is not None:
        icu_windows_df['in_dttm'] = icu_windows_df['in_dttm'].dt.tz_localize(None)
    if icu_windows_df['out_dttm'].dt.tz is not None:
        icu_windows_df['out_dttm'] = icu_windows_df['out_dttm'].dt.tz_localize(None)

    print(f"Found {len(icu_windows_df):,} ICU time windows")
    return icu_windows_df

In [None]:
# Extract ICU time windows
icu_windows = extract_icu_time_windows(
    clif_config['tables_path'],
    clif_config['file_type']
)

print("\nSample ICU time windows:")
icu_windows.head(10)

## ECDF Computation (Pandas)

In [None]:
def compute_ecdf_pandas(values: pd.Series) -> pd.DataFrame:
    """
    Compute ECDF using pure Pandas operations.

    Args:
        values: Pandas Series of numeric values

    Returns:
        Pandas DataFrame with columns:
        - value: float
        - probability: float (0 to 1)
    """
    if len(values) == 0:
        return pd.DataFrame({'value': [], 'probability': []})

    n = len(values)
    
    # Sort values and compute cumulative probability
    sorted_values = values.sort_values().reset_index(drop=True)
    probabilities = (np.arange(1, n + 1)) / n
    
    ecdf_df = pd.DataFrame({
        'value': sorted_values,
        'probability': probabilities
    })
    
    # Group by value and keep max probability (for duplicates)
    ecdf_df = ecdf_df.groupby('value', as_index=False)['probability'].max()
    ecdf_df = ecdf_df.sort_values('value').reset_index(drop=True)

    return ecdf_df

In [None]:
# Test ECDF computation
test_values = pd.Series([1.0, 2.0, 2.0, 3.0, 4.0, 5.0, 5.0, 5.0, 6.0, 7.0])
ecdf_result = compute_ecdf_pandas(test_values)

print("Test ECDF computation:")
print(f"Input values: {test_values.tolist()}")
print(f"\nECDF result:")
ecdf_result

## Binning Functions (Pandas)

In [None]:
def create_quantile_bins_pandas(
    data: pd.Series,
    num_bins: int = 10
) -> List[Dict[str, Any]]:
    """
    Create quantile bins using Pandas qcut.

    Args:
        data: Pandas Series with values
        num_bins: Number of bins to create

    Returns:
        List of bin dictionaries
    """
    if len(data) == 0:
        return []

    if len(data) < num_bins * 2:
        num_bins = max(1, len(data) // 2)

    try:
        bins_cut, bin_edges = pd.qcut(data, q=num_bins, retbins=True, duplicates='drop')
        bin_counts = bins_cut.value_counts().sort_index()

        bins = []
        for i, (interval, count) in enumerate(bin_counts.items(), 1):
            if i == 1:
                interval_str = f"[{interval.left:.2f}, {interval.right:.2f}]"
            else:
                interval_str = f"({interval.left:.2f}, {interval.right:.2f}]"

            bins.append({
                'bin_num': i,
                'bin_min': float(interval.left),
                'bin_max': float(interval.right),
                'count': int(count),
                'percentage': float(count / len(data) * 100),
                'interval': interval_str
            })

        return bins

    except Exception as e:
        return [{
            'bin_num': 1,
            'bin_min': float(data.min()),
            'bin_max': float(data.max()),
            'count': len(data),
            'percentage': 100.0,
            'interval': f"[{data.min():.2f}, {data.max():.2f}]"
        }]

In [None]:
def create_bins_for_segment_pandas(
    data: pd.Series,
    segment_min: float,
    segment_max: float,
    num_bins: int,
    segment_name: str,
    extra_bins_last: int = 0,
    split_first: bool = False
) -> List[Dict[str, Any]]:
    """
    Create quantile-based bins for a segment using Pandas.
    """
    # Filter data to segment range
    segment_data = data[(data >= segment_min) & (data <= segment_max)]

    if len(segment_data) == 0:
        return []

    if num_bins == 1 or len(segment_data) < num_bins * 2:
        return [{
            'segment': segment_name,
            'bin_num': 1,
            'bin_min': float(segment_data.min()),
            'bin_max': float(segment_data.max()),
            'count': len(segment_data),
            'percentage': 100.0
        }]

    try:
        quantiles = np.linspace(0, 1, num_bins + 1)
        bins_cut, bin_edges = pd.qcut(segment_data, q=quantiles, retbins=True, duplicates='drop')
        bin_counts = bins_cut.value_counts().sort_index()

        bins = []
        for i, (interval, count) in enumerate(bin_counts.items(), 1):
            bins.append({
                'segment': segment_name,
                'bin_num': i,
                'bin_min': float(interval.left),
                'bin_max': float(interval.right),
                'count': int(count),
                'percentage': float(count / len(segment_data) * 100)
            })

        # Handle extra bins for extreme values
        if extra_bins_last > 0 and len(bins) > 0:
            if split_first:
                # Split first bin
                extreme_bin = bins[0]
                extreme_data = segment_data[
                    (segment_data >= extreme_bin['bin_min']) &
                    (segment_data <= extreme_bin['bin_max'])
                ]

                if len(extreme_data) >= extra_bins_last * 2:
                    tail_quantiles = np.linspace(0, 1, extra_bins_last + 1)
                    tail_bins_cut, tail_edges = pd.qcut(
                        extreme_data, q=tail_quantiles, retbins=True, duplicates='drop'
                    )

                    bins = bins[1:]  # Remove original first bin
                    tail_counts = tail_bins_cut.value_counts().sort_index()
                    
                    new_bins = []
                    for j, (interval, count) in enumerate(tail_counts.items(), 1):
                        new_bins.append({
                            'segment': segment_name,
                            'bin_num': j,
                            'bin_min': float(interval.left),
                            'bin_max': float(interval.right),
                            'count': int(count),
                            'percentage': float(count / len(segment_data) * 100)
                        })

                    for bin_info in bins:
                        bin_info['bin_num'] = len(new_bins) + 1
                        new_bins.append(bin_info)

                    bins = new_bins

            else:
                # Split last bin
                extreme_bin = bins[-1]
                extreme_data = segment_data[
                    (segment_data >= extreme_bin['bin_min']) &
                    (segment_data <= extreme_bin['bin_max'])
                ]

                if len(extreme_data) >= extra_bins_last * 2:
                    tail_quantiles = np.linspace(0, 1, extra_bins_last + 1)
                    tail_bins_cut, tail_edges = pd.qcut(
                        extreme_data, q=tail_quantiles, retbins=True, duplicates='drop'
                    )

                    bins = bins[:-1]  # Remove original last bin
                    tail_counts = tail_bins_cut.value_counts().sort_index()
                    
                    for j, (interval, count) in enumerate(tail_counts.items(), 1):
                        bins.append({
                            'segment': segment_name,
                            'bin_num': len(bins) + 1,
                            'bin_min': float(interval.left),
                            'bin_max': float(interval.right),
                            'count': int(count),
                            'percentage': float(count / len(segment_data) * 100)
                        })

        return bins

    except Exception as e:
        return [{
            'segment': segment_name,
            'bin_num': 1,
            'bin_min': float(segment_data.min()),
            'bin_max': float(segment_data.max()),
            'count': len(segment_data),
            'percentage': 100.0
        }]

In [None]:
def create_all_bins_pandas(
    data: pd.Series,
    normal_lower: float,
    normal_upper: float,
    outlier_min: float,
    outlier_max: float,
    bins_below: int,
    bins_normal: int,
    bins_above: int,
    extra_bins_below: int = 0,
    extra_bins_above: int = 0
) -> List[Dict[str, Any]]:
    """
    Create bins for all segments using Pandas.
    """
    all_bins = []

    # Below normal segment
    if bins_below > 0 and outlier_min < normal_lower:
        below_bins = create_bins_for_segment_pandas(
            data, outlier_min, normal_lower, bins_below, 'below',
            extra_bins_last=extra_bins_below,
            split_first=True
        )
        all_bins.extend(below_bins)

    # Normal segment
    if bins_normal > 0:
        normal_bins = create_bins_for_segment_pandas(
            data, normal_lower, normal_upper, bins_normal, 'normal'
        )
        all_bins.extend(normal_bins)

    # Above normal segment
    if bins_above > 0 and normal_upper < outlier_max:
        above_bins = create_bins_for_segment_pandas(
            data, normal_upper, outlier_max, bins_above, 'above',
            extra_bins_last=extra_bins_above,
            split_first=False
        )
        all_bins.extend(above_bins)

    return all_bins

In [None]:
# Test binning
np.random.seed(42)
test_values = pd.Series(np.random.randn(10000) * 20 + 100)

bins = create_all_bins_pandas(
    data=test_values,
    normal_lower=80,
    normal_upper=120,
    outlier_min=0,
    outlier_max=200,
    bins_below=3,
    bins_normal=4,
    bins_above=3,
    extra_bins_below=5,
    extra_bins_above=5
)

print(f"Created {len(bins)} bins:")
for b in bins:
    print(f"  Bin {b['bin_num']:2d}: [{b['bin_min']:8.2f}, {b['bin_max']:8.2f}] - "
          f"segment: {b['segment']:10s}, count: {b['count']:5d}, pct: {b['percentage']:5.1f}%")

## Process Single Category (Labs/Vitals)

In [None]:
def process_category_pandas(
    table_type: str,
    category: str,
    unit: Optional[str],
    icu_windows: pd.DataFrame,
    tables_path: str,
    file_type: str,
    outlier_range: Dict[str, float],
    cat_config: Dict[str, Any],
    output_dir: str = None,
    extreme_bins_count: int = 5,
    save_output: bool = False
) -> Tuple[Dict[str, Any], pd.DataFrame, pd.DataFrame]:
    """
    Process a single lab/vital category using pure Pandas.
    """
    # Determine file path and column names
    if table_type == 'labs':
        file_path = '../../output/intermediate/clif_labs_standardized.parquet'
        category_col = 'lab_category'
        value_col = 'lab_value_numeric'
        datetime_col = 'lab_result_dttm'
    else:  # vitals
        file_path = os.path.join(tables_path, f'clif_vitals.{file_type}')
        category_col = 'vital_category'
        value_col = 'vital_value'
        datetime_col = 'recorded_dttm'

    if not os.path.exists(file_path):
        raise FileNotFoundError(f"Data file not found: {file_path}")

    display_name = f"{category} ({unit})" if table_type == 'labs' and unit else category
    print(f"Loading {display_name}...")

    # Load data - only necessary columns
    columns_to_load = ['hospitalization_id', datetime_col, value_col, category_col]
    if table_type == 'labs':
        columns_to_load.append('reference_unit')
    
    data_df = pd.read_parquet(file_path, columns=columns_to_load)
    
    # Filter to category
    data_df = data_df[data_df[category_col] == category].copy()
    
    # Filter by unit for labs
    if table_type == 'labs' and unit:
        if isinstance(unit, list):
            data_df = data_df[data_df['reference_unit'].isin(unit)]
        else:
            data_df = data_df[data_df['reference_unit'] == unit]
    
    # Remove timezone from datetime if present
    if data_df[datetime_col].dt.tz is not None:
        data_df[datetime_col] = data_df[datetime_col].dt.tz_localize(None)

    # Merge with ICU windows
    data_df = data_df.merge(icu_windows, on='hospitalization_id', how='inner')
    
    # Filter to values during ICU stay
    data_df = data_df[
        (data_df[datetime_col] >= data_df['in_dttm']) &
        (data_df[datetime_col] <= data_df['out_dttm'])
    ]

    original_count = len(data_df)
    print(f"  Original count: {original_count:,}")

    if original_count == 0:
        print(f"  WARNING: No data found for {display_name} during ICU stays")
        return (
            {'category': category, 'unit': unit, 'original_count': 0, 'clean_count': 0},
            pd.DataFrame({'value': [], 'probability': []}),
            pd.DataFrame()
        )

    # Remove outliers
    values_clean = data_df[
        (data_df[value_col] >= outlier_range['min']) &
        (data_df[value_col] <= outlier_range['max'])
    ][value_col]

    clean_count = len(values_clean)
    print(f"  After outlier removal: {clean_count:,} (removed {original_count - clean_count:,})")

    if clean_count == 0:
        print(f"  WARNING: No data remaining after outlier removal")
        return (
            {'category': category, 'unit': unit, 'original_count': original_count, 'clean_count': 0},
            pd.DataFrame({'value': [], 'probability': []}),
            pd.DataFrame()
        )

    # Compute ECDF
    ecdf_df = compute_ecdf_pandas(values_clean)
    print(f"  ECDF: {len(ecdf_df):,} distinct pairs (compression: {clean_count / len(ecdf_df):.1f}x)")

    # Compute Bins
    bins_below = cat_config['bins']['below_normal']
    bins_normal = cat_config['bins']['normal']
    bins_above = cat_config['bins']['above_normal']

    extra_bins_below = extreme_bins_count if bins_below > 1 else 0
    extra_bins_above = extreme_bins_count if bins_above > 1 else 0

    bins = create_all_bins_pandas(
        data=values_clean,
        normal_lower=cat_config['normal_range']['lower'],
        normal_upper=cat_config['normal_range']['upper'],
        outlier_min=outlier_range['min'],
        outlier_max=outlier_range['max'],
        bins_below=bins_below,
        bins_normal=bins_normal,
        bins_above=bins_above,
        extra_bins_below=extra_bins_below,
        extra_bins_above=extra_bins_above
    )

    # Add interval notation
    for bin_info in bins:
        if bin_info['bin_num'] == 1:
            interval = f"[{bin_info['bin_min']:.2f}, {bin_info['bin_max']:.2f}]"
        else:
            interval = f"({bin_info['bin_min']:.2f}, {bin_info['bin_max']:.2f}]"
        bin_info['interval'] = interval

    bins_df = pd.DataFrame(bins) if bins else pd.DataFrame()
    print(f"  Bins: {len(bins_df)}")

    # Save output
    if save_output and output_dir:
        if table_type == 'labs' and unit:
            unit_for_filename = unit[0] if isinstance(unit, list) else unit
            unit_safe = sanitize_unit_for_filename(unit_for_filename)
            filename = f'{category}_{unit_safe}.parquet'
        else:
            filename = f'{category}.parquet'

        ecdf_dir = os.path.join(output_dir, 'ecdf', table_type)
        os.makedirs(ecdf_dir, exist_ok=True)
        ecdf_df.to_parquet(os.path.join(ecdf_dir, filename), index=False)

        bins_dir = os.path.join(output_dir, 'bins', table_type)
        os.makedirs(bins_dir, exist_ok=True)
        bins_df.to_parquet(os.path.join(bins_dir, filename), index=False)

        print(f"  Saved to {output_dir}")

    stats = {
        'category': category,
        'unit': unit if table_type == 'labs' else None,
        'original_count': original_count,
        'clean_count': clean_count,
        'ecdf_distinct_pairs': len(ecdf_df),
        'num_bins': len(bins)
    }

    return stats, ecdf_df, bins_df

## Test: Process a Single Lab Category

In [None]:
labs_config = lab_vital_config.get('labs', {})
labs_outlier = outlier_config['tables']['labs']['lab_value_numeric']

print("Available lab categories in config:")
for cat in sorted(labs_config.keys()):
    if isinstance(labs_config.get(cat), dict):
        in_outlier = cat in labs_outlier
        print(f"  {cat}: outlier config = {in_outlier}")

In [None]:
TEST_LAB_CATEGORY = 'albumin'

if TEST_LAB_CATEGORY in labs_config and TEST_LAB_CATEGORY in labs_outlier:
    cat_config = labs_config[TEST_LAB_CATEGORY]
    if cat_config and isinstance(cat_config, dict):
        test_unit = cat_config.get('reference_unit')
        if isinstance(test_unit, str):
            test_unit = test_unit.lower()
        elif isinstance(test_unit, list):
            test_unit = [u.lower() if isinstance(u, str) else u for u in test_unit]
        
        stats, ecdf_df, bins_df = process_category_pandas(
            table_type='labs',
            category=TEST_LAB_CATEGORY,
            unit=test_unit,
            icu_windows=icu_windows,
            tables_path=clif_config['tables_path'],
            file_type=clif_config['file_type'],
            outlier_range=labs_outlier[TEST_LAB_CATEGORY],
            cat_config=cat_config,
            extreme_bins_count=5,
            save_output=False
        )
        
        print(f"\nStats: {stats}")
else:
    print(f"Category '{TEST_LAB_CATEGORY}' not found in config")

In [None]:
print("ECDF (first 20 rows):")
ecdf_df.head(20)

In [None]:
print("Bins:")
bins_df

## Test: Process a Single Vital Category

In [None]:
vitals_config = lab_vital_config.get('vitals', {})
vitals_outlier = outlier_config['tables']['vitals']['vital_value']

TEST_VITAL_CATEGORY = 'heart_rate'

if TEST_VITAL_CATEGORY in vitals_config and TEST_VITAL_CATEGORY in vitals_outlier:
    extreme_bins = 5 if TEST_VITAL_CATEGORY in ['height_cm', 'weight_kg'] else 10
    
    vital_stats, vital_ecdf_df, vital_bins_df = process_category_pandas(
        table_type='vitals',
        category=TEST_VITAL_CATEGORY,
        unit=None,
        icu_windows=icu_windows,
        tables_path=clif_config['tables_path'],
        file_type=clif_config['file_type'],
        outlier_range=vitals_outlier[TEST_VITAL_CATEGORY],
        cat_config=vitals_config[TEST_VITAL_CATEGORY],
        extreme_bins_count=extreme_bins,
        save_output=False
    )
    
    print(f"\nStats: {vital_stats}")
else:
    print(f"Category '{TEST_VITAL_CATEGORY}' not found in config")

## Visualize ECDF

In [None]:
import matplotlib.pyplot as plt

def plot_ecdf(ecdf_df: pd.DataFrame, title: str = "ECDF"):
    """Plot ECDF from a Pandas DataFrame."""
    fig, ax = plt.subplots(figsize=(10, 6))
    
    values = ecdf_df['value'].values
    probs = ecdf_df['probability'].values
    
    ax.step(values, probs, where='post', linewidth=1.5)
    ax.set_xlabel('Value')
    ax.set_ylabel('Cumulative Probability')
    ax.set_title(title)
    ax.grid(True, alpha=0.3)
    ax.set_ylim(0, 1.05)
    
    plt.tight_layout()
    plt.show()

if 'ecdf_df' in dir() and len(ecdf_df) > 0:
    plot_ecdf(ecdf_df, f"ECDF: {TEST_LAB_CATEGORY}")

## Run Full Pipeline

In [None]:
OUTPUT_DIR = str(PROJECT_ROOT / 'output' / 'final')
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Process all labs from config
for category, cat_config in labs_config.items():
    if cat_config is None:
        print(f"WARNING: Category '{category}' has None config, skipping")
        continue

    if not isinstance(cat_config, dict):
        continue

    if category not in labs_outlier:
        print(f"WARNING: Category '{category}' not in outlier config, skipping")
        continue

    config_unit = cat_config.get('reference_unit')
    if config_unit is None:
        print(f"WARNING: Category '{category}' has no reference_unit in config, skipping")
        continue

    # Lowercase to match standardized data
    if isinstance(config_unit, str):
        unit = config_unit.lower()
    elif isinstance(config_unit, list):
        unit = [u.lower() if isinstance(u, str) else u for u in config_unit]
    else:
        unit = config_unit

    stats, ecdf_df, bins_df = process_category_pandas(
        table_type='labs',
        category=category,
        unit=unit,
        icu_windows=icu_windows,
        tables_path=clif_config['tables_path'],
        file_type=clif_config['file_type'],
        outlier_range=labs_outlier[category],
        cat_config=cat_config,
        output_dir=OUTPUT_DIR,
        extreme_bins_count=5,
        save_output=True
    )