# Full Pipeline with Results Management

Minimal example: load data → preprocess → model all targets → save & visualize.

In [None]:
from src.utils.pipeline import load_combined_dataset, run_preprocessing_pipeline, ohe_features
from src.feature_config import ALL_CONTINUOUS_FEATURES, ALL_CATEGORICAL_FEATURES, ALL_BINARY_FEATURES
from src.target_definition.aggregate import aggregate_health_targets
from src.predictive import run_modeling_suite
from src.results_management import ResultsManager, ResultsVisualizer, compare_targets

## 1. Load & Preprocess Data

In [None]:
df = load_combined_dataset("data/morphology_data_cleaned.csv", "data/synthetic_health_data.xlsx")
df = run_preprocessing_pipeline(df)
print(f"Dataset: {df.shape[0]} rows, {df.shape[1]} columns")

In [None]:
# Build feature types map
feature_types = {}
for col in df.columns:
    if col in ALL_CONTINUOUS_FEATURES:
        feature_types[col] = "continuous"
    elif col in ALL_CATEGORICAL_FEATURES:
        feature_types[col] = "categorical"
    elif col in ALL_BINARY_FEATURES:
        feature_types[col] = "binary"
    elif df[col].dropna().isin([0, 1]).all():
        feature_types[col] = "binary"
    else:
        feature_types[col] = "continuous"

# One-hot encode typology (categorical → binary columns)
df, feature_types = ohe_features(df, feature_types)
print(f"After OHE: {df.shape[1]} columns")

## 2. Aggregate Targets & Run Models

In [None]:
TARGETS = ["mental_health", "cardiovascular", "sleep_disorder", "respiratory"]
manager = ResultsManager(base_dir="results")

for target in TARGETS:
    print(f"\n{'='*40}\nProcessing: {target.upper()}\n{'='*40}")
    
    # Aggregate target
    agg = aggregate_health_targets(df, target, feature_types)
    data, ft = agg["data"], agg["feature_types"]
    
    # Run modeling
    results = run_modeling_suite(data, "target", feature_types=ft)
    
    # Save
    manager.save(results, f"{target}_run", target_variable=target, overwrite=True)
    print(f"Best: {results.get('best_model_name')}")

## 3. View All Experiments

In [None]:
manager.list_all()

## 4. Visualize Results

In [None]:
# Visualize mental health results
viz = ResultsVisualizer(manager.load("mental_health_run"))
viz.summary()
viz.plot_all()

In [None]:
# Visualize cardiovascular results
viz = ResultsVisualizer(manager.load("cardiovascular_run"))
viz.summary()
viz.plot_all()

## 5. Compare Across Targets

In [None]:
# Compare regression targets on R2
compare_targets(["mental_health", "sleep_disorder"], metric="R2")

In [None]:
# Compare classification targets on Accuracy
compare_targets(["cardiovascular", "respiratory"], metric="Accuracy")