# AKI Prediction - Training Example

This notebook demonstrates how to use the modular AKI prediction package to:
1. Load and preprocess data
2. Train multiple models with hyperparameter tuning
3. Evaluate models and save the best one
4. Generate SHAP explanations

## Simple Usage Example


In [None]:
# Import the package
import sys
import os
sys.path.append(os.path.abspath('../src'))

# Import all functions from the package
from utils import (
    setup_plotting, load_vitaldb_data, preprocess_data, prepare_train_test_data
)
from train import (
    get_default_model_configs, hyperparameter_tuning, save_best_model
)
from evaluate import (
    evaluate_models, print_evaluation_summary
)
from visualization import (
    plot_roc_curves, plot_pr_curves, plot_model_comparison, plot_confusion_matrices
)
from shap_explainer import (
    explain_model_with_shap, analyze_logistic_regression_coefficients
)

# Setup plotting
setup_plotting()


ModuleNotFoundError: No module named 'src'

## 1. Data Loading and Preprocessing


In [None]:
# Load and preprocess data
df = load_vitaldb_data()
X, y, feature_names = preprocess_data(df)
data_dict = prepare_train_test_data(X, y)


## 2. Model Training with Hyperparameter Tuning


In [None]:
# Get model configurations and train models
models_config = get_default_model_configs()

# Train models with hyperparameter tuning
tuned_models = hyperparameter_tuning(
    models_config, 
    data_dict['X_train_dict'], 
    data_dict['y_train']
)


## 3. Model Evaluation


In [None]:
# Model data mapping for evaluation
model_data_mapping = {
    'LogisticRegression': 'scaled',
    'RandomForest': 'imputed',
    'XGBoost': 'imputed',
    'SVM': 'scaled'
}

# Evaluate all models
results_df = evaluate_models(
    tuned_models, 
    data_dict['X_test_dict'], 
    data_dict['y_test'], 
    model_data_mapping
)

# Print summary
print_evaluation_summary(results_df)


## 4. Save Best Model


In [None]:
# Find and save the best model
best_model_name, best_model = save_best_model(
    tuned_models,
    data_dict['X_test_dict'],
    data_dict['y_test'],
    model_data_mapping
)


## 5. SHAP Explanations


In [None]:
# Generate SHAP explanations for the best model
if 'LogisticRegression' in tuned_models:
    lr_model = tuned_models['LogisticRegression']
    # Analyze coefficients first
    analyze_logistic_regression_coefficients(lr_model, feature_names)
    # Generate SHAP explanation
    explain_model_with_shap(
        lr_model, 
        data_dict['X_test_dict']['scaled'], 
        feature_names, 
        'LogisticRegression', 
        max_display=15
    )

# Generate SHAP explanation for XGBoost
if 'XGBoost' in tuned_models:
    xgb_model = tuned_models['XGBoost']
    explain_model_with_shap(
        xgb_model, 
        data_dict['X_test_dict']['imputed'], 
        feature_names, 
        'XGBoost', 
        max_display=15
    )
