# Depth of Anesthesia Classification - Training Pipeline

### Step 1: Import necessary libraries

In [None]:
from src.models.tabularNN import TabularNN
from sklearn.naive_bayes import GaussianNB
import numpy as np
from src.dataset.eeg_dataset import EEGDataset
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import StratifiedKFold
import pandas as pd
from catboost import CatBoostClassifier
from tabicl import TabICLClassifier
from pathlib import Path
from src.utils import Utils
from tabpfn import TabPFNClassifier
import torch
import random

### Step 2: Set your parameters

In [None]:
window_sizes = [1, 2, 5, 10, 30, 60]
step_sizes = [1, 2, 5, 10, 30, 60]
strategy = "FeatureBased"
timing = 60  # window_sizes[5]
sampling_rate = 128
window_size = sampling_rate * timing
step_size = sampling_rate * timing
preprocessing = True
feature_selection = True
depth_of_anesthesia = True
random_seed = 42
for_majority = int(window_size / 2)
number_of_features = 50
base_path = Path.cwd()
data_path = base_path / "EEG_data" / "dataset"

In [None]:
print(
    f"You are using {strategy} with a window size of {window_size} and step size of {step_size} to predict Depth of Anesthesia"
)

### Step 3: Load the Data and create the features
If your feature-dataset was already created and saved as a csv, you can skip to Step 4

Assign your patients/volunteers to the appropriate training/validation or test set

In [None]:
eeg_data = EEGDataset(
    data_dir=base_path / "EEG_data",
    training_ids=[
        1,
        2,
        3,
        4,
        6,
        7,
        12,
    ],
    validation_ids=[11, 10, 9],
    testing_ids=[8, 5],
    window_size=window_size,
    step_size=step_size,
    sampling_rate=sampling_rate,
    majority_voting=True,
    for_majority=for_majority,
    preprocessing=preprocessing,
    inference_mode=False,
    depth_of_anesthesia=depth_of_anesthesia,
    strategy=strategy,
)
training_data = eeg_data.train_df
test_data = eeg_data.test_df
validation_data = eeg_data.val_df
training_data.to_csv(
    data_path / f"training_data_{window_size}_{step_size}.csv", index=False
)
test_data.to_csv(data_path / f"test_data_{window_size}_{step_size}.csv", index=False)
validation_data.to_csv(
    data_path / f"validation_data_{window_size}_{step_size}.csv", index=False
)

### Step 4: Read in the saved csv-feature set


In [None]:
training_data = pd.read_csv(data_path / f"training_data_{window_size}_{step_size}.csv")
test_data = pd.read_csv(data_path / f"test_data_{window_size}_{step_size}.csv")
validation_data = pd.read_csv(
    data_path / f"validation_data_{window_size}_{step_size}.csv"
)

In [None]:
for column in training_data.columns:
    print(column)

### Step 5: Datasets get prepared and preprocessed for the different binary classification targets

In [None]:
utils = Utils(
    for_majority=for_majority,
    window_size=window_size,
    step_size=step_size,
    random_seed=random_seed,
    preprocessing=preprocessing,
    sampling_rate=sampling_rate,
    results_validation_csv_path=base_path
    / "doA_classification"
    / "ml_models"
    / f"{strategy}_validation_results_df.csv",
    results_test_csv_path=base_path
    / "doA_classification"
    / "ml_models"
    / f"{strategy}_test_results_df.csv",
    model_dir=base_path / "doA_classification" / "ml_models",
)
exclude_columns = ["Start", "End", "sleep"]
labels = ["sleep"]
if depth_of_anesthesia:
    exclude_columns.extend(["cr", "sspl", "burst_suppression"])
    labels_to_process = ["sleep", "cr", "sspl", "burst_suppression"]
    labels.extend(["cr", "sspl", "burst_suppression"])
else:
    labels_to_process = ["sleep"]

# Define features (excluding the necessary columns)
features = training_data.drop(columns=exclude_columns, errors="ignore").columns

# Create a new dictionary to store preprocessed data
preprocessed_data_dict = {}

for label in labels_to_process:
    print(f"Processing {label}...")
    # Preprocess data
    (
        X,
        y,
        X_val,
        y_val,
        X_test,
        y_test,
        train_loader_nn,
        val_loader_nn,
        test_loader_nn,
        input_size,
    ) = utils.preprocess_data(
        X=training_data[features],
        y=training_data[label],
        X_val=validation_data[features],
        y_val=validation_data[label],
        X_test=test_data[features],
        y_test=test_data[label],
        k=number_of_features,
        batch_size=16,
        device="mps",
        strategy=strategy,
        classification_type=label,
        feature_selection=True,
    )

    preprocessed_data_dict[label] = {
        "X": X,
        "y": y,
        "X_val": X_val,
        "y_val": y_val,
        "X_test": X_test,
        "y_test": y_test,
        "train_loader_nn": train_loader_nn,
        "val_loader_nn": val_loader_nn,
        "test_loader_nn": test_loader_nn,
        "input_size": input_size,
    }

print("Processing completed for all labels.")

In [None]:
number_of_features

### Step 6: Define ML-Models 

In [None]:
def set_seed(seed=random_seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


set_seed(random_seed)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
models = {
    "TabICLClassifier": TabICLClassifier(random_state=42, device=device),
    "CatBoostClassifier": CatBoostClassifier(verbose=0, random_state=random_seed),
    "RandomForestClassifier": RandomForestClassifier(random_state=random_seed),
    "GaussianNB": GaussianNB(),
    "TabPFNClassifier": TabPFNClassifier(
        random_state=42, device=device, ignore_pretraining_limits=True
    ),
    "TabularNN": TabularNN(
        input_size=input_size, hidden_sizes=[input_size, 32, 16], dropout_rate=0.4
    ).to(device),
}

### Step 7: Train the Models for the different binary classification tasks

In [None]:
for label in ["sleep", "sspl", "cr", "burst_suppression"]:
    for model_name, model in models.items():
        utils.train_and_evaluate_model(
            name=model_name,
            model=model,
            train_loader_nn=preprocessed_data_dict[label]["train_loader_nn"],
            val_loader_nn=preprocessed_data_dict[label]["val_loader_nn"],
            X=preprocessed_data_dict[label]["X"],
            y=preprocessed_data_dict[label]["y"],
            X_val=preprocessed_data_dict[label]["X_val"],
            y_val=preprocessed_data_dict[label]["y_val"],
            skf=StratifiedKFold(n_splits=5, shuffle=True, random_state=random_seed),
            classification_type=label,
            strategy=strategy,
            number_of_features=number_of_features,
        )

### Step 8: Get Test Results:

In [None]:
for label in ["sleep", "sspl", "cr", "burst_suppression"]:
    for model_name, model in models.items():

        test_results = utils.evaluate_model_on_test(
            model_name=model_name,
            model=model,
            X_test=preprocessed_data_dict[label]["X_test"],
            y_test=preprocessed_data_dict[label]["y_test"],
            test_loader=preprocessed_data_dict[label]["test_loader_nn"],
            classification_type=label,
            strategy=strategy,
            number_of_features=number_of_features,
        )