### Step 1: Import necessary libraries

In [None]:
import numpy as np
import torch
import random
from src.dataset.eeg_dataset import EEGDataset
from pathlib import Path
import pandas as pd
import os
from src.utils import Utils

### Step 2: Set random seeds

In [None]:
random_seed = 42
random.seed(random_seed)
np.random.seed(random_seed)
torch.manual_seed(random_seed)
torch.cuda.manual_seed_all(random_seed)
torch.mps.manual_seed(random_seed)

### Step 3: Set your parameters

In [None]:
window_sizes = [128, 2 * 128, 5 * 128, 10 * 128, 30 * 128, 60*128]
step_sizes = [128, 2 * 128, 5 * 128, 10 * 128, 30 * 128, 60*128]

window_size = window_sizes[5]
step_size = step_sizes[4]
#window_size = 10*128
#step_size = 5*128
preprocessing = True
feature_selection = False
depth_of_anesthesia = True
for_majority = int(window_size / 2)
strategy = "rawEEG"   #"stftEEG" for stft
sampling_rate = 128
training_ids=[1, 2, 3, 4, 6, 7, 12,]

base_path = Path.cwd()
data_path = base_path / "data"


In [None]:
print(
    f"You are using {strategy} with a window size of {window_size} and step size of {step_size} to predict Depth of Anesthesia"
)

### Step 4: Load the Data and create the features
If your feature-dataset was already created and saved as a csv, you can skip to Step 5


In [None]:
training_data_path = data_path / f"training_data_{strategy}_{window_size}_{step_size}.h5"
validation_data_path = data_path / f"validation_data_{strategy}_{window_size}_{step_size}.h5"
test_data_path = data_path / f"test_data_{strategy}_{window_size}_{step_size}.h5"

print(training_data_path)

if os.path.exists(training_data_path) and os.path.exists(validation_data_path) and os.path.exists(test_data_path):
    training_data = pd.read_hdf(data_path / f"training_data_{strategy}_{window_size}_{step_size}.h5", key='eeg_window')
    test_data = pd.read_hdf(data_path / f"test_data_{strategy}_{window_size}_{step_size}.h5", key='eeg_window')
    validation_data = pd.read_hdf(data_path / f"validation_data_{strategy}_{window_size}_{step_size}.h5", key='eeg_window')
    
else:
    sleep_data = EEGDataset(
        data_dir=base_path / "EEG_data",
        training_ids=training_ids,  
        validation_ids=[11, 10, 9],
        testing_ids=[8, 5],
        window_size=window_size,
        sampling_rate=sampling_rate,
        step_size=step_size,
        majority_voting=True,
        for_majority=for_majority,
        preprocessing=preprocessing,
        inference_mode=False,
        depth_of_anesthesia=depth_of_anesthesia,
        strategy=strategy,
    )
    training_data = sleep_data.train_df
    test_data = sleep_data.test_df
    validation_data = sleep_data.val_df
    training_data.to_hdf(
        data_path / f"training_data_{strategy}_{window_size}_{step_size}.h5", key='eeg_window', index=False
    )
    test_data.to_hdf(data_path / f"test_data_{strategy}_{window_size}_{step_size}.h5", key='eeg_window',index=False)
    validation_data.to_hdf(
        data_path / f"validation_data_{strategy}_{window_size}_{step_size}.h5", key='eeg_window',index=False
    )

In [None]:
for column in training_data.columns:
    print(column)
print(training_data.shape)
training_data.head()

### Step 5: Datasets get prepared and preprocessed for the different binary classification targets

In [None]:
utils = Utils(
    for_majority=for_majority,
    window_size=window_size,
    step_size=step_size,
    random_seed=random_seed,
    preprocessing=preprocessing,
    sampling_rate=sampling_rate,
    results_validation_csv_path=base_path
    / "doA_classification"
    / "ml_models"
    / "validation_results_df.csv",
    results_test_csv_path=base_path
    / "doA_classification"
    / "ml_models"
    / "test_results_df.csv",
    model_dir=base_path / "doA_classification" / "ml_models",
)

exclude_columns = ["Start", "End", "sleep"]
labels = ["sleep"]
if depth_of_anesthesia:
    exclude_columns.extend(["cr", "sspl", "burst_suppression"])
    labels_to_process = ["sleep", "cr", "sspl", "burst_suppression"]
    labels.extend(["cr", "sspl", "burst_suppression"])
else:
    labels_to_process = ["sleep"]

# Define features (excluding the necessary columns)
features = training_data.drop(columns=exclude_columns, errors="ignore").columns

In [None]:

# Create a new dictionary to store preprocessed data
preprocessed_data_dict = {}


for label in labels_to_process:
    print(f"Processing {label}...")
    # Preprocess data
    (
        X,
        y,
        X_val,
        y_val,
        X_test,
        y_test,
        train_loader_nn,
        val_loader_nn,
        test_loader_nn,
        input_size,
    ) = utils.preprocess_data(
        X=np.vstack(training_data['eeg_window'].values),
        y=training_data[label],
        X_val=np.vstack(validation_data['eeg_window'].values),
        y_val=validation_data[label],
        X_test=np.vstack(test_data['eeg_window'].values),
        y_test=test_data[label],
        batch_size=11, #1024 until 1280 512 until 3840 then 128
        device="mps",
        strategy=strategy,
        classification_type=label,
        imbalanced=True,
    )

    preprocessed_data_dict[label] = {
        "X": X,
        "y": y,
        "X_val": X_val,
        "y_val": y_val,
        "X_test": X_test,
        "y_test": y_test,
        "train_loader_nn": train_loader_nn,
        "val_loader_nn": val_loader_nn,
        "test_loader_nn": test_loader_nn,
        "input_size": input_size,
    }

print("Processing completed for all labels.")

### Step 6: Define ML-Models 

In [None]:
from src.models.LSTM import LSTMModel
from src.models.UCR import SimpleModel, FCN, UCRResNet


models = {
    "SimpleModel": SimpleModel(
        input_size=window_size,
        hidden_size=500,
        output_size=1
    ).to('mps'),
    "FCN": FCN(
        input_shape=1,
        num_classes=1
    ).to('mps'),
    "UCRResNet": UCRResNet(
        input_shape=1,
        n_feature_maps=64,
        nb_classes=1
    ).to('mps'),
    "LSTMModel": LSTMModel(input_size=1, 
                    hidden_size=256, 
                    num_layers=2, 
                    output_size=1, 
                    dropout_rnn=0.3, 
                    dropout=0.1,
                    bidirectional=False).to('mps'),
    
}

### Step 7: Train the Models for the different binary classification tasks

In [None]:
for label in ["sleep", "cr","sspl", "burst_suppression"]:
    for model_name, model in models.items():
        utils.train_and_evaluate_model(
            name=model_name,
            model=model,
            train_loader_nn=preprocessed_data_dict[label]["train_loader_nn"],
            val_loader_nn=preprocessed_data_dict[label]["val_loader_nn"],
            classification_type=label,
            strategy=strategy,
            patience=3)

### Step 8: Get Test Results:

In [None]:
get_test_results = True  
if get_test_results:
    for label in ["sleep", "cr", "sspl","burst_suppression"]: 

        for model_name, model in models.items():

            test_results = utils.evaluate_model_on_test(
                model_name=model_name,
                model=model,
                X_test=preprocessed_data_dict[label]["X_test"],
                y_test=preprocessed_data_dict[label]["y_test"],
                test_loader=preprocessed_data_dict[label]["test_loader_nn"],
                classification_type=label,
                strategy=strategy,
            )