In [2]:
import os
import random
import numpy as np
import pandas as pd
import warnings
import pickle
warnings.filterwarnings("ignore")

from sklearn.model_selection import StratifiedKFold  # (Not used now, but kept if needed elsewhere)
from sklearn.metrics import mean_squared_error

# Set deterministic behavior for CUDA (set before torch imports)
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"

import torch
import torch.nn as nn
import torch.optim as optim
import torch.backends.cudnn as cudnn

import optuna
import optuna.visualization

def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # for multi-GPU
    cudnn.deterministic = True
    cudnn.benchmark = False
    torch.use_deterministic_algorithms(True)
    return seed

# Global device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device} with {torch.cuda.device_count()} GPU(s)")

# Data directory (adjust as needed)
DL_DIR = "../../data/deep_learning"


Using device: cuda with 5 GPU(s)


### Import Data

In [3]:
# Load the regression split dictionary.
with open(f'{DL_DIR}/comb_reg_dict.pkl', 'rb') as f:
    comb_reg_dict = pickle.load(f)

with open(f'{DL_DIR}/fitbit_reg_dict.pkl', 'rb') as f:
    fitbit_reg_dict = pickle.load(f)

# Load the classification split dictionary.
with open(f'{DL_DIR}/comb_class_dict.pkl', 'rb') as f:
    comb_class_dict = pickle.load(f)

with open(f'{DL_DIR}/fitbit_class_dict.pkl', 'rb') as f:
    fitbit_class_dict = pickle.load(f) 

### Utility Functions & Dynamic Model Definition 

In [4]:
def conv_output_length(L_in, kernel_size, stride, padding, dilation):
    """Compute output length after a conv1d layer."""
    return (L_in + 2 * padding - dilation * (kernel_size - 1) - 1) // stride + 1

def pool_output_length(L_in, pool_kernel):
    """Assuming stride == pool_kernel for pooling."""
    return L_in // pool_kernel

def create_subject_dataset(df, outcome_col="SI_mean"):
    """
    Aggregates records for each subject into a subject-level sample.
    Returns a DataFrame with:
      - 'X': predictors with shape (n_features, 39)
      - the outcome (given by outcome_col)
      - sample_weight (if provided)
      - a stratification column based on the outcome.
    
    This function assumes that each subject already has exactly 39 timepoints.
    For classification (when outcome_col=="is_SI"), if the SI_mean column is absent,
    it is not added.
    """
    exclude_cols = ["PatientID", "timepoints", "si_kde_weight", "SI_mean", "is_SI", "SI_level"]
    predictor_cols = [col for col in df.columns if col not in (exclude_cols + [outcome_col])]
    
    subject_data = []
    for pid, group in df.groupby("PatientID"):
        group_sorted = group.sort_values("timepoints")
        # Assume each subject has exactly 39 timepoints.
        X = group_sorted[predictor_cols].values.T  # shape: (n_features, 39)
        y = group_sorted[outcome_col].iloc[0]
        weight = group_sorted["si_kde_weight"].iloc[0] if "si_kde_weight" in group.columns else 1.0
        record = {"PatientID": pid, "X": X, outcome_col: y, "sample_weight": weight}
        if outcome_col == "is_SI" and "SI_mean" in group_sorted.columns:
            record["SI_mean"] = group_sorted["SI_mean"].iloc[0]
        subject_data.append(record)
    subj_df = pd.DataFrame(subject_data)
    subj_df[f"{outcome_col}_bin"] = np.round(subj_df[outcome_col]).astype(int)
    return subj_df, predictor_cols

def get_stratified_cv_splits(df, subject_id="PatientID", target_var="SI_mean", n_splits=5):
    """
    Performs stratified K-fold cross validation at the subject level.
    
    Parameters:
      df : pandas.DataFrame
          The original dataframe containing repeated measures.
      subject_id : str
          The column name for the subject ID (e.g., "PatientID").
      target_var : str
          The target variable; for regression use "SI_mean" and for classification use "is_SI".
      n_splits : int
          Number of folds for cross validation.
    
    Returns:
      splits : list of tuples
          A list where each element is a tuple (train_df, test_df) corresponding
          to one fold. Each dataframe contains all rows (i.e. repeated measures) for the patients in that fold.
    
    Behavior:
      - Isolates unique patient IDs and their target variable by dropping duplicates.
      - If target_var is "SI_mean", creates a new column "SI_mean_levels" (rounded SI_mean).
      - Uses the resulting column as the stratification column.
      - Performs stratified K-fold CV and then subsets the original dataframe based on the patient IDs.
    """
    # Create a subject-level dataframe (unique patient IDs with their target variable)
    subject_df = df[[subject_id, target_var]].drop_duplicates(subset=[subject_id]).copy()
    
    # For regression: create a new column with the rounded SI_mean values.
    if target_var == "SI_mean":
        subject_df["SI_mean_levels"] = subject_df[target_var].round().astype(int)
        strat_col = "SI_mean_levels"
    else:
        strat_col = target_var  # For classification, use the target directly.
    
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
    splits = []
    
    # Get the subject IDs and stratification labels
    subjects = subject_df[subject_id].values
    strat_labels = subject_df[strat_col].values
    
    # For each fold, retrieve patient IDs and then subset the original dataframe.
    for train_idx, test_idx in skf.split(subjects, strat_labels):
        train_patient_ids = subject_df.iloc[train_idx][subject_id].values
        test_patient_ids  = subject_df.iloc[test_idx][subject_id].values
        train_split = df[df[subject_id].isin(train_patient_ids)]
        test_split  = df[df[subject_id].isin(test_patient_ids)]
        splits.append((train_split, test_split))
    
    return splits




#### Updated Dynamic Models with Dropout ####
class DynamicCNNRegression(nn.Module):
    """
    Builds a dynamic CNN model for regression based on hyperparameters from an Optuna trial.
    Dropout is applied after each ReLU activation.
    """
    def __init__(self, input_channels, seq_len, trial):
        super(DynamicCNNRegression, self).__init__()
        layers = []
        # Tune dropout probability as a hyperparameter
        dropout_prob = trial.suggest_float("dropout_prob", 0.1, 0.5)
        current_channels = input_channels
        current_seq_len = seq_len

        # ----- Convolutional Layers -----
        n_conv = trial.suggest_int("n_conv", 1, 3)
        for i in range(n_conv):
            n_filters = trial.suggest_int(f"n_filters_{i}", 8, 64, step=8)
            kernel_size = trial.suggest_int(f"kernel_size_{i}", 3, 7, step=2)
            dilation = trial.suggest_int(f"dilation_{i}", 1, 2)
            stride = trial.suggest_int(f"stride_{i}", 1, 2)
            padding = ((kernel_size - 1) // 2) * dilation
            layers.append(nn.Conv1d(in_channels=current_channels,
                                    out_channels=n_filters,
                                    kernel_size=kernel_size,
                                    stride=stride,
                                    dilation=dilation,
                                    padding=padding))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout_prob))
            current_seq_len = conv_output_length(current_seq_len, kernel_size, stride, padding, dilation)
            
            # Optionally add pooling
            use_pool = trial.suggest_categorical(f"use_pool_{i}", [True, False])
            if use_pool:
                pool_type = trial.suggest_categorical(f"pool_type_{i}", ["max", "avg"])
                suggested_pool_kernel = trial.suggest_int(f"pool_kernel_{i}", 2, 4)
                pool_kernel = suggested_pool_kernel if current_seq_len >= suggested_pool_kernel else current_seq_len
                if pool_kernel > 1:
                    if pool_type == "max":
                        layers.append(nn.MaxPool1d(kernel_size=pool_kernel))
                    else:
                        layers.append(nn.AvgPool1d(kernel_size=pool_kernel))
                    current_seq_len = pool_output_length(current_seq_len, pool_kernel)
            current_channels = n_filters

        self.conv = nn.Sequential(*layers)
        
        # ----- Fully Connected Layers -----
        flattened_dim = current_channels * current_seq_len
        n_hidden = trial.suggest_int("n_hidden", 0, 2)
        fc_layers = []
        in_features = flattened_dim
        for j in range(n_hidden):
            n_units = trial.suggest_int(f"n_units_{j}", 16, 128, step=16)
            fc_layers.append(nn.Linear(in_features, n_units))
            fc_layers.append(nn.ReLU())
            fc_layers.append(nn.Dropout(dropout_prob))
            in_features = n_units
        fc_layers.append(nn.Linear(in_features, 1))
        self.fc = nn.Sequential(*fc_layers)
    
    def forward(self, x):
        x = self.conv(x)
        x = x.view(x.size(0), -1)
        return self.fc(x)

class DynamicCNNClassification(nn.Module):
    """
    Builds a dynamic CNN model for binary classification based on hyperparameters from an Optuna trial.
    Dropout is applied after each ReLU activation.
    """
    def __init__(self, input_channels, seq_len, trial):
        super(DynamicCNNClassification, self).__init__()
        layers = []
        dropout_prob = trial.suggest_float("dropout_prob", 0.1, 0.5)
        current_channels = input_channels
        current_seq_len = seq_len

        # ----- Convolutional Layers -----
        n_conv = trial.suggest_int("n_conv", 1, 3)
        for i in range(n_conv):
            n_filters = trial.suggest_int(f"n_filters_{i}", 8, 64, step=8)
            kernel_size = trial.suggest_int(f"kernel_size_{i}", 3, 7, step=2)
            dilation = trial.suggest_int(f"dilation_{i}", 1, 2)
            stride = trial.suggest_int(f"stride_{i}", 1, 2)
            padding = ((kernel_size - 1) // 2) * dilation
            layers.append(nn.Conv1d(in_channels=current_channels,
                                    out_channels=n_filters,
                                    kernel_size=kernel_size,
                                    stride=stride,
                                    dilation=dilation,
                                    padding=padding))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout_prob))
            current_seq_len = conv_output_length(current_seq_len, kernel_size, stride, padding, dilation)
            
            use_pool = trial.suggest_categorical(f"use_pool_{i}", [True, False])
            if use_pool:
                pool_type = trial.suggest_categorical(f"pool_type_{i}", ["max", "avg"])
                suggested_pool_kernel = trial.suggest_int(f"pool_kernel_{i}", 2, 4)
                pool_kernel = suggested_pool_kernel if current_seq_len >= suggested_pool_kernel else current_seq_len
                if pool_kernel > 1:
                    if pool_type == "max":
                        layers.append(nn.MaxPool1d(kernel_size=pool_kernel))
                    else:
                        layers.append(nn.AvgPool1d(kernel_size=pool_kernel))
                    current_seq_len = pool_output_length(current_seq_len, pool_kernel)
            current_channels = n_filters

        self.conv = nn.Sequential(*layers)
        
        # ----- Fully Connected Layers -----
        flattened_dim = current_channels * current_seq_len
        n_hidden = trial.suggest_int("n_hidden", 0, 2)
        fc_layers = []
        in_features = flattened_dim
        for j in range(n_hidden):
            n_units = trial.suggest_int(f"n_units_{j}", 16, 128, step=16)
            fc_layers.append(nn.Linear(in_features, n_units))
            fc_layers.append(nn.ReLU())
            fc_layers.append(nn.Dropout(dropout_prob))
            in_features = n_units
        fc_layers.append(nn.Linear(in_features, 1))  # final logit for binary classification
        self.fc = nn.Sequential(*fc_layers)
    
    def forward(self, x):
        x = self.conv(x)
        x = x.view(x.size(0), -1)
        return self.fc(x)

### Objective Function with 5-Fold Stratified Cross Validation

In [5]:
def objective_regression(trial, data_dict, use_sample_weights, model_name):
    set_seed(42)
    # Process only the training portion from the dictionary.
    train_df, _ = create_subject_dataset(data_dict['train'], outcome_col="SI_mean")
    X = np.stack(train_df["X"].values, axis=0)  # shape: (n_subjects, n_features, 39)
    y = train_df["SI_mean"].values
    w = train_df["sample_weight"].values if use_sample_weights else np.ones_like(y, dtype=np.float32)
    n_subjects, input_channels, seq_len = X.shape

    # Get the stratified CV splits from the helper function.
    # Stratification is based on PatientID and the binned SI_mean (SI_mean_bin).
    cv_splits = get_stratified_cv_splits(train_df, subject_id="PatientID", target_var="SI_mean", n_splits=5)

    # Suggest hyperparameters once per trial.
    lr = trial.suggest_loguniform("lr", 1e-5, 1e-2)
    use_reg = trial.suggest_categorical("use_regularization", [True, False])
    weight_decay = trial.suggest_loguniform("weight_decay", 1e-6, 1e-3) if use_reg else 0.0
    batch_size = trial.suggest_int("batch_size", 16, 64, step=16)
    num_epochs = trial.suggest_int("num_epochs", 5, 10)

    rmse_list = []
    for cv_train_df, cv_val_df in cv_splits:
        X_train_fold = np.stack(cv_train_df["X"].values, axis=0)
        y_train_fold = cv_train_df["SI_mean"].values
        w_train_fold = cv_train_df["sample_weight"].values if use_sample_weights else np.ones_like(y_train_fold, dtype=np.float32)
        X_val_fold   = np.stack(cv_val_df["X"].values, axis=0)
        y_val_fold   = cv_val_df["SI_mean"].values

        # Build the dynamic CNN regression model with the trial’s architecture choices.
        model = DynamicCNNRegression(input_channels, seq_len, trial).to(device)
        if torch.cuda.device_count() > 1:
            model = nn.DataParallel(model)
        optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
        loss_fn = nn.MSELoss(reduction="none")

        train_dataset = torch.utils.data.TensorDataset(
            torch.tensor(X_train_fold, dtype=torch.float32),
            torch.tensor(y_train_fold, dtype=torch.float32).view(-1, 1),
            torch.tensor(w_train_fold, dtype=torch.float32)
        )
        train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
        
        model.train()
        for ep in range(num_epochs):
            for X_batch, y_batch, weight_batch in train_loader:
                X_batch = X_batch.to(device)
                y_batch = y_batch.to(device)
                weight_batch = weight_batch.to(device)
                optimizer.zero_grad()
                outputs = model(X_batch)
                loss_per_sample = loss_fn(outputs, y_batch).view(-1)
                loss = (loss_per_sample * weight_batch).mean() if use_sample_weights else loss_per_sample.mean()
                loss.backward()
                optimizer.step()

        model.eval()
        with torch.no_grad():
            X_val_tensor = torch.tensor(X_val_fold, dtype=torch.float32).to(device)
            preds = model(X_val_tensor).cpu().numpy()
        fold_rmse = np.sqrt(np.mean((preds - y_val_fold.reshape(-1, 1)) ** 2))
        rmse_list.append(fold_rmse)
    
    mean_rmse = np.mean(rmse_list)
    return mean_rmse


###  Run the Superficial Optuna Search & Save Hyperparameter Importance Visualization

In [6]:
def run_superficial_search_regression(data_dict, use_sample_weights, model_name, n_trials=5):
    optuna.logging.set_verbosity(optuna.logging.WARNING)
    def print_progress(study, trial):
        print(f"{model_name} trial {len(study.trials)}/{n_trials}\r", end="", flush=True)
    
    study = optuna.create_study(direction="minimize")
    study.optimize(lambda trial: objective_regression(trial, data_dict, use_sample_weights, model_name),
                   n_trials=n_trials, callbacks=[print_progress])
    
    fig = optuna.visualization.plot_param_importances(study)
    plot_filename = f"search/{model_name}_hyperparam_importance.png"
    fig.write_image(plot_filename)
    
    rows = []
    for t in study.trials:
        row = {"model": model_name,
               "type": "weighted" if use_sample_weights else "not weighted",
               "overall_rmse": t.value,
               "config": t.params}
        rows.append(row)
    result_df = pd.DataFrame(rows)
    
    best_trial = study.best_trial
    optimal_configuration = {"value": best_trial.value,
                             "params": best_trial.params,
                             "user_attrs": best_trial.user_attrs}
    
    result_dict = {"results": result_df,
                   "optimal_configuration": optimal_configuration,
                   "importance_plot": plot_filename}
    return result_dict

### Run an Optuna study for a given dataset and weighting configuration

### Weighted

In [6]:
run_superficial_search_regression(comb_reg_dict, use_sample_weights=True, model_name="comb_reg_superficial", n_trials=20)

comb_reg_superficial trial 20/20

{'results':                    model      type  overall_rmse  \
 0   comb_reg_superficial  weighted      1.077803   
 1   comb_reg_superficial  weighted      0.805814   
 2   comb_reg_superficial  weighted      1.074576   
 3   comb_reg_superficial  weighted      1.396296   
 4   comb_reg_superficial  weighted      1.354603   
 5   comb_reg_superficial  weighted      0.997967   
 6   comb_reg_superficial  weighted      1.361521   
 7   comb_reg_superficial  weighted      1.525523   
 8   comb_reg_superficial  weighted      0.915472   
 9   comb_reg_superficial  weighted      1.140880   
 10  comb_reg_superficial  weighted      1.038099   
 11  comb_reg_superficial  weighted      1.090438   
 12  comb_reg_superficial  weighted      1.459692   
 13  comb_reg_superficial  weighted      1.303128   
 14  comb_reg_superficial  weighted      1.662313   
 15  comb_reg_superficial  weighted      1.588079   
 16  comb_reg_superficial  weighted      0.997922   
 17  comb_reg_superficial  weighted

In [7]:
run_superficial_search_regression(fitbit_reg_dict, use_sample_weights=True, model_name="fitbit_reg_superficial", n_trials=20)

fitbit_reg_superficial trial 20/20

{'results':                      model      type  overall_rmse  \
 0   fitbit_reg_superficial  weighted      1.157431   
 1   fitbit_reg_superficial  weighted      1.291520   
 2   fitbit_reg_superficial  weighted      0.725654   
 3   fitbit_reg_superficial  weighted      1.578346   
 4   fitbit_reg_superficial  weighted      1.322916   
 5   fitbit_reg_superficial  weighted      1.486152   
 6   fitbit_reg_superficial  weighted      1.365617   
 7   fitbit_reg_superficial  weighted      0.671264   
 8   fitbit_reg_superficial  weighted      1.366532   
 9   fitbit_reg_superficial  weighted      0.641342   
 10  fitbit_reg_superficial  weighted      0.809376   
 11  fitbit_reg_superficial  weighted      0.677573   
 12  fitbit_reg_superficial  weighted      0.806698   
 13  fitbit_reg_superficial  weighted      0.990587   
 14  fitbit_reg_superficial  weighted      0.953094   
 15  fitbit_reg_superficial  weighted      0.996520   
 16  fitbit_reg_superficial  weighted      0.718990   

### Not Weighted

In [7]:
run_superficial_search_regression(comb_reg_dict, use_sample_weights=False, model_name="comb_reg_superficial_nw", n_trials=20)

comb_reg_superficial_nw trial 20/20

{'results':                       model          type  overall_rmse  \
 0   comb_reg_superficial_nw  not weighted      0.652197   
 1   comb_reg_superficial_nw  not weighted      0.617605   
 2   comb_reg_superficial_nw  not weighted      0.661722   
 3   comb_reg_superficial_nw  not weighted      0.649703   
 4   comb_reg_superficial_nw  not weighted      0.629274   
 5   comb_reg_superficial_nw  not weighted      0.604076   
 6   comb_reg_superficial_nw  not weighted      0.598998   
 7   comb_reg_superficial_nw  not weighted      0.630121   
 8   comb_reg_superficial_nw  not weighted      0.835498   
 9   comb_reg_superficial_nw  not weighted      0.827271   
 10  comb_reg_superficial_nw  not weighted      0.629864   
 11  comb_reg_superficial_nw  not weighted      1.106040   
 12  comb_reg_superficial_nw  not weighted      0.601481   
 13  comb_reg_superficial_nw  not weighted      0.623442   
 14  comb_reg_superficial_nw  not weighted      0.600564   
 15  comb_reg_superficial_nw 

In [8]:
run_superficial_search_regression(fitbit_reg_dict, use_sample_weights=False, model_name="fitbit_reg_superficial_nw", n_trials=20)

fitbit_reg_superficial_nw trial 20/20

{'results':                         model          type  overall_rmse  \
 0   fitbit_reg_superficial_nw  not weighted      0.627845   
 1   fitbit_reg_superficial_nw  not weighted      0.652370   
 2   fitbit_reg_superficial_nw  not weighted      0.687655   
 3   fitbit_reg_superficial_nw  not weighted      0.621692   
 4   fitbit_reg_superficial_nw  not weighted      0.612285   
 5   fitbit_reg_superficial_nw  not weighted      0.615260   
 6   fitbit_reg_superficial_nw  not weighted      0.799828   
 7   fitbit_reg_superficial_nw  not weighted      0.652783   
 8   fitbit_reg_superficial_nw  not weighted      0.710526   
 9   fitbit_reg_superficial_nw  not weighted      0.637370   
 10  fitbit_reg_superficial_nw  not weighted      0.620302   
 11  fitbit_reg_superficial_nw  not weighted      0.604470   
 12  fitbit_reg_superficial_nw  not weighted      0.609234   
 13  fitbit_reg_superficial_nw  not weighted      0.617111   
 14  fitbit_reg_superficial_nw  not weighted      0.603432 

### Superficial search for classification

In [9]:
def objective_classification(trial, data_dict, use_sample_weights, model_name):
    set_seed(42)
    # Process only the training portion.
    train_df, _ = create_subject_dataset(data_dict['train'], outcome_col="is_SI")
    X = np.stack(train_df["X"].values, axis=0)
    y = train_df["is_SI"].values.astype(np.float32)
    w = train_df["sample_weight"].values if use_sample_weights else np.ones_like(y, dtype=np.float32)
    n_subjects, input_channels, seq_len = X.shape

    # Get stratified CV splits using the helper function.
    cv_splits = get_stratified_cv_splits(train_df, subject_id="PatientID", target_var="is_SI", n_splits=5)

    # Suggest hyperparameters once per trial.
    lr = trial.suggest_loguniform("lr", 1e-5, 1e-2)
    use_reg = trial.suggest_categorical("use_regularization", [True, False])
    weight_decay = trial.suggest_loguniform("weight_decay", 1e-6, 1e-3) if use_reg else 0.0
    batch_size = trial.suggest_int("batch_size", 16, 64, step=16)
    num_epochs = trial.suggest_int("num_epochs", 5, 10)

    acc_list = []
    sens_list = []
    spec_list = []
    for cv_train_df, cv_val_df in cv_splits:
        X_train_fold = np.stack(cv_train_df["X"].values, axis=0)
        y_train_fold = cv_train_df["is_SI"].values.astype(np.float32)
        w_train_fold = cv_train_df["sample_weight"].values if use_sample_weights else np.ones_like(y_train_fold, dtype=np.float32)
        X_val_fold = np.stack(cv_val_df["X"].values, axis=0)
        y_val_fold = cv_val_df["is_SI"].values.astype(np.float32)

        # Build the dynamic CNN classification model with trial's hyperparameters.
        model = DynamicCNNClassification(input_channels, seq_len, trial).to(device)
        if torch.cuda.device_count() > 1:
            model = nn.DataParallel(model)
        optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
        loss_fn = nn.BCEWithLogitsLoss(reduction="none")
        
        train_dataset = torch.utils.data.TensorDataset(
            torch.tensor(X_train_fold, dtype=torch.float32),
            torch.tensor(y_train_fold, dtype=torch.float32).view(-1, 1),
            torch.tensor(w_train_fold, dtype=torch.float32)
        )
        train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
        
        model.train()
        for ep in range(num_epochs):
            for X_batch, y_batch, weight_batch in train_loader:
                X_batch = X_batch.to(device)
                y_batch = y_batch.to(device)
                weight_batch = weight_batch.to(device)
                optimizer.zero_grad()
                outputs = model(X_batch)
                loss_per_sample = loss_fn(outputs, y_batch).view(-1)
                loss = (loss_per_sample * weight_batch).mean() if use_sample_weights else loss_per_sample.mean()
                loss.backward()
                optimizer.step()
        
        model.eval()
        with torch.no_grad():
            X_val_tensor = torch.tensor(X_val_fold, dtype=torch.float32).to(device)
            logits = model(X_val_tensor)
            probs = torch.sigmoid(logits).cpu().numpy().reshape(-1)
        preds = (probs >= 0.5).astype(np.float32)
        overall_acc = np.mean(preds == y_val_fold)
        TP = np.sum((preds == 1) & (y_val_fold == 1))
        FN = np.sum((preds == 0) & (y_val_fold == 1))
        sensitivity = TP / (TP + FN) if (TP + FN) > 0 else np.nan
        TN = np.sum((preds == 0) & (y_val_fold == 0))
        FP = np.sum((preds == 1) & (y_val_fold == 0))
        specificity = TN / (TN + FP) if (TN + FP) > 0 else np.nan
        
        acc_list.append(overall_acc)
        sens_list.append(sensitivity)
        spec_list.append(specificity)
    
    mean_acc = np.mean(acc_list)
    mean_sens = np.mean(sens_list)
    mean_spec = np.mean(spec_list)
    
    # For the objective function, we return 1 - mean accuracy (to minimize).
    return 1 - mean_acc


In [10]:
def run_superficial_search_classification(data_dict, use_sample_weights, model_name, n_trials=5):
    optuna.logging.set_verbosity(optuna.logging.WARNING)
    def print_progress(study, trial):
        print(f"{model_name} trial {len(study.trials)}/{n_trials}\r", end="", flush=True)
    
    study = optuna.create_study(direction="minimize")
    study.optimize(lambda trial: objective_classification(trial, data_dict, use_sample_weights, model_name),
                   n_trials=n_trials, callbacks=[print_progress])
    
    fig = optuna.visualization.plot_param_importances(study)
    plot_filename = f"search/{model_name}_hyperparam_importance.png"
    fig.write_image(plot_filename)
    
    rows = []
    for t in study.trials:
        row = {"model": model_name,
               "type": "weighted" if use_sample_weights else "not weighted",
               "1 - accuracy": 1 - t.value,  # since objective is 1-accuracy
               "config": t.params}
        rows.append(row)
    result_df = pd.DataFrame(rows)
    
    best_trial = study.best_trial
    optimal_configuration = {"value": best_trial.value,
                             "params": best_trial.params,
                             "user_attrs": best_trial.user_attrs}
    
    result_dict = {"results": result_df,
                   "optimal_configuration": optimal_configuration,
                   "importance_plot": plot_filename}
    return result_dict

### Weighted

In [10]:
run_superficial_search_classification(comb_class_dict, use_sample_weights=True, model_name="comb_class_superficial", n_trials=20)

comb_class_superficial trial 20/20

{'results':                      model      type  1 - accuracy  \
 0   comb_class_superficial  weighted      0.238280   
 1   comb_class_superficial  weighted      0.238280   
 2   comb_class_superficial  weighted      0.447486   
 3   comb_class_superficial  weighted      0.656692   
 4   comb_class_superficial  weighted      0.238280   
 5   comb_class_superficial  weighted      0.238280   
 6   comb_class_superficial  weighted      0.238280   
 7   comb_class_superficial  weighted      0.240514   
 8   comb_class_superficial  weighted      0.247777   
 9   comb_class_superficial  weighted      0.238280   
 10  comb_class_superficial  weighted      0.253910   
 11  comb_class_superficial  weighted      0.551664   
 12  comb_class_superficial  weighted      0.344425   
 13  comb_class_superficial  weighted      0.343308   
 14  comb_class_superficial  weighted      0.238280   
 15  comb_class_superficial  weighted      0.238280   
 16  comb_class_superficial  weighted      0.447486   

In [11]:
run_superficial_search_classification(fitbit_class_dict, use_sample_weights=True, model_name="fitbit_class_superficial", n_trials=20)

fitbit_class_superficial trial 20/20

{'results':                        model      type  1 - accuracy  \
 0   fitbit_class_superficial  weighted      0.238280   
 1   fitbit_class_superficial  weighted      0.238280   
 2   fitbit_class_superficial  weighted      0.238280   
 3   fitbit_class_superficial  weighted      0.245524   
 4   fitbit_class_superficial  weighted      0.238280   
 5   fitbit_class_superficial  weighted      0.238280   
 6   fitbit_class_superficial  weighted      0.238280   
 7   fitbit_class_superficial  weighted      0.238280   
 8   fitbit_class_superficial  weighted      0.248320   
 9   fitbit_class_superficial  weighted      0.238280   
 10  fitbit_class_superficial  weighted      0.299110   
 11  fitbit_class_superficial  weighted      0.332586   
 12  fitbit_class_superficial  weighted      0.284045   
 13  fitbit_class_superficial  weighted      0.305252   
 14  fitbit_class_superficial  weighted      0.238280   
 15  fitbit_class_superficial  weighted      0.271734   
 16  fitbit_class_su

### Not Weighted

In [11]:
run_superficial_search_classification(comb_class_dict, use_sample_weights=False, model_name="comb_class_superficial_nw", n_trials=20)

comb_class_superficial_nw trial 20/20

{'results':                         model          type  1 - accuracy  \
 0   comb_class_superficial_nw  not weighted      0.762830   
 1   comb_class_superficial_nw  not weighted      0.761720   
 2   comb_class_superficial_nw  not weighted      0.761162   
 3   comb_class_superficial_nw  not weighted      0.761720   
 4   comb_class_superficial_nw  not weighted      0.448893   
 5   comb_class_superficial_nw  not weighted      0.342458   
 6   comb_class_superficial_nw  not weighted      0.732147   
 7   comb_class_superficial_nw  not weighted      0.701436   
 8   comb_class_superficial_nw  not weighted      0.761720   
 9   comb_class_superficial_nw  not weighted      0.736044   
 10  comb_class_superficial_nw  not weighted      0.761720   
 11  comb_class_superficial_nw  not weighted      0.760612   
 12  comb_class_superficial_nw  not weighted      0.737184   
 13  comb_class_superficial_nw  not weighted      0.761720   
 14  comb_class_superficial_nw  not weighted      0.761720 

In [12]:
run_superficial_search_classification(fitbit_class_dict, use_sample_weights=False, model_name="fitbit_class_superficial_nw", n_trials=20)

fitbit_class_superficial_nw trial 20/20

{'results':                           model          type  1 - accuracy  \
 0   fitbit_class_superficial_nw  not weighted      0.761720   
 1   fitbit_class_superficial_nw  not weighted      0.741633   
 2   fitbit_class_superficial_nw  not weighted      0.762279   
 3   fitbit_class_superficial_nw  not weighted      0.761720   
 4   fitbit_class_superficial_nw  not weighted      0.761720   
 5   fitbit_class_superficial_nw  not weighted      0.761720   
 6   fitbit_class_superficial_nw  not weighted      0.761720   
 7   fitbit_class_superficial_nw  not weighted      0.761720   
 8   fitbit_class_superficial_nw  not weighted      0.740496   
 9   fitbit_class_superficial_nw  not weighted      0.760046   
 10  fitbit_class_superficial_nw  not weighted      0.761720   
 11  fitbit_class_superficial_nw  not weighted      0.748873   
 12  fitbit_class_superficial_nw  not weighted      0.598084   
 13  fitbit_class_superficial_nw  not weighted      0.761720   
 14  fitbit_class_superficial