In [1]:
import os
import random
import numpy as np
import pandas as pd
import warnings
import pickle
warnings.filterwarnings("ignore")

from sklearn.model_selection import StratifiedKFold  # (Not used now, but kept if needed elsewhere)
from sklearn.metrics import mean_squared_error

# Set deterministic behavior for CUDA (set before torch imports)
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"

import torch
import torch.nn as nn
import torch.optim as optim
import torch.backends.cudnn as cudnn

import optuna
import optuna.visualization

def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # for multi-GPU
    cudnn.deterministic = True
    cudnn.benchmark = False
    torch.use_deterministic_algorithms(True)
    return seed

# Global device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device} with {torch.cuda.device_count()} GPU(s)")

# Data directory (adjust as needed)
DL_DIR = "../../data/deep_learning"


Using device: cuda with 5 GPU(s)


### Import Data

In [2]:
# Load the regression split dictionary.
with open(f'{DL_DIR}/comb_reg_dict.pkl', 'rb') as f:
    comb_reg_dict = pickle.load(f)

with open(f'{DL_DIR}/fitbit_reg_dict.pkl', 'rb') as f:
    fitbit_reg_dict = pickle.load(f)

# Load the classification split dictionary.
with open(f'{DL_DIR}/comb_class_dict.pkl', 'rb') as f:
    comb_class_dict = pickle.load(f)

with open(f'{DL_DIR}/fitbit_class_dict.pkl', 'rb') as f:
    fitbit_class_dict = pickle.load(f) 

### Utility Functions & Dynamic Model Definition 

In [3]:
def pool_output_length(L_in, pool_kernel):
    """Assuming stride == pool_kernel for pooling."""
    return L_in // pool_kernel

def create_subject_dataset(df, outcome_col="SI_mean"):
    """
    Aggregates records for each subject into a subject-level sample.
    Returns a DataFrame with:
      - 'X': predictors with shape (n_features, 39)
      - the outcome (given by outcome_col)
      - sample_weight (if provided)
      - a stratification column based on the outcome.
    
    This function assumes that each subject already has exactly 39 timepoints.
    For classification (when outcome_col=="is_SI"), if the SI_mean column is absent,
    it is not added.
    """
    exclude_cols = ["PatientID", "timepoints", "si_kde_weight", "SI_mean", "is_SI", "SI_level"]
    predictor_cols = [col for col in df.columns if col not in (exclude_cols + [outcome_col])]
    
    subject_data = []
    for pid, group in df.groupby("PatientID"):
        group_sorted = group.sort_values("timepoints")
        # Assume each subject has exactly 39 timepoints.
        X = group_sorted[predictor_cols].values.T  # shape: (n_features, 39)
        y = group_sorted[outcome_col].iloc[0]
        weight = group_sorted["si_kde_weight"].iloc[0] if "si_kde_weight" in group.columns else 1.0
        record = {"PatientID": pid, "X": X, outcome_col: y, "sample_weight": weight}
        if outcome_col == "is_SI" and "SI_mean" in group_sorted.columns:
            record["SI_mean"] = group_sorted["SI_mean"].iloc[0]
        subject_data.append(record)
    subj_df = pd.DataFrame(subject_data)
    subj_df[f"{outcome_col}_bin"] = np.round(subj_df[outcome_col]).astype(int)
    return subj_df, predictor_cols

def get_stratified_cv_splits(df, subject_id="PatientID", target_var="SI_mean", n_splits=5):
    """
    Performs stratified K-fold cross validation at the subject level.
    
    Parameters:
      df : pandas.DataFrame
          The original dataframe containing repeated measures.
      subject_id : str
          The column name for the subject ID (e.g., "PatientID").
      target_var : str
          The target variable; for regression use "SI_mean" and for classification use "is_SI".
      n_splits : int
          Number of folds for cross validation.
    
    Returns:
      splits : list of tuples
          A list where each element is a tuple (train_df, test_df) corresponding
          to one fold. Each dataframe contains all rows (i.e. repeated measures) for the patients in that fold.
    
    Behavior:
      - Isolates unique patient IDs and their target variable by dropping duplicates.
      - If target_var is "SI_mean", creates a new column "SI_mean_levels" (rounded SI_mean).
      - Uses the resulting column as the stratification column.
      - Performs stratified K-fold CV and then subsets the original dataframe based on the patient IDs.
    """
    # Create a subject-level dataframe (unique patient IDs with their target variable)
    subject_df = df[[subject_id, target_var]].drop_duplicates(subset=[subject_id]).copy()
    
    # For regression: create a new column with the rounded SI_mean values.
    if target_var == "SI_mean":
        subject_df["SI_mean_levels"] = subject_df[target_var].round().astype(int)
        strat_col = "SI_mean_levels"
    else:
        strat_col = target_var  # For classification, use the target directly.
    
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
    splits = []
    
    # Get the subject IDs and stratification labels
    subjects = subject_df[subject_id].values
    strat_labels = subject_df[strat_col].values
    
    # For each fold, retrieve patient IDs and then subset the original dataframe.
    for train_idx, test_idx in skf.split(subjects, strat_labels):
        train_patient_ids = subject_df.iloc[train_idx][subject_id].values
        test_patient_ids  = subject_df.iloc[test_idx][subject_id].values
        train_split = df[df[subject_id].isin(train_patient_ids)]
        test_split  = df[df[subject_id].isin(test_patient_ids)]
        splits.append((train_split, test_split))
    
    return splits




## Updated Dynamic Models with Dropout Using LSTM

class DynamicLSTMRegression(nn.Module):
    """
    Builds a dynamic LSTM model for regression based on hyperparameters from an Optuna trial.
    The model searches over the number of LSTM layers, hidden size, bidirectionality, dropout, 
    and additional fully connected layers.
    """
    def __init__(self, input_size, seq_len, trial):
        super(DynamicLSTMRegression, self).__init__()
        # Hyperparameter search for LSTM architecture
        n_layers = trial.suggest_int("n_layers", 1, 3)
        hidden_size = trial.suggest_int("hidden_size", 16, 128, step=16)
        bidirectional = trial.suggest_categorical("bidirectional", [True, False])
        dropout = trial.suggest_float("dropout", 0.1, 0.5)
        
        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=n_layers,
            dropout=dropout if n_layers > 1 else 0.0,
            bidirectional=bidirectional,
            batch_first=True
        )
        
        # Determine the size of the LSTM output for the fully connected layers.
        fc_input_dim = hidden_size * 2 if bidirectional else hidden_size
        
        # Optionally add fully connected layers.
        n_fc_layers = trial.suggest_int("n_fc_layers", 0, 2)
        fc_layers = []
        in_features = fc_input_dim
        for i in range(n_fc_layers):
            fc_units = trial.suggest_int(f"fc_units_{i}", 16, 128, step=16)
            fc_layers.append(nn.Linear(in_features, fc_units))
            fc_layers.append(nn.ReLU())
            fc_layers.append(nn.Dropout(dropout))
            in_features = fc_units
        fc_layers.append(nn.Linear(in_features, 1))
        self.fc = nn.Sequential(*fc_layers)
    
    def forward(self, x):
        # Input x shape: (batch_size, n_features, seq_len)
        # Transpose to (batch_size, seq_len, n_features) for LSTM
        x = x.transpose(1, 2)
        lstm_out, _ = self.lstm(x)
        # Use the output at the last time step for prediction.
        out = lstm_out[:, -1, :]
        return self.fc(out)

class DynamicLSTMClassification(nn.Module):
    """
    Builds a dynamic LSTM model for binary classification based on hyperparameters from an Optuna trial.
    The model searches over LSTM architecture parameters (layers, hidden size, bidirectionality, dropout)
    and over the fully connected layers following the LSTM.
    """
    def __init__(self, input_size, seq_len, trial):
        super(DynamicLSTMClassification, self).__init__()
        n_layers = trial.suggest_int("n_layers", 1, 3)
        hidden_size = trial.suggest_int("hidden_size", 16, 128, step=16)
        bidirectional = trial.suggest_categorical("bidirectional", [True, False])
        dropout = trial.suggest_float("dropout", 0.1, 0.5)
        
        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=n_layers,
            dropout=dropout if n_layers > 1 else 0.0,
            bidirectional=bidirectional,
            batch_first=True
        )
        
        fc_input_dim = hidden_size * 2 if bidirectional else hidden_size
        
        n_fc_layers = trial.suggest_int("n_fc_layers", 0, 2)
        fc_layers = []
        in_features = fc_input_dim
        for i in range(n_fc_layers):
            fc_units = trial.suggest_int(f"fc_units_{i}", 16, 128, step=16)
            fc_layers.append(nn.Linear(in_features, fc_units))
            fc_layers.append(nn.ReLU())
            fc_layers.append(nn.Dropout(dropout))
            in_features = fc_units
        # Final layer produces one logit for binary classification.
        fc_layers.append(nn.Linear(in_features, 1))
        self.fc = nn.Sequential(*fc_layers)
    
    def forward(self, x):
        # Input x shape: (batch_size, n_features, seq_len)
        # Transpose to (batch_size, seq_len, n_features) for LSTM
        x = x.transpose(1, 2)
        lstm_out, _ = self.lstm(x)
        out = lstm_out[:, -1, :]
        return self.fc(out)

### Objective Function with 5-Fold Stratified Cross Validation

In [4]:
def objective_regression(trial, data_dict, use_sample_weights, model_name):
    set_seed(42)
    # Process only the training portion from the dictionary.
    train_df, _ = create_subject_dataset(data_dict['train'], outcome_col="SI_mean")
    X = np.stack(train_df["X"].values, axis=0)  # shape: (n_subjects, n_features, 39)
    y = train_df["SI_mean"].values
    w = train_df["sample_weight"].values if use_sample_weights else np.ones_like(y, dtype=np.float32)
    n_subjects, input_channels, seq_len = X.shape

    # Get the stratified CV splits from the helper function.
    cv_splits = get_stratified_cv_splits(train_df, subject_id="PatientID", target_var="SI_mean", n_splits=5)

    # Suggest hyperparameters common to training.
    lr = trial.suggest_loguniform("lr", 1e-5, 1e-2)
    use_reg = trial.suggest_categorical("use_regularization", [True, False])
    weight_decay = trial.suggest_loguniform("weight_decay", 1e-6, 1e-3) if use_reg else 0.0
    batch_size = trial.suggest_int("batch_size", 16, 64, step=16)
    num_epochs = trial.suggest_int("num_epochs", 5, 10)

    rmse_list = []
    for cv_train_df, cv_val_df in cv_splits:
        X_train_fold = np.stack(cv_train_df["X"].values, axis=0)
        y_train_fold = cv_train_df["SI_mean"].values
        w_train_fold = cv_train_df["sample_weight"].values if use_sample_weights else np.ones_like(y_train_fold, dtype=np.float32)
        X_val_fold   = np.stack(cv_val_df["X"].values, axis=0)
        y_val_fold   = cv_val_df["SI_mean"].values

        # Build the dynamic LSTM regression model with the trial’s hyperparameter choices.
        model = DynamicLSTMRegression(input_channels, seq_len, trial).to(device)
        if torch.cuda.device_count() > 1:
            model = nn.DataParallel(model)
        optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
        loss_fn = nn.MSELoss(reduction="none")

        train_dataset = torch.utils.data.TensorDataset(
            torch.tensor(X_train_fold, dtype=torch.float32),
            torch.tensor(y_train_fold, dtype=torch.float32).view(-1, 1),
            torch.tensor(w_train_fold, dtype=torch.float32)
        )
        train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
        
        model.train()
        for ep in range(num_epochs):
            for X_batch, y_batch, weight_batch in train_loader:
                X_batch = X_batch.to(device)
                y_batch = y_batch.to(device)
                weight_batch = weight_batch.to(device)
                optimizer.zero_grad()
                outputs = model(X_batch)
                loss_per_sample = loss_fn(outputs, y_batch).view(-1)
                loss = (loss_per_sample * weight_batch).mean() if use_sample_weights else loss_per_sample.mean()
                loss.backward()
                optimizer.step()

        model.eval()
        with torch.no_grad():
            X_val_tensor = torch.tensor(X_val_fold, dtype=torch.float32).to(device)
            preds = model(X_val_tensor).cpu().numpy()
        fold_rmse = np.sqrt(np.mean((preds - y_val_fold.reshape(-1, 1)) ** 2))
        rmse_list.append(fold_rmse)
    
    mean_rmse = np.mean(rmse_list)
    return mean_rmse

###  Run the Superficial Optuna Search & Save Hyperparameter Importance Visualization

In [5]:
def run_superficial_search_regression(data_dict, use_sample_weights, model_name, n_trials=5):
    optuna.logging.set_verbosity(optuna.logging.WARNING)
    def print_progress(study, trial):
        print(f"{model_name} trial {len(study.trials)}/{n_trials}\r", end="", flush=True)
    
    study = optuna.create_study(direction="minimize")
    study.optimize(lambda trial: objective_regression(trial, data_dict, use_sample_weights, model_name),
                   n_trials=n_trials, callbacks=[print_progress])
    
    fig = optuna.visualization.plot_param_importances(study)
    plot_filename = f"search/{model_name}_hyperparam_importance.png"
    fig.write_image(plot_filename)
    
    rows = []
    for t in study.trials:
        row = {"model": model_name,
               "type": "weighted" if use_sample_weights else "not weighted",
               "overall_rmse": t.value,
               "config": t.params}
        rows.append(row)
    result_df = pd.DataFrame(rows)
    
    best_trial = study.best_trial
    optimal_configuration = {"value": best_trial.value,
                             "params": best_trial.params,
                             "user_attrs": best_trial.user_attrs}
    
    result_dict = {"results": result_df,
                   "optimal_configuration": optimal_configuration,
                   "importance_plot": plot_filename}
    return result_dict

### Run an Optuna study for a given dataset and weighting configuration

In [6]:
run_superficial_search_regression(comb_reg_dict, use_sample_weights=True, model_name="comb_reg_superficial", n_trials=20)

comb_reg_superficial trial 20/20

{'results':                    model      type  overall_rmse  \
 0   comb_reg_superficial  weighted      1.562145   
 1   comb_reg_superficial  weighted      1.671992   
 2   comb_reg_superficial  weighted      1.625948   
 3   comb_reg_superficial  weighted      1.324943   
 4   comb_reg_superficial  weighted      1.699000   
 5   comb_reg_superficial  weighted      1.716119   
 6   comb_reg_superficial  weighted      1.232757   
 7   comb_reg_superficial  weighted      1.717029   
 8   comb_reg_superficial  weighted      1.532924   
 9   comb_reg_superficial  weighted      1.574785   
 10  comb_reg_superficial  weighted      1.234916   
 11  comb_reg_superficial  weighted      1.304396   
 12  comb_reg_superficial  weighted      0.665981   
 13  comb_reg_superficial  weighted      1.705783   
 14  comb_reg_superficial  weighted      1.766074   
 15  comb_reg_superficial  weighted      1.405695   
 16  comb_reg_superficial  weighted      1.253953   
 17  comb_reg_superficial  weighted

In [7]:
run_superficial_search_regression(fitbit_reg_dict, use_sample_weights=True, model_name="fitbit_reg_superficial", n_trials=20)

fitbit_reg_superficial trial 20/20

{'results':                      model      type  overall_rmse  \
 0   fitbit_reg_superficial  weighted      1.607195   
 1   fitbit_reg_superficial  weighted      1.640821   
 2   fitbit_reg_superficial  weighted      1.315669   
 3   fitbit_reg_superficial  weighted      1.308216   
 4   fitbit_reg_superficial  weighted      1.715482   
 5   fitbit_reg_superficial  weighted      1.788118   
 6   fitbit_reg_superficial  weighted      1.180780   
 7   fitbit_reg_superficial  weighted      1.424092   
 8   fitbit_reg_superficial  weighted      1.634825   
 9   fitbit_reg_superficial  weighted      1.568106   
 10  fitbit_reg_superficial  weighted      1.766472   
 11  fitbit_reg_superficial  weighted      1.397064   
 12  fitbit_reg_superficial  weighted      1.563550   
 13  fitbit_reg_superficial  weighted      1.160402   
 14  fitbit_reg_superficial  weighted      1.268800   
 15  fitbit_reg_superficial  weighted      1.318844   
 16  fitbit_reg_superficial  weighted      1.668759   

#### No weights

In [7]:
run_superficial_search_regression(comb_reg_dict, use_sample_weights=False, model_name="comb_reg_superficial_nw", n_trials=20)

comb_reg_superficial_nw trial 20/20

{'results':                       model          type  overall_rmse  \
 0   comb_reg_superficial_nw  not weighted      0.620188   
 1   comb_reg_superficial_nw  not weighted      0.596405   
 2   comb_reg_superficial_nw  not weighted      1.193700   
 3   comb_reg_superficial_nw  not weighted      1.115172   
 4   comb_reg_superficial_nw  not weighted      0.596681   
 5   comb_reg_superficial_nw  not weighted      1.093058   
 6   comb_reg_superficial_nw  not weighted      0.629297   
 7   comb_reg_superficial_nw  not weighted      1.319175   
 8   comb_reg_superficial_nw  not weighted      0.627419   
 9   comb_reg_superficial_nw  not weighted      0.673436   
 10  comb_reg_superficial_nw  not weighted      0.607532   
 11  comb_reg_superficial_nw  not weighted      0.628660   
 12  comb_reg_superficial_nw  not weighted      0.594348   
 13  comb_reg_superficial_nw  not weighted      0.600590   
 14  comb_reg_superficial_nw  not weighted      0.601746   
 15  comb_reg_superficial_nw 

In [8]:
run_superficial_search_regression(fitbit_reg_dict, use_sample_weights=False, model_name="fitbit_reg_superficial_nw", n_trials=20)

fitbit_reg_superficial_nw trial 20/20

{'results':                         model          type  overall_rmse  \
 0   fitbit_reg_superficial_nw  not weighted      0.620028   
 1   fitbit_reg_superficial_nw  not weighted      0.605539   
 2   fitbit_reg_superficial_nw  not weighted      0.627281   
 3   fitbit_reg_superficial_nw  not weighted      0.598081   
 4   fitbit_reg_superficial_nw  not weighted      1.242475   
 5   fitbit_reg_superficial_nw  not weighted      1.257282   
 6   fitbit_reg_superficial_nw  not weighted      0.609245   
 7   fitbit_reg_superficial_nw  not weighted      1.305235   
 8   fitbit_reg_superficial_nw  not weighted      0.615307   
 9   fitbit_reg_superficial_nw  not weighted      0.625439   
 10  fitbit_reg_superficial_nw  not weighted      0.598953   
 11  fitbit_reg_superficial_nw  not weighted      0.598079   
 12  fitbit_reg_superficial_nw  not weighted      0.597776   
 13  fitbit_reg_superficial_nw  not weighted      0.597429   
 14  fitbit_reg_superficial_nw  not weighted      0.598615 

### Superficial search for classification

In [10]:
def objective_classification(trial, data_dict, use_sample_weights, model_name):
    set_seed(42)
    # Process only the training portion.
    train_df, _ = create_subject_dataset(data_dict['train'], outcome_col="is_SI")
    X = np.stack(train_df["X"].values, axis=0)
    y = train_df["is_SI"].values.astype(np.float32)
    w = train_df["sample_weight"].values if use_sample_weights else np.ones_like(y, dtype=np.float32)
    n_subjects, input_channels, seq_len = X.shape

    # Get stratified CV splits using the helper function.
    cv_splits = get_stratified_cv_splits(train_df, subject_id="PatientID", target_var="is_SI", n_splits=5)

    lr = trial.suggest_loguniform("lr", 1e-5, 1e-2)
    use_reg = trial.suggest_categorical("use_regularization", [True, False])
    weight_decay = trial.suggest_loguniform("weight_decay", 1e-6, 1e-3) if use_reg else 0.0
    batch_size = trial.suggest_int("batch_size", 16, 64, step=16)
    num_epochs = trial.suggest_int("num_epochs", 5, 10)

    acc_list = []
    sens_list = []
    spec_list = []
    for cv_train_df, cv_val_df in cv_splits:
        X_train_fold = np.stack(cv_train_df["X"].values, axis=0)
        y_train_fold = cv_train_df["is_SI"].values.astype(np.float32)
        w_train_fold = cv_train_df["sample_weight"].values if use_sample_weights else np.ones_like(y_train_fold, dtype=np.float32)
        X_val_fold = np.stack(cv_val_df["X"].values, axis=0)
        y_val_fold = cv_val_df["is_SI"].values.astype(np.float32)

        # Build the dynamic LSTM classification model with the trial's hyperparameters.
        model = DynamicLSTMClassification(input_channels, seq_len, trial).to(device)
        if torch.cuda.device_count() > 1:
            model = nn.DataParallel(model)
        optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
        loss_fn = nn.BCEWithLogitsLoss(reduction="none")
        
        train_dataset = torch.utils.data.TensorDataset(
            torch.tensor(X_train_fold, dtype=torch.float32),
            torch.tensor(y_train_fold, dtype=torch.float32).view(-1, 1),
            torch.tensor(w_train_fold, dtype=torch.float32)
        )
        train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
        
        model.train()
        for ep in range(num_epochs):
            for X_batch, y_batch, weight_batch in train_loader:
                X_batch = X_batch.to(device)
                y_batch = y_batch.to(device)
                weight_batch = weight_batch.to(device)
                optimizer.zero_grad()
                outputs = model(X_batch)
                loss_per_sample = loss_fn(outputs, y_batch).view(-1)
                loss = (loss_per_sample * weight_batch).mean() if use_sample_weights else loss_per_sample.mean()
                loss.backward()
                optimizer.step()
        
        model.eval()
        with torch.no_grad():
            X_val_tensor = torch.tensor(X_val_fold, dtype=torch.float32).to(device)
            logits = model(X_val_tensor)
            probs = torch.sigmoid(logits).cpu().numpy().reshape(-1)
        preds = (probs >= 0.5).astype(np.float32)
        overall_acc = np.mean(preds == y_val_fold)
        TP = np.sum((preds == 1) & (y_val_fold == 1))
        FN = np.sum((preds == 0) & (y_val_fold == 1))
        sensitivity = TP / (TP + FN) if (TP + FN) > 0 else np.nan
        TN = np.sum((preds == 0) & (y_val_fold == 0))
        FP = np.sum((preds == 1) & (y_val_fold == 0))
        specificity = TN / (TN + FP) if (TN + FP) > 0 else np.nan
        
        acc_list.append(overall_acc)
        sens_list.append(sensitivity)
        spec_list.append(specificity)
    
    mean_acc = np.mean(acc_list)
    mean_sens = np.mean(sens_list)
    mean_spec = np.mean(spec_list)
    
    # For the objective function, we return 1 - mean accuracy (to minimize).
    return 1 - mean_acc


In [13]:
def run_superficial_search_classification(data_dict, use_sample_weights, model_name, n_trials=5):
    optuna.logging.set_verbosity(optuna.logging.WARNING)
    def print_progress(study, trial):
        print(f"{model_name} trial {len(study.trials)}/{n_trials}\r", end="", flush=True)
    
    study = optuna.create_study(direction="minimize")
    study.optimize(lambda trial: objective_classification(trial, data_dict, use_sample_weights, model_name),
                   n_trials=n_trials, callbacks=[print_progress])
    
    fig = optuna.visualization.plot_param_importances(study)
    plot_filename = f"search/{model_name}_hyperparam_importance.png"
    fig.write_image(plot_filename)
    
    rows = []
    for t in study.trials:
        row = {"model": model_name,
               "type": "weighted" if use_sample_weights else "not weighted",
               "1 - accuracy": 1 - t.value,  # since objective is 1-accuracy
               "config": t.params}
        rows.append(row)
    result_df = pd.DataFrame(rows)
    
    best_trial = study.best_trial
    optimal_configuration = {"value": best_trial.value,
                             "params": best_trial.params,
                             "user_attrs": best_trial.user_attrs}
    
    result_dict = {"results": result_df,
                   "optimal_configuration": optimal_configuration,
                   "importance_plot": plot_filename}
    return result_dict

In [12]:
run_superficial_search_classification(comb_class_dict, use_sample_weights=True, model_name="comb_class_superficial", n_trials=20)

[W 2025-04-05 16:39:59,924] Trial 0 failed with parameters: {'lr': 0.008578525404781486, 'use_regularization': False, 'batch_size': 32, 'num_epochs': 6, 'n_layers': 3, 'hidden_size': 64, 'bidirectional': True, 'dropout': 0.15719888348738867, 'n_fc_layers': 2, 'fc_units_0': 32, 'fc_units_1': 48} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/home/jeffrw/BIOSTAT629/main_project/modeling/DL_py_env/lib/python3.10/site-packages/optuna/study/_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
  File "/tmp/ipykernel_3571150/2524403166.py", line 7, in <lambda>
    study.optimize(lambda trial: objective_classification(trial, data_dict, use_sample_weights, model_name),
  File "/tmp/ipykernel_3571150/1236772839.py", line 54, in objective_classification
    optimizer.step()
  File "/home/jeffrw/BIOSTAT629/main_project/modeling/DL_py_env/lib/python3.10/site-packages/torch/optim/optimizer.py", line 493, in wrapper
    out = func(

KeyboardInterrupt: 

In [None]:
run_superficial_search_classification(fitbit_class_dict, use_sample_weights=True, model_name="fitbit_class_superficial", n_trials=20)

### No weights

In [14]:
run_superficial_search_classification(comb_class_dict, use_sample_weights=False, model_name="comb_class_superficial_nw", n_trials=20)

comb_class_superficial_nw trial 20/20

{'results':                         model          type  1 - accuracy  \
 0   comb_class_superficial_nw  not weighted      0.737175   
 1   comb_class_superficial_nw  not weighted      0.657542   
 2   comb_class_superficial_nw  not weighted      0.753340   
 3   comb_class_superficial_nw  not weighted      0.713705   
 4   comb_class_superficial_nw  not weighted      0.737166   
 5   comb_class_superficial_nw  not weighted      0.752223   
 6   comb_class_superficial_nw  not weighted      0.761720   
 7   comb_class_superficial_nw  not weighted      0.761720   
 8   comb_class_superficial_nw  not weighted      0.749467   
 9   comb_class_superficial_nw  not weighted      0.761720   
 10  comb_class_superficial_nw  not weighted      0.762279   
 11  comb_class_superficial_nw  not weighted      0.762279   
 12  comb_class_superficial_nw  not weighted      0.761163   
 13  comb_class_superficial_nw  not weighted      0.762279   
 14  comb_class_superficial_nw  not weighted      0.761720 

In [15]:
run_superficial_search_classification(fitbit_class_dict, use_sample_weights=False, model_name="fitbit_class_superficial_nw", n_trials=20)

fitbit_class_superficial_nw trial 20/20

{'results':                           model          type  1 - accuracy  \
 0   fitbit_class_superficial_nw  not weighted      0.758370   
 1   fitbit_class_superficial_nw  not weighted      0.761720   
 2   fitbit_class_superficial_nw  not weighted      0.752805   
 3   fitbit_class_superficial_nw  not weighted      0.570830   
 4   fitbit_class_superficial_nw  not weighted      0.756144   
 5   fitbit_class_superficial_nw  not weighted      0.761163   
 6   fitbit_class_superficial_nw  not weighted      0.748325   
 7   fitbit_class_superficial_nw  not weighted      0.760044   
 8   fitbit_class_superficial_nw  not weighted      0.757810   
 9   fitbit_class_superficial_nw  not weighted      0.747217   
 10  fitbit_class_superficial_nw  not weighted      0.760604   
 11  fitbit_class_superficial_nw  not weighted      0.762837   
 12  fitbit_class_superficial_nw  not weighted      0.605731   
 13  fitbit_class_superficial_nw  not weighted      0.761720   
 14  fitbit_class_superficial