In [1]:
import os
import pickle
import numpy as np
import pandas as pd
import random
import pickle

from sklearn.metrics import mean_squared_error, precision_recall_fscore_support, accuracy_score, roc_auc_score

os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8" # set before any torch imports

import torch
import torch.nn as nn
import torch.optim as optim
import torch.backends.cudnn as cudnn
from sklearn.model_selection import StratifiedKFold

# Set device (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

DL_DIR = "../../data/deep_learning"


### Function for setting the seed

In [2]:
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if using multi-GPU
    cudnn.deterministic = True
    cudnn.benchmark = False
    torch.use_deterministic_algorithms(True)
    
    
    return seed

### Import Data

In [3]:
# Load the regression split dictionary.
with open(f'{DL_DIR}/comb_reg_dict.pkl', 'rb') as f:
    comb_reg_dict = pickle.load(f)

with open(f'{DL_DIR}/fitbit_reg_dict.pkl', 'rb') as f:
    fitbit_reg_dict = pickle.load(f)

# Load the classification split dictionary.
with open(f'{DL_DIR}/comb_class_dict.pkl', 'rb') as f:
    comb_class_dict = pickle.load(f)

with open(f'{DL_DIR}/fitbit_class_dict.pkl', 'rb') as f:
    fitbit_class_dict = pickle.load(f) 

    



### Define Models

In [4]:
class LSTMRegression(nn.Module):
    def __init__(self, input_size, seq_len, dropout_prob=0.5):
        super(LSTMRegression, self).__init__()
        # Define an LSTM layer with default parameters (hidden_size set to 16)
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=16, num_layers=1, batch_first=True)
        self.dropout = nn.Dropout(dropout_prob)
        self.fc = nn.Linear(16, 1)
    
    def forward(self, x):
        # x shape: [batch, input_size, seq_len]
        # Transpose to [batch, seq_len, input_size] for LSTM
        x = x.transpose(1, 2)
        lstm_out, (h_n, c_n) = self.lstm(x)
        # Use the last hidden state from the LSTM (from the final layer)
        h_last = h_n[-1]  # shape: [batch, hidden_size]
        h_last = self.dropout(h_last)
        out = self.fc(h_last)
        return out

class LSTMClassification(nn.Module):
    def __init__(self, input_size, seq_len, dropout_prob=0.5):
        super(LSTMClassification, self).__init__()
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=16, num_layers=1, batch_first=True)
        self.dropout = nn.Dropout(dropout_prob)
        self.fc = nn.Linear(16, 1)  # single logit for binary classification
    
    def forward(self, x):
        x = x.transpose(1, 2)  # convert to [batch, seq_len, input_size]
        lstm_out, (h_n, c_n) = self.lstm(x)
        h_last = h_n[-1]
        h_last = self.dropout(h_last)
        out = self.fc(h_last)
        return out


### Helper Functions

In [5]:
def create_subject_dataset(df, outcome_col="SI_mean"):
    """
    Aggregates records for each subject into a subject-level sample.
    Excludes meta/outcome columns and returns a DataFrame with:
      - PatientID, outcome (SI_mean or is_SI), sample_weight,
      - X: a numpy array of predictors with shape (n_features, 39).
    
    This function assumes that each subject already has exactly 39 timepoints.
    For classification (when outcome_col=="is_SI"), if the original DataFrame contains a SI_mean column, it is included.
    A stratification column is created by rounding the outcome (used for later splitting).
    """
    exclude_cols = ["PatientID", "timepoints", "si_kde_weight", "SI_mean", "is_SI", "SI_level"]
    predictor_cols = [col for col in df.columns if col not in exclude_cols]
    
    subject_data = []
    for pid, group in df.groupby("PatientID"):
        group_sorted = group.sort_values("timepoints")
        # Assume each subject already has exactly 39 timepoints.
        X = group_sorted[predictor_cols].values.T  # shape: (n_features, 39)
        y = group_sorted[outcome_col].iloc[0]
        weight = group_sorted["si_kde_weight"].iloc[0] if "si_kde_weight" in group.columns else 1.0
        record = {"PatientID": pid, "X": X, outcome_col: y, "sample_weight": weight}
        if outcome_col == "is_SI" and "SI_mean" in group_sorted.columns:
            record["SI_mean"] = group_sorted["SI_mean"].iloc[0]
        subject_data.append(record)
    subj_df = pd.DataFrame(subject_data)
    subj_df[f"{outcome_col}_bin"] = np.round(subj_df[outcome_col]).astype(int)
    return subj_df, predictor_cols

def get_stratified_cv_splits(df, subject_id="PatientID", target_var="SI_mean", n_splits=5):
    """
    Performs stratified K-fold cross validation at the subject level.
    
    Parameters:
      df : pandas.DataFrame
          The original dataframe containing repeated measures.
      subject_id : str
          The column name for the subject ID (e.g., "PatientID").
      target_var : str
          The target variable; for regression use "SI_mean" and for classification use "is_SI".
      n_splits : int
          Number of folds for cross validation.
    
    Returns:
      splits : list of tuples
          A list where each element is a tuple (train_df, test_df) corresponding
          to one fold. Each dataframe contains all rows (i.e. repeated measures) for the patients in that fold.
    
    Behavior:
      - Isolates unique patient IDs and their target variable by dropping duplicates.
      - If target_var is "SI_mean", creates a new column "SI_mean_levels" (rounded SI_mean).
      - Uses the resulting column as the stratification column.
      - Performs stratified K-fold CV and then subsets the original dataframe based on the patient IDs.
    """
    # Create a subject-level dataframe (unique patient IDs with their target variable)
    subject_df = df[[subject_id, target_var]].drop_duplicates(subset=[subject_id]).copy()
    
    # For regression: create a new column with the rounded SI_mean values.
    if target_var == "SI_mean":
        subject_df["SI_mean_levels"] = subject_df[target_var].round().astype(int)
        strat_col = "SI_mean_levels"
    else:
        strat_col = target_var  # For classification, use the target directly.
    
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
    splits = []
    
    # Get the subject IDs and stratification labels
    subjects = subject_df[subject_id].values
    strat_labels = subject_df[strat_col].values
    
    # For each fold, retrieve patient IDs and then subset the original dataframe.
    for train_idx, test_idx in skf.split(subjects, strat_labels):
        train_patient_ids = subject_df.iloc[train_idx][subject_id].values
        test_patient_ids  = subject_df.iloc[test_idx][subject_id].values
        train_split = df[df[subject_id].isin(train_patient_ids)]
        test_split  = df[df[subject_id].isin(test_patient_ids)]
        splits.append((train_split, test_split))
    
    return splits


### SI_mean regression CNN base learner

In [6]:
def base_learner_SI_mean(data_dict, model_name, num_epochs, batch_size, use_sample_weights=False):
    """
    Trains an LSTM regression model to predict SI_mean.
    Uses subject-level training and testing DataFrames provided in data_dict.
    Each split is processed with create_subject_dataset (using outcome_col="SI_mean")
    so that each subject has an "X" array of shape (n_features, timepoints).
    
    If use_sample_weights=True, sample-level weights are used during training.
    (Weights are not used during evaluation.)
    
    Also performs 5-fold stratified cross validation (based on PatientID and SI_mean_levels)
    on the training set and returns the mean and standard deviation of the RMSE (both overall and per-bin).
    Additionally, overall mean squared error (MSE) is computed and returned in a column named "mse".
    
    Returns two DataFrames:
      1. metrics_df: with columns: model, data, "1", "2", "3", "4", "5", overall 
         for the full training and test splits.
      2. cv_metrics_df: with columns: stat, model, "1", "2", "3", "4", "5", overall, mse.
         There will be one row for the mean RMSE values (stat = "mean") and one row for the standard deviation
         values (stat = "sd"), computed via 5-fold stratified cross validation.
    """
    # Process subject-level training and testing sets.
    train_df, _ = create_subject_dataset(data_dict['train'], outcome_col="SI_mean")
    test_df, _  = create_subject_dataset(data_dict['test'], outcome_col="SI_mean")
    
    X_train = np.stack(train_df["X"].values, axis=0)
    y_train = train_df["SI_mean"].values
    if use_sample_weights:
        w_train = train_df["sample_weight"].values
    else:
        w_train = np.ones_like(y_train, dtype=np.float32)
    
    X_test = np.stack(test_df["X"].values, axis=0)
    y_test = test_df["SI_mean"].values
    
    input_size = X_train.shape[1]  # number of features
    seq_len = X_train.shape[2]     # number of timepoints
    
    # --- Full Training on the Provided Training Set ---
    model = LSTMRegression(input_size, seq_len).to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    loss_fn = nn.MSELoss(reduction="none")
    
    train_dataset = torch.utils.data.TensorDataset(
        torch.tensor(X_train, dtype=torch.float32),
        torch.tensor(y_train, dtype=torch.float32).view(-1, 1),
        torch.tensor(w_train, dtype=torch.float32)
    )
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
    
    model.train()
    for ep in range(num_epochs):
        epoch_loss = 0.0
        for X_batch, y_batch, weight_batch in train_loader:
            X_batch = X_batch.to(device)
            y_batch = y_batch.to(device)
            weight_batch = weight_batch.to(device)
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss_per_sample = loss_fn(outputs, y_batch).view(-1)
            loss = (loss_per_sample * weight_batch).mean() if use_sample_weights else loss_per_sample.mean()
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item() * X_batch.size(0)
        print(f"SI_mean - Epoch {ep+1}/{num_epochs}, Loss: {epoch_loss/len(train_dataset):.4f}")
    
    model.eval()
    with torch.no_grad():
        X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
        train_preds = model(X_train_tensor).cpu().numpy()
        X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
        test_preds = model(X_test_tensor).cpu().numpy()
        
        overall_train_rmse = np.sqrt(np.mean((train_preds - y_train.reshape(-1, 1))**2))
        overall_test_rmse  = np.sqrt(np.mean((test_preds - y_test.reshape(-1, 1))**2))
    
    # Calculate per-bin metrics for train and test sets.
    train_bins = np.round(y_train).astype(int)
    test_bins  = np.round(y_test).astype(int)
    bin_rmse_train = {}
    for b in range(1, 6):
        idx = np.where(train_bins == b)[0]
        bin_rmse_train[str(b)] = np.sqrt(np.mean((train_preds[idx] - y_train[idx].reshape(-1, 1))**2)) if len(idx) > 0 else np.nan
    bin_rmse_test = {}
    for b in range(1, 6):
        idx = np.where(test_bins == b)[0]
        bin_rmse_test[str(b)] = np.sqrt(np.mean((test_preds[idx] - y_test[idx].reshape(-1, 1))**2)) if len(idx) > 0 else np.nan
    
    train_metrics = {"model": model_name, "data": "train"}
    for b in range(1, 6):
        train_metrics[str(b)] = bin_rmse_train.get(str(b), np.nan)
    train_metrics["overall"] = overall_train_rmse
    
    test_metrics = {"model": model_name, "data": "test"}
    for b in range(1, 6):
        test_metrics[str(b)] = bin_rmse_test.get(str(b), np.nan)
    test_metrics["overall"] = overall_test_rmse
    
    metrics_df = pd.DataFrame([train_metrics, test_metrics])
    
    # --- 5-fold Stratified Cross Validation on the Training Set ---
    cv_splits = get_stratified_cv_splits(train_df, subject_id="PatientID", target_var="SI_mean", n_splits=5)
    cv_results = []
    for cv_train_df, cv_val_df in cv_splits:
        X_cv_train = np.stack(cv_train_df["X"].values, axis=0)
        y_cv_train = cv_train_df["SI_mean"].values
        if use_sample_weights:
            w_cv_train = cv_train_df["sample_weight"].values
        else:
            w_cv_train = np.ones_like(y_cv_train, dtype=np.float32)
        X_cv_val   = np.stack(cv_val_df["X"].values, axis=0)
        y_cv_val   = cv_val_df["SI_mean"].values
        
        model_cv = LSTMRegression(input_size, seq_len).to(device)
        optimizer_cv = optim.Adam(model_cv.parameters(), lr=0.001)
        train_dataset_cv = torch.utils.data.TensorDataset(
            torch.tensor(X_cv_train, dtype=torch.float32),
            torch.tensor(y_cv_train, dtype=torch.float32).view(-1, 1),
            torch.tensor(w_cv_train, dtype=torch.float32)
        )
        train_loader_cv = torch.utils.data.DataLoader(train_dataset_cv, batch_size=batch_size, shuffle=True, num_workers=0)
        
        model_cv.train()
        for ep in range(num_epochs):
            for X_batch, y_batch, weight_batch in train_loader_cv:
                X_batch = X_batch.to(device)
                y_batch = y_batch.to(device)
                weight_batch = weight_batch.to(device)
                optimizer_cv.zero_grad()
                outputs = model_cv(X_batch)
                loss_per_sample = loss_fn(outputs, y_batch).view(-1)
                loss = (loss_per_sample * weight_batch).mean() if use_sample_weights else loss_per_sample.mean()
                loss.backward()
                optimizer_cv.step()
        model_cv.eval()
        with torch.no_grad():
            X_cv_val_tensor = torch.tensor(X_cv_val, dtype=torch.float32).to(device)
            val_preds = model_cv(X_cv_val_tensor).cpu().numpy()
        
        # Compute overall RMSE and MSE for the fold.
        overall_cv_rmse = np.sqrt(np.mean((val_preds - y_cv_val.reshape(-1, 1))**2))
        overall_cv_mse  = np.mean((val_preds - y_cv_val.reshape(-1, 1))**2)
        
        cv_bin_rmse = {}
        cv_bins = np.round(y_cv_val).astype(int)
        for b in range(1, 6):
            idx = np.where(cv_bins == b)[0]
            cv_bin_rmse[str(b)] = np.sqrt(np.mean((val_preds[idx] - y_cv_val[idx].reshape(-1, 1))**2)) if len(idx) > 0 else np.nan
        
        cv_results.append({
            "overall": overall_cv_rmse,
            "mse": overall_cv_mse,   # New overall MSE metric
            "1": cv_bin_rmse["1"],
            "2": cv_bin_rmse["2"],
            "3": cv_bin_rmse["3"],
            "4": cv_bin_rmse["4"],
            "5": cv_bin_rmse["5"]
        })
    
    cv_results_df = pd.DataFrame(cv_results)
    cv_mean = cv_results_df.mean()
    cv_std  = cv_results_df.std()

    # Modified structure for cv_metrics_df:
    # Two rows: one for the mean ("stat" = "mean") and one for the sd ("stat" = "sd")
    mean_row = {
        "stat": "mean",
        "model": model_name,
        "1": cv_mean["1"],
        "2": cv_mean["2"],
        "3": cv_mean["3"],
        "4": cv_mean["4"],
        "5": cv_mean["5"],
        "overall": cv_mean["overall"],
        "mse": cv_mean["mse"]
    }
    sd_row = {
        "stat": "sd",
        "model": model_name,
        "1": cv_std["1"],
        "2": cv_std["2"],
        "3": cv_std["3"],
        "4": cv_std["4"],
        "5": cv_std["5"],
        "overall": cv_std["overall"],
        "mse": cv_std["mse"]
    }
    cv_metrics_df = pd.DataFrame([mean_row, sd_row])
    
    return metrics_df, cv_metrics_df


In [7]:
comb_reg_base_w = base_learner_SI_mean(comb_reg_dict, num_epochs = 10, batch_size = 32, use_sample_weights=True, model_name="comb_base_weighted")
comb_reg_base_nw = base_learner_SI_mean(comb_reg_dict, num_epochs = 10, batch_size = 32, use_sample_weights=False, model_name="comb_base")

fitbit_reg_base_w = base_learner_SI_mean(fitbit_reg_dict, num_epochs = 10, batch_size = 32, use_sample_weights=True, model_name="fitbit_base_weighted")
fitbit_reg_base_nw = base_learner_SI_mean(fitbit_reg_dict, num_epochs = 10, batch_size = 32, use_sample_weights=False, model_name="fitbit_base")


SI_mean - Epoch 1/10, Loss: 10.0719
SI_mean - Epoch 2/10, Loss: 7.3971
SI_mean - Epoch 3/10, Loss: 4.9292
SI_mean - Epoch 4/10, Loss: 2.9679
SI_mean - Epoch 5/10, Loss: 2.0076
SI_mean - Epoch 6/10, Loss: 1.8832
SI_mean - Epoch 7/10, Loss: 1.6193
SI_mean - Epoch 8/10, Loss: 1.8980
SI_mean - Epoch 9/10, Loss: 1.9266
SI_mean - Epoch 10/10, Loss: 1.7592
SI_mean - Epoch 1/10, Loss: 1.0879
SI_mean - Epoch 2/10, Loss: 0.5766
SI_mean - Epoch 3/10, Loss: 0.4997
SI_mean - Epoch 4/10, Loss: 0.4726
SI_mean - Epoch 5/10, Loss: 0.4523
SI_mean - Epoch 6/10, Loss: 0.4501
SI_mean - Epoch 7/10, Loss: 0.4415
SI_mean - Epoch 8/10, Loss: 0.4362
SI_mean - Epoch 9/10, Loss: 0.4078
SI_mean - Epoch 10/10, Loss: 0.4112
SI_mean - Epoch 1/10, Loss: 9.6233
SI_mean - Epoch 2/10, Loss: 7.7155
SI_mean - Epoch 3/10, Loss: 6.0528
SI_mean - Epoch 4/10, Loss: 3.6338
SI_mean - Epoch 5/10, Loss: 2.0604
SI_mean - Epoch 6/10, Loss: 2.2771
SI_mean - Epoch 7/10, Loss: 1.7369
SI_mean - Epoch 8/10, Loss: 1.6627
SI_mean - Epoch 9

### Save base learner results

In [8]:
reg_base_results = pd.concat([comb_reg_base_w[0], comb_reg_base_nw[0], fitbit_reg_base_w[0], fitbit_reg_base_nw[0]], axis=0)
reg_base_results.to_csv("results/reg_base_learner.tsv", sep="\t", index=False)

### Save base learner val results

In [8]:
reg_base_val_results = pd.concat([comb_reg_base_w[1], comb_reg_base_nw[1], fitbit_reg_base_w[1], fitbit_reg_base_nw[1]], axis=0)
reg_base_val_results.to_csv("results/reg_base_val_learner.tsv", sep="\t", index=False)

### Classification Model for base learner

In [10]:
def base_learner_is_SI(data_dict, model_name, num_epochs, batch_size, use_sample_weights=False):
    """
    Trains an LSTM classifier to predict is_SI.
    Uses subject-level training and testing DataFrames provided in data_dict.
    Each split is processed with create_subject_dataset (using outcome_col="is_SI")
    so that each subject has an "X" array of shape (n_features, timepoints).
    
    If use_sample_weights=True, sample-level weights are used during training
    (weights are not used during evaluation).
    
    Also performs 5-fold stratified cross validation on the training set (stratified
    by PatientID and the is_SI column) and returns the mean and standard deviation of
    the performance metrics (accuracy, sensitivity, specificity, AUC) across folds.
    
    Returns two DataFrames:
      1. metrics_df: with columns: model, data, accuracy, sensitivity, specificity, AUC,
         computed for the full training and testing sets.
      2. cv_metrics_df: with columns: stat, model, accuracy, sensitivity, specificity, AUC, bse.
         There will be one row for the mean CV metrics (stat = "mean") and one row for their
         standard deviation (stat = "sd").
    """

    # --- Prepare the Data ---
    train_df, _ = create_subject_dataset(data_dict['train'], outcome_col="is_SI")
    test_df, _  = create_subject_dataset(data_dict['test'], outcome_col="is_SI")
    
    X_train = np.stack(train_df["X"].values, axis=0)
    y_train = train_df["is_SI"].values.astype(np.float32)
    if use_sample_weights:
        w_train = train_df["sample_weight"].values
    else:
        w_train = np.ones_like(y_train, dtype=np.float32)
        
    X_test = np.stack(test_df["X"].values, axis=0)
    y_test = test_df["is_SI"].values.astype(np.float32)
    
    input_size = X_train.shape[1]
    seq_len = X_train.shape[2]
    
    # --- Full Training on the Provided Training Set ---
    model = LSTMClassification(input_size, seq_len).to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    loss_fn = nn.BCEWithLogitsLoss(reduction="none")
    
    train_dataset = torch.utils.data.TensorDataset(
        torch.tensor(X_train, dtype=torch.float32),
        torch.tensor(y_train, dtype=torch.float32).view(-1, 1),
        torch.tensor(w_train, dtype=torch.float32)
    )
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size,
                                                 shuffle=True, num_workers=0)
    
    model.train()
    for ep in range(num_epochs):
        epoch_loss = 0.0
        for X_batch, y_batch, weight_batch in train_loader:
            X_batch = X_batch.to(device)
            y_batch = y_batch.to(device)
            weight_batch = weight_batch.to(device)
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss_per_sample = loss_fn(outputs, y_batch).view(-1)
            loss = (loss_per_sample * weight_batch).mean() if use_sample_weights else loss_per_sample.mean()
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item() * X_batch.size(0)
        print(f"is_SI - Epoch {ep+1}/{num_epochs}, Loss: {epoch_loss/len(train_dataset):.4f}")
    
    model.eval()
    with torch.no_grad():
        X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
        train_logits = model(X_train_tensor)
        train_probs = torch.sigmoid(train_logits).cpu().numpy()
        X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
        test_logits = model(X_test_tensor)
        test_probs = torch.sigmoid(test_logits).cpu().numpy()
        
        train_preds = (train_probs >= 0.5).astype(np.float32)
        test_preds = (test_probs >= 0.5).astype(np.float32)
        
        # Compute full-set accuracy, sensitivity, and specificity for training set.
        overall_acc_train = np.mean(train_preds == y_train.reshape(-1, 1))
        TP = np.sum((train_preds == 1) & (y_train.reshape(-1, 1) == 1))
        FN = np.sum((train_preds == 0) & (y_train.reshape(-1, 1) == 1))
        TN = np.sum((train_preds == 0) & (y_train.reshape(-1, 1) == 0))
        FP = np.sum((train_preds == 1) & (y_train.reshape(-1, 1) == 0))
        overall_sens_train = TP / (TP + FN) if (TP + FN) > 0 else np.nan
        overall_spec_train = TN / (TN + FP) if (TN + FP) > 0 else np.nan

        # Compute full-set accuracy, sensitivity, and specificity for testing set.
        overall_acc_test = np.mean(test_preds == y_test.reshape(-1, 1))
        TP = np.sum((test_preds == 1) & (y_test.reshape(-1, 1) == 1))
        FN = np.sum((test_preds == 0) & (y_test.reshape(-1, 1) == 1))
        TN = np.sum((test_preds == 0) & (y_test.reshape(-1, 1) == 0))
        FP = np.sum((test_preds == 1) & (y_test.reshape(-1, 1) == 0))
        overall_sens_test = TP / (TP + FN) if (TP + FN) > 0 else np.nan
        overall_spec_test = TN / (TN + FP) if (TN + FP) > 0 else np.nan

        # --- Compute AUC for full training and testing sets ---
        try:
            overall_auc_train = roc_auc_score(y_train, train_probs.flatten())
        except ValueError:
            overall_auc_train = np.nan
        try:
            overall_auc_test = roc_auc_score(y_test, test_probs.flatten())
        except ValueError:
            overall_auc_test = np.nan
    
    train_metrics = {"model": model_name, "data": "train",
                     "accuracy": overall_acc_train,
                     "sensitivity": overall_sens_train,
                     "specificity": overall_spec_train,
                     "AUC": overall_auc_train}
    test_metrics = {"model": model_name, "data": "test",
                    "accuracy": overall_acc_test,
                    "sensitivity": overall_sens_test,
                    "specificity": overall_spec_test,
                    "AUC": overall_auc_test}
    
    metrics_df = pd.DataFrame([train_metrics, test_metrics])
    
    # --- 5-fold Stratified Cross Validation on the Training Set ---
    cv_splits = get_stratified_cv_splits(train_df, subject_id="PatientID", target_var="is_SI", n_splits=5)
    cv_fold_metrics = []
    for cv_train_df, cv_val_df in cv_splits:
        X_cv_train = np.stack(cv_train_df["X"].values, axis=0)
        y_cv_train = cv_train_df["is_SI"].values.astype(np.float32)
        if use_sample_weights:
            w_cv_train = cv_train_df["sample_weight"].values
        else:
            w_cv_train = np.ones_like(y_cv_train, dtype=np.float32)
        X_cv_val   = np.stack(cv_val_df["X"].values, axis=0)
        y_cv_val   = cv_val_df["is_SI"].values.astype(np.float32)
        
        model_cv = LSTMClassification(input_size, seq_len).to(device)
        optimizer_cv = optim.Adam(model_cv.parameters(), lr=0.001)
        train_dataset_cv = torch.utils.data.TensorDataset(
            torch.tensor(X_cv_train, dtype=torch.float32),
            torch.tensor(y_cv_train, dtype=torch.float32).view(-1, 1),
            torch.tensor(w_cv_train, dtype=torch.float32)
        )
        train_loader_cv = torch.utils.data.DataLoader(train_dataset_cv, batch_size=batch_size,
                                                      shuffle=True, num_workers=0)
        
        model_cv.train()
        for ep in range(num_epochs):
            for X_batch, y_batch, weight_batch in train_loader_cv:
                X_batch = X_batch.to(device)
                y_batch = y_batch.to(device)
                weight_batch = weight_batch.to(device)
                optimizer_cv.zero_grad()
                outputs = model_cv(X_batch)
                loss_per_sample = loss_fn(outputs, y_batch).view(-1)
                loss = (loss_per_sample * weight_batch).mean() if use_sample_weights else loss_per_sample.mean()
                loss.backward()
                optimizer_cv.step()
        model_cv.eval()
        with torch.no_grad():
            X_cv_val_tensor = torch.tensor(X_cv_val, dtype=torch.float32).to(device)
            cv_logits = model_cv(X_cv_val_tensor)
            # --- Compute binary cross entropy (bse) on the CV validation set ---
            cv_bce_loss_tensor = loss_fn(cv_logits, torch.tensor(y_cv_val, dtype=torch.float32).view(-1, 1).to(device))
            cv_bse = cv_bce_loss_tensor.mean().item()
            
            cv_probs = torch.sigmoid(cv_logits).cpu().numpy()
            cv_preds = (cv_probs >= 0.5).astype(np.float32)
        
        cv_acc = np.mean(cv_preds == y_cv_val.reshape(-1, 1))
        TP = np.sum((cv_preds == 1) & (y_cv_val.reshape(-1, 1) == 1))
        FN = np.sum((cv_preds == 0) & (y_cv_val.reshape(-1, 1) == 1))
        TN = np.sum((cv_preds == 0) & (y_cv_val.reshape(-1, 1) == 0))
        FP = np.sum((cv_preds == 1) & (y_cv_val.reshape(-1, 1) == 0))
        cv_sens = TP / (TP + FN) if (TP + FN) > 0 else np.nan
        cv_spec = TN / (TN + FP) if (TN + FP) > 0 else np.nan
        try:
            cv_auc = roc_auc_score(y_cv_val, cv_probs.flatten())
        except ValueError:
            cv_auc = np.nan
        
        cv_fold_metrics.append({
            "accuracy": cv_acc,
            "sensitivity": cv_sens,
            "specificity": cv_spec,
            "AUC": cv_auc,
            "bse": cv_bse
        })
    
    cv_results_df = pd.DataFrame(cv_fold_metrics)
    cv_mean = cv_results_df.mean()
    cv_std  = cv_results_df.std()
    
    # Build the cross validation metrics DataFrame with two rows:
    # one row for the mean metrics and one row for the standard deviation.
    mean_row = {
        "stat": "mean",
        "model": model_name,
        "accuracy": cv_mean["accuracy"],
        "sensitivity": cv_mean["sensitivity"],
        "specificity": cv_mean["specificity"],
        "AUC": cv_mean["AUC"],
        "bse": cv_mean["bse"]
    }
    sd_row = {
        "stat": "sd",
        "model": model_name,
        "accuracy": cv_std["accuracy"],
        "sensitivity": cv_std["sensitivity"],
        "specificity": cv_std["specificity"],
        "AUC": cv_std["AUC"],
        "bse": cv_std["bse"]
    }
    cv_metrics_df = pd.DataFrame([mean_row, sd_row])
    
    return metrics_df, cv_metrics_df


In [11]:
comb_class_base_w = base_learner_is_SI(comb_class_dict, model_name="comb_base_weighted", num_epochs=10, batch_size=32, use_sample_weights=True)
comb_class_base_nw = base_learner_is_SI(comb_class_dict, model_name="comb_base", num_epochs = 10, batch_size = 32, use_sample_weights=False)

fitbit_class_base_w = base_learner_is_SI(fitbit_class_dict, model_name="fitbit_base_weighted", num_epochs=10, batch_size=32, use_sample_weights=True)
fitbit_class_base_nw = base_learner_is_SI(fitbit_class_dict, model_name="fitbit_base", num_epochs = 10, batch_size = 32, use_sample_weights=False)

is_SI - Epoch 1/10, Loss: 0.6720
is_SI - Epoch 2/10, Loss: 0.5074
is_SI - Epoch 3/10, Loss: 0.3732
is_SI - Epoch 4/10, Loss: 0.3023
is_SI - Epoch 5/10, Loss: 0.2839
is_SI - Epoch 6/10, Loss: 0.2682
is_SI - Epoch 7/10, Loss: 0.2588
is_SI - Epoch 8/10, Loss: 0.2731
is_SI - Epoch 9/10, Loss: 0.2638
is_SI - Epoch 10/10, Loss: 0.2611
is_SI - Epoch 1/10, Loss: 0.6111
is_SI - Epoch 2/10, Loss: 0.5508
is_SI - Epoch 3/10, Loss: 0.5332
is_SI - Epoch 4/10, Loss: 0.5321
is_SI - Epoch 5/10, Loss: 0.5224
is_SI - Epoch 6/10, Loss: 0.5106
is_SI - Epoch 7/10, Loss: 0.5126
is_SI - Epoch 8/10, Loss: 0.5059
is_SI - Epoch 9/10, Loss: 0.5038
is_SI - Epoch 10/10, Loss: 0.4927
is_SI - Epoch 1/10, Loss: 0.5862
is_SI - Epoch 2/10, Loss: 0.4545
is_SI - Epoch 3/10, Loss: 0.3214
is_SI - Epoch 4/10, Loss: 0.2866
is_SI - Epoch 5/10, Loss: 0.2758
is_SI - Epoch 6/10, Loss: 0.2760
is_SI - Epoch 7/10, Loss: 0.2722
is_SI - Epoch 8/10, Loss: 0.2624
is_SI - Epoch 9/10, Loss: 0.2615
is_SI - Epoch 10/10, Loss: 0.2604
is_SI -

### Save class base

In [17]:
class_base_results = pd.concat([comb_class_base_w[0], comb_class_base_nw[0], fitbit_class_base_w[0], fitbit_class_base_nw[0]], axis=0)
class_base_results.to_csv("results/class_base_learner.tsv", sep="\t", index=False)

### Save Class val

In [12]:
class_base_val_results = pd.concat([comb_class_base_w[1], comb_class_base_nw[1], fitbit_class_base_w[1], fitbit_class_base_nw[1]], axis=0)
class_base_val_results.to_csv("results/class_base_val_learner.tsv", sep="\t", index=False)