In [1]:
!pip install optuna grande



In [2]:
import numpy as np
import pandas as pd
from xgboost import XGBClassifier
from sklearn.model_selection import cross_validate, train_test_split
from sklearn.metrics import accuracy_score, matthews_corrcoef, make_scorer
from scipy.stats import shapiro
import optuna
from GRANDE import GRANDE

In [3]:
# This is a function that loads training and testing data for 10-fold cross validation
def prepare_train_test_dataset(datasets, test_idx):
  test_df = datasets[test_idx]
  train_df = pd.concat([datasets[i] for i in range(len(datasets)) if i != test_idx])

  X_train = train_df.iloc[:, 1:-1].values
  X_test = test_df.iloc[:, 1:-1].values
  y_train = train_df.iloc[:, -1].values
  y_test = test_df.iloc[:, -1].values

  return X_train, X_test, y_train, y_test

In [4]:
# Default params and args based on the paper and github repo
params = {
        'depth': 5, # tree depth
        'n_estimators': 2048, # number of estimators / trees

        'learning_rate_weights': 0.005, # learning rate for leaf weights
        'learning_rate_index': 0.01, # learning rate for split indices
        'learning_rate_values': 0.01, # learning rate for split values
        'learning_rate_leaf': 0.01, # learning rate for leafs (logits)

        'optimizer': 'adam', # optimizer
        'cosine_decay_steps': 0, # decay steps for lr schedule (CosineDecayRestarts)

        'loss': 'crossentropy', # loss function (default 'crossentropy' for binary & multi-class classification and 'mse' for regression)
        'focal_loss': False, # use focal loss {True, False}
        'temperature': 0.0, # temperature for stochastic re-weighted GD (0.0, 1.0)

        'from_logits': True, # use logits for weighting {True, False}
        'use_class_weights': True, # use class weights for training {True, False}

        'dropout': 0.0, # dropout rate (here, dropout randomly disables individual estimators of the ensemble during training)

        'selected_variables': 0.8, # feature subset percentage (0.0, 1.0)
        'data_subset_fraction': 1.0, # data subset percentage (0.0, 1.0)
}

args = {
    'epochs': 1_000, # number of epochs for training
    'early_stopping_epochs': 25,
    'batch_size': 32,
    'cat_idx': [], # put list of categorical indices
    'objective': 'binary', # objective / task {'binary', 'classification', 'regression'}

    'random_seed': 42,
    'verbose': 0,
}

In [5]:
import torch

def train_with_base_grande(datasets, key):
  print("Currently training GRANDE model with dataset key:", key)

  # Create a Pandas DataFrame to store all experiment results
  results_df = pd.DataFrame(columns=['Fold', 'Accuracy', 'MCC'])

  # Perform 10-fold cross validation
  dataset_list = datasets[key]

  for i in range(10):
    X_train, X_test, y_train, y_test = prepare_train_test_dataset(dataset_list, i)

    # Train the model
    model = GRANDE(params=params, args=args)
    model.fit(X_train=X_train, y_train=y_train, X_val=X_test, y_val=y_test)

    # Evaluate the model
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, np.argmax(y_pred, axis=1))
    mcc = matthews_corrcoef(y_test, np.argmax(y_pred, axis=1))

    print("Fold", i+1, "Accuracy:", accuracy, "MCC:", mcc)
    results_df.loc[i] = [i+1, accuracy, mcc]

  print()

  print("Mean Accuracy:", results_df['Accuracy'].mean(), "Standard Deviation:", results_df['Accuracy'].std())
  print("Mean MCC:", results_df['MCC'].mean(), "Standard Deviation:", results_df['MCC'].std())

In [6]:
def create_objective_for_grande(X_train, X_test, y_train, y_test):
  def objective(trial):
      # Suggest hyperparameters for GRANDE
      params = {
          'depth': trial.suggest_int('depth', 3, 10),  # tree depth
          'n_estimators': trial.suggest_int('n_estimators', 512, 2048, step=256),  # number of estimators

          'learning_rate_weights': trial.suggest_float('learning_rate_weights', 1e-3, 1e-1, log=True),
          'learning_rate_index': 0.01,
          'learning_rate_values': 0.01,
          'learning_rate_leaf': trial.suggest_float('learning_rate_leaf', 1e-3, 1e-1, log=True),

          'optimizer': 'adam',
          'cosine_decay_steps': 0,

          'loss': 'crossentropy',
          'focal_loss': False,
          'temperature': 0.0,

          'from_logits': True,
          'use_class_weights': True,

          'dropout': 0.0,

          'selected_variables': trial.suggest_float('selected_variables', 0.5, 1.0),
          'data_subset_fraction': 1.0,
      }

      args = {
          'epochs': 100,  # Number of epochs
          'early_stopping_epochs': 10,
          'batch_size': 64,
          'cat_idx': [],
          'objective': 'binary',

          'random_seed': 42,
          'verbose': 0,
      }

      base_model = GRANDE(params=params, args=args)
      base_model.fit(X_train=X_train, y_train=y_train, X_val=X_test, y_val=y_test)

      y_pred = base_model.predict(X_test)
      y_pred_labels = np.argmax(y_pred, axis=1)
      mcc = matthews_corrcoef(y_test, y_pred_labels)

      # Return the accuracy
      return mcc

  return objective

In [7]:
import time
def train_with_best_hyperparameters(datasets, key):
  start_time = time.time()
  print("Currently training GRANDE model with dataset key:", key)

  # Create a Pandas DataFrame to store all experiment results
  results_df = pd.DataFrame(columns=['Fold', 'Accuracy', 'MCC'])

  # Perform 10-fold cross validation
  dataset_list = datasets[key]

  # Use the first data as the test for the HPO
  X_train, X_test, y_train, y_test = prepare_train_test_dataset(dataset_list, 0)

  objective_function = create_objective_for_grande(X_train, X_test, y_train, y_test)

  study = optuna.create_study(direction='maximize')
  study.optimize(objective_function, n_trials=10, gc_after_trial=True)

  best_params = study.best_params
  print("Best hyperparameters:", best_params)

  # Use best hyperparams to conduct a 10-fold cross validation
  for i in range(10):
    X_train, X_test, y_train, y_test = prepare_train_test_dataset(dataset_list, i)

    # Train the model
    model = GRANDE(params=best_params, args=args)
    model.fit(X_train=X_train, y_train=y_train, X_val=X_test, y_val=y_test)

    # Evaluate the model
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, np.argmax(y_pred, axis=1))
    mcc = matthews_corrcoef(y_test, np.argmax(y_pred, axis=1))

    print("Fold", i+1, "Accuracy:", accuracy, "MCC:", mcc)
    results_df.loc[i] = [i+1, accuracy, mcc]

  print()

  print("Mean Accuracy:", results_df['Accuracy'].mean(), "Standard Deviation:", results_df['Accuracy'].std())
  print("Mean MCC:", results_df['MCC'].mean(), "Standard Deviation:", results_df['MCC'].std())

  elapsed_time = time.time() - start_time
  print(f"Total execution time: {elapsed_time:.2f} seconds")

In [8]:
# Load the datasets

datasets = {}

# All

II_all_nopcc_1 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/all/cv/nopcc/fold_1.csv')
II_all_nopcc_2 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/all/cv/nopcc/fold_2.csv')
II_all_nopcc_3 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/all/cv/nopcc/fold_3.csv')
II_all_nopcc_4 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/all/cv/nopcc/fold_4.csv')
II_all_nopcc_5 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/all/cv/nopcc/fold_5.csv')
II_all_nopcc_6 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/all/cv/nopcc/fold_6.csv')
II_all_nopcc_7 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/all/cv/nopcc/fold_7.csv')
II_all_nopcc_8 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/all/cv/nopcc/fold_8.csv')
II_all_nopcc_9 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/all/cv/nopcc/fold_9.csv')
II_all_nopcc_10 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/all/cv/nopcc/fold_10.csv')
datasets['II_all_nopcc'] = [II_all_nopcc_1, II_all_nopcc_2, II_all_nopcc_3, II_all_nopcc_4, II_all_nopcc_5,
                                       II_all_nopcc_6, II_all_nopcc_7, II_all_nopcc_8, II_all_nopcc_9, II_all_nopcc_10]

II_all_pcc95_1 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/all/cv/pcc95/fold_1.csv')
II_all_pcc95_2 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/all/cv/pcc95/fold_2.csv')
II_all_pcc95_3 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/all/cv/pcc95/fold_3.csv')
II_all_pcc95_4 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/all/cv/pcc95/fold_4.csv')
II_all_pcc95_5 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/all/cv/pcc95/fold_5.csv')
II_all_pcc95_6 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/all/cv/pcc95/fold_6.csv')
II_all_pcc95_7 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/all/cv/pcc95/fold_7.csv')
II_all_pcc95_8 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/all/cv/pcc95/fold_8.csv')
II_all_pcc95_9 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/all/cv/pcc95/fold_9.csv')
II_all_pcc95_10 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/all/cv/pcc95/fold_10.csv')
datasets['II_all_pcc95'] = [II_all_pcc95_1, II_all_pcc95_2, II_all_pcc95_3, II_all_pcc95_4, II_all_pcc95_5,
                                       II_all_pcc95_6, II_all_pcc95_7, II_all_pcc95_8, II_all_pcc95_9, II_all_pcc95_10]

II_all_pcc75_1 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/all/cv/pcc75/fold_1.csv')
II_all_pcc75_2 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/all/cv/pcc75/fold_2.csv')
II_all_pcc75_3 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/all/cv/pcc75/fold_3.csv')
II_all_pcc75_4 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/all/cv/pcc75/fold_4.csv')
II_all_pcc75_5 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/all/cv/pcc75/fold_5.csv')
II_all_pcc75_6 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/all/cv/pcc75/fold_6.csv')
II_all_pcc75_7 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/all/cv/pcc75/fold_7.csv')
II_all_pcc75_8 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/all/cv/pcc75/fold_8.csv')
II_all_pcc75_9 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/all/cv/pcc75/fold_9.csv')
II_all_pcc75_10 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/all/cv/pcc75/fold_10.csv')
datasets['II_all_pcc75'] = [II_all_pcc75_1, II_all_pcc75_2, II_all_pcc75_3, II_all_pcc75_4, II_all_pcc75_5,
                                       II_all_pcc75_6, II_all_pcc75_7, II_all_pcc75_8, II_all_pcc75_9, II_all_pcc75_10]

# 550

II_550_nopcc_1 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/550/cv/nopcc/fold_1.csv')
II_550_nopcc_2 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/550/cv/nopcc/fold_2.csv')
II_550_nopcc_3 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/550/cv/nopcc/fold_3.csv')
II_550_nopcc_4 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/550/cv/nopcc/fold_4.csv')
II_550_nopcc_5 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/550/cv/nopcc/fold_5.csv')
II_550_nopcc_6 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/550/cv/nopcc/fold_6.csv')
II_550_nopcc_7 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/550/cv/nopcc/fold_7.csv')
II_550_nopcc_8 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/550/cv/nopcc/fold_8.csv')
II_550_nopcc_9 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/550/cv/nopcc/fold_9.csv')
II_550_nopcc_10 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/550/cv/nopcc/fold_10.csv')
datasets['II_550_nopcc'] = [II_550_nopcc_1, II_550_nopcc_2, II_550_nopcc_3, II_550_nopcc_4, II_550_nopcc_5,
                                       II_550_nopcc_6, II_550_nopcc_7, II_550_nopcc_8, II_550_nopcc_9, II_550_nopcc_10]

II_550_pcc95_1 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/550/cv/pcc95/fold_1.csv')
II_550_pcc95_2 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/550/cv/pcc95/fold_2.csv')
II_550_pcc95_3 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/550/cv/pcc95/fold_3.csv')
II_550_pcc95_4 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/550/cv/pcc95/fold_4.csv')
II_550_pcc95_5 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/550/cv/pcc95/fold_5.csv')
II_550_pcc95_6 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/550/cv/pcc95/fold_6.csv')
II_550_pcc95_7 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/550/cv/pcc95/fold_7.csv')
II_550_pcc95_8 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/550/cv/pcc95/fold_8.csv')
II_550_pcc95_9 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/550/cv/pcc95/fold_9.csv')
II_550_pcc95_10 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/550/cv/pcc95/fold_10.csv')
datasets['II_550_pcc95'] = [II_550_pcc95_1, II_550_pcc95_2, II_550_pcc95_3, II_550_pcc95_4, II_550_pcc95_5,
                                       II_550_pcc95_6, II_550_pcc95_7, II_550_pcc95_8, II_550_pcc95_9, II_550_pcc95_10]

II_550_pcc75_1 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/550/cv/pcc75/fold_1.csv')
II_550_pcc75_2 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/550/cv/pcc75/fold_2.csv')
II_550_pcc75_3 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/550/cv/pcc75/fold_3.csv')
II_550_pcc75_4 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/550/cv/pcc75/fold_4.csv')
II_550_pcc75_5 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/550/cv/pcc75/fold_5.csv')
II_550_pcc75_6 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/550/cv/pcc75/fold_6.csv')
II_550_pcc75_7 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/550/cv/pcc75/fold_7.csv')
II_550_pcc75_8 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/550/cv/pcc75/fold_8.csv')
II_550_pcc75_9 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/550/cv/pcc75/fold_9.csv')
II_550_pcc75_10 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/550/cv/pcc75/fold_10.csv')
datasets['II_550_pcc75'] = [II_550_pcc75_1, II_550_pcc75_2, II_550_pcc75_3, II_550_pcc75_4, II_550_pcc75_5,
                                       II_550_pcc75_6, II_550_pcc75_7, II_550_pcc75_8, II_550_pcc75_9, II_550_pcc75_10]

# 450

II_450_nopcc_1 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/450/cv/nopcc/fold_1.csv')
II_450_nopcc_2 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/450/cv/nopcc/fold_2.csv')
II_450_nopcc_3 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/450/cv/nopcc/fold_3.csv')
II_450_nopcc_4 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/450/cv/nopcc/fold_4.csv')
II_450_nopcc_5 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/450/cv/nopcc/fold_5.csv')
II_450_nopcc_6 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/450/cv/nopcc/fold_6.csv')
II_450_nopcc_7 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/450/cv/nopcc/fold_7.csv')
II_450_nopcc_8 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/450/cv/nopcc/fold_8.csv')
II_450_nopcc_9 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/450/cv/nopcc/fold_9.csv')
II_450_nopcc_10 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/450/cv/nopcc/fold_10.csv')
datasets['II_450_nopcc'] = [II_450_nopcc_1, II_450_nopcc_2, II_450_nopcc_3, II_450_nopcc_4, II_450_nopcc_5,
                                       II_450_nopcc_6, II_450_nopcc_7, II_450_nopcc_8, II_450_nopcc_9, II_450_nopcc_10]

II_450_pcc95_1 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/450/cv/pcc95/fold_1.csv')
II_450_pcc95_2 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/450/cv/pcc95/fold_2.csv')
II_450_pcc95_3 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/450/cv/pcc95/fold_3.csv')
II_450_pcc95_4 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/450/cv/pcc95/fold_4.csv')
II_450_pcc95_5 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/450/cv/pcc95/fold_5.csv')
II_450_pcc95_6 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/450/cv/pcc95/fold_6.csv')
II_450_pcc95_7 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/450/cv/pcc95/fold_7.csv')
II_450_pcc95_8 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/450/cv/pcc95/fold_8.csv')
II_450_pcc95_9 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/450/cv/pcc95/fold_9.csv')
II_450_pcc95_10 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/450/cv/pcc95/fold_10.csv')
datasets['II_450_pcc95'] = [II_450_pcc95_1, II_450_pcc95_2, II_450_pcc95_3, II_450_pcc95_4, II_450_pcc95_5,
                                       II_450_pcc95_6, II_450_pcc95_7, II_450_pcc95_8, II_450_pcc95_9, II_450_pcc95_10]

II_450_pcc75_1 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/450/cv/pcc75/fold_1.csv')
II_450_pcc75_2 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/450/cv/pcc75/fold_2.csv')
II_450_pcc75_3 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/450/cv/pcc75/fold_3.csv')
II_450_pcc75_4 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/450/cv/pcc75/fold_4.csv')
II_450_pcc75_5 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/450/cv/pcc75/fold_5.csv')
II_450_pcc75_6 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/450/cv/pcc75/fold_6.csv')
II_450_pcc75_7 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/450/cv/pcc75/fold_7.csv')
II_450_pcc75_8 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/450/cv/pcc75/fold_8.csv')
II_450_pcc75_9 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/450/cv/pcc75/fold_9.csv')
II_450_pcc75_10 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/450/cv/pcc75/fold_10.csv')
datasets['II_450_pcc75'] = [II_450_pcc75_1, II_450_pcc75_2, II_450_pcc75_3, II_450_pcc75_4, II_450_pcc75_5,
                                       II_450_pcc75_6, II_450_pcc75_7, II_450_pcc75_8, II_450_pcc75_9, II_450_pcc75_10]

# 350

II_350_nopcc_1 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/350/cv/nopcc/fold_1.csv')
II_350_nopcc_2 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/350/cv/nopcc/fold_2.csv')
II_350_nopcc_3 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/350/cv/nopcc/fold_3.csv')
II_350_nopcc_4 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/350/cv/nopcc/fold_4.csv')
II_350_nopcc_5 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/350/cv/nopcc/fold_5.csv')
II_350_nopcc_6 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/350/cv/nopcc/fold_6.csv')
II_350_nopcc_7 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/350/cv/nopcc/fold_7.csv')
II_350_nopcc_8 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/350/cv/nopcc/fold_8.csv')
II_350_nopcc_9 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/350/cv/nopcc/fold_9.csv')
II_350_nopcc_10 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/350/cv/nopcc/fold_10.csv')
datasets['II_350_nopcc'] = [II_350_nopcc_1, II_350_nopcc_2, II_350_nopcc_3, II_350_nopcc_4, II_350_nopcc_5,
                                       II_350_nopcc_6, II_350_nopcc_7, II_350_nopcc_8, II_350_nopcc_9, II_350_nopcc_10]

II_350_pcc95_1 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/350/cv/pcc95/fold_1.csv')
II_350_pcc95_2 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/350/cv/pcc95/fold_2.csv')
II_350_pcc95_3 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/350/cv/pcc95/fold_3.csv')
II_350_pcc95_4 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/350/cv/pcc95/fold_4.csv')
II_350_pcc95_5 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/350/cv/pcc95/fold_5.csv')
II_350_pcc95_6 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/350/cv/pcc95/fold_6.csv')
II_350_pcc95_7 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/350/cv/pcc95/fold_7.csv')
II_350_pcc95_8 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/350/cv/pcc95/fold_8.csv')
II_350_pcc95_9 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/350/cv/pcc95/fold_9.csv')
II_350_pcc95_10 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/350/cv/pcc95/fold_10.csv')
datasets['II_350_pcc95'] = [II_350_pcc95_1, II_350_pcc95_2, II_350_pcc95_3, II_350_pcc95_4, II_350_pcc95_5,
                                       II_350_pcc95_6, II_350_pcc95_7, II_350_pcc95_8, II_350_pcc95_9, II_350_pcc95_10]

II_350_pcc75_1 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/350/cv/pcc75/fold_1.csv')
II_350_pcc75_2 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/350/cv/pcc75/fold_2.csv')
II_350_pcc75_3 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/350/cv/pcc75/fold_3.csv')
II_350_pcc75_4 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/350/cv/pcc75/fold_4.csv')
II_350_pcc75_5 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/350/cv/pcc75/fold_5.csv')
II_350_pcc75_6 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/350/cv/pcc75/fold_6.csv')
II_350_pcc75_7 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/350/cv/pcc75/fold_7.csv')
II_350_pcc75_8 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/350/cv/pcc75/fold_8.csv')
II_350_pcc75_9 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/350/cv/pcc75/fold_9.csv')
II_350_pcc75_10 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/II/350/cv/pcc75/fold_10.csv')
datasets['II_350_pcc75'] = [II_350_pcc75_1, II_350_pcc75_2, II_350_pcc75_3, II_350_pcc75_4, II_350_pcc75_5,
                                       II_350_pcc75_6, II_350_pcc75_7, II_350_pcc75_8, II_350_pcc75_9, II_350_pcc75_10]


In [9]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [10]:
II_all_nopcc_results_df = train_with_base_grande(datasets, 'II_all_nopcc')
print("\n---------------------------------------------------------------------\n")
II_all_pcc95_results_df = train_with_base_grande(datasets, 'II_all_pcc95')
print("\n---------------------------------------------------------------------\n")
II_all_pcc75_results_df = train_with_base_grande(datasets, 'II_all_pcc75')
print("\n---------------------------------------------------------------------\n")
II_550_nopcc_results_df = train_with_base_grande(datasets, 'II_550_nopcc')
print("\n---------------------------------------------------------------------\n")
II_550_pcc95_results_df = train_with_base_grande(datasets, 'II_550_pcc95')
print("\n---------------------------------------------------------------------\n")
II_550_pcc75_results_df = train_with_base_grande(datasets, 'II_550_pcc75')
print("\n---------------------------------------------------------------------\n")
II_450_nopcc_results_df = train_with_base_grande(datasets, 'II_450_nopcc')
print("\n---------------------------------------------------------------------\n")
II_450_pcc95_results_df = train_with_base_grande(datasets, 'II_450_pcc95')
print("\n---------------------------------------------------------------------\n")
II_450_pcc75_results_df = train_with_base_grande(datasets, 'II_450_pcc75')
print("\n---------------------------------------------------------------------\n")
II_350_nopcc_results_df = train_with_base_grande(datasets, 'II_350_nopcc')
print("\n---------------------------------------------------------------------\n")
II_350_pcc95_results_df = train_with_base_grande(datasets, 'II_350_pcc95')
print("\n---------------------------------------------------------------------\n")

Currently training GRANDE model with dataset key: II_all_nopcc
Fold 1 Accuracy: 0.9105691056910569 MCC: 0.8215727622950508
Fold 2 Accuracy: 0.8938775510204081 MCC: 0.7882000630560075




Fold 3 Accuracy: 0.9020408163265307 MCC: 0.8050834528552104
Fold 4 Accuracy: 0.8775510204081632 MCC: 0.7575464274320031
Fold 5 Accuracy: 0.8489795918367347 MCC: 0.6999352236366774
Fold 6 Accuracy: 0.8530612244897959 MCC: 0.7096454789053316
Fold 7 Accuracy: 0.9061224489795918 MCC: 0.8122625809441117
Fold 8 Accuracy: 0.8938775510204081 MCC: 0.7916409242608335
Fold 9 Accuracy: 0.8571428571428571 MCC: 0.7153838111123045
Fold 10 Accuracy: 0.8653061224489796 MCC: 0.7306231272473168

Mean Accuracy: 0.8808528289364526 Standard Deviation: 0.023363955351980385
Mean MCC: 0.7631893851744849 Standard Deviation: 0.046251426581778325

---------------------------------------------------------------------

Currently training GRANDE model with dataset key: II_all_pcc95
Fold 1 Accuracy: 0.8617886178861789 MCC: 0.7259805147281762
Fold 2 Accuracy: 0.8938775510204081 MCC: 0.7930419145550602
Fold 3 Accuracy: 0.9102040816326531 MCC: 0.8244391024030343
Fold 4 Accuracy: 0.8979591836734694 MCC: 0.795934690204752

In [11]:
II_350_pcc75_results_df = train_with_base_grande(datasets, 'II_350_pcc75')

Currently training GRANDE model with dataset key: II_350_pcc75
Fold 1 Accuracy: 0.9571428571428572 MCC: 0.9146591207600471
Fold 2 Accuracy: 0.8428571428571429 MCC: 0.6882472016116853
Fold 3 Accuracy: 0.9428571428571428 MCC: 0.8871639008998208
Fold 4 Accuracy: 0.9 MCC: 0.8003267306650412
Fold 5 Accuracy: 0.8857142857142857 MCC: 0.7726911394933923
Fold 6 Accuracy: 0.9285714285714286 MCC: 0.8574929257125441
Fold 7 Accuracy: 0.8571428571428571 MCC: 0.7154547587901781
Fold 8 Accuracy: 0.9285714285714286 MCC: 0.8603090020146066
Fold 9 Accuracy: 0.9571428571428572 MCC: 0.9146591207600471
Fold 10 Accuracy: 0.927536231884058 MCC: 0.8581191265318578

Mean Accuracy: 0.9127536231884058 Standard Deviation: 0.040052489825552394
Mean MCC: 0.8269123027239221 Standard Deviation: 0.0798270106485287


In [12]:
II_all_nopcc_results_df = train_with_best_hyperparameters(datasets, 'II_all_nopcc')

[I 2025-01-02 15:46:50,703] A new study created in memory with name: no-name-5aad1fcf-ed7e-4540-beb1-3c740738a5eb


Currently training GRANDE model with dataset key: II_all_nopcc


[I 2025-01-02 15:47:08,341] Trial 0 finished with value: 0.7485862381397372 and parameters: {'depth': 6, 'n_estimators': 1792, 'learning_rate_weights': 0.03850303501184741, 'learning_rate_leaf': 0.029751027378032504, 'selected_variables': 0.5342705216865247}. Best is trial 0 with value: 0.7485862381397372.
[I 2025-01-02 15:47:43,861] Trial 1 finished with value: 0.7805136005849247 and parameters: {'depth': 8, 'n_estimators': 768, 'learning_rate_weights': 0.007147121711434555, 'learning_rate_leaf': 0.012666172992233559, 'selected_variables': 0.9395924663414132}. Best is trial 1 with value: 0.7805136005849247.
[I 2025-01-02 15:48:01,164] Trial 2 finished with value: 0.6599339193905432 and parameters: {'depth': 6, 'n_estimators': 512, 'learning_rate_weights': 0.0525027625180064, 'learning_rate_leaf': 0.0038263565981313335, 'selected_variables': 0.6008814408835395}. Best is trial 1 with value: 0.7805136005849247.
[I 2025-01-02 15:48:18,552] Trial 3 finished with value: 0.7817548103813475 a

Best hyperparameters: {'depth': 6, 'n_estimators': 1024, 'learning_rate_weights': 0.00420683701059734, 'learning_rate_leaf': 0.002971042201509535, 'selected_variables': 0.9896602740311217}
Fold 1 Accuracy: 0.8780487804878049 MCC: 0.7561975342090198
Fold 2 Accuracy: 0.8775510204081632 MCC: 0.7555333937760073
Fold 3 Accuracy: 0.8734693877551021 MCC: 0.7475995260511742
Fold 4 Accuracy: 0.8734693877551021 MCC: 0.7480985164268196
Fold 5 Accuracy: 0.8244897959183674 MCC: 0.6490070638411302
Fold 6 Accuracy: 0.8448979591836735 MCC: 0.6944815819209769
Fold 7 Accuracy: 0.8938775510204081 MCC: 0.7877515660402505
Fold 8 Accuracy: 0.8734693877551021 MCC: 0.7512830316251354
Fold 9 Accuracy: 0.8612244897959184 MCC: 0.7224443555910969
Fold 10 Accuracy: 0.8408163265306122 MCC: 0.683379870897112

Mean Accuracy: 0.8641314086610252 Standard Deviation: 0.021117744080987124
Mean MCC: 0.7295776440378723 Standard Deviation: 0.04191781577030156


In [13]:
II_all_pcc95_results_df = train_with_best_hyperparameters(datasets, 'II_all_pcc95')

[I 2025-01-02 15:59:07,498] A new study created in memory with name: no-name-1d22081c-7d1a-482a-916a-f9e0e4df90a4


Currently training GRANDE model with dataset key: II_all_pcc95


[I 2025-01-02 16:00:14,633] Trial 0 finished with value: 0.7988894471393801 and parameters: {'depth': 8, 'n_estimators': 1792, 'learning_rate_weights': 0.0072624536373121825, 'learning_rate_leaf': 0.0026561588580164934, 'selected_variables': 0.5647446639876308}. Best is trial 0 with value: 0.7988894471393801.
[I 2025-01-02 16:00:35,447] Trial 1 finished with value: 0.7407192072666816 and parameters: {'depth': 7, 'n_estimators': 1536, 'learning_rate_weights': 0.05199037684166296, 'learning_rate_leaf': 0.03449193270135265, 'selected_variables': 0.8875622516220596}. Best is trial 0 with value: 0.7988894471393801.
[I 2025-01-02 16:00:56,776] Trial 2 finished with value: 0.7564976920142548 and parameters: {'depth': 6, 'n_estimators': 2048, 'learning_rate_weights': 0.0013527046632995307, 'learning_rate_leaf': 0.0487268725946014, 'selected_variables': 0.8187460860911719}. Best is trial 0 with value: 0.7988894471393801.
[I 2025-01-02 16:01:18,722] Trial 3 finished with value: 0.616392090901922

Best hyperparameters: {'depth': 8, 'n_estimators': 1792, 'learning_rate_weights': 0.0072624536373121825, 'learning_rate_leaf': 0.0026561588580164934, 'selected_variables': 0.5647446639876308}
Fold 1 Accuracy: 0.8983739837398373 MCC: 0.7988894471393801
Fold 2 Accuracy: 0.8857142857142857 MCC: 0.7740827743407135
Fold 3 Accuracy: 0.8857142857142857 MCC: 0.7752418351897331
Fold 4 Accuracy: 0.8693877551020408 MCC: 0.7388870392949923
Fold 5 Accuracy: 0.8693877551020408 MCC: 0.7391305991215069
Fold 6 Accuracy: 0.8816326530612245 MCC: 0.7635139791380653
Fold 7 Accuracy: 0.8653061224489796 MCC: 0.7306410769025723
Fold 8 Accuracy: 0.8653061224489796 MCC: 0.7336682643918102
Fold 9 Accuracy: 0.8408163265306122 MCC: 0.6816394550292398
Fold 10 Accuracy: 0.8816326530612245 MCC: 0.7638586973771658

Mean Accuracy: 0.8743271942923512 Standard Deviation: 0.015912444787590282
Mean MCC: 0.7499553167925179 Standard Deviation: 0.03255470410757685


In [9]:
II_all_pcc75_results_df = train_with_best_hyperparameters(datasets, 'II_all_pcc75')

[I 2025-01-02 16:27:55,320] A new study created in memory with name: no-name-0cebaae3-2119-4273-968d-8f6e40725a33


Currently training GRANDE model with dataset key: II_all_pcc75


[I 2025-01-02 16:28:17,041] Trial 0 finished with value: 0.6748859713908456 and parameters: {'depth': 5, 'n_estimators': 2048, 'learning_rate_weights': 0.02123458512228993, 'learning_rate_leaf': 0.005630249040385152, 'selected_variables': 0.6484599493997552}. Best is trial 0 with value: 0.6748859713908456.
[I 2025-01-02 16:28:34,316] Trial 1 finished with value: 0.5217237205808197 and parameters: {'depth': 3, 'n_estimators': 2048, 'learning_rate_weights': 0.004473109173839802, 'learning_rate_leaf': 0.0012115250732951165, 'selected_variables': 0.5615279638510954}. Best is trial 0 with value: 0.6748859713908456.
[I 2025-01-02 16:29:31,708] Trial 2 finished with value: 0.6137446661558168 and parameters: {'depth': 9, 'n_estimators': 1024, 'learning_rate_weights': 0.09870628824378372, 'learning_rate_leaf': 0.0053339487303748255, 'selected_variables': 0.985020400181644}. Best is trial 0 with value: 0.6748859713908456.
[I 2025-01-02 16:29:50,774] Trial 3 finished with value: 0.691880578216131

Best hyperparameters: {'depth': 6, 'n_estimators': 2048, 'learning_rate_weights': 0.0034191358578967096, 'learning_rate_leaf': 0.020417560809504052, 'selected_variables': 0.5179090448342483}
Fold 1 Accuracy: 0.8658536585365854 MCC: 0.7317315005483669
Fold 2 Accuracy: 0.8938775510204081 MCC: 0.7904587591186666
Fold 3 Accuracy: 0.9020408163265307 MCC: 0.8080400133319339
Fold 4 Accuracy: 0.9020408163265307 MCC: 0.804078368652539
Fold 5 Accuracy: 0.8693877551020408 MCC: 0.7391305991215069
Fold 6 Accuracy: 0.8612244897959184 MCC: 0.7227929054382907
Fold 7 Accuracy: 0.8775510204081632 MCC: 0.7554682928047233
Fold 8 Accuracy: 0.9102040816326531 MCC: 0.8205264929917873
Fold 9 Accuracy: 0.8857142857142857 MCC: 0.7715123796179054
Fold 10 Accuracy: 0.8571428571428571 MCC: 0.7349374211514327

Mean Accuracy: 0.8825037332005975 Standard Deviation: 0.018974074074123444
Mean MCC: 0.7678676732777152 Standard Deviation: 0.035936899869378994


In [10]:
II_550_nopcc_results_df = train_with_best_hyperparameters(datasets, 'II_550_nopcc')

[I 2025-01-02 16:42:35,966] A new study created in memory with name: no-name-708e7623-fd29-488f-b8ce-641801370d9b


Currently training GRANDE model with dataset key: II_550_nopcc


[I 2025-01-02 16:43:16,078] Trial 0 finished with value: 0.6191034846198453 and parameters: {'depth': 9, 'n_estimators': 1280, 'learning_rate_weights': 0.09673435738718378, 'learning_rate_leaf': 0.013610069768492425, 'selected_variables': 0.8297344388528999}. Best is trial 0 with value: 0.6191034846198453.
[I 2025-01-02 16:43:30,508] Trial 1 finished with value: 0.7823355995993853 and parameters: {'depth': 3, 'n_estimators': 1792, 'learning_rate_weights': 0.0013671848322744514, 'learning_rate_leaf': 0.0044269733189952086, 'selected_variables': 0.5796352159666108}. Best is trial 1 with value: 0.7823355995993853.
[I 2025-01-02 16:43:43,127] Trial 2 finished with value: 0.7051561380739453 and parameters: {'depth': 3, 'n_estimators': 512, 'learning_rate_weights': 0.0024471051264676837, 'learning_rate_leaf': 0.001575796177818471, 'selected_variables': 0.7951543773386223}. Best is trial 1 with value: 0.7823355995993853.
[I 2025-01-02 16:44:07,754] Trial 3 finished with value: 0.8187233019063

Best hyperparameters: {'depth': 8, 'n_estimators': 1536, 'learning_rate_weights': 0.0038103063923061397, 'learning_rate_leaf': 0.053326949165286124, 'selected_variables': 0.8030232376999475}
Fold 1 Accuracy: 0.9090909090909091 MCC: 0.820354226434844
Fold 2 Accuracy: 0.9454545454545454 MCC: 0.8914987065202297
Fold 3 Accuracy: 0.9363636363636364 MCC: 0.8728715609439694
Fold 4 Accuracy: 0.8727272727272727 MCC: 0.7474338507517468
Fold 5 Accuracy: 0.9272727272727272 MCC: 0.8551110042132816
Fold 6 Accuracy: 0.9090909090909091 MCC: 0.8181818181818182
Fold 7 Accuracy: 0.9545454545454546 MCC: 0.9092412093166349
Fold 8 Accuracy: 0.9181818181818182 MCC: 0.8398412548412546
Fold 9 Accuracy: 0.9090909090909091 MCC: 0.8187233019063334
Fold 10 Accuracy: 0.9357798165137615 MCC: 0.8716740137406188

Mean Accuracy: 0.9217597998331943 Standard Deviation: 0.023511364741492125
Mean MCC: 0.844493094685073 Standard Deviation: 0.04647569556150737


In [11]:
II_550_pcc95_results_df = train_with_best_hyperparameters(datasets, 'II_550_pcc95')

[I 2025-01-02 16:54:11,103] A new study created in memory with name: no-name-2d2cbe19-3859-4024-97c0-f88a8c2d00e2


Currently training GRANDE model with dataset key: II_550_pcc95


[I 2025-01-02 16:54:24,091] Trial 0 finished with value: 0.7454545454545455 and parameters: {'depth': 3, 'n_estimators': 1536, 'learning_rate_weights': 0.010328055134735987, 'learning_rate_leaf': 0.00902779631981669, 'selected_variables': 0.5847782555415619}. Best is trial 0 with value: 0.7454545454545455.
[I 2025-01-02 16:54:38,674] Trial 1 finished with value: 0.7031374768163903 and parameters: {'depth': 4, 'n_estimators': 1280, 'learning_rate_weights': 0.0017040162399029248, 'learning_rate_leaf': 0.0010079484345450914, 'selected_variables': 0.7347883795703385}. Best is trial 0 with value: 0.7454545454545455.
[I 2025-01-02 16:54:53,244] Trial 2 finished with value: 0.6036024339693667 and parameters: {'depth': 5, 'n_estimators': 2048, 'learning_rate_weights': 0.028205993235973576, 'learning_rate_leaf': 0.001621980908006015, 'selected_variables': 0.6967772285923286}. Best is trial 0 with value: 0.7454545454545455.
[I 2025-01-02 16:55:28,999] Trial 3 finished with value: 0.8001322641986

Best hyperparameters: {'depth': 9, 'n_estimators': 512, 'learning_rate_weights': 0.0019529285970434462, 'learning_rate_leaf': 0.0011310178170669605, 'selected_variables': 0.9235562001597348}
Fold 1 Accuracy: 0.8545454545454545 MCC: 0.7109736629101981
Fold 2 Accuracy: 0.9545454545454546 MCC: 0.9104463009115371
Fold 3 Accuracy: 0.9 MCC: 0.8033264176742436
Fold 4 Accuracy: 0.8363636363636363 MCC: 0.6745134750686496
Fold 5 Accuracy: 0.9181818181818182 MCC: 0.8376105968386142
Fold 6 Accuracy: 0.9090909090909091 MCC: 0.8187233019063334
Fold 7 Accuracy: 0.9363636363636364 MCC: 0.8728715609439694
Fold 8 Accuracy: 0.8727272727272727 MCC: 0.745947897292437
Fold 9 Accuracy: 0.8545454545454545 MCC: 0.7109736629101981
Fold 10 Accuracy: 0.9174311926605505 MCC: 0.8360341015976291

Mean Accuracy: 0.8953794829024186 Standard Deviation: 0.039053692856320654
Mean MCC: 0.792142097805381 Standard Deviation: 0.07780723988698977


In [10]:
II_550_pcc75_results_df = train_with_best_hyperparameters(datasets, 'II_550_pcc75')

[I 2025-01-04 08:16:00,023] A new study created in memory with name: no-name-fa246ac5-0185-4c3f-b1ac-665a9dda8cb4


Currently training GRANDE model with dataset key: II_550_pcc75


[I 2025-01-04 08:16:14,657] Trial 0 finished with value: 0.7167132003459626 and parameters: {'depth': 4, 'n_estimators': 1792, 'learning_rate_weights': 0.09109608678304235, 'learning_rate_leaf': 0.02091775978366091, 'selected_variables': 0.7284488851029985}. Best is trial 0 with value: 0.7167132003459626.
[I 2025-01-04 08:18:10,875] Trial 1 finished with value: 0.7109736629101981 and parameters: {'depth': 10, 'n_estimators': 1536, 'learning_rate_weights': 0.03297593856902169, 'learning_rate_leaf': 0.004367843846576722, 'selected_variables': 0.8600222116567133}. Best is trial 0 with value: 0.7167132003459626.
[I 2025-01-04 08:19:50,639] Trial 2 finished with value: 0.7823355995993853 and parameters: {'depth': 10, 'n_estimators': 1536, 'learning_rate_weights': 0.02072383386975284, 'learning_rate_leaf': 0.023098347061368575, 'selected_variables': 0.9582111982939574}. Best is trial 2 with value: 0.7823355995993853.
[I 2025-01-04 08:20:03,056] Trial 3 finished with value: 0.6367847903715926

Best hyperparameters: {'depth': 6, 'n_estimators': 768, 'learning_rate_weights': 0.03911778433727795, 'learning_rate_leaf': 0.014089253109638366, 'selected_variables': 0.5899028977315837}
Fold 1 Accuracy: 0.8636363636363636 MCC: 0.7302967433402214
Fold 2 Accuracy: 0.8636363636363636 MCC: 0.7372097807744856
Fold 3 Accuracy: 0.8909090909090909 MCC: 0.7838940385932954
Fold 4 Accuracy: 0.8636363636363636 MCC: 0.7302967433402214
Fold 5 Accuracy: 0.8636363636363636 MCC: 0.7283570407292297
Fold 6 Accuracy: 0.8454545454545455 MCC: 0.6910233190806425
Fold 7 Accuracy: 0.9363636363636364 MCC: 0.8763560920082657
Fold 8 Accuracy: 0.8454545454545455 MCC: 0.6910233190806425
Fold 9 Accuracy: 0.9 MCC: 0.8033264176742436
Fold 10 Accuracy: 0.8807339449541285 MCC: 0.7615359138591727

Mean Accuracy: 0.8753461217681402 Standard Deviation: 0.027764229299767004
Mean MCC: 0.7533319408480421 Standard Deviation: 0.05612028302030608


In [11]:
II_450_nopcc_results_df = train_with_best_hyperparameters(datasets, 'II_450_nopcc')

[I 2025-01-04 08:24:37,138] A new study created in memory with name: no-name-90f094d3-96a5-40c1-9a98-872405c0743b


Currently training GRANDE model with dataset key: II_450_nopcc


[I 2025-01-04 08:24:52,316] Trial 0 finished with value: 0.8446530265013353 and parameters: {'depth': 5, 'n_estimators': 1792, 'learning_rate_weights': 0.00520197928213617, 'learning_rate_leaf': 0.010347943099474193, 'selected_variables': 0.7872487426461782}. Best is trial 0 with value: 0.8446530265013353.
[I 2025-01-04 08:25:07,171] Trial 1 finished with value: 0.8230354986052387 and parameters: {'depth': 4, 'n_estimators': 2048, 'learning_rate_weights': 0.0017860894600375892, 'learning_rate_leaf': 0.015291671820498539, 'selected_variables': 0.6700778606782103}. Best is trial 0 with value: 0.8446530265013353.
[I 2025-01-04 08:28:09,644] Trial 2 finished with value: 0.8230354986052387 and parameters: {'depth': 10, 'n_estimators': 2048, 'learning_rate_weights': 0.009662285516855367, 'learning_rate_leaf': 0.007330744371899261, 'selected_variables': 0.7947526563006211}. Best is trial 0 with value: 0.8446530265013353.
[I 2025-01-04 08:28:21,544] Trial 3 finished with value: 0.7362477346177

Best hyperparameters: {'depth': 5, 'n_estimators': 1792, 'learning_rate_weights': 0.00520197928213617, 'learning_rate_leaf': 0.010347943099474193, 'selected_variables': 0.7872487426461782}
Fold 1 Accuracy: 0.9111111111111111 MCC: 0.8230354986052387
Fold 2 Accuracy: 0.8888888888888888 MCC: 0.7785470932752259
Fold 3 Accuracy: 0.9222222222222223 MCC: 0.8446530265013353
Fold 4 Accuracy: 0.8888888888888888 MCC: 0.7808688094430304
Fold 5 Accuracy: 0.8666666666666667 MCC: 0.7362477346177143
Fold 6 Accuracy: 0.8666666666666667 MCC: 0.7362477346177143
Fold 7 Accuracy: 0.9111111111111111 MCC: 0.8222222222222222
Fold 8 Accuracy: 0.8777777777777778 MCC: 0.755742181606458
Fold 9 Accuracy: 0.9555555555555556 MCC: 0.9120123092652646
Fold 10 Accuracy: 0.9101123595505618 MCC: 0.8280139471090427

Mean Accuracy: 0.899900124843945 Standard Deviation: 0.02767325929886908
Mean MCC: 0.8017590557263248 Standard Deviation: 0.05500771345110363


In [12]:
II_450_pcc95_results_df = train_with_best_hyperparameters(datasets, 'II_450_pcc95')

[I 2025-01-04 08:32:59,723] A new study created in memory with name: no-name-b2396b94-7b13-4e7c-8a50-bab9fd8b8248


Currently training GRANDE model with dataset key: II_450_pcc95


[I 2025-01-04 08:33:12,264] Trial 0 finished with value: 0.8446530265013353 and parameters: {'depth': 5, 'n_estimators': 512, 'learning_rate_weights': 0.016019935743517063, 'learning_rate_leaf': 0.008274854925644127, 'selected_variables': 0.5988860369655855}. Best is trial 0 with value: 0.8446530265013353.
[I 2025-01-04 08:33:24,816] Trial 1 finished with value: 0.8446530265013353 and parameters: {'depth': 3, 'n_estimators': 768, 'learning_rate_weights': 0.0022684087762366257, 'learning_rate_leaf': 0.04873167189774093, 'selected_variables': 0.8186950576821259}. Best is trial 0 with value: 0.8446530265013353.
[I 2025-01-04 08:34:18,007] Trial 2 finished with value: 0.8666666666666667 and parameters: {'depth': 9, 'n_estimators': 768, 'learning_rate_weights': 0.0021323009316630845, 'learning_rate_leaf': 0.005981148499389056, 'selected_variables': 0.823137736506266}. Best is trial 2 with value: 0.8666666666666667.
[I 2025-01-04 08:34:30,086] Trial 3 finished with value: 0.6681531047810609 

Best hyperparameters: {'depth': 9, 'n_estimators': 768, 'learning_rate_weights': 0.0021323009316630845, 'learning_rate_leaf': 0.005981148499389056, 'selected_variables': 0.823137736506266}
Fold 1 Accuracy: 0.9 MCC: 0.8001976040538966
Fold 2 Accuracy: 0.9 MCC: 0.8001976040538966
Fold 3 Accuracy: 0.9222222222222223 MCC: 0.8463272660560106
Fold 4 Accuracy: 0.9111111111111111 MCC: 0.8296297792621188
Fold 5 Accuracy: 0.9 MCC: 0.8017837257372732
Fold 6 Accuracy: 0.8666666666666667 MCC: 0.7399400733959437
Fold 7 Accuracy: 0.9222222222222223 MCC: 0.8446530265013353
Fold 8 Accuracy: 0.9 MCC: 0.8001976040538966
Fold 9 Accuracy: 0.9333333333333333 MCC: 0.8675239039352517
Fold 10 Accuracy: 0.9438202247191011 MCC: 0.8897329414213228

Mean Accuracy: 0.9099375780274658 Standard Deviation: 0.02177151772703187
Mean MCC: 0.8220183528470946 Standard Deviation: 0.04267698512747966


In [14]:
II_450_pcc75_results_df = train_with_best_hyperparameters(datasets, 'II_450_pcc75')

[I 2025-01-04 08:50:24,221] A new study created in memory with name: no-name-887a6bd9-2676-463f-ae4d-ee24815f3344


Currently training GRANDE model with dataset key: II_450_pcc75


[I 2025-01-04 08:50:40,479] Trial 0 finished with value: 0.8463272660560106 and parameters: {'depth': 4, 'n_estimators': 1280, 'learning_rate_weights': 0.0014824382511823947, 'learning_rate_leaf': 0.0027946616418149817, 'selected_variables': 0.636133413792692}. Best is trial 0 with value: 0.8463272660560106.
[I 2025-01-04 08:51:04,457] Trial 1 finished with value: 0.8908708063747479 and parameters: {'depth': 8, 'n_estimators': 1280, 'learning_rate_weights': 0.03628028875673967, 'learning_rate_leaf': 0.02767481534988566, 'selected_variables': 0.7459271750823792}. Best is trial 1 with value: 0.8908708063747479.
[I 2025-01-04 08:53:20,056] Trial 2 finished with value: 0.8230354986052387 and parameters: {'depth': 10, 'n_estimators': 1280, 'learning_rate_weights': 0.011863976610245104, 'learning_rate_leaf': 0.0032050475815278523, 'selected_variables': 0.531352694136811}. Best is trial 1 with value: 0.8908708063747479.
[I 2025-01-04 08:54:50,657] Trial 3 finished with value: 0.86752390393525

Best hyperparameters: {'depth': 8, 'n_estimators': 1280, 'learning_rate_weights': 0.03628028875673967, 'learning_rate_leaf': 0.02767481534988566, 'selected_variables': 0.7459271750823792}
Fold 1 Accuracy: 0.9222222222222223 MCC: 0.8446530265013353
Fold 2 Accuracy: 0.8888888888888888 MCC: 0.7785470932752259
Fold 3 Accuracy: 0.9222222222222223 MCC: 0.8446530265013353
Fold 4 Accuracy: 0.8777777777777778 MCC: 0.755742181606458
Fold 5 Accuracy: 0.8555555555555555 MCC: 0.7112867591590193
Fold 6 Accuracy: 0.8333333333333334 MCC: 0.6804138174397717
Fold 7 Accuracy: 0.9333333333333333 MCC: 0.8744746321952063
Fold 8 Accuracy: 0.8666666666666667 MCC: 0.7333333333333333
Fold 9 Accuracy: 0.9444444444444444 MCC: 0.889108448948774
Fold 10 Accuracy: 0.8876404494382022 MCC: 0.7827200065480686

Mean Accuracy: 0.8932084893882646 Standard Deviation: 0.03638335173066984
Mean MCC: 0.7894932325508529 Standard Deviation: 0.07126131014925888
Total execution time: 699.40 seconds


In [10]:
II_350_nopcc_results_df = train_with_best_hyperparameters(datasets, 'II_350_nopcc')

[I 2025-01-04 09:10:03,506] A new study created in memory with name: no-name-efa625a2-6cd8-43c9-8796-e4b1d81f5214


Currently training GRANDE model with dataset key: II_350_nopcc


[I 2025-01-04 09:10:20,089] Trial 0 finished with value: 0.7714285714285715 and parameters: {'depth': 5, 'n_estimators': 1792, 'learning_rate_weights': 0.026872059805274143, 'learning_rate_leaf': 0.005185169163582623, 'selected_variables': 0.7891053342381755}. Best is trial 0 with value: 0.7714285714285715.
[I 2025-01-04 09:10:35,804] Trial 1 finished with value: 0.8574929257125441 and parameters: {'depth': 7, 'n_estimators': 1536, 'learning_rate_weights': 0.03654655566362076, 'learning_rate_leaf': 0.07059985791188207, 'selected_variables': 0.9384402224122463}. Best is trial 1 with value: 0.8574929257125441.
[I 2025-01-04 09:10:50,155] Trial 2 finished with value: 0.8029550685469662 and parameters: {'depth': 6, 'n_estimators': 2048, 'learning_rate_weights': 0.009887998304734833, 'learning_rate_leaf': 0.07624188098284114, 'selected_variables': 0.7202348517093407}. Best is trial 1 with value: 0.8574929257125441.
[I 2025-01-04 09:11:03,532] Trial 3 finished with value: 0.725018488207177 a

Best hyperparameters: {'depth': 6, 'n_estimators': 1280, 'learning_rate_weights': 0.01933337234208883, 'learning_rate_leaf': 0.026229583732619216, 'selected_variables': 0.7556556798212072}
Fold 1 Accuracy: 0.9285714285714286 MCC: 0.8574929257125441
Fold 2 Accuracy: 0.8857142857142857 MCC: 0.7726911394933923
Fold 3 Accuracy: 0.9714285714285714 MCC: 0.944400281603035
Fold 4 Accuracy: 0.9142857142857143 MCC: 0.8299275201966065
Fold 5 Accuracy: 0.8857142857142857 MCC: 0.7765163665331185
Fold 6 Accuracy: 0.9285714285714286 MCC: 0.8603090020146066
Fold 7 Accuracy: 0.9 MCC: 0.8003267306650412
Fold 8 Accuracy: 0.9285714285714286 MCC: 0.8574929257125441
Fold 9 Accuracy: 0.9714285714285714 MCC: 0.944400281603035
Fold 10 Accuracy: 0.8985507246376812 MCC: 0.7973089447313457

Mean Accuracy: 0.9212836438923396 Standard Deviation: 0.031158147762959183
Mean MCC: 0.8440866118265269 Standard Deviation: 0.06210644792799784
Total execution time: 435.24 seconds


In [11]:
II_350_pcc95_results_df = train_with_best_hyperparameters(datasets, 'II_350_pcc95')

[I 2025-01-04 09:18:02,197] A new study created in memory with name: no-name-c9f0b56e-2a45-4f62-a443-7634d0719152


Currently training GRANDE model with dataset key: II_350_pcc95


[I 2025-01-04 09:19:36,687] Trial 0 finished with value: 0.7726911394933923 and parameters: {'depth': 10, 'n_estimators': 1536, 'learning_rate_weights': 0.02696600978169259, 'learning_rate_leaf': 0.006022230884779744, 'selected_variables': 0.7326299213303933}. Best is trial 0 with value: 0.7726911394933923.
[I 2025-01-04 09:19:54,787] Trial 1 finished with value: 0.8029550685469662 and parameters: {'depth': 7, 'n_estimators': 512, 'learning_rate_weights': 0.0011014544071759821, 'learning_rate_leaf': 0.010375649064176467, 'selected_variables': 0.9518413899686005}. Best is trial 1 with value: 0.8029550685469662.
[I 2025-01-04 09:20:16,955] Trial 2 finished with value: 0.8603090020146066 and parameters: {'depth': 7, 'n_estimators': 1536, 'learning_rate_weights': 0.014043329220671275, 'learning_rate_leaf': 0.005580489627514283, 'selected_variables': 0.568732338099351}. Best is trial 2 with value: 0.8603090020146066.
[I 2025-01-04 09:20:53,603] Trial 3 finished with value: 0.772691139493392

Best hyperparameters: {'depth': 7, 'n_estimators': 1536, 'learning_rate_weights': 0.014043329220671275, 'learning_rate_leaf': 0.005580489627514283, 'selected_variables': 0.568732338099351}
Fold 1 Accuracy: 0.9428571428571428 MCC: 0.8871639008998208
Fold 2 Accuracy: 0.9285714285714286 MCC: 0.8603090020146066
Fold 3 Accuracy: 0.9285714285714286 MCC: 0.8603090020146066
Fold 4 Accuracy: 0.8857142857142857 MCC: 0.7726911394933923
Fold 5 Accuracy: 0.8571428571428571 MCC: 0.7189966356788134
Fold 6 Accuracy: 0.9142857142857143 MCC: 0.8340360973874236
Fold 7 Accuracy: 0.8714285714285714 MCC: 0.7431605356175383
Fold 8 Accuracy: 0.9428571428571428 MCC: 0.8857142857142857
Fold 9 Accuracy: 0.9 MCC: 0.808290376865476
Fold 10 Accuracy: 0.927536231884058 MCC: 0.8553409248858425

Mean Accuracy: 0.9098964803312629 Standard Deviation: 0.030085411332991027
Mean MCC: 0.8226011900571806 Standard Deviation: 0.059571195285357016
Total execution time: 638.71 seconds


In [12]:
II_350_pcc75_results_df = train_with_best_hyperparameters(datasets, 'II_350_pcc75')

[I 2025-01-04 09:29:38,688] A new study created in memory with name: no-name-edf05d32-d522-4385-8be9-9f6295e44d25


Currently training GRANDE model with dataset key: II_350_pcc75


[I 2025-01-04 09:29:53,060] Trial 0 finished with value: 0.8603090020146066 and parameters: {'depth': 6, 'n_estimators': 1792, 'learning_rate_weights': 0.014653777691044862, 'learning_rate_leaf': 0.0340485387382563, 'selected_variables': 0.8704094958989985}. Best is trial 0 with value: 0.8603090020146066.
[I 2025-01-04 09:30:04,994] Trial 1 finished with value: 0.8285714285714286 and parameters: {'depth': 4, 'n_estimators': 1536, 'learning_rate_weights': 0.05139320709505699, 'learning_rate_leaf': 0.007424470881676524, 'selected_variables': 0.545356264155285}. Best is trial 0 with value: 0.8603090020146066.
[I 2025-01-04 09:31:24,197] Trial 2 finished with value: 0.7581753965757456 and parameters: {'depth': 10, 'n_estimators': 512, 'learning_rate_weights': 0.001209124245170635, 'learning_rate_leaf': 0.0011339157055720794, 'selected_variables': 0.974287312188722}. Best is trial 0 with value: 0.8603090020146066.
[I 2025-01-04 09:31:41,622] Trial 3 finished with value: 0.8574929257125441 a

Best hyperparameters: {'depth': 8, 'n_estimators': 1280, 'learning_rate_weights': 0.00834871674202739, 'learning_rate_leaf': 0.0092003442550684, 'selected_variables': 0.8540119757953765}
Fold 1 Accuracy: 0.9285714285714286 MCC: 0.8574929257125441
Fold 2 Accuracy: 0.8428571428571429 MCC: 0.6882472016116853
Fold 3 Accuracy: 0.9142857142857143 MCC: 0.8299275201966065
Fold 4 Accuracy: 0.9 MCC: 0.8003267306650412
Fold 5 Accuracy: 0.8428571428571429 MCC: 0.6882472016116853
Fold 6 Accuracy: 0.9142857142857143 MCC: 0.8299275201966065
Fold 7 Accuracy: 0.8714285714285714 MCC: 0.7456011350793257
Fold 8 Accuracy: 0.8857142857142857 MCC: 0.7726911394933923
Fold 9 Accuracy: 0.9714285714285714 MCC: 0.944400281603035
Fold 10 Accuracy: 0.927536231884058 MCC: 0.8581191265318578

Mean Accuracy: 0.899896480331263 Standard Deviation: 0.04032601575983565
Mean MCC: 0.801498078270178 Standard Deviation: 0.08026765936750759
Total execution time: 575.21 seconds
