In [1]:
!pip install optuna grande



In [1]:
import numpy as np
import pandas as pd
from xgboost import XGBClassifier
from sklearn.model_selection import cross_validate, train_test_split
from sklearn.metrics import accuracy_score, matthews_corrcoef, make_scorer
from scipy.stats import shapiro
import optuna
from GRANDE import GRANDE

In [2]:
# This is a function that loads training and testing data for 10-fold cross validation
def prepare_train_test_dataset(datasets, test_idx):
  test_df = datasets[test_idx]
  train_df = pd.concat([datasets[i] for i in range(len(datasets)) if i != test_idx])

  X_train = train_df.iloc[:, 1:-1].values
  X_test = test_df.iloc[:, 1:-1].values
  y_train = train_df.iloc[:, -1].values
  y_test = test_df.iloc[:, -1].values

  return X_train, X_test, y_train, y_test

In [3]:
# Default params and args based on the paper and github repo
params = {
        'depth': 5, # tree depth
        'n_estimators': 2048, # number of estimators / trees

        'learning_rate_weights': 0.005, # learning rate for leaf weights
        'learning_rate_index': 0.01, # learning rate for split indices
        'learning_rate_values': 0.01, # learning rate for split values
        'learning_rate_leaf': 0.01, # learning rate for leafs (logits)

        'optimizer': 'adam', # optimizer
        'cosine_decay_steps': 0, # decay steps for lr schedule (CosineDecayRestarts)

        'loss': 'crossentropy', # loss function (default 'crossentropy' for binary & multi-class classification and 'mse' for regression)
        'focal_loss': False, # use focal loss {True, False}
        'temperature': 0.0, # temperature for stochastic re-weighted GD (0.0, 1.0)

        'from_logits': True, # use logits for weighting {True, False}
        'use_class_weights': True, # use class weights for training {True, False}

        'dropout': 0.0, # dropout rate (here, dropout randomly disables individual estimators of the ensemble during training)

        'selected_variables': 0.8, # feature subset percentage (0.0, 1.0)
        'data_subset_fraction': 1.0, # data subset percentage (0.0, 1.0)
}

args = {
    'epochs': 1_000, # number of epochs for training
    'early_stopping_epochs': 25,
    'batch_size': 32,
    'cat_idx': [], # put list of categorical indices
    'objective': 'binary', # objective / task {'binary', 'classification', 'regression'}

    'random_seed': 42,
    'verbose': 0,
}

In [4]:
import torch
import time

def train_with_base_grande(datasets, key):
  start_time = time.time()
  print("Currently training GRANDE model with dataset key:", key)

  # Create a Pandas DataFrame to store all experiment results
  results_df = pd.DataFrame(columns=['Fold', 'Accuracy', 'MCC'])

  # Perform 10-fold cross validation
  dataset_list = datasets[key]

  for i in range(10):
    X_train, X_test, y_train, y_test = prepare_train_test_dataset(dataset_list, i)

    # Train the model
    model = GRANDE(params=params, args=args)
    model.fit(X_train=X_train, y_train=y_train, X_val=X_test, y_val=y_test)

    # Evaluate the model
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, np.argmax(y_pred, axis=1))
    mcc = matthews_corrcoef(y_test, np.argmax(y_pred, axis=1))

    print("Fold", i+1, "Accuracy:", accuracy, "MCC:", mcc)
    results_df.loc[i] = [i+1, accuracy, mcc]

  print()

  print("Mean Accuracy:", results_df['Accuracy'].mean(), "Standard Deviation:", results_df['Accuracy'].std())
  print("Mean MCC:", results_df['MCC'].mean(), "Standard Deviation:", results_df['MCC'].std())

  elapsed_time = time.time() - start_time
  print(f"Time taken: {elapsed_time:.2f} seconds")

In [5]:
def create_objective_for_grande(X_train, X_test, y_train, y_test):
  def objective(trial):
      # Suggest hyperparameters for GRANDE
      params = {
          'depth': trial.suggest_int('depth', 3, 10),  # tree depth
          'n_estimators': trial.suggest_int('n_estimators', 512, 2048, step=256),  # number of estimators

          'learning_rate_weights': trial.suggest_float('learning_rate_weights', 1e-3, 1e-1, log=True),
          'learning_rate_index': 0.01,
          'learning_rate_values': 0.01,
          'learning_rate_leaf': trial.suggest_float('learning_rate_leaf', 1e-3, 1e-1, log=True),

          'optimizer': 'adam',
          'cosine_decay_steps': 0,

          'loss': 'crossentropy',
          'focal_loss': False,
          'temperature': 0.0,

          'from_logits': True,
          'use_class_weights': True,

          'dropout': 0.0,

          'selected_variables': trial.suggest_float('selected_variables', 0.5, 1.0),
          'data_subset_fraction': 1.0,
      }

      args = {
          'epochs': 100,  # Number of epochs
          'early_stopping_epochs': 10,
          'batch_size': 64,
          'cat_idx': [],
          'objective': 'binary',

          'random_seed': 42,
          'verbose': 0,
      }

      base_model = GRANDE(params=params, args=args)
      base_model.fit(X_train=X_train, y_train=y_train, X_val=X_test, y_val=y_test)

      y_pred = base_model.predict(X_test)
      y_pred_labels = np.argmax(y_pred, axis=1)
      mcc = matthews_corrcoef(y_test, y_pred_labels)

      # Return the accuracy
      return mcc

  return objective

In [6]:
def train_with_best_hyperparameters(datasets, key):
  start_time = time.time()
  print("Currently training GRANDE model with dataset key:", key)

  # Create a Pandas DataFrame to store all experiment results
  results_df = pd.DataFrame(columns=['Fold', 'Accuracy', 'MCC'])

  # Perform 10-fold cross validation
  dataset_list = datasets[key]

  # Use the first data as the test for the HPO
  X_train, X_test, y_train, y_test = prepare_train_test_dataset(dataset_list, 0)

  objective_function = create_objective_for_grande(X_train, X_test, y_train, y_test)

  study = optuna.create_study(direction='maximize')
  study.optimize(objective_function, n_trials=10, gc_after_trial=True)

  best_params = study.best_params
  print("Best hyperparameters:", best_params)

  # Use best hyperparams to conduct a 10-fold cross validation
  for i in range(10):
    X_train, X_test, y_train, y_test = prepare_train_test_dataset(dataset_list, i)

    # Train the model
    model = GRANDE(params=best_params, args=args)
    model.fit(X_train=X_train, y_train=y_train, X_val=X_test, y_val=y_test)

    # Evaluate the model
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, np.argmax(y_pred, axis=1))
    mcc = matthews_corrcoef(y_test, np.argmax(y_pred, axis=1))

    print("Fold", i+1, "Accuracy:", accuracy, "MCC:", mcc)
    results_df.loc[i] = [i+1, accuracy, mcc]

  print()

  print("Mean Accuracy:", results_df['Accuracy'].mean(), "Standard Deviation:", results_df['Accuracy'].std())
  print("Mean MCC:", results_df['MCC'].mean(), "Standard Deviation:", results_df['MCC'].std())

  elapsed_time = time.time() - start_time
  print(f"Time taken: {elapsed_time:.2f} seconds")

In [7]:
# Load the validation datasets

datasets = {}

# All

XII_all_nopcc_1 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/all/cv/nopcc/fold_1.csv')
XII_all_nopcc_2 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/all/cv/nopcc/fold_2.csv')
XII_all_nopcc_3 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/all/cv/nopcc/fold_3.csv')
XII_all_nopcc_4 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/all/cv/nopcc/fold_4.csv')
XII_all_nopcc_5 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/all/cv/nopcc/fold_5.csv')
XII_all_nopcc_6 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/all/cv/nopcc/fold_6.csv')
XII_all_nopcc_7 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/all/cv/nopcc/fold_7.csv')
XII_all_nopcc_8 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/all/cv/nopcc/fold_8.csv')
XII_all_nopcc_9 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/all/cv/nopcc/fold_9.csv')
XII_all_nopcc_10 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/all/cv/nopcc/fold_10.csv')
datasets['XII_all_nopcc'] = [XII_all_nopcc_1, XII_all_nopcc_2, XII_all_nopcc_3, XII_all_nopcc_4, XII_all_nopcc_5,
                                       XII_all_nopcc_6, XII_all_nopcc_7, XII_all_nopcc_8, XII_all_nopcc_9, XII_all_nopcc_10]

XII_all_pcc95_1 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/all/cv/pcc95/fold_1.csv')
XII_all_pcc95_2 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/all/cv/pcc95/fold_2.csv')
XII_all_pcc95_3 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/all/cv/pcc95/fold_3.csv')
XII_all_pcc95_4 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/all/cv/pcc95/fold_4.csv')
XII_all_pcc95_5 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/all/cv/pcc95/fold_5.csv')
XII_all_pcc95_6 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/all/cv/pcc95/fold_6.csv')
XII_all_pcc95_7 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/all/cv/pcc95/fold_7.csv')
XII_all_pcc95_8 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/all/cv/pcc95/fold_8.csv')
XII_all_pcc95_9 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/all/cv/pcc95/fold_9.csv')
XII_all_pcc95_10 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/all/cv/pcc95/fold_10.csv')
datasets['XII_all_pcc95'] = [XII_all_pcc95_1, XII_all_pcc95_2, XII_all_pcc95_3, XII_all_pcc95_4, XII_all_pcc95_5,
                                       XII_all_pcc95_6, XII_all_pcc95_7, XII_all_pcc95_8, XII_all_pcc95_9, XII_all_pcc95_10]

XII_all_pcc75_1 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/all/cv/pcc75/fold_1.csv')
XII_all_pcc75_2 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/all/cv/pcc75/fold_2.csv')
XII_all_pcc75_3 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/all/cv/pcc75/fold_3.csv')
XII_all_pcc75_4 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/all/cv/pcc75/fold_4.csv')
XII_all_pcc75_5 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/all/cv/pcc75/fold_5.csv')
XII_all_pcc75_6 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/all/cv/pcc75/fold_6.csv')
XII_all_pcc75_7 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/all/cv/pcc75/fold_7.csv')
XII_all_pcc75_8 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/all/cv/pcc75/fold_8.csv')
XII_all_pcc75_9 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/all/cv/pcc75/fold_9.csv')
XII_all_pcc75_10 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/all/cv/pcc75/fold_10.csv')
datasets['XII_all_pcc75'] = [XII_all_pcc75_1, XII_all_pcc75_2, XII_all_pcc75_3, XII_all_pcc75_4, XII_all_pcc75_5,
                                       XII_all_pcc75_6, XII_all_pcc75_7, XII_all_pcc75_8, XII_all_pcc75_9, XII_all_pcc75_10]

# 200

XII_200_nopcc_1 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/200/cv/nopcc/fold_1.csv')
XII_200_nopcc_2 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/200/cv/nopcc/fold_2.csv')
XII_200_nopcc_3 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/200/cv/nopcc/fold_3.csv')
XII_200_nopcc_4 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/200/cv/nopcc/fold_4.csv')
XII_200_nopcc_5 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/200/cv/nopcc/fold_5.csv')
XII_200_nopcc_6 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/200/cv/nopcc/fold_6.csv')
XII_200_nopcc_7 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/200/cv/nopcc/fold_7.csv')
XII_200_nopcc_8 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/200/cv/nopcc/fold_8.csv')
XII_200_nopcc_9 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/200/cv/nopcc/fold_9.csv')
XII_200_nopcc_10 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/200/cv/nopcc/fold_10.csv')
datasets['XII_200_nopcc'] = [XII_200_nopcc_1, XII_200_nopcc_2, XII_200_nopcc_3, XII_200_nopcc_4, XII_200_nopcc_5,
                                       XII_200_nopcc_6, XII_200_nopcc_7, XII_200_nopcc_8, XII_200_nopcc_9, XII_200_nopcc_10]

XII_200_pcc95_1 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/200/cv/pcc95/fold_1.csv')
XII_200_pcc95_2 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/200/cv/pcc95/fold_2.csv')
XII_200_pcc95_3 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/200/cv/pcc95/fold_3.csv')
XII_200_pcc95_4 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/200/cv/pcc95/fold_4.csv')
XII_200_pcc95_5 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/200/cv/pcc95/fold_5.csv')
XII_200_pcc95_6 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/200/cv/pcc95/fold_6.csv')
XII_200_pcc95_7 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/200/cv/pcc95/fold_7.csv')
XII_200_pcc95_8 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/200/cv/pcc95/fold_8.csv')
XII_200_pcc95_9 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/200/cv/pcc95/fold_9.csv')
XII_200_pcc95_10 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/200/cv/pcc95/fold_10.csv')
datasets['XII_200_pcc95'] = [XII_200_pcc95_1, XII_200_pcc95_2, XII_200_pcc95_3, XII_200_pcc95_4, XII_200_pcc95_5,
                                       XII_200_pcc95_6, XII_200_pcc95_7, XII_200_pcc95_8, XII_200_pcc95_9, XII_200_pcc95_10]

XII_200_pcc75_1 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/200/cv/pcc75/fold_1.csv')
XII_200_pcc75_2 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/200/cv/pcc75/fold_2.csv')
XII_200_pcc75_3 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/200/cv/pcc75/fold_3.csv')
XII_200_pcc75_4 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/200/cv/pcc75/fold_4.csv')
XII_200_pcc75_5 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/200/cv/pcc75/fold_5.csv')
XII_200_pcc75_6 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/200/cv/pcc75/fold_6.csv')
XII_200_pcc75_7 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/200/cv/pcc75/fold_7.csv')
XII_200_pcc75_8 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/200/cv/pcc75/fold_8.csv')
XII_200_pcc75_9 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/200/cv/pcc75/fold_9.csv')
XII_200_pcc75_10 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/200/cv/pcc75/fold_10.csv')
datasets['XII_200_pcc75'] = [XII_200_pcc75_1, XII_200_pcc75_2, XII_200_pcc75_3, XII_200_pcc75_4, XII_200_pcc75_5,
                                       XII_200_pcc75_6, XII_200_pcc75_7, XII_200_pcc75_8, XII_200_pcc75_9, XII_200_pcc75_10]

# 150

XII_150_nopcc_1 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/150/cv/nopcc/fold_1.csv')
XII_150_nopcc_2 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/150/cv/nopcc/fold_2.csv')
XII_150_nopcc_3 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/150/cv/nopcc/fold_3.csv')
XII_150_nopcc_4 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/150/cv/nopcc/fold_4.csv')
XII_150_nopcc_5 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/150/cv/nopcc/fold_5.csv')
XII_150_nopcc_6 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/150/cv/nopcc/fold_6.csv')
XII_150_nopcc_7 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/150/cv/nopcc/fold_7.csv')
XII_150_nopcc_8 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/150/cv/nopcc/fold_8.csv')
XII_150_nopcc_9 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/150/cv/nopcc/fold_9.csv')
XII_150_nopcc_10 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/150/cv/nopcc/fold_10.csv')
datasets['XII_150_nopcc'] = [XII_150_nopcc_1, XII_150_nopcc_2, XII_150_nopcc_3, XII_150_nopcc_4, XII_150_nopcc_5,
                                       XII_150_nopcc_6, XII_150_nopcc_7, XII_150_nopcc_8, XII_150_nopcc_9, XII_150_nopcc_10]

XII_150_pcc95_1 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/150/cv/pcc95/fold_1.csv')
XII_150_pcc95_2 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/150/cv/pcc95/fold_2.csv')
XII_150_pcc95_3 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/150/cv/pcc95/fold_3.csv')
XII_150_pcc95_4 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/150/cv/pcc95/fold_4.csv')
XII_150_pcc95_5 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/150/cv/pcc95/fold_5.csv')
XII_150_pcc95_6 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/150/cv/pcc95/fold_6.csv')
XII_150_pcc95_7 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/150/cv/pcc95/fold_7.csv')
XII_150_pcc95_8 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/150/cv/pcc95/fold_8.csv')
XII_150_pcc95_9 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/150/cv/pcc95/fold_9.csv')
XII_150_pcc95_10 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/150/cv/pcc95/fold_10.csv')
datasets['XII_150_pcc95'] = [XII_150_pcc95_1, XII_150_pcc95_2, XII_150_pcc95_3, XII_150_pcc95_4, XII_150_pcc95_5,
                                       XII_150_pcc95_6, XII_150_pcc95_7, XII_150_pcc95_8, XII_150_pcc95_9, XII_150_pcc95_10]

XII_150_pcc75_1 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/150/cv/pcc75/fold_1.csv')
XII_150_pcc75_2 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/150/cv/pcc75/fold_2.csv')
XII_150_pcc75_3 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/150/cv/pcc75/fold_3.csv')
XII_150_pcc75_4 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/150/cv/pcc75/fold_4.csv')
XII_150_pcc75_5 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/150/cv/pcc75/fold_5.csv')
XII_150_pcc75_6 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/150/cv/pcc75/fold_6.csv')
XII_150_pcc75_7 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/150/cv/pcc75/fold_7.csv')
XII_150_pcc75_8 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/150/cv/pcc75/fold_8.csv')
XII_150_pcc75_9 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/150/cv/pcc75/fold_9.csv')
XII_150_pcc75_10 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/150/cv/pcc75/fold_10.csv')
datasets['XII_150_pcc75'] = [XII_150_pcc75_1, XII_150_pcc75_2, XII_150_pcc75_3, XII_150_pcc75_4, XII_150_pcc75_5,
                                       XII_150_pcc75_6, XII_150_pcc75_7, XII_150_pcc75_8, XII_150_pcc75_9, XII_150_pcc75_10]

# 100

XII_100_nopcc_1 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/100/cv/nopcc/fold_1.csv')
XII_100_nopcc_2 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/100/cv/nopcc/fold_2.csv')
XII_100_nopcc_3 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/100/cv/nopcc/fold_3.csv')
XII_100_nopcc_4 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/100/cv/nopcc/fold_4.csv')
XII_100_nopcc_5 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/100/cv/nopcc/fold_5.csv')
XII_100_nopcc_6 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/100/cv/nopcc/fold_6.csv')
XII_100_nopcc_7 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/100/cv/nopcc/fold_7.csv')
XII_100_nopcc_8 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/100/cv/nopcc/fold_8.csv')
XII_100_nopcc_9 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/100/cv/nopcc/fold_9.csv')
XII_100_nopcc_10 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/100/cv/nopcc/fold_10.csv')
datasets['XII_100_nopcc'] = [XII_100_nopcc_1, XII_100_nopcc_2, XII_100_nopcc_3, XII_100_nopcc_4, XII_100_nopcc_5,
                                       XII_100_nopcc_6, XII_100_nopcc_7, XII_100_nopcc_8, XII_100_nopcc_9, XII_100_nopcc_10]

XII_100_pcc95_1 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/100/cv/pcc95/fold_1.csv')
XII_100_pcc95_2 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/100/cv/pcc95/fold_2.csv')
XII_100_pcc95_3 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/100/cv/pcc95/fold_3.csv')
XII_100_pcc95_4 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/100/cv/pcc95/fold_4.csv')
XII_100_pcc95_5 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/100/cv/pcc95/fold_5.csv')
XII_100_pcc95_6 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/100/cv/pcc95/fold_6.csv')
XII_100_pcc95_7 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/100/cv/pcc95/fold_7.csv')
XII_100_pcc95_8 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/100/cv/pcc95/fold_8.csv')
XII_100_pcc95_9 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/100/cv/pcc95/fold_9.csv')
XII_100_pcc95_10 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/100/cv/pcc95/fold_10.csv')
datasets['XII_100_pcc95'] = [XII_100_pcc95_1, XII_100_pcc95_2, XII_100_pcc95_3, XII_100_pcc95_4, XII_100_pcc95_5,
                                       XII_100_pcc95_6, XII_100_pcc95_7, XII_100_pcc95_8, XII_100_pcc95_9, XII_100_pcc95_10]

XII_100_pcc75_1 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/100/cv/pcc75/fold_1.csv')
XII_100_pcc75_2 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/100/cv/pcc75/fold_2.csv')
XII_100_pcc75_3 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/100/cv/pcc75/fold_3.csv')
XII_100_pcc75_4 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/100/cv/pcc75/fold_4.csv')
XII_100_pcc75_5 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/100/cv/pcc75/fold_5.csv')
XII_100_pcc75_6 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/100/cv/pcc75/fold_6.csv')
XII_100_pcc75_7 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/100/cv/pcc75/fold_7.csv')
XII_100_pcc75_8 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/100/cv/pcc75/fold_8.csv')
XII_100_pcc75_9 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/100/cv/pcc75/fold_9.csv')
XII_100_pcc75_10 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/XII/100/cv/pcc75/fold_10.csv')
datasets['XII_100_pcc75'] = [XII_100_pcc75_1, XII_100_pcc75_2, XII_100_pcc75_3, XII_100_pcc75_4, XII_100_pcc75_5,
                                       XII_100_pcc75_6, XII_100_pcc75_7, XII_100_pcc75_8, XII_100_pcc75_9, XII_100_pcc75_10]

In [8]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [9]:
XII_all_nopcc_results_df = train_with_base_grande(datasets, 'XII_all_nopcc')
print("\n---------------------------------------------------------------------\n")
XII_all_pcc95_results_df = train_with_base_grande(datasets, 'XII_all_pcc95')
print("\n---------------------------------------------------------------------\n")
XII_all_pcc75_results_df = train_with_base_grande(datasets, 'XII_all_pcc75')
print("\n---------------------------------------------------------------------\n")
XII_200_nopcc_results_df = train_with_base_grande(datasets, 'XII_200_nopcc')
print("\n---------------------------------------------------------------------\n")
XII_200_pcc95_results_df = train_with_base_grande(datasets, 'XII_200_pcc95')
print("\n---------------------------------------------------------------------\n")
XII_200_pcc75_results_df = train_with_base_grande(datasets, 'XII_200_pcc75')
print("\n---------------------------------------------------------------------\n")
XII_150_nopcc_results_df = train_with_base_grande(datasets, 'XII_150_nopcc')
print("\n---------------------------------------------------------------------\n")
XII_150_pcc95_results_df = train_with_base_grande(datasets, 'XII_150_pcc95')
print("\n---------------------------------------------------------------------\n")
XII_150_pcc75_results_df = train_with_base_grande(datasets, 'XII_150_pcc75')
print("\n---------------------------------------------------------------------\n")
XII_100_nopcc_results_df = train_with_base_grande(datasets, 'XII_100_nopcc')
print("\n---------------------------------------------------------------------\n")
XII_100_pcc95_results_df = train_with_base_grande(datasets, 'XII_100_pcc95')
print("\n---------------------------------------------------------------------\n")
XII_100_pcc75_results_df = train_with_base_grande(datasets, 'XII_100_pcc75')

Currently training GRANDE model with dataset key: XII_all_nopcc
Fold 1 Accuracy: 0.8979591836734694 MCC: 0.7963283217283071
Fold 2 Accuracy: 0.8775510204081632 MCC: 0.755
Fold 3 Accuracy: 1.0 MCC: 1.0
Fold 4 Accuracy: 0.8979591836734694 MCC: 0.8023551891819376




Fold 5 Accuracy: 0.9387755102040817 MCC: 0.8783333333333333




Fold 6 Accuracy: 0.8979591836734694 MCC: 0.8013876853447538
Fold 7 Accuracy: 0.8367346938775511 MCC: 0.6733333333333333
Fold 8 Accuracy: 0.9166666666666666 MCC: 0.8333333333333334
Fold 9 Accuracy: 0.7708333333333334 MCC: 0.5421374765483944
Fold 10 Accuracy: 0.8541666666666666 MCC: 0.7089490077940542

Mean Accuracy: 0.8888605442176871 Standard Deviation: 0.061322567784936846
Mean MCC: 0.7791157680597447 Standard Deviation: 0.12281850802431543
Time taken: 127.51 seconds

---------------------------------------------------------------------

Currently training GRANDE model with dataset key: XII_all_pcc95
Fold 1 Accuracy: 0.9183673469387755 MCC: 0.8366666666666667
Fold 2 Accuracy: 0.8367346938775511 MCC: 0.6761278203353551
Fold 3 Accuracy: 1.0 MCC: 1.0
Fold 4 Accuracy: 0.9183673469387755 MCC: 0.8397340583424286
Fold 5 Accuracy: 0.9387755102040817 MCC: 0.8781314407318439
Fold 6 Accuracy: 0.8979591836734694 MCC: 0.8013876853447538
Fold 7 Accuracy: 0.8775510204081632 MCC: 0.7571284874952731
F

In [10]:
XII_all_nopcc_results_df = train_with_best_hyperparameters(datasets, 'XII_all_nopcc')
print("\n---------------------------------------------------------------------\n")
XII_all_pcc95_results_df = train_with_best_hyperparameters(datasets, 'XII_all_pcc95')
print("\n---------------------------------------------------------------------\n")
XII_all_pcc75_results_df = train_with_best_hyperparameters(datasets, 'XII_all_pcc75')

[I 2025-01-04 10:21:13,202] A new study created in memory with name: no-name-7e429a75-3647-4eef-a1ce-01a0c8f5a578


Currently training GRANDE model with dataset key: XII_all_nopcc


[I 2025-01-04 10:22:34,503] Trial 0 finished with value: 0.6761278203353551 and parameters: {'depth': 10, 'n_estimators': 768, 'learning_rate_weights': 0.0013384655807644327, 'learning_rate_leaf': 0.0017517653238667008, 'selected_variables': 0.5248709093306623}. Best is trial 0 with value: 0.6761278203353551.
[I 2025-01-04 10:22:44,341] Trial 1 finished with value: 0.6733333333333333 and parameters: {'depth': 4, 'n_estimators': 1792, 'learning_rate_weights': 0.03219307496656572, 'learning_rate_leaf': 0.004106319473017303, 'selected_variables': 0.9224292419431999}. Best is trial 0 with value: 0.6761278203353551.
[I 2025-01-04 10:22:55,120] Trial 2 finished with value: 0.6761278203353551 and parameters: {'depth': 4, 'n_estimators': 1280, 'learning_rate_weights': 0.008345390287494472, 'learning_rate_leaf': 0.0010380040883317097, 'selected_variables': 0.8699831181248788}. Best is trial 0 with value: 0.6761278203353551.
[I 2025-01-04 10:23:05,683] Trial 3 finished with value: 0.550918964717

Best hyperparameters: {'depth': 8, 'n_estimators': 2048, 'learning_rate_weights': 0.003174236637933003, 'learning_rate_leaf': 0.007961678839491209, 'selected_variables': 0.6540162403919783}
Fold 1 Accuracy: 0.8979591836734694 MCC: 0.7966666666666666
Fold 2 Accuracy: 0.8571428571428571 MCC: 0.7145252027247703
Fold 3 Accuracy: 0.9795918367346939 MCC: 0.96
Fold 4 Accuracy: 0.9183673469387755 MCC: 0.8397340583424286
Fold 5 Accuracy: 0.9183673469387755 MCC: 0.8366666666666667
Fold 6 Accuracy: 0.8979591836734694 MCC: 0.8013876853447538
Fold 7 Accuracy: 0.8979591836734694 MCC: 0.7963283217283071
Fold 8 Accuracy: 0.9375 MCC: 0.8757605390397141
Fold 9 Accuracy: 0.8333333333333334 MCC: 0.6689936080056726


[I 2025-01-04 10:33:10,372] A new study created in memory with name: no-name-69b83b39-4b5d-4b0b-bc21-b50187e6d6e8


Fold 10 Accuracy: 0.8541666666666666 MCC: 0.7089490077940542

Mean Accuracy: 0.8992346938775511 Standard Deviation: 0.0432105192747381
Mean MCC: 0.7999011756313034 Standard Deviation: 0.08631223574080822
Time taken: 717.17 seconds

---------------------------------------------------------------------

Currently training GRANDE model with dataset key: XII_all_pcc95


[I 2025-01-04 10:33:48,171] Trial 0 finished with value: 0.715 and parameters: {'depth': 9, 'n_estimators': 768, 'learning_rate_weights': 0.003450987594044972, 'learning_rate_leaf': 0.005765089041693128, 'selected_variables': 0.9049284481510143}. Best is trial 0 with value: 0.715.
[I 2025-01-04 10:34:05,179] Trial 1 finished with value: 0.7966666666666666 and parameters: {'depth': 8, 'n_estimators': 2048, 'learning_rate_weights': 0.09338990803330428, 'learning_rate_leaf': 0.060024459535108474, 'selected_variables': 0.6170835155850722}. Best is trial 1 with value: 0.7966666666666666.
[I 2025-01-04 10:34:32,193] Trial 2 finished with value: 0.7963283217283071 and parameters: {'depth': 9, 'n_estimators': 512, 'learning_rate_weights': 0.06291518684884573, 'learning_rate_leaf': 0.006597033223043242, 'selected_variables': 0.9355716339217041}. Best is trial 1 with value: 0.7966666666666666.
[I 2025-01-04 10:34:55,965] Trial 3 finished with value: 0.675050399249104 and parameters: {'depth': 8,

Best hyperparameters: {'depth': 5, 'n_estimators': 512, 'learning_rate_weights': 0.006066383458545886, 'learning_rate_leaf': 0.011658909851800826, 'selected_variables': 0.8877414218273969}
Fold 1 Accuracy: 0.8775510204081632 MCC: 0.7571284874952731
Fold 2 Accuracy: 0.8571428571428571 MCC: 0.7202771009357686
Fold 3 Accuracy: 0.9795918367346939 MCC: 0.96
Fold 4 Accuracy: 0.9387755102040817 MCC: 0.8781314407318439
Fold 5 Accuracy: 0.9183673469387755 MCC: 0.839206575741442
Fold 6 Accuracy: 0.8775510204081632 MCC: 0.7571284874952731
Fold 7 Accuracy: 0.8775510204081632 MCC: 0.755
Fold 8 Accuracy: 0.9583333333333334 MCC: 0.9166666666666666
Fold 9 Accuracy: 0.7708333333333334 MCC: 0.5459486832355505


[I 2025-01-04 10:37:52,945] A new study created in memory with name: no-name-efa10be2-d358-48a2-91d7-c187f4e83f66


Fold 10 Accuracy: 0.8333333333333334 MCC: 0.6666666666666666

Mean Accuracy: 0.8889030612244898 Standard Deviation: 0.06220413914961839
Mean MCC: 0.7796154108968485 Standard Deviation: 0.1234116855289603
Time taken: 282.57 seconds

---------------------------------------------------------------------

Currently training GRANDE model with dataset key: XII_all_pcc75


[I 2025-01-04 10:38:05,791] Trial 0 finished with value: 0.6363961030678927 and parameters: {'depth': 6, 'n_estimators': 1024, 'learning_rate_weights': 0.01564340806273413, 'learning_rate_leaf': 0.0015192195603645879, 'selected_variables': 0.8705621378175294}. Best is trial 0 with value: 0.6363961030678927.
[I 2025-01-04 10:38:19,645] Trial 1 finished with value: 0.7571284874952731 and parameters: {'depth': 6, 'n_estimators': 2048, 'learning_rate_weights': 0.008375732930890618, 'learning_rate_leaf': 0.01684771466511989, 'selected_variables': 0.8824284334388937}. Best is trial 1 with value: 0.7571284874952731.
[I 2025-01-04 10:39:22,997] Trial 2 finished with value: 0.715 and parameters: {'depth': 9, 'n_estimators': 2048, 'learning_rate_weights': 0.009615809999998856, 'learning_rate_leaf': 0.0010793296053417556, 'selected_variables': 0.6666577670288265}. Best is trial 1 with value: 0.7571284874952731.
[I 2025-01-04 10:39:33,725] Trial 3 finished with value: 0.7571284874952731 and parame

Best hyperparameters: {'depth': 6, 'n_estimators': 512, 'learning_rate_weights': 0.0279132995764493, 'learning_rate_leaf': 0.043099047767134926, 'selected_variables': 0.9743491756276828}
Fold 1 Accuracy: 0.8775510204081632 MCC: 0.7571284874952731
Fold 2 Accuracy: 0.8775510204081632 MCC: 0.7579309393388919
Fold 3 Accuracy: 0.9183673469387755 MCC: 0.8397340583424286
Fold 4 Accuracy: 0.9387755102040817 MCC: 0.8783333333333333
Fold 5 Accuracy: 0.9795918367346939 MCC: 0.96
Fold 6 Accuracy: 0.8979591836734694 MCC: 0.7963283217283071
Fold 7 Accuracy: 0.8775510204081632 MCC: 0.7579309393388919
Fold 8 Accuracy: 0.9166666666666666 MCC: 0.8333333333333334
Fold 9 Accuracy: 0.8333333333333334 MCC: 0.6666666666666666
Fold 10 Accuracy: 0.8333333333333334 MCC: 0.6689936080056726

Mean Accuracy: 0.8950680272108844 Standard Deviation: 0.04547919939210547
Mean MCC: 0.7916379687582799 Standard Deviation: 0.09071217853215471
Time taken: 365.30 seconds


In [10]:
print("\n---------------------------------------------------------------------\n")
XII_200_nopcc_results_df = train_with_best_hyperparameters(datasets, 'XII_200_nopcc')
print("\n---------------------------------------------------------------------\n")

[I 2025-01-04 10:46:09,751] A new study created in memory with name: no-name-d0ff74f9-9403-4616-b0ba-3e2575a11f95



---------------------------------------------------------------------

Currently training GRANDE model with dataset key: XII_200_nopcc


[I 2025-01-04 10:46:25,456] Trial 0 finished with value: 0.8510644963469901 and parameters: {'depth': 5, 'n_estimators': 2048, 'learning_rate_weights': 0.02418920414533056, 'learning_rate_leaf': 0.004399836655115275, 'selected_variables': 0.6001305793222371}. Best is trial 0 with value: 0.8510644963469901.
[I 2025-01-04 10:46:37,768] Trial 1 finished with value: 0.6508140266182866 and parameters: {'depth': 6, 'n_estimators': 768, 'learning_rate_weights': 0.03502601204981815, 'learning_rate_leaf': 0.0011790052194417607, 'selected_variables': 0.9255944115558584}. Best is trial 0 with value: 0.8510644963469901.
[I 2025-01-04 10:46:47,872] Trial 2 finished with value: 0.8510644963469901 and parameters: {'depth': 3, 'n_estimators': 512, 'learning_rate_weights': 0.012008525198309491, 'learning_rate_leaf': 0.013533531297649014, 'selected_variables': 0.7131446915356912}. Best is trial 0 with value: 0.8510644963469901.
[I 2025-01-04 10:47:17,431] Trial 3 finished with value: 0.7509392614826383 

Best hyperparameters: {'depth': 5, 'n_estimators': 2048, 'learning_rate_weights': 0.02418920414533056, 'learning_rate_leaf': 0.004399836655115275, 'selected_variables': 0.6001305793222371}
Fold 1 Accuracy: 0.875 MCC: 0.7585826061362605
Fold 2 Accuracy: 0.925 MCC: 0.8510644963469901
Fold 3 Accuracy: 0.875 MCC: 0.7509392614826383
Fold 4 Accuracy: 0.9 MCC: 0.8040302522073697
Fold 5 Accuracy: 0.9 MCC: 0.8
Fold 6 Accuracy: 0.95 MCC: 0.9045340337332909
Fold 7 Accuracy: 0.775 MCC: 0.556293911166591
Fold 8 Accuracy: 0.875 MCC: 0.7585826061362605
Fold 9 Accuracy: 0.9 MCC: 0.8040302522073697
Fold 10 Accuracy: 0.7948717948717948 MCC: 0.6224436442165379

Mean Accuracy: 0.8769871794871795 Standard Deviation: 0.054140804527459334
Mean MCC: 0.7610501063633308 Standard Deviation: 0.10282302218684848
Time taken: 321.42 seconds

---------------------------------------------------------------------



In [11]:
XII_200_pcc95_results_df = train_with_best_hyperparameters(datasets, 'XII_200_pcc95')

[I 2025-01-04 10:52:05,818] A new study created in memory with name: no-name-5a508727-c427-49ff-8cbc-4585d7cb34da


Currently training GRANDE model with dataset key: XII_200_pcc95


[I 2025-01-04 10:52:17,521] Trial 0 finished with value: 0.9045340337332909 and parameters: {'depth': 5, 'n_estimators': 1536, 'learning_rate_weights': 0.010638726380738893, 'learning_rate_leaf': 0.009062367148569112, 'selected_variables': 0.6949842130850761}. Best is trial 0 with value: 0.9045340337332909.
[I 2025-01-04 10:52:29,320] Trial 1 finished with value: 0.9 and parameters: {'depth': 5, 'n_estimators': 2048, 'learning_rate_weights': 0.00687040874901193, 'learning_rate_leaf': 0.013197619852913766, 'selected_variables': 0.9273126748436367}. Best is trial 0 with value: 0.9045340337332909.
[I 2025-01-04 10:52:47,992] Trial 2 finished with value: 0.9 and parameters: {'depth': 7, 'n_estimators': 1792, 'learning_rate_weights': 0.0026495243874631266, 'learning_rate_leaf': 0.011713187556763548, 'selected_variables': 0.9773436798865216}. Best is trial 0 with value: 0.9045340337332909.
[I 2025-01-04 10:53:01,858] Trial 3 finished with value: 0.7 and parameters: {'depth': 7, 'n_estimators

Best hyperparameters: {'depth': 5, 'n_estimators': 1536, 'learning_rate_weights': 0.010638726380738893, 'learning_rate_leaf': 0.009062367148569112, 'selected_variables': 0.6949842130850761}
Fold 1 Accuracy: 0.925 MCC: 0.8597269536210952
Fold 2 Accuracy: 0.9 MCC: 0.8040302522073697
Fold 3 Accuracy: 0.925 MCC: 0.8510644963469901
Fold 4 Accuracy: 0.875 MCC: 0.7585826061362605
Fold 5 Accuracy: 0.9 MCC: 0.8
Fold 6 Accuracy: 0.875 MCC: 0.7509392614826383
Fold 7 Accuracy: 0.8 MCC: 0.6030226891555273
Fold 8 Accuracy: 0.9 MCC: 0.8
Fold 9 Accuracy: 0.95 MCC: 0.9
Fold 10 Accuracy: 0.9743589743589743 MCC: 0.95

Mean Accuracy: 0.9024358974358974 Standard Deviation: 0.04769104400494741
Mean MCC: 0.8077366258949882 Standard Deviation: 0.09487711260850726
Time taken: 267.72 seconds


In [12]:
XII_200_pcc75_results_df = train_with_best_hyperparameters(datasets, 'XII_200_pcc75')

[I 2025-01-04 10:56:55,931] A new study created in memory with name: no-name-22baa732-bd80-414a-ac1b-cffebded7f4f


Currently training GRANDE model with dataset key: XII_200_pcc75


[I 2025-01-04 10:57:11,388] Trial 0 finished with value: 0.9 and parameters: {'depth': 7, 'n_estimators': 1280, 'learning_rate_weights': 0.001648006264673391, 'learning_rate_leaf': 0.010028468490654458, 'selected_variables': 0.6198973954905778}. Best is trial 0 with value: 0.9.
[I 2025-01-04 10:57:25,727] Trial 1 finished with value: 0.6574382586514258 and parameters: {'depth': 6, 'n_estimators': 768, 'learning_rate_weights': 0.026200185155736223, 'learning_rate_leaf': 0.006586470310384929, 'selected_variables': 0.6561162982109532}. Best is trial 0 with value: 0.9.
[I 2025-01-04 10:57:57,881] Trial 2 finished with value: 0.8510644963469901 and parameters: {'depth': 9, 'n_estimators': 512, 'learning_rate_weights': 0.0017497679438638692, 'learning_rate_leaf': 0.0035662539326905404, 'selected_variables': 0.6549873269313392}. Best is trial 0 with value: 0.9.
[I 2025-01-04 10:58:09,836] Trial 3 finished with value: 0.8510644963469901 and parameters: {'depth': 5, 'n_estimators': 1280, 'learn

Best hyperparameters: {'depth': 7, 'n_estimators': 1280, 'learning_rate_weights': 0.001648006264673391, 'learning_rate_leaf': 0.010028468490654458, 'selected_variables': 0.6198973954905778}
Fold 1 Accuracy: 0.925 MCC: 0.8510644963469901
Fold 2 Accuracy: 0.925 MCC: 0.8510644963469901
Fold 3 Accuracy: 0.9 MCC: 0.8040302522073697
Fold 4 Accuracy: 0.925 MCC: 0.8510644963469901
Fold 5 Accuracy: 0.875 MCC: 0.7509392614826383
Fold 6 Accuracy: 0.95 MCC: 0.9045340337332909
Fold 7 Accuracy: 0.85 MCC: 0.7
Fold 8 Accuracy: 0.95 MCC: 0.9
Fold 9 Accuracy: 0.975 MCC: 0.9511897312113419
Fold 10 Accuracy: 0.9487179487179487 MCC: 0.8973684210526316

Mean Accuracy: 0.9223717948717949 Standard Deviation: 0.03799629582739759
Mean MCC: 0.8461255188728243 Standard Deviation: 0.07618444059281376
Time taken: 466.22 seconds


In [13]:
XII_150_nopcc_results_df = train_with_best_hyperparameters(datasets, 'XII_150_nopcc')
print("\n---------------------------------------------------------------------\n")
XII_150_pcc95_results_df = train_with_best_hyperparameters(datasets, 'XII_150_pcc95')
print("\n---------------------------------------------------------------------\n")


[I 2025-01-04 11:05:10,567] A new study created in memory with name: no-name-1583fa11-2a97-4669-825c-1a50e7481b49


Currently training GRANDE model with dataset key: XII_150_nopcc


[I 2025-01-04 11:05:33,027] Trial 0 finished with value: 0.8017837257372731 and parameters: {'depth': 8, 'n_estimators': 1536, 'learning_rate_weights': 0.03641154062501343, 'learning_rate_leaf': 0.0025447276325962927, 'selected_variables': 0.8021378651548279}. Best is trial 0 with value: 0.8017837257372731.
[I 2025-01-04 11:05:43,865] Trial 1 finished with value: 0.8017837257372731 and parameters: {'depth': 4, 'n_estimators': 1792, 'learning_rate_weights': 0.005358988212549344, 'learning_rate_leaf': 0.06864695109169924, 'selected_variables': 0.7622641835502852}. Best is trial 0 with value: 0.8017837257372731.
[I 2025-01-04 11:06:39,974] Trial 2 finished with value: 0.7333333333333333 and parameters: {'depth': 10, 'n_estimators': 1024, 'learning_rate_weights': 0.0012742516417254114, 'learning_rate_leaf': 0.05286738084620215, 'selected_variables': 0.9687630168377961}. Best is trial 0 with value: 0.8017837257372731.
[I 2025-01-04 11:06:52,981] Trial 3 finished with value: 0.80178372573727

Best hyperparameters: {'depth': 8, 'n_estimators': 1536, 'learning_rate_weights': 0.03641154062501343, 'learning_rate_leaf': 0.0025447276325962927, 'selected_variables': 0.8021378651548279}
Fold 1 Accuracy: 0.8666666666666667 MCC: 0.760885910252682
Fold 2 Accuracy: 0.9 MCC: 0.8017837257372731
Fold 3 Accuracy: 0.9333333333333333 MCC: 0.8666666666666667
Fold 4 Accuracy: 1.0 MCC: 1.0
Fold 5 Accuracy: 0.8 MCC: 0.6546536707079772
Fold 6 Accuracy: 0.9 MCC: 0.8017837257372731
Fold 7 Accuracy: 0.9333333333333333 MCC: 0.8744746321952062
Fold 8 Accuracy: 0.9333333333333333 MCC: 0.8666666666666667
Fold 9 Accuracy: 0.9666666666666667 MCC: 0.9354143466934853


[I 2025-01-04 11:13:27,026] A new study created in memory with name: no-name-38612a8c-ea8f-45f9-8ffd-784d69b5385d


Fold 10 Accuracy: 0.8620689655172413 MCC: 0.7238095238095238

Mean Accuracy: 0.9095402298850577 Standard Deviation: 0.05717176109780817
Mean MCC: 0.8286138868466754 Standard Deviation: 0.10168124446728918
Time taken: 496.46 seconds

---------------------------------------------------------------------

Currently training GRANDE model with dataset key: XII_150_pcc95


[I 2025-01-04 11:13:36,422] Trial 0 finished with value: 0.8017837257372731 and parameters: {'depth': 4, 'n_estimators': 1280, 'learning_rate_weights': 0.005636269567710802, 'learning_rate_leaf': 0.094994284071767, 'selected_variables': 0.9279332049061061}. Best is trial 0 with value: 0.8017837257372731.
[I 2025-01-04 11:13:47,068] Trial 1 finished with value: 0.6225430174794672 and parameters: {'depth': 3, 'n_estimators': 1792, 'learning_rate_weights': 0.07861087967436546, 'learning_rate_leaf': 0.0011598897158337149, 'selected_variables': 0.9108638060557938}. Best is trial 0 with value: 0.8017837257372731.
[I 2025-01-04 11:14:07,006] Trial 2 finished with value: 0.7333333333333333 and parameters: {'depth': 8, 'n_estimators': 1280, 'learning_rate_weights': 0.007311956403557162, 'learning_rate_leaf': 0.04090953828762781, 'selected_variables': 0.9116797512368758}. Best is trial 0 with value: 0.8017837257372731.
[I 2025-01-04 11:14:18,198] Trial 3 finished with value: 0.7333333333333333 a

Best hyperparameters: {'depth': 5, 'n_estimators': 1024, 'learning_rate_weights': 0.025657683610181725, 'learning_rate_leaf': 0.010059664034469232, 'selected_variables': 0.6071796904627234}
Fold 1 Accuracy: 0.9333333333333333 MCC: 0.8666666666666667
Fold 2 Accuracy: 0.9666666666666667 MCC: 0.9354143466934853
Fold 3 Accuracy: 0.9333333333333333 MCC: 0.8744746321952062
Fold 4 Accuracy: 0.9333333333333333 MCC: 0.8744746321952062
Fold 5 Accuracy: 0.9 MCC: 0.8017837257372731
Fold 6 Accuracy: 0.9 MCC: 0.8017837257372731
Fold 7 Accuracy: 0.9 MCC: 0.8017837257372731
Fold 8 Accuracy: 0.8666666666666667 MCC: 0.7333333333333333
Fold 9 Accuracy: 0.9666666666666667 MCC: 0.9354143466934853
Fold 10 Accuracy: 0.896551724137931 MCC: 0.7942674649391135

Mean Accuracy: 0.9196551724137929 Standard Deviation: 0.032458462196740215
Mean MCC: 0.8419396599928316 Standard Deviation: 0.06591996606875881
Time taken: 324.63 seconds

---------------------------------------------------------------------



In [9]:
XII_150_pcc75_results_df = train_with_best_hyperparameters(datasets, 'XII_150_pcc75')
print("\n---------------------------------------------------------------------\n")

[I 2025-01-04 11:20:35,311] A new study created in memory with name: no-name-7a3a05d6-3d55-46a9-8b84-6b1594de3734


Currently training GRANDE model with dataset key: XII_150_pcc75


[I 2025-01-04 11:22:12,386] Trial 0 finished with value: 0.8017837257372731 and parameters: {'depth': 10, 'n_estimators': 1792, 'learning_rate_weights': 0.003952347128644093, 'learning_rate_leaf': 0.015938093383217396, 'selected_variables': 0.6347120598602136}. Best is trial 0 with value: 0.8017837257372731.
[I 2025-01-04 11:23:32,312] Trial 1 finished with value: 0.8017837257372731 and parameters: {'depth': 10, 'n_estimators': 1536, 'learning_rate_weights': 0.004468615565997734, 'learning_rate_leaf': 0.006660164935235105, 'selected_variables': 0.7690532084057644}. Best is trial 0 with value: 0.8017837257372731.
[I 2025-01-04 11:24:53,061] Trial 2 finished with value: 0.8017837257372731 and parameters: {'depth': 10, 'n_estimators': 1536, 'learning_rate_weights': 0.014251130036321271, 'learning_rate_leaf': 0.006568185042007302, 'selected_variables': 0.6723304965556665}. Best is trial 0 with value: 0.8017837257372731.
[I 2025-01-04 11:25:54,191] Trial 3 finished with value: 0.73333333333

Best hyperparameters: {'depth': 10, 'n_estimators': 1792, 'learning_rate_weights': 0.003952347128644093, 'learning_rate_leaf': 0.015938093383217396, 'selected_variables': 0.6347120598602136}
Fold 1 Accuracy: 0.8666666666666667 MCC: 0.7333333333333333
Fold 2 Accuracy: 0.9333333333333333 MCC: 0.8744746321952062
Fold 3 Accuracy: 0.9666666666666667 MCC: 0.9354143466934853
Fold 4 Accuracy: 0.9 MCC: 0.8017837257372731
Fold 5 Accuracy: 0.8666666666666667 MCC: 0.7399400733959437
Fold 6 Accuracy: 0.9666666666666667 MCC: 0.9354143466934853
Fold 7 Accuracy: 0.9666666666666667 MCC: 0.9354143466934853
Fold 8 Accuracy: 1.0 MCC: 1.0
Fold 9 Accuracy: 0.9666666666666667 MCC: 0.9354143466934853
Fold 10 Accuracy: 0.8620689655172413 MCC: 0.7295452198626643

Mean Accuracy: 0.9295402298850576 Standard Deviation: 0.05144967624498386
Mean MCC: 0.8620734371298362 Standard Deviation: 0.10186168889508021
Time taken: 1545.41 seconds

---------------------------------------------------------------------



In [9]:
XII_100_nopcc_results_df = train_with_best_hyperparameters(datasets, 'XII_100_nopcc')
print("\n---------------------------------------------------------------------\n")


[I 2025-01-04 11:47:41,115] A new study created in memory with name: no-name-62576378-ac3e-4a32-8996-f0ef6b1b0f0f


Currently training GRANDE model with dataset key: XII_100_nopcc


[I 2025-01-04 11:47:54,833] Trial 0 finished with value: 0.9045340337332909 and parameters: {'depth': 4, 'n_estimators': 1792, 'learning_rate_weights': 0.0067653595573735755, 'learning_rate_leaf': 0.006536911954226054, 'selected_variables': 0.7764641845029736}. Best is trial 0 with value: 0.9045340337332909.
[I 2025-01-04 11:48:36,890] Trial 1 finished with value: 1.0 and parameters: {'depth': 9, 'n_estimators': 2048, 'learning_rate_weights': 0.0020601051571885872, 'learning_rate_leaf': 0.019402332189587583, 'selected_variables': 0.7492769262739525}. Best is trial 1 with value: 1.0.
[I 2025-01-04 11:49:30,009] Trial 2 finished with value: 0.8 and parameters: {'depth': 10, 'n_estimators': 1024, 'learning_rate_weights': 0.0740006761300733, 'learning_rate_leaf': 0.006785781731833649, 'selected_variables': 0.9887253967261466}. Best is trial 1 with value: 1.0.
[I 2025-01-04 11:49:44,491] Trial 3 finished with value: 1.0 and parameters: {'depth': 7, 'n_estimators': 512, 'learning_rate_weight

Best hyperparameters: {'depth': 9, 'n_estimators': 2048, 'learning_rate_weights': 0.0020601051571885872, 'learning_rate_leaf': 0.019402332189587583, 'selected_variables': 0.7492769262739525}
Fold 1 Accuracy: 1.0 MCC: 1.0
Fold 2 Accuracy: 0.95 MCC: 0.9045340337332909
Fold 3 Accuracy: 1.0 MCC: 1.0
Fold 4 Accuracy: 0.9 MCC: 0.8
Fold 5 Accuracy: 0.95 MCC: 0.9045340337332909
Fold 6 Accuracy: 0.95 MCC: 0.9045340337332909
Fold 7 Accuracy: 0.95 MCC: 0.9045340337332909
Fold 8 Accuracy: 0.85 MCC: 0.7035264706814485
Fold 9 Accuracy: 0.85 MCC: 0.7035264706814485
Fold 10 Accuracy: 0.8947368421052632 MCC: 0.7888888888888889

Mean Accuracy: 0.9294736842105262 Standard Deviation: 0.05409941488860695
Mean MCC: 0.8614077965184949 Standard Deviation: 0.10787751418989588
Time taken: 828.64 seconds

---------------------------------------------------------------------



In [9]:
XII_100_pcc95_results_df = train_with_best_hyperparameters(datasets, 'XII_100_pcc95')
print("\n---------------------------------------------------------------------\n")


[I 2025-01-04 12:11:47,058] A new study created in memory with name: no-name-75ba8c83-745e-407b-a0d4-03bfd312baef


Currently training GRANDE model with dataset key: XII_100_pcc95


[I 2025-01-04 12:12:11,027] Trial 0 finished with value: 1.0 and parameters: {'depth': 8, 'n_estimators': 1280, 'learning_rate_weights': 0.02780497651694946, 'learning_rate_leaf': 0.014153914292320035, 'selected_variables': 0.6218035536533584}. Best is trial 0 with value: 1.0.
[I 2025-01-04 12:12:22,476] Trial 1 finished with value: 0.9045340337332909 and parameters: {'depth': 5, 'n_estimators': 1536, 'learning_rate_weights': 0.02654714668635002, 'learning_rate_leaf': 0.002969804747337797, 'selected_variables': 0.7474310217775102}. Best is trial 0 with value: 1.0.
[I 2025-01-04 12:13:40,099] Trial 2 finished with value: 1.0 and parameters: {'depth': 10, 'n_estimators': 1792, 'learning_rate_weights': 0.001255548862013414, 'learning_rate_leaf': 0.028736527481854058, 'selected_variables': 0.7158853862820346}. Best is trial 0 with value: 1.0.
[I 2025-01-04 12:14:15,388] Trial 3 finished with value: 0.7035264706814485 and parameters: {'depth': 9, 'n_estimators': 1280, 'learning_rate_weights

Best hyperparameters: {'depth': 8, 'n_estimators': 1280, 'learning_rate_weights': 0.02780497651694946, 'learning_rate_leaf': 0.014153914292320035, 'selected_variables': 0.6218035536533584}
Fold 1 Accuracy: 1.0 MCC: 1.0
Fold 2 Accuracy: 0.95 MCC: 0.9045340337332909
Fold 3 Accuracy: 1.0 MCC: 1.0
Fold 4 Accuracy: 0.9 MCC: 0.816496580927726
Fold 5 Accuracy: 0.9 MCC: 0.8
Fold 6 Accuracy: 1.0 MCC: 1.0
Fold 7 Accuracy: 0.8 MCC: 0.6
Fold 8 Accuracy: 0.85 MCC: 0.7035264706814485
Fold 9 Accuracy: 0.85 MCC: 0.7035264706814485
Fold 10 Accuracy: 0.7894736842105263 MCC: 0.5865557254410011

Mean Accuracy: 0.9039473684210526 Standard Deviation: 0.0813498953816709
Mean MCC: 0.8114639281464916 Standard Deviation: 0.16126514639139783
Time taken: 425.94 seconds

---------------------------------------------------------------------



In [10]:
XII_100_pcc75_results_df = train_with_best_hyperparameters(datasets, 'XII_100_pcc75')

[I 2025-01-04 12:18:53,005] A new study created in memory with name: no-name-9f337778-e22d-4ce3-b934-fdeafda47e8a


Currently training GRANDE model with dataset key: XII_100_pcc75


[I 2025-01-04 12:19:26,808] Trial 0 finished with value: 0.9045340337332909 and parameters: {'depth': 9, 'n_estimators': 1280, 'learning_rate_weights': 0.00827805506458484, 'learning_rate_leaf': 0.0021529772050879015, 'selected_variables': 0.6509255159601288}. Best is trial 0 with value: 0.9045340337332909.
[I 2025-01-04 12:19:37,601] Trial 1 finished with value: 1.0 and parameters: {'depth': 5, 'n_estimators': 1536, 'learning_rate_weights': 0.03231224325556342, 'learning_rate_leaf': 0.05335994701356162, 'selected_variables': 0.9136795360354677}. Best is trial 1 with value: 1.0.
[I 2025-01-04 12:19:48,810] Trial 2 finished with value: 0.816496580927726 and parameters: {'depth': 5, 'n_estimators': 1024, 'learning_rate_weights': 0.033342355658693146, 'learning_rate_leaf': 0.002701559347753451, 'selected_variables': 0.7774805190586438}. Best is trial 1 with value: 1.0.
[I 2025-01-04 12:20:06,295] Trial 3 finished with value: 1.0 and parameters: {'depth': 7, 'n_estimators': 1792, 'learning

Best hyperparameters: {'depth': 5, 'n_estimators': 1536, 'learning_rate_weights': 0.03231224325556342, 'learning_rate_leaf': 0.05335994701356162, 'selected_variables': 0.9136795360354677}
Fold 1 Accuracy: 1.0 MCC: 1.0
Fold 2 Accuracy: 0.95 MCC: 0.9045340337332909
Fold 3 Accuracy: 1.0 MCC: 1.0
Fold 4 Accuracy: 0.8 MCC: 0.6123724356957946
Fold 5 Accuracy: 0.9 MCC: 0.816496580927726
Fold 6 Accuracy: 0.9 MCC: 0.816496580927726
Fold 7 Accuracy: 0.9 MCC: 0.816496580927726
Fold 8 Accuracy: 0.8 MCC: 0.6546536707079772
Fold 9 Accuracy: 0.85 MCC: 0.7337993857053428
Fold 10 Accuracy: 1.0 MCC: 1.0

Mean Accuracy: 0.9099999999999999 Standard Deviation: 0.07745966692414832
Mean MCC: 0.8354849268625584 Standard Deviation: 0.1413679812362995
Time taken: 307.99 seconds
