In [1]:
!pip install optuna grande



In [1]:
import numpy as np
import pandas as pd
from xgboost import XGBClassifier
from sklearn.model_selection import cross_validate, train_test_split
from sklearn.metrics import accuracy_score, matthews_corrcoef, make_scorer
from scipy.stats import shapiro
import optuna
from GRANDE import GRANDE

In [2]:
# This is a function that loads training and testing data for 10-fold cross validation
def prepare_train_test_dataset(datasets, test_idx):
  test_df = datasets[test_idx]
  train_df = pd.concat([datasets[i] for i in range(len(datasets)) if i != test_idx])

  X_train = train_df.iloc[:, 1:-1].values
  X_test = test_df.iloc[:, 1:-1].values
  y_train = train_df.iloc[:, -1].values
  y_test = test_df.iloc[:, -1].values

  return X_train, X_test, y_train, y_test

In [3]:
# Default params and args based on the paper and github repo
params = {
        'depth': 5, # tree depth
        'n_estimators': 2048, # number of estimators / trees

        'learning_rate_weights': 0.005, # learning rate for leaf weights
        'learning_rate_index': 0.01, # learning rate for split indices
        'learning_rate_values': 0.01, # learning rate for split values
        'learning_rate_leaf': 0.01, # learning rate for leafs (logits)

        'optimizer': 'adam', # optimizer
        'cosine_decay_steps': 0, # decay steps for lr schedule (CosineDecayRestarts)

        'loss': 'crossentropy', # loss function (default 'crossentropy' for binary & multi-class classification and 'mse' for regression)
        'focal_loss': False, # use focal loss {True, False}
        'temperature': 0.0, # temperature for stochastic re-weighted GD (0.0, 1.0)

        'from_logits': True, # use logits for weighting {True, False}
        'use_class_weights': True, # use class weights for training {True, False}

        'dropout': 0.0, # dropout rate (here, dropout randomly disables individual estimators of the ensemble during training)

        'selected_variables': 0.8, # feature subset percentage (0.0, 1.0)
        'data_subset_fraction': 1.0, # data subset percentage (0.0, 1.0)
}

args = {
    'epochs': 1_000, # number of epochs for training
    'early_stopping_epochs': 25,
    'batch_size': 32,
    'cat_idx': [], # put list of categorical indices
    'objective': 'binary', # objective / task {'binary', 'classification', 'regression'}

    'random_seed': 42,
    'verbose': 0,
}

In [4]:
import torch

def train_with_base_grande(datasets, key):
  print("Currently training GRANDE model with dataset key:", key)

  # Create a Pandas DataFrame to store all experiment results
  results_df = pd.DataFrame(columns=['Fold', 'Accuracy', 'MCC'])

  # Perform 10-fold cross validation
  dataset_list = datasets[key]

  for i in range(10):
    X_train, X_test, y_train, y_test = prepare_train_test_dataset(dataset_list, i)

    # Train the model
    model = GRANDE(params=params, args=args)
    model.fit(X_train=X_train, y_train=y_train, X_val=X_test, y_val=y_test)

    # Evaluate the model
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, np.argmax(y_pred, axis=1))
    mcc = matthews_corrcoef(y_test, np.argmax(y_pred, axis=1))

    print("Fold", i+1, "Accuracy:", accuracy, "MCC:", mcc)
    results_df.loc[i] = [i+1, accuracy, mcc]

  print()

  print("Mean Accuracy:", results_df['Accuracy'].mean(), "Standard Deviation:", results_df['Accuracy'].std())
  print("Mean MCC:", results_df['MCC'].mean(), "Standard Deviation:", results_df['MCC'].std())

In [5]:
def create_objective_for_grande(X_train, X_test, y_train, y_test):
  def objective(trial):
      # Suggest hyperparameters for GRANDE
      opt_params = {
          'depth': trial.suggest_int('depth', 3, 10),  # tree depth
          'n_estimators': trial.suggest_int('n_estimators', 512, 2048),  # number of estimators

          'learning_rate_weights': 0.005,
          'learning_rate_index': 0.01,
          'learning_rate_values': 0.01,
          'learning_rate_leaf': 0.01,

          'optimizer': 'adam',
          'cosine_decay_steps': 0,

          'loss': 'crossentropy',
          'focal_loss': False,
          'temperature': 0.0,

          'from_logits': True,
          'use_class_weights': True,

          'dropout': 0.0,

          'selected_variables': trial.suggest_float('selected_variables', 0.5, 1.0),
          'data_subset_fraction': 1.0,
      }

      args = {
          'epochs': 100,  # Number of epochs
          'early_stopping_epochs': 10,
          'batch_size': 64,
          'cat_idx': [],
          'objective': 'binary',

          'random_seed': 42,
          'verbose': 0,
      }

      base_model = GRANDE(params=opt_params, args=args)
      base_model.fit(X_train=X_train, y_train=y_train, X_val=X_test, y_val=y_test)

      y_pred = base_model.predict(X_test)
      y_pred_labels = np.argmax(y_pred, axis=1)
      mcc = matthews_corrcoef(y_test, y_pred_labels)

      # Return the accuracy
      return mcc

  return objective

In [6]:
import time
def train_with_best_hyperparameters(datasets, key):
  start_time = time.time()
  print("Currently training GRANDE model with dataset key:", key)

  # Create a Pandas DataFrame to store all experiment results
  results_df = pd.DataFrame(columns=['Fold', 'Accuracy', 'MCC'])

  # Perform 10-fold cross validation
  dataset_list = datasets[key]

  # Use the first data as the test for the HPO
  X_train, X_test, y_train, y_test = prepare_train_test_dataset(dataset_list, 0)

  objective_function = create_objective_for_grande(X_train, X_test, y_train, y_test)

  study = optuna.create_study(direction='maximize')
  study.optimize(objective_function, n_trials=10, gc_after_trial=True)

  best_params = study.best_params
  print("Best hyperparameters:", best_params)

  # Use best hyperparams to conduct a 10-fold cross validation
  for i in range(10):
    X_train, X_test, y_train, y_test = prepare_train_test_dataset(dataset_list, i)

    # Train the model
    model = GRANDE(params=best_params, args=args)
    model.fit(X_train=X_train, y_train=y_train, X_val=X_test, y_val=y_test)

    # Evaluate the model
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, np.argmax(y_pred, axis=1))
    mcc = matthews_corrcoef(y_test, np.argmax(y_pred, axis=1))

    print("Fold", i+1, "Accuracy:", accuracy, "MCC:", mcc)
    results_df.loc[i] = [i+1, accuracy, mcc]

  print()

  print("Mean Accuracy:", results_df['Accuracy'].mean(), "Standard Deviation:", results_df['Accuracy'].std())
  print("Mean MCC:", results_df['MCC'].mean(), "Standard Deviation:", results_df['MCC'].std())
  elapsed_time = time.time() - start_time
  print(f"Time taken: {elapsed_time:.2f}seconds")

In [7]:
def manual_train_with_best_hyperparameters(datasets, key, best_params):
  print("Currently training GRANDE model with dataset key:", key)

  # Create a Pandas DataFrame to store all experiment results
  results_df = pd.DataFrame(columns=['Fold', 'Accuracy', 'MCC'])

  # Perform 10-fold cross validation
  dataset_list = datasets[key]

  # Use best hyperparams to conduct a 10-fold cross validation
  for i in range(10):
    X_train, X_test, y_train, y_test = prepare_train_test_dataset(dataset_list, i)

    # Train the model
    model = GRANDE(params=best_params, args=args)
    model.fit(X_train=X_train, y_train=y_train, X_val=X_test, y_val=y_test)

    # Evaluate the model
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, np.argmax(y_pred, axis=1))
    mcc = matthews_corrcoef(y_test, np.argmax(y_pred, axis=1))

    print("Fold", i+1, "Accuracy:", accuracy, "MCC:", mcc)
    results_df.loc[i] = [i+1, accuracy, mcc]

  print()

  print("Mean Accuracy:", results_df['Accuracy'].mean(), "Standard Deviation:", results_df['Accuracy'].std())
  print("Mean MCC:", results_df['MCC'].mean(), "Standard Deviation:", results_df['MCC'].std())

In [8]:
# Load the validation datasets

datasets = {}

# All

IX_all_nopcc_1 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/all/cv/nopcc/fold_1.csv')
IX_all_nopcc_2 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/all/cv/nopcc/fold_2.csv')
IX_all_nopcc_3 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/all/cv/nopcc/fold_3.csv')
IX_all_nopcc_4 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/all/cv/nopcc/fold_4.csv')
IX_all_nopcc_5 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/all/cv/nopcc/fold_5.csv')
IX_all_nopcc_6 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/all/cv/nopcc/fold_6.csv')
IX_all_nopcc_7 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/all/cv/nopcc/fold_7.csv')
IX_all_nopcc_8 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/all/cv/nopcc/fold_8.csv')
IX_all_nopcc_9 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/all/cv/nopcc/fold_9.csv')
IX_all_nopcc_10 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/all/cv/nopcc/fold_10.csv')
datasets['IX_all_nopcc'] = [IX_all_nopcc_1, IX_all_nopcc_2, IX_all_nopcc_3, IX_all_nopcc_4, IX_all_nopcc_5,
                                       IX_all_nopcc_6, IX_all_nopcc_7, IX_all_nopcc_8, IX_all_nopcc_9, IX_all_nopcc_10]

IX_all_pcc95_1 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/all/cv/pcc95/fold_1.csv')
IX_all_pcc95_2 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/all/cv/pcc95/fold_2.csv')
IX_all_pcc95_3 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/all/cv/pcc95/fold_3.csv')
IX_all_pcc95_4 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/all/cv/pcc95/fold_4.csv')
IX_all_pcc95_5 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/all/cv/pcc95/fold_5.csv')
IX_all_pcc95_6 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/all/cv/pcc95/fold_6.csv')
IX_all_pcc95_7 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/all/cv/pcc95/fold_7.csv')
IX_all_pcc95_8 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/all/cv/pcc95/fold_8.csv')
IX_all_pcc95_9 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/all/cv/pcc95/fold_9.csv')
IX_all_pcc95_10 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/all/cv/pcc95/fold_10.csv')
datasets['IX_all_pcc95'] = [IX_all_pcc95_1, IX_all_pcc95_2, IX_all_pcc95_3, IX_all_pcc95_4, IX_all_pcc95_5,
                                       IX_all_pcc95_6, IX_all_pcc95_7, IX_all_pcc95_8, IX_all_pcc95_9, IX_all_pcc95_10]

IX_all_pcc75_1 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/all/cv/pcc75/fold_1.csv')
IX_all_pcc75_2 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/all/cv/pcc75/fold_2.csv')
IX_all_pcc75_3 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/all/cv/pcc75/fold_3.csv')
IX_all_pcc75_4 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/all/cv/pcc75/fold_4.csv')
IX_all_pcc75_5 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/all/cv/pcc75/fold_5.csv')
IX_all_pcc75_6 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/all/cv/pcc75/fold_6.csv')
IX_all_pcc75_7 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/all/cv/pcc75/fold_7.csv')
IX_all_pcc75_8 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/all/cv/pcc75/fold_8.csv')
IX_all_pcc75_9 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/all/cv/pcc75/fold_9.csv')
IX_all_pcc75_10 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/all/cv/pcc75/fold_10.csv')
datasets['IX_all_pcc75'] = [IX_all_pcc75_1, IX_all_pcc75_2, IX_all_pcc75_3, IX_all_pcc75_4, IX_all_pcc75_5,
                                       IX_all_pcc75_6, IX_all_pcc75_7, IX_all_pcc75_8, IX_all_pcc75_9, IX_all_pcc75_10]

# 500

IX_500_nopcc_1 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/500/cv/nopcc/fold_1.csv')
IX_500_nopcc_2 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/500/cv/nopcc/fold_2.csv')
IX_500_nopcc_3 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/500/cv/nopcc/fold_3.csv')
IX_500_nopcc_4 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/500/cv/nopcc/fold_4.csv')
IX_500_nopcc_5 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/500/cv/nopcc/fold_5.csv')
IX_500_nopcc_6 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/500/cv/nopcc/fold_6.csv')
IX_500_nopcc_7 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/500/cv/nopcc/fold_7.csv')
IX_500_nopcc_8 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/500/cv/nopcc/fold_8.csv')
IX_500_nopcc_9 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/500/cv/nopcc/fold_9.csv')
IX_500_nopcc_10 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/500/cv/nopcc/fold_10.csv')
datasets['IX_500_nopcc'] = [IX_500_nopcc_1, IX_500_nopcc_2, IX_500_nopcc_3, IX_500_nopcc_4, IX_500_nopcc_5,
                                       IX_500_nopcc_6, IX_500_nopcc_7, IX_500_nopcc_8, IX_500_nopcc_9, IX_500_nopcc_10]

IX_500_pcc95_1 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/500/cv/pcc95/fold_1.csv')
IX_500_pcc95_2 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/500/cv/pcc95/fold_2.csv')
IX_500_pcc95_3 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/500/cv/pcc95/fold_3.csv')
IX_500_pcc95_4 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/500/cv/pcc95/fold_4.csv')
IX_500_pcc95_5 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/500/cv/pcc95/fold_5.csv')
IX_500_pcc95_6 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/500/cv/pcc95/fold_6.csv')
IX_500_pcc95_7 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/500/cv/pcc95/fold_7.csv')
IX_500_pcc95_8 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/500/cv/pcc95/fold_8.csv')
IX_500_pcc95_9 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/500/cv/pcc95/fold_9.csv')
IX_500_pcc95_10 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/500/cv/pcc95/fold_10.csv')
datasets['IX_500_pcc95'] = [IX_500_pcc95_1, IX_500_pcc95_2, IX_500_pcc95_3, IX_500_pcc95_4, IX_500_pcc95_5,
                                       IX_500_pcc95_6, IX_500_pcc95_7, IX_500_pcc95_8, IX_500_pcc95_9, IX_500_pcc95_10]

IX_500_pcc75_1 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/500/cv/pcc75/fold_1.csv')
IX_500_pcc75_2 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/500/cv/pcc75/fold_2.csv')
IX_500_pcc75_3 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/500/cv/pcc75/fold_3.csv')
IX_500_pcc75_4 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/500/cv/pcc75/fold_4.csv')
IX_500_pcc75_5 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/500/cv/pcc75/fold_5.csv')
IX_500_pcc75_6 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/500/cv/pcc75/fold_6.csv')
IX_500_pcc75_7 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/500/cv/pcc75/fold_7.csv')
IX_500_pcc75_8 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/500/cv/pcc75/fold_8.csv')
IX_500_pcc75_9 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/500/cv/pcc75/fold_9.csv')
IX_500_pcc75_10 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/500/cv/pcc75/fold_10.csv')
datasets['IX_500_pcc75'] = [IX_500_pcc75_1, IX_500_pcc75_2, IX_500_pcc75_3, IX_500_pcc75_4, IX_500_pcc75_5,
                                       IX_500_pcc75_6, IX_500_pcc75_7, IX_500_pcc75_8, IX_500_pcc75_9, IX_500_pcc75_10]

# 400

IX_400_nopcc_1 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/400/cv/nopcc/fold_1.csv')
IX_400_nopcc_2 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/400/cv/nopcc/fold_2.csv')
IX_400_nopcc_3 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/400/cv/nopcc/fold_3.csv')
IX_400_nopcc_4 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/400/cv/nopcc/fold_4.csv')
IX_400_nopcc_5 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/400/cv/nopcc/fold_5.csv')
IX_400_nopcc_6 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/400/cv/nopcc/fold_6.csv')
IX_400_nopcc_7 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/400/cv/nopcc/fold_7.csv')
IX_400_nopcc_8 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/400/cv/nopcc/fold_8.csv')
IX_400_nopcc_9 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/400/cv/nopcc/fold_9.csv')
IX_400_nopcc_10 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/400/cv/nopcc/fold_10.csv')
datasets['IX_400_nopcc'] = [IX_400_nopcc_1, IX_400_nopcc_2, IX_400_nopcc_3, IX_400_nopcc_4, IX_400_nopcc_5,
                                       IX_400_nopcc_6, IX_400_nopcc_7, IX_400_nopcc_8, IX_400_nopcc_9, IX_400_nopcc_10]

IX_400_pcc95_1 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/400/cv/pcc95/fold_1.csv')
IX_400_pcc95_2 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/400/cv/pcc95/fold_2.csv')
IX_400_pcc95_3 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/400/cv/pcc95/fold_3.csv')
IX_400_pcc95_4 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/400/cv/pcc95/fold_4.csv')
IX_400_pcc95_5 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/400/cv/pcc95/fold_5.csv')
IX_400_pcc95_6 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/400/cv/pcc95/fold_6.csv')
IX_400_pcc95_7 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/400/cv/pcc95/fold_7.csv')
IX_400_pcc95_8 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/400/cv/pcc95/fold_8.csv')
IX_400_pcc95_9 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/400/cv/pcc95/fold_9.csv')
IX_400_pcc95_10 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/400/cv/pcc95/fold_10.csv')
datasets['IX_400_pcc95'] = [IX_400_pcc95_1, IX_400_pcc95_2, IX_400_pcc95_3, IX_400_pcc95_4, IX_400_pcc95_5,
                                       IX_400_pcc95_6, IX_400_pcc95_7, IX_400_pcc95_8, IX_400_pcc95_9, IX_400_pcc95_10]

IX_400_pcc75_1 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/400/cv/pcc75/fold_1.csv')
IX_400_pcc75_2 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/400/cv/pcc75/fold_2.csv')
IX_400_pcc75_3 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/400/cv/pcc75/fold_3.csv')
IX_400_pcc75_4 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/400/cv/pcc75/fold_4.csv')
IX_400_pcc75_5 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/400/cv/pcc75/fold_5.csv')
IX_400_pcc75_6 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/400/cv/pcc75/fold_6.csv')
IX_400_pcc75_7 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/400/cv/pcc75/fold_7.csv')
IX_400_pcc75_8 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/400/cv/pcc75/fold_8.csv')
IX_400_pcc75_9 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/400/cv/pcc75/fold_9.csv')
IX_400_pcc75_10 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/400/cv/pcc75/fold_10.csv')
datasets['IX_400_pcc75'] = [IX_400_pcc75_1, IX_400_pcc75_2, IX_400_pcc75_3, IX_400_pcc75_4, IX_400_pcc75_5,
                                       IX_400_pcc75_6, IX_400_pcc75_7, IX_400_pcc75_8, IX_400_pcc75_9, IX_400_pcc75_10]

# 300

IX_300_nopcc_1 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/300/cv/nopcc/fold_1.csv')
IX_300_nopcc_2 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/300/cv/nopcc/fold_2.csv')
IX_300_nopcc_3 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/300/cv/nopcc/fold_3.csv')
IX_300_nopcc_4 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/300/cv/nopcc/fold_4.csv')
IX_300_nopcc_5 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/300/cv/nopcc/fold_5.csv')
IX_300_nopcc_6 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/300/cv/nopcc/fold_6.csv')
IX_300_nopcc_7 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/300/cv/nopcc/fold_7.csv')
IX_300_nopcc_8 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/300/cv/nopcc/fold_8.csv')
IX_300_nopcc_9 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/300/cv/nopcc/fold_9.csv')
IX_300_nopcc_10 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/300/cv/nopcc/fold_10.csv')
datasets['IX_300_nopcc'] = [IX_300_nopcc_1, IX_300_nopcc_2, IX_300_nopcc_3, IX_300_nopcc_4, IX_300_nopcc_5,
                                       IX_300_nopcc_6, IX_300_nopcc_7, IX_300_nopcc_8, IX_300_nopcc_9, IX_300_nopcc_10]

IX_300_pcc95_1 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/300/cv/pcc95/fold_1.csv')
IX_300_pcc95_2 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/300/cv/pcc95/fold_2.csv')
IX_300_pcc95_3 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/300/cv/pcc95/fold_3.csv')
IX_300_pcc95_4 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/300/cv/pcc95/fold_4.csv')
IX_300_pcc95_5 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/300/cv/pcc95/fold_5.csv')
IX_300_pcc95_6 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/300/cv/pcc95/fold_6.csv')
IX_300_pcc95_7 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/300/cv/pcc95/fold_7.csv')
IX_300_pcc95_8 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/300/cv/pcc95/fold_8.csv')
IX_300_pcc95_9 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/300/cv/pcc95/fold_9.csv')
IX_300_pcc95_10 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/300/cv/pcc95/fold_10.csv')
datasets['IX_300_pcc95'] = [IX_300_pcc95_1, IX_300_pcc95_2, IX_300_pcc95_3, IX_300_pcc95_4, IX_300_pcc95_5,
                                       IX_300_pcc95_6, IX_300_pcc95_7, IX_300_pcc95_8, IX_300_pcc95_9, IX_300_pcc95_10]

IX_300_pcc75_1 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/300/cv/pcc75/fold_1.csv')
IX_300_pcc75_2 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/300/cv/pcc75/fold_2.csv')
IX_300_pcc75_3 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/300/cv/pcc75/fold_3.csv')
IX_300_pcc75_4 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/300/cv/pcc75/fold_4.csv')
IX_300_pcc75_5 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/300/cv/pcc75/fold_5.csv')
IX_300_pcc75_6 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/300/cv/pcc75/fold_6.csv')
IX_300_pcc75_7 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/300/cv/pcc75/fold_7.csv')
IX_300_pcc75_8 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/300/cv/pcc75/fold_8.csv')
IX_300_pcc75_9 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/300/cv/pcc75/fold_9.csv')
IX_300_pcc75_10 = pd.read_csv('/content/drive/MyDrive/OrderedDataset/IX/300/cv/pcc75/fold_10.csv')
datasets['IX_300_pcc75'] = [IX_300_pcc75_1, IX_300_pcc75_2, IX_300_pcc75_3, IX_300_pcc75_4, IX_300_pcc75_5,
                                       IX_300_pcc75_6, IX_300_pcc75_7, IX_300_pcc75_8, IX_300_pcc75_9, IX_300_pcc75_10]


In [9]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [11]:
IX_all_nopcc_results_df = train_with_base_grande(datasets, 'IX_all_nopcc')
print("\n---------------------------------------------------------------------\n")
IX_all_pcc95_results_df = train_with_base_grande(datasets, 'IX_all_pcc95')
print("\n---------------------------------------------------------------------\n")
IX_all_pcc75_results_df = train_with_base_grande(datasets, 'IX_all_pcc75')
print("\n---------------------------------------------------------------------\n")
IX_500_nopcc_results_df = train_with_base_grande(datasets, 'IX_500_nopcc')
print("\n---------------------------------------------------------------------\n")
IX_500_pcc95_results_df = train_with_base_grande(datasets, 'IX_500_pcc95')
print("\n---------------------------------------------------------------------\n")
IX_500_pcc75_results_df = train_with_base_grande(datasets, 'IX_500_pcc75')
print("\n---------------------------------------------------------------------\n")
IX_400_nopcc_results_df = train_with_base_grande(datasets, 'IX_400_nopcc')
print("\n---------------------------------------------------------------------\n")
IX_400_pcc95_results_df = train_with_base_grande(datasets, 'IX_400_pcc95')
print("\n---------------------------------------------------------------------\n")
IX_400_pcc75_results_df = train_with_base_grande(datasets, 'IX_400_pcc75')
print("\n---------------------------------------------------------------------\n")
IX_300_nopcc_results_df = train_with_base_grande(datasets, 'IX_300_nopcc')
print("\n---------------------------------------------------------------------\n")
IX_300_pcc95_results_df = train_with_base_grande(datasets, 'IX_300_pcc95')
print("\n---------------------------------------------------------------------\n")
IX_300_pcc75_results_df = train_with_base_grande(datasets, 'IX_300_pcc75')

Currently training GRANDE model with dataset key: IX_all_nopcc
Fold 1 Accuracy: 0.8482758620689655 MCC: 0.696537290715373
Fold 2 Accuracy: 0.8689655172413793 MCC: 0.738013698630137




Fold 3 Accuracy: 0.8551724137931035 MCC: 0.7121629408467265
Fold 4 Accuracy: 0.8055555555555556 MCC: 0.6170919847985359
Fold 5 Accuracy: 0.875 MCC: 0.7502895194085833
Fold 6 Accuracy: 0.875 MCC: 0.7526178090063818
Fold 7 Accuracy: 0.8263888888888888 MCC: 0.6543575143161157
Fold 8 Accuracy: 0.8263888888888888 MCC: 0.652840747707773
Fold 9 Accuracy: 0.8055555555555556 MCC: 0.6113470158144012
Fold 10 Accuracy: 0.875 MCC: 0.75

Mean Accuracy: 0.8461302681992336 Standard Deviation: 0.028245462837103674
Mean MCC: 0.6935258521244027 Standard Deviation: 0.055801049984968695

---------------------------------------------------------------------

Currently training GRANDE model with dataset key: IX_all_pcc95
Fold 1 Accuracy: 0.7862068965517242 MCC: 0.5747978383994542
Fold 2 Accuracy: 0.8689655172413793 MCC: 0.7386256677605554
Fold 3 Accuracy: 0.8758620689655172 MCC: 0.7529528726768665
Fold 4 Accuracy: 0.8472222222222222 MCC: 0.6955186050933215
Fold 5 Accuracy: 0.8333333333333334 MCC: 0.667697860

In [13]:
IX_all_nopcc_results_df = train_with_best_hyperparameters(datasets, 'IX_all_nopcc')

[I 2025-01-04 09:02:24,833] A new study created in memory with name: no-name-11f43d16-3690-4ce7-ad85-aae02880622a


Currently training GRANDE model with dataset key: IX_all_nopcc


[I 2025-01-04 09:02:40,103] Trial 0 finished with value: 0.655619475778845 and parameters: {'depth': 5, 'n_estimators': 1351, 'selected_variables': 0.6692079681289866}. Best is trial 0 with value: 0.655619475778845.
[I 2025-01-04 09:05:27,898] Trial 1 finished with value: 0.7395565493010662 and parameters: {'depth': 10, 'n_estimators': 1162, 'selected_variables': 0.7186160080086665}. Best is trial 1 with value: 0.7395565493010662.
[I 2025-01-04 09:06:04,071] Trial 2 finished with value: 0.6551855494460996 and parameters: {'depth': 8, 'n_estimators': 1061, 'selected_variables': 0.6426393968634428}. Best is trial 1 with value: 0.7395565493010662.
[I 2025-01-04 09:07:57,464] Trial 3 finished with value: 0.7541607957287141 and parameters: {'depth': 9, 'n_estimators': 1951, 'selected_variables': 0.8251458323226535}. Best is trial 3 with value: 0.7541607957287141.
[I 2025-01-04 09:08:15,037] Trial 4 finished with value: 0.6012236034178687 and parameters: {'depth': 5, 'n_estimators': 1083, 's

Best hyperparameters: {'depth': 9, 'n_estimators': 1951, 'selected_variables': 0.8251458323226535}
Fold 1 Accuracy: 0.8344827586206897 MCC: 0.6691415521078228
Fold 2 Accuracy: 0.8689655172413793 MCC: 0.738013698630137
Fold 3 Accuracy: 0.8758620689655172 MCC: 0.7520456844063276
Fold 4 Accuracy: 0.8402777777777778 MCC: 0.6837948979863563
Fold 5 Accuracy: 0.8680555555555556 MCC: 0.7367509296683308
Fold 6 Accuracy: 0.8819444444444444 MCC: 0.7657375167529014
Fold 7 Accuracy: 0.7986111111111112 MCC: 0.5972798330092391
Fold 8 Accuracy: 0.8055555555555556 MCC: 0.612056372482123
Fold 9 Accuracy: 0.8194444444444444 MCC: 0.6388888888888888
Fold 10 Accuracy: 0.875 MCC: 0.75

Mean Accuracy: 0.8468199233716476 Standard Deviation: 0.031240881518635518
Mean MCC: 0.6943709373932127 Standard Deviation: 0.06257787539210952
Time taken: 1682.41seconds


In [10]:
IX_all_pcc95_results_df = train_with_best_hyperparameters(datasets, 'IX_all_pcc95')

[I 2025-01-04 09:31:33,149] A new study created in memory with name: no-name-be7e7e22-7f4c-408c-970d-7ff33ae4011a


Currently training GRANDE model with dataset key: IX_all_pcc95


[I 2025-01-04 09:31:52,347] Trial 0 finished with value: 0.6565567817711477 and parameters: {'depth': 6, 'n_estimators': 1154, 'selected_variables': 0.9288659972409392}. Best is trial 0 with value: 0.6565567817711477.
[I 2025-01-04 09:32:52,666] Trial 1 finished with value: 0.6003813444668563 and parameters: {'depth': 9, 'n_estimators': 1112, 'selected_variables': 0.6804150839560245}. Best is trial 0 with value: 0.6565567817711477.
[I 2025-01-04 09:33:08,403] Trial 2 finished with value: 0.6156687549730612 and parameters: {'depth': 6, 'n_estimators': 759, 'selected_variables': 0.7084133078982656}. Best is trial 0 with value: 0.6565567817711477.
[I 2025-01-04 09:33:21,927] Trial 3 finished with value: 0.6193386107599298 and parameters: {'depth': 4, 'n_estimators': 877, 'selected_variables': 0.5885907723872139}. Best is trial 0 with value: 0.6565567817711477.
[I 2025-01-04 09:33:56,107] Trial 4 finished with value: 0.5602719386708 and parameters: {'depth': 8, 'n_estimators': 1176, 'selec

Best hyperparameters: {'depth': 6, 'n_estimators': 1154, 'selected_variables': 0.9288659972409392}
Fold 1 Accuracy: 0.8206896551724138 MCC: 0.6422047504584104
Fold 2 Accuracy: 0.8482758620689655 MCC: 0.6968601458238795
Fold 3 Accuracy: 0.8827586206896552 MCC: 0.7674432547775484
Fold 4 Accuracy: 0.8611111111111112 MCC: 0.7247430753394787
Fold 5 Accuracy: 0.8194444444444444 MCC: 0.6398771166858558
Fold 6 Accuracy: 0.8333333333333334 MCC: 0.6669240172520741
Fold 7 Accuracy: 0.8263888888888888 MCC: 0.652840747707773
Fold 8 Accuracy: 0.7916666666666666 MCC: 0.5869678440936947
Fold 9 Accuracy: 0.8472222222222222 MCC: 0.6947125179709105
Fold 10 Accuracy: 0.875 MCC: 0.7502895194085833

Mean Accuracy: 0.84058908045977 Standard Deviation: 0.027843605025512592
Mean MCC: 0.6822862989518208 Standard Deviation: 0.05541685283649373
Time taken: 409.39seconds


In [11]:
IX_all_pcc75_results_df = train_with_best_hyperparameters(datasets, 'IX_all_pcc75')

[I 2025-01-04 09:38:48,322] A new study created in memory with name: no-name-5da5d289-e10d-4a1d-b5f5-9e980fd5c6ef


Currently training GRANDE model with dataset key: IX_all_pcc75


[I 2025-01-04 09:39:00,130] Trial 0 finished with value: 0.5061423050202445 and parameters: {'depth': 3, 'n_estimators': 1016, 'selected_variables': 0.939224500998366}. Best is trial 0 with value: 0.5061423050202445.
[I 2025-01-04 09:40:05,696] Trial 1 finished with value: 0.6025335078064162 and parameters: {'depth': 9, 'n_estimators': 1279, 'selected_variables': 0.6490422854532198}. Best is trial 1 with value: 0.6025335078064162.
[I 2025-01-04 09:42:43,657] Trial 2 finished with value: 0.6145645934929995 and parameters: {'depth': 10, 'n_estimators': 1506, 'selected_variables': 0.585739655178795}. Best is trial 2 with value: 0.6145645934929995.
[I 2025-01-04 09:43:38,243] Trial 3 finished with value: 0.5879703468219306 and parameters: {'depth': 9, 'n_estimators': 1195, 'selected_variables': 0.9550737240334277}. Best is trial 2 with value: 0.6145645934929995.
[I 2025-01-04 09:43:51,324] Trial 4 finished with value: 0.5532320485015881 and parameters: {'depth': 4, 'n_estimators': 753, 'se

Best hyperparameters: {'depth': 9, 'n_estimators': 1923, 'selected_variables': 0.5727613236865996}
Fold 1 Accuracy: 0.8 MCC: 0.6025335078064162
Fold 2 Accuracy: 0.8551724137931035 MCC: 0.7121629408467265
Fold 3 Accuracy: 0.8758620689655172 MCC: 0.7544392662374653
Fold 4 Accuracy: 0.8472222222222222 MCC: 0.6944444444444444
Fold 5 Accuracy: 0.8333333333333334 MCC: 0.6669240172520741
Fold 6 Accuracy: 0.8263888888888888 MCC: 0.6558849021501785
Fold 7 Accuracy: 0.8333333333333334 MCC: 0.6669240172520741
Fold 8 Accuracy: 0.8055555555555556 MCC: 0.6113470158144012
Fold 9 Accuracy: 0.8472222222222222 MCC: 0.6955186050933215
Fold 10 Accuracy: 0.875 MCC: 0.7502895194085833

Mean Accuracy: 0.8399090038314178 Standard Deviation: 0.02561705010139913
Mean MCC: 0.6810468236305686 Standard Deviation: 0.05119958396869717
Time taken: 1541.61seconds


In [11]:
best_params_IX_500_nopcc = {'depth': 10, 'n_estimators': 1803, 'selected_variables': 0.5835230447278554}
IX_500_nopcc_results_df = manual_train_with_best_hyperparameters(datasets, 'IX_500_nopcc', best_params_IX_500_nopcc)

Currently training GRANDE model with dataset key: IX_500_nopcc
Fold 1 Accuracy: 0.83 MCC: 0.6611912172532103
Fold 2 Accuracy: 0.88 MCC: 0.7606087305741639




Fold 3 Accuracy: 0.86 MCC: 0.7223151185146152
Fold 4 Accuracy: 0.87 MCC: 0.7413356072232964
Fold 5 Accuracy: 0.91 MCC: 0.8241310085125538
Fold 6 Accuracy: 0.88 MCC: 0.7624437362098715
Fold 7 Accuracy: 0.85 MCC: 0.7069625178409298
Fold 8 Accuracy: 0.8 MCC: 0.6180642325727469
Fold 9 Accuracy: 0.85 MCC: 0.7012634122382534
Fold 10 Accuracy: 0.9393939393939394 MCC: 0.879444878335872

Mean Accuracy: 0.8669393939393938 Standard Deviation: 0.039330290461434936
Mean MCC: 0.7377760459275513 Standard Deviation: 0.07556275904906536


In [10]:
IX_500_pcc95_results_df = train_with_best_hyperparameters(datasets, 'IX_500_pcc95')

[I 2025-01-04 11:10:56,947] A new study created in memory with name: no-name-554e9f54-a09f-4f77-a2ad-004d3ebdf57d


Currently training GRANDE model with dataset key: IX_500_pcc95


[I 2025-01-04 11:11:13,295] Trial 0 finished with value: 0.7205766921228921 and parameters: {'depth': 3, 'n_estimators': 1283, 'selected_variables': 0.6640472934642174}. Best is trial 0 with value: 0.7205766921228921.
[I 2025-01-04 11:11:27,618] Trial 1 finished with value: 0.64 and parameters: {'depth': 4, 'n_estimators': 1648, 'selected_variables': 0.5987054046981366}. Best is trial 0 with value: 0.7205766921228921.
[I 2025-01-04 11:11:40,881] Trial 2 finished with value: 0.72 and parameters: {'depth': 4, 'n_estimators': 1225, 'selected_variables': 0.6720380341161167}. Best is trial 0 with value: 0.7205766921228921.
[I 2025-01-04 11:12:48,945] Trial 3 finished with value: 0.6420578831241024 and parameters: {'depth': 9, 'n_estimators': 1346, 'selected_variables': 0.9842928410757917}. Best is trial 0 with value: 0.7205766921228921.
[I 2025-01-04 11:13:02,477] Trial 4 finished with value: 0.7205766921228921 and parameters: {'depth': 4, 'n_estimators': 1298, 'selected_variables': 0.94411

Best hyperparameters: {'depth': 3, 'n_estimators': 1283, 'selected_variables': 0.6640472934642174}
Fold 1 Accuracy: 0.85 MCC: 0.7001400420140049
Fold 2 Accuracy: 0.87 MCC: 0.7473603760032685
Fold 3 Accuracy: 0.84 MCC: 0.6805446536716203
Fold 4 Accuracy: 0.78 MCC: 0.56
Fold 5 Accuracy: 0.89 MCC: 0.7801560468156056
Fold 6 Accuracy: 0.88 MCC: 0.7606087305741639
Fold 7 Accuracy: 0.87 MCC: 0.7413356072232964
Fold 8 Accuracy: 0.79 MCC: 0.5896306675376274
Fold 9 Accuracy: 0.85 MCC: 0.7069625178409298
Fold 10 Accuracy: 0.8181818181818182 MCC: 0.6369947448150328

Mean Accuracy: 0.8438181818181818 Standard Deviation: 0.037312863337347714
Mean MCC: 0.690373338649555 Standard Deviation: 0.07401304698149579
Time taken: 421.22seconds


In [11]:
IX_500_pcc75_results_df = train_with_best_hyperparameters(datasets, 'IX_500_pcc75')

[I 2025-01-04 11:18:25,866] A new study created in memory with name: no-name-bcce6672-9edd-47f5-bce8-1eb045561339


Currently training GRANDE model with dataset key: IX_500_pcc75


[I 2025-01-04 11:19:30,317] Trial 0 finished with value: 0.6420578831241024 and parameters: {'depth': 9, 'n_estimators': 1419, 'selected_variables': 0.7989229920011405}. Best is trial 0 with value: 0.6420578831241024.
[I 2025-01-04 11:20:21,864] Trial 1 finished with value: 0.7001400420140049 and parameters: {'depth': 9, 'n_estimators': 1079, 'selected_variables': 0.5014773624616171}. Best is trial 1 with value: 0.7001400420140049.
[I 2025-01-04 11:20:36,836] Trial 2 finished with value: 0.6601320396132047 and parameters: {'depth': 5, 'n_estimators': 1200, 'selected_variables': 0.9100835007011836}. Best is trial 1 with value: 0.7001400420140049.
[I 2025-01-04 11:21:31,045] Trial 3 finished with value: 0.6201240372124044 and parameters: {'depth': 9, 'n_estimators': 1127, 'selected_variables': 0.8124325868572058}. Best is trial 1 with value: 0.7001400420140049.
[I 2025-01-04 11:23:14,244] Trial 4 finished with value: 0.6405126152203485 and parameters: {'depth': 10, 'n_estimators': 959, '

Best hyperparameters: {'depth': 9, 'n_estimators': 1079, 'selected_variables': 0.5014773624616171}
Fold 1 Accuracy: 0.82 MCC: 0.6420578831241024
Fold 2 Accuracy: 0.88 MCC: 0.7655318158241113
Fold 3 Accuracy: 0.88 MCC: 0.7624437362098715
Fold 4 Accuracy: 0.85 MCC: 0.7035264706814484
Fold 5 Accuracy: 0.86 MCC: 0.72
Fold 6 Accuracy: 0.89 MCC: 0.7801560468156056
Fold 7 Accuracy: 0.84 MCC: 0.6849495194215732
Fold 8 Accuracy: 0.82 MCC: 0.6446583712203042
Fold 9 Accuracy: 0.79 MCC: 0.582921932850343
Fold 10 Accuracy: 0.8787878787878788 MCC: 0.7575510204081632

Mean Accuracy: 0.8508787878787878 Standard Deviation: 0.03303368530546855
Mean MCC: 0.7043796796555523 Standard Deviation: 0.06540909423856492
Time taken: 1004.90seconds


In [10]:
IX_400_nopcc_results_df = train_with_best_hyperparameters(datasets, 'IX_400_nopcc')

[I 2025-01-04 11:39:37,059] A new study created in memory with name: no-name-8d5f210b-6233-4734-8f67-1746de699030


Currently training GRANDE model with dataset key: IX_400_nopcc


[I 2025-01-04 11:40:01,985] Trial 0 finished with value: 0.6508140266182866 and parameters: {'depth': 8, 'n_estimators': 620, 'selected_variables': 0.5849970225994227}. Best is trial 0 with value: 0.6508140266182866.
[I 2025-01-04 11:40:19,643] Trial 1 finished with value: 0.6508140266182866 and parameters: {'depth': 6, 'n_estimators': 1815, 'selected_variables': 0.6446093517848402}. Best is trial 0 with value: 0.6508140266182866.
[I 2025-01-04 11:40:33,042] Trial 2 finished with value: 0.65 and parameters: {'depth': 4, 'n_estimators': 1928, 'selected_variables': 0.6498622214801657}. Best is trial 0 with value: 0.6508140266182866.
[I 2025-01-04 11:41:40,270] Trial 3 finished with value: 0.7252266687565153 and parameters: {'depth': 9, 'n_estimators': 1613, 'selected_variables': 0.787153709877101}. Best is trial 3 with value: 0.7252266687565153.
[I 2025-01-04 11:41:57,860] Trial 4 finished with value: 0.7008766440504625 and parameters: {'depth': 6, 'n_estimators': 1910, 'selected_variabl

Best hyperparameters: {'depth': 9, 'n_estimators': 1613, 'selected_variables': 0.787153709877101}
Fold 1 Accuracy: 0.8625 MCC: 0.7252266687565153
Fold 2 Accuracy: 0.9 MCC: 0.8040302522073697
Fold 3 Accuracy: 0.8375 MCC: 0.6752110364284797
Fold 4 Accuracy: 0.8625 MCC: 0.736363238372587
Fold 5 Accuracy: 0.8625 MCC: 0.7252266687565153
Fold 6 Accuracy: 0.8625 MCC: 0.7270477053176553
Fold 7 Accuracy: 0.9 MCC: 0.8
Fold 8 Accuracy: 0.9375 MCC: 0.8774713684868253
Fold 9 Accuracy: 0.9 MCC: 0.8010018789148142
Fold 10 Accuracy: 0.8734177215189873 MCC: 0.7475929098748589

Mean Accuracy: 0.8798417721518987 Standard Deviation: 0.0290221678290587
Mean MCC: 0.7619171727115621 Standard Deviation: 0.058107761646028544
Time taken: 1208.24seconds


In [11]:
IX_400_pcc95_results_df = train_with_best_hyperparameters(datasets, 'IX_400_pcc95')

[I 2025-01-04 11:59:45,305] A new study created in memory with name: no-name-8cf9ad04-69fb-4b25-bc17-8ecdf3e64111


Currently training GRANDE model with dataset key: IX_400_pcc95


[I 2025-01-04 12:00:21,938] Trial 0 finished with value: 0.8010018789148142 and parameters: {'depth': 8, 'n_estimators': 1562, 'selected_variables': 0.5008310460166607}. Best is trial 0 with value: 0.8010018789148142.
[I 2025-01-04 12:02:47,321] Trial 1 finished with value: 0.7509392614826383 and parameters: {'depth': 10, 'n_estimators': 1558, 'selected_variables': 0.8766278181961528}. Best is trial 0 with value: 0.8010018789148142.
[I 2025-01-04 12:03:02,420] Trial 2 finished with value: 0.7252266687565153 and parameters: {'depth': 6, 'n_estimators': 630, 'selected_variables': 0.7313644261717323}. Best is trial 0 with value: 0.8010018789148142.
[I 2025-01-04 12:03:15,706] Trial 3 finished with value: 0.7252266687565153 and parameters: {'depth': 3, 'n_estimators': 1362, 'selected_variables': 0.9106658271980332}. Best is trial 0 with value: 0.8010018789148142.
[I 2025-01-04 12:04:10,255] Trial 4 finished with value: 0.8010018789148142 and parameters: {'depth': 9, 'n_estimators': 1176, '

Best hyperparameters: {'depth': 6, 'n_estimators': 543, 'selected_variables': 0.9787113120182229}
Fold 1 Accuracy: 0.8875 MCC: 0.7752423010845508
Fold 2 Accuracy: 0.8625 MCC: 0.736363238372587
Fold 3 Accuracy: 0.8375 MCC: 0.6752110364284797
Fold 4 Accuracy: 0.925 MCC: 0.8510644963469901
Fold 5 Accuracy: 0.8375 MCC: 0.6769064842612652
Fold 6 Accuracy: 0.8875 MCC: 0.7752423010845508
Fold 7 Accuracy: 0.8375 MCC: 0.6752110364284797
Fold 8 Accuracy: 0.9 MCC: 0.8
Fold 9 Accuracy: 0.8375 MCC: 0.6752110364284797
Fold 10 Accuracy: 0.8227848101265823 MCC: 0.6455128205128206

Mean Accuracy: 0.8635284810126583 Standard Deviation: 0.03436603390097656
Mean MCC: 0.7285964750948204 Standard Deviation: 0.06882637767157682
Time taken: 579.63seconds


In [10]:
IX_400_pcc75_results_df = train_with_best_hyperparameters(datasets, 'IX_400_pcc75')

[I 2025-01-04 12:11:07,117] A new study created in memory with name: no-name-feb27e9e-8992-43e8-af7e-69403c68993a


Currently training GRANDE model with dataset key: IX_400_pcc75


[I 2025-01-04 12:11:59,651] Trial 0 finished with value: 0.7252266687565153 and parameters: {'depth': 9, 'n_estimators': 961, 'selected_variables': 0.7013872101244221}. Best is trial 0 with value: 0.7252266687565153.
[I 2025-01-04 12:12:16,439] Trial 1 finished with value: 0.6251954041004442 and parameters: {'depth': 7, 'n_estimators': 580, 'selected_variables': 0.7467480202591192}. Best is trial 0 with value: 0.7252266687565153.
[I 2025-01-04 12:12:38,862] Trial 2 finished with value: 0.6769064842612652 and parameters: {'depth': 8, 'n_estimators': 613, 'selected_variables': 0.6232961846713215}. Best is trial 0 with value: 0.7252266687565153.
[I 2025-01-04 12:12:56,895] Trial 3 finished with value: 0.6752110364284797 and parameters: {'depth': 7, 'n_estimators': 610, 'selected_variables': 0.6220767550393362}. Best is trial 0 with value: 0.7252266687565153.
[I 2025-01-04 12:13:30,875] Trial 4 finished with value: 0.6769064842612652 and parameters: {'depth': 8, 'n_estimators': 1972, 'sele

Best hyperparameters: {'depth': 9, 'n_estimators': 961, 'selected_variables': 0.7013872101244221}
Fold 1 Accuracy: 0.8375 MCC: 0.6752110364284797
Fold 2 Accuracy: 0.875 MCC: 0.753778361444409
Fold 3 Accuracy: 0.875 MCC: 0.75
Fold 4 Accuracy: 0.8875 MCC: 0.7752423010845508
Fold 5 Accuracy: 0.825 MCC: 0.65
Fold 6 Accuracy: 0.8625 MCC: 0.7270477053176553
Fold 7 Accuracy: 0.85 MCC: 0.7008766440504625
Fold 8 Accuracy: 0.9125 MCC: 0.8252579334125864
Fold 9 Accuracy: 0.8625 MCC: 0.7270477053176553
Fold 10 Accuracy: 0.8227848101265823 MCC: 0.6484005912037146

Mean Accuracy: 0.8610284810126583 Standard Deviation: 0.02824622418302733
Mean MCC: 0.7232862278259514 Standard Deviation: 0.05633382522969016
Time taken: 817.17seconds


In [11]:
IX_300_nopcc_results_df = train_with_best_hyperparameters(datasets, 'IX_300_nopcc')

[I 2025-01-04 12:25:06,563] A new study created in memory with name: no-name-9d6fa2ba-2cb5-49ea-9a26-5fffa84b558d


Currently training GRANDE model with dataset key: IX_300_nopcc


[I 2025-01-04 12:25:18,201] Trial 0 finished with value: 0.7333333333333333 and parameters: {'depth': 4, 'n_estimators': 1384, 'selected_variables': 0.545517642947843}. Best is trial 0 with value: 0.7333333333333333.
[I 2025-01-04 12:25:32,107] Trial 1 finished with value: 0.734968415259167 and parameters: {'depth': 5, 'n_estimators': 1613, 'selected_variables': 0.74968887821423}. Best is trial 1 with value: 0.734968415259167.
[I 2025-01-04 12:27:44,041] Trial 2 finished with value: 0.6666666666666666 and parameters: {'depth': 10, 'n_estimators': 1685, 'selected_variables': 0.6922135198076043}. Best is trial 1 with value: 0.734968415259167.
[I 2025-01-04 12:28:01,202] Trial 3 finished with value: 0.734968415259167 and parameters: {'depth': 7, 'n_estimators': 839, 'selected_variables': 0.8751836286561181}. Best is trial 1 with value: 0.734968415259167.
[I 2025-01-04 12:28:20,712] Trial 4 finished with value: 0.7003892132633236 and parameters: {'depth': 7, 'n_estimators': 1488, 'selected

Best hyperparameters: {'depth': 5, 'n_estimators': 1613, 'selected_variables': 0.74968887821423}
Fold 1 Accuracy: 0.85 MCC: 0.7003892132633236
Fold 2 Accuracy: 0.9333333333333333 MCC: 0.8685990362153791
Fold 3 Accuracy: 0.8166666666666667 MCC: 0.6639137299238815
Fold 4 Accuracy: 0.7833333333333333 MCC: 0.5669817440703095
Fold 5 Accuracy: 0.75 MCC: 0.502518907629606
Fold 6 Accuracy: 0.85 MCC: 0.7035264706814485
Fold 7 Accuracy: 0.9 MCC: 0.8
Fold 8 Accuracy: 0.85 MCC: 0.7003892132633236
Fold 9 Accuracy: 0.95 MCC: 0.9005004170528446
Fold 10 Accuracy: 0.9152542372881356 MCC: 0.8310344827586207

Mean Accuracy: 0.8598587570621469 Standard Deviation: 0.06522283561925905
Mean MCC: 0.7237853214858737 Standard Deviation: 0.1282992460586049
Time taken: 540.15seconds


In [12]:
IX_300_pcc95_results_df = train_with_best_hyperparameters(datasets, 'IX_300_pcc95')

[I 2025-01-04 12:34:42,889] A new study created in memory with name: no-name-39f61368-2d13-4821-b074-eb9bebd49164


Currently training GRANDE model with dataset key: IX_300_pcc95


[I 2025-01-04 12:34:54,356] Trial 0 finished with value: 0.734968415259167 and parameters: {'depth': 3, 'n_estimators': 1954, 'selected_variables': 0.575996830242852}. Best is trial 0 with value: 0.734968415259167.
[I 2025-01-04 12:35:16,410] Trial 1 finished with value: 0.8 and parameters: {'depth': 7, 'n_estimators': 1714, 'selected_variables': 0.5372039643561586}. Best is trial 1 with value: 0.8.
[I 2025-01-04 12:35:58,860] Trial 2 finished with value: 0.7670929478598306 and parameters: {'depth': 9, 'n_estimators': 750, 'selected_variables': 0.8950029204800203}. Best is trial 1 with value: 0.8.
[I 2025-01-04 12:38:17,894] Trial 3 finished with value: 0.7670929478598306 and parameters: {'depth': 10, 'n_estimators': 1561, 'selected_variables': 0.597846338799497}. Best is trial 1 with value: 0.8.
[I 2025-01-04 12:38:30,265] Trial 4 finished with value: 0.734968415259167 and parameters: {'depth': 3, 'n_estimators': 1889, 'selected_variables': 0.9625950058079595}. Best is trial 1 with va

Best hyperparameters: {'depth': 9, 'n_estimators': 1729, 'selected_variables': 0.6580911721567515}
Fold 1 Accuracy: 0.8666666666666667 MCC: 0.7333333333333333
Fold 2 Accuracy: 0.9333333333333333 MCC: 0.8685990362153791
Fold 3 Accuracy: 0.7833333333333333 MCC: 0.5827524946624019
Fold 4 Accuracy: 0.8666666666666667 MCC: 0.734968415259167
Fold 5 Accuracy: 0.7833333333333333 MCC: 0.5669817440703095
Fold 6 Accuracy: 0.9 MCC: 0.8017837257372731
Fold 7 Accuracy: 0.85 MCC: 0.7035264706814485
Fold 8 Accuracy: 0.9166666666666666 MCC: 0.8337966824563376
Fold 9 Accuracy: 0.8666666666666667 MCC: 0.734968415259167
Fold 10 Accuracy: 0.9152542372881356 MCC: 0.830840596138354

Mean Accuracy: 0.8681920903954803 Standard Deviation: 0.05223845440285684
Mean MCC: 0.7391550913813171 Standard Deviation: 0.10191779095460778
Time taken: 1292.19seconds


In [10]:
IX_300_pcc75_results_df = train_with_best_hyperparameters(datasets, 'IX_300_pcc75')

[I 2025-01-04 12:58:42,838] A new study created in memory with name: no-name-6078ac46-65ed-4b26-a702-db32ebcf6db4


Currently training GRANDE model with dataset key: IX_300_pcc75


[I 2025-01-04 12:59:11,676] Trial 0 finished with value: 0.5747048932153912 and parameters: {'depth': 8, 'n_estimators': 1193, 'selected_variables': 0.5647714398971422}. Best is trial 0 with value: 0.5747048932153912.
[I 2025-01-04 13:00:11,081] Trial 1 finished with value: 0.6365239496641676 and parameters: {'depth': 9, 'n_estimators': 1636, 'selected_variables': 0.9646080329192355}. Best is trial 1 with value: 0.6365239496641676.
[I 2025-01-04 13:00:24,452] Trial 2 finished with value: 0.6681531047810609 and parameters: {'depth': 6, 'n_estimators': 629, 'selected_variables': 0.592256331163878}. Best is trial 2 with value: 0.6681531047810609.
[I 2025-01-04 13:02:09,090] Trial 3 finished with value: 0.6365239496641676 and parameters: {'depth': 10, 'n_estimators': 1279, 'selected_variables': 0.8289297617154163}. Best is trial 2 with value: 0.6681531047810609.
[I 2025-01-04 13:02:22,424] Trial 4 finished with value: 0.6365239496641676 and parameters: {'depth': 6, 'n_estimators': 737, 'se

Best hyperparameters: {'depth': 4, 'n_estimators': 1591, 'selected_variables': 0.8257538602485242}
Fold 1 Accuracy: 0.8333333333333334 MCC: 0.6666666666666666
Fold 2 Accuracy: 0.9333333333333333 MCC: 0.8685990362153791
Fold 3 Accuracy: 0.8 MCC: 0.6225430174794672
Fold 4 Accuracy: 0.8333333333333334 MCC: 0.6681531047810609
Fold 5 Accuracy: 0.7166666666666667 MCC: 0.43357427487729555
Fold 6 Accuracy: 0.9166666666666666 MCC: 0.83753151271601
Fold 7 Accuracy: 0.8333333333333334 MCC: 0.6666666666666666
Fold 8 Accuracy: 0.8833333333333333 MCC: 0.7670929478598306
Fold 9 Accuracy: 0.8833333333333333 MCC: 0.7705289916987292
Fold 10 Accuracy: 0.847457627118644 MCC: 0.6979334958986935

Mean Accuracy: 0.8480790960451976 Standard Deviation: 0.062085771461793365
Mean MCC: 0.6999289714859799 Standard Deviation: 0.12339605270997489
Time taken: 538.50seconds
