In [1]:
#!pip install torch --quiet
#!pip install ray --quiet
#!pip install pydantic --quiet

## Model definition 

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
from sklearn.metrics import matthews_corrcoef
import numpy as np
# Define the model
class SP_MLP(nn.Module): #eredita da nn.module alcune caratteristiche come la capacità di memorizzare i pesi
    def __init__(self, input_size, hidden_sizes, output_size, dropout_p=0.5):
        super(SP_MLP, self).__init__()
        #  lista che contiene  tutti i layer
        layers = []
        current_input_size = input_size
        # iteri sulle dimensioni dei layer nascosti. E' utile per permettere al modello di avere un numero dinamico di hidden layers, che viene cambiato in base all'iperparametro num_layers.
        for i, hidden_size in enumerate(hidden_sizes):
            # layer lineare
            layers.append(nn.Linear(current_input_size, hidden_size))
            # layer RELU (è una funzione di attivazione, se un numero in input è negativo lo trasforma in 0, altrimenti lo lascia invariato. Permette di disegnare pattern piu complessi di semplici combinazioni di regressioni lineari. Il fatto di trattare i dati negativi come 0 non è una perdita di dati, poichè 0 significa nessuna ricorrenza in quei dati, non ti interessa sapere "quanto non è presente il pattern in quel dato".)
            layers.append(nn.ReLU())
            # dropout che 
            layers.append(nn.Dropout(p=dropout_p))#è una tecnica di regolarizzazione e previene l'overfitting, spegnendo casualmnente una percentuale (che in questo caso è dropout_p) dei neuroni di quel layer.
            # aggiorna la dimensione di input per il prossimo layer
            current_input_size = hidden_size
            # aggiungi il layer di output finale
        layers.append(nn.Linear(current_input_size, output_size))
        layers.append(nn.Sigmoid())
        #  nn.Sequential fa quello che ha fatto il prof ma in una volta sola, praticamente è il forward, mette i layer in una sequenza ordinata dove l'output di un layer diventa l'input per il prossimo
        self.net = nn.Sequential(*layers)
    def forward(self, x):
        return self.net(x) # chiamata semplice al contenitore Sequential


# Define a custom dataset
class HelixDataset(Dataset): #praticamente si tratta di tradurre i dati grezzi , ovvero le matrici numpy in formato che dataloader può capire e usare
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32) #converte la matrice x in un tensore di pytorch , ovvero la versione pytorch degli array numpy. I tensori sono oggetti particolari che possono essere spostati sulla GPU e essere usati per calcolare i gradienti.
        self.y = torch.tensor(y, dtype=torch.float32).reshape(-1, 1) #trasforma la matrice y di forma (4809,) in (4809,1) per renderla compatibile con l'output del modello che adotta questa forma.
    def __len__(self):
        return len(self.X) #risponde alla domanda "quanti campioni ci sono nel dataset?" e usa questo numero per sapere quanti batch può creare quando un'epoca è finita (ovvero quando ha guardato tutti i dati , ovvero ha terminato un epoca). Semplicemente divide la lunghezza totale del dataset per la dimensione del batch, per calcolarsi quanti batch completi corrispondono a un epoca. In ogni modo questa funzione fornsice la lunghezza del dataset per questo calcolo.

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx] #raccoglie la i-esima riga nella matrice x e y, a ogni chiamata del dataloader (quando è in modalità shuffle, e quindi li pesca a caso è molto importante, perche grazie a questa funzione lui va a pescare il numero chiamato dallo shuffle, per riga di campione e classe).


#def train_val è la funzione che gestisce l'intero processo di addestramento e validazione. 
def train_val(model, #è il modello da addestrare
              train_loader, #i dati da studiare, diviso in batch
              val_loader, #il test da fare a fine  di ogni studio
              optimizer, #metodo di studio (adam, RMSprop ecc.. dice al modlelo come aggiornare le sue consocenze).
              criterion, #il correttore, che dice al modello di quanto ha sbagliato
              epochs, #quante volte il modello rileggerà i dati per impararne
              patience, #quante volte il modello può fare un esame di prova peggiore del precedente prima di interrompere le epoche in anticipo
              scorer = matthews_corrcoef,
              init_best_score = -1,
              output_transform = lambda x: (x > 0.5).float()): #come tradurre la probabilità del modello, praticamente trasforma i valori in 1 e 0
  best_val_score = init_best_score #inizializza il miglior punteggio
  epochs_without_improvement = 0 #contatore della patience utile per vedere quante volte di fila non migliora
  best_model_state_dict = None #prepara il cassetto dove inserirci il modello che ha performato meglio

  for epoch in range(epochs): #ripeti il processo per epoche volte. 
      # Training
      model.train()  #inizializzi il modello vuoto da allenare
      loss = 0 #inizializzi la variabile per la loss
      for batch_X, batch_y in train_loader: #questo for itera su tutti i batches
          batch_X, batch_y = batch_X.to(device), batch_y.to(device) #sposta eventualmente i dati del batch sulla gpu se disponibile per fare i calcoli piu velocemente
          optimizer.zero_grad() #azzera l'optimizer che era stato utilizzato per il batch precedente
          outputs = model(batch_X) #il modello legge il batch x e produce le risposte
          loss = criterion(outputs, batch_y) # il correttore calcola il singolo numero di errore confrontando le risposte date dal modello con quelle del batch y
          loss.backward() #funzione di pytorch che  si guarda quanto ogni peso ha contribuito a quell'errore  tramite il calcolo del gradiente quindi dice di quanto un peso deve scendere o salire.
          optimizer.step() #prende i calcoli della backward e aggiorna fisicamente i pesi del cervello per ridurre l'errore

      # Validation
      model.eval() #è cruciale perche mette il modello in fase di valutazione, spegnendo il dropout, ovvero quello che spegneva neuroni a caso per evitare overfitting
      val_preds = []
      val_labels = []
      with torch.no_grad(): #dice a pytorch di non calcolare gradienti, poiche siamo in fase di valutazione, rendendo il tutto piu veloce e consumando meno memoria
          for batch_X, batch_y in val_loader: #itera su tutti i batch del validation
              batch_X, batch_y = batch_X.to(device), batch_y.to(device)
              outputs = model(batch_X)
              #preds = (outputs > 0.5).float() #qui invece utilizzi direttamente questo modo per trasformare gli output in 0 e 1
              preds = output_transform(outputs) #utilizza il metodo di traformazione conenuto in output _transform permettendolo di variare a piacimento
              val_preds.extend(preds.cpu().numpy().flatten()) #aggiunge le risposte alle liste
              val_labels.extend(batch_y.cpu().numpy().flatten())
      val_score = scorer(val_labels, val_preds) #calcola il punteggio MCC alla fine di ogni test

      if val_score > best_val_score:
          best_val_score = val_score
          epochs_without_improvement = 0
          best_model_state_dict = model.state_dict()
          print('Validation score improved to {:.4f}'.format(best_val_score))
      else:
          epochs_without_improvement += 1
          if epochs_without_improvement >= patience:
              print('Early stopping at epoch {}'.format(epoch+1))
              break

      print('Epoch [{}/{}], Loss: {:.4f}, Val score: {:.4f}'.format(epoch+1, epochs, loss.item(), val_score))
  return best_model_state_dict

def test(model, test_loader, scorer = matthews_corrcoef, output_transform = lambda x: (x > 0.5).float()):
  model.eval()
  all_preds = []
  all_labels = []
  with torch.no_grad():
      for batch_X, batch_y in test_loader:
          batch_X, batch_y = batch_X.to(device), batch_y.to(device)
          outputs = model(batch_X)
          preds = output_transform(outputs)
          all_preds.extend(preds.cpu().numpy().flatten())
          all_labels.extend(batch_y.cpu().numpy().flatten())

  score = scorer(all_labels, all_preds)
  return score


if torch.cuda.is_available():
    device = torch.device("cuda")
    print("GPU is available")
else:
    device = torch.device("cpu")
    print("GPU is not available")

GPU is not available


## Finding the best Hyperparameter

In [3]:
from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler
import os
import random


In [None]:
def generate_hidden_layer_size(config):
    pool = [1024, 512, 256, 128, 64, 32] #pool di dimensioni disponibili. modifica qui se vuoi cambiare le dimensioni possibili di ogni layer
    num_layers = config["num_layers"] 
    #imposta che se il numero di layer viene settato dall'utente maggiore dei pool, lo imposti come lunghezza massima il numero di pool (per evitare che due layer abbiano le stesse dimensioni)
    if num_layers > len(pool):
        num_layers = len(pool)
    chosen_dims = random.sample(pool, num_layers) 
    #permette di scegliere randomicamente se usare la struttura a imbuto (grande, piccolo, grande) o in ordine decrescente(come il prof, dal piu grande al piu piccolo)
    if random.choice([True, False]): # 50% di possibilità di essere True
        # SÌ, FUNNEL method (Stabile): Ordina dalla più grande alla più piccola
        final_dims = sorted(chosen_dims, reverse=True)
        # Questo testerà l'ipotesi di stabilità
    else:
        # NO, CLESSIDRA/FLESSIBILE: Mescola l'ordine
        random.shuffle(chosen_dims)
        final_dims = chosen_dims
    return final_dims

In [5]:
#set the configuration on the random search must work
config = {
    "base_dir": os.path.abspath("../Feature_Selection/"),
    "num_layers":tune.choice([2,3,4,5,6]), #sceglie randomicamente il numero di layers
    "hidden_sizes": tune.sample_from(generate_hidden_layer_size), #richiama la funzione per scegliere randomicamente le dimensioni di ciascuno dei layer
    "dropout": tune.uniform(0.1, 0.5), #dropout percentage
    "lr": tune.loguniform(1e-4, 1e-2), #learning rate
    "batch_size": tune.choice([64, 128, 256]) #batch size
}

In [6]:
#this function makes the 5-CV to evaluate the mean mcc for that configuration of hyperparameter
def test_config(config):
    # Esempio: usa i tuoi 5 split .npz per fare MCC medio
    mcc_scores = []
    base_dir = config["base_dir"]
    for i in range(1, 6):
        #this line define the folder where npz(s) files are contained and the respective path
        train_path = os.path.join(base_dir, f"training_features_{i}.npz")
        val_path   = os.path.join(base_dir, f"validation_features_{i}.npz")
        test_path  = os.path.join(base_dir, f"testing_features_{i}.npz")
    # load feature matrices and label vector
        #load train
        loaded_data_train = np.load(train_path)
        x_train = loaded_data_train['matrix']
        y_train = loaded_data_train['target']
        
        # load test
        loaded_data_test = np.load(test_path)
        x_test = loaded_data_test['matrix']
        y_test = loaded_data_test['target']
        
        # load validation
        loaded_data_validation = np.load(val_path)
        x_val = loaded_data_validation['matrix']
        y_val = loaded_data_validation['target']
    
            # Split the dataset into training, validation and test sets
        train_dataset = HelixDataset(x_train, y_train)
        val_dataset = HelixDataset(x_val, y_val)
        test_dataset = HelixDataset(x_test, y_test)

                # Create data loaders divided in batches
        batch_size = config["batch_size"]
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=batch_size)
        test_loader = DataLoader(test_dataset, batch_size=batch_size)
        
        # Initialize the model
        input_size = x_train.shape[1]
        
        model = SP_MLP(
            input_size,
            config["hidden_sizes"],
            output_size=1,
            dropout_p=config["dropout"]).to(device)

        optimizer = optim.Adam(model.parameters(), lr=config["lr"]) #use adam optimized with learning rate chosen by random search
        criterion = nn.BCELoss()  #loss function for binary classification

        best_state = train_val(model, train_loader, val_loader,
                               optimizer, criterion,
                               epochs=100, patience=20)
        model.load_state_dict(best_state) #best state(with optimized weight) with chosen parameter 
        
        # calcoli MCC sul test di quel fold 
        mcc = test(model, test_loader)
        mcc_scores.append(mcc)

    mean_mcc = np.mean(mcc_scores)

    # raytune vuole che gli riporti un dizionario di metriche
    tune.report({"mcc": mean_mcc, "num_layers":config["num_layers"], "hidden_layers_size": config["hidden_sizes"]})


In [7]:
#run the hyperparameter tuning for 20 different random configuration combinations
result = tune.run(
    test_config,
    config=config,
    num_samples=50,      # how many combinations you try 
)

best_trial = result.get_best_trial("mcc", "max", "last") #search all trials (combination of configurations) and select the best one. It compares the last reported mcc from each trial and returns the trial that achieved the maximum mcc.
print("Best trial config:", best_trial.config) #take the best configuration
print("Best CV MCC:", best_trial.last_result["mcc"]) #take the best mcc


2025-11-13 19:22:44,718	INFO worker.py:2012 -- Started a local Ray instance.
2025-11-13 19:22:45,605	INFO tune.py:253 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `tune.run(...)`.
2025-11-13 19:22:45,610	INFO tune.py:616 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949
2025-11-13 19:22:45,636	INFO tensorboardx.py:193 -- pip install "ray[tune]" to see TensorBoard files.


0,1
Current time:,2025-11-13 20:58:30
Running for:,01:35:44.88
Memory:,1.4/3.5 GiB

Trial name,status,loc,batch_size,dropout,lr,num_layers,iter,total time (s),mcc,num_layers.1
test_config_c0509_00000,TERMINATED,172.19.13.131:10932,256,0.463379,0.00176439,2,1,167.234,0.772369,2
test_config_c0509_00001,TERMINATED,172.19.13.131:10931,256,0.194235,0.000774364,3,1,104.249,0.78775,3
test_config_c0509_00002,TERMINATED,172.19.13.131:10933,128,0.162939,0.00171434,6,1,319.312,0.780113,6
test_config_c0509_00003,TERMINATED,172.19.13.131:10934,64,0.40765,0.000103826,3,1,116.213,0.726027,3
test_config_c0509_00004,TERMINATED,172.19.13.131:11177,64,0.299569,0.000729672,2,1,659.353,0.788412,2
test_config_c0509_00005,TERMINATED,172.19.13.131:11251,64,0.356195,0.00257997,3,1,684.36,0.788221,3
test_config_c0509_00006,TERMINATED,172.19.13.131:11342,256,0.271383,0.000215205,2,1,94.0805,0.742543,2
test_config_c0509_00007,TERMINATED,172.19.13.131:11440,128,0.133325,0.00456997,3,1,168.931,0.791227,3
test_config_c0509_00008,TERMINATED,172.19.13.131:11531,128,0.362217,0.000392601,6,1,406.293,0.78067,6
test_config_c0509_00009,TERMINATED,172.19.13.131:11641,256,0.481908,0.00129302,2,1,165.289,0.762488,2


[36m(test_config pid=10931)[0m Validation score improved to 0.0000
[36m(test_config pid=10931)[0m Epoch [1/100], Loss: 0.3742, Val score: 0.0000
[36m(test_config pid=10932)[0m Validation score improved to 0.7233[32m [repeated 25x across cluster] (Ray deduplicates logs by default. Set RAY_DEDUP_LOGS=0 to disable log deduplication, or see https://docs.ray.io/en/master/ray-observability/user-guides/configure-logging.html#log-deduplication for more options.)[0m
[36m(test_config pid=10932)[0m Epoch [15/100], Loss: 0.1302, Val score: 0.7233[32m [repeated 58x across cluster][0m
[36m(test_config pid=10934)[0m Validation score improved to 0.7030[32m [repeated 11x across cluster][0m
[36m(test_config pid=10934)[0m Epoch [30/100], Loss: 0.0678, Val score: 0.6865[32m [repeated 57x across cluster][0m
[36m(test_config pid=10931)[0m Validation score improved to 0.7851[32m [repeated 12x across cluster][0m
[36m(test_config pid=10931)[0m Epoch [67/100], Loss: 0.1158, Val score: 

[36m(pid=gcs_server)[0m [2025-11-13 19:23:13,567 E 10609 10609] (gcs_server) gcs_server.cc:302: Failed to establish connection to the event+metrics exporter agent. Events and metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14
[33m(raylet)[0m [2025-11-13 19:23:14,723 E 10684 10684] (raylet) main.cc:975: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14
[2025-11-13 19:23:15,634 E 10585 10728] core_worker_process.cc:825: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14
[36m(test_config pid=10932)[0m [2025-11-13 19:23:17,315 E 10932 11013] core_worker_process.cc:825: Failed to establish connection to the metrics exporter

[36m(test_config pid=10934)[0m Validation score improved to 0.7228[32m [repeated 14x across cluster][0m
[36m(test_config pid=10934)[0m Epoch [74/100], Loss: 0.0451, Val score: 0.7228[32m [repeated 56x across cluster][0m
[36m(test_config pid=10932)[0m Early stopping at epoch 84
[36m(test_config pid=10931)[0m Validation score improved to 0.7841[32m [repeated 12x across cluster][0m
[36m(test_config pid=10934)[0m Epoch [91/100], Loss: 0.2219, Val score: 0.7177[32m [repeated 65x across cluster][0m
[36m(test_config pid=10934)[0m Early stopping at epoch 95
[36m(test_config pid=10934)[0m Validation score improved to 0.7029[32m [repeated 12x across cluster][0m
[36m(test_config pid=10934)[0m Epoch [9/100], Loss: 0.0507, Val score: 0.7029[32m [repeated 51x across cluster][0m
[36m(test_config pid=10931)[0m Early stopping at epoch 59
[36m(test_config pid=10932)[0m Validation score improved to 0.7698[32m [repeated 13x across cluster][0m
[36m(test_config pid=10934)

Trial name,hidden_layers_size,mcc,num_layers
test_config_c0509_00000,"[128, 1024]",0.772369,2
test_config_c0509_00001,"[512, 128, 32]",0.78775,3
test_config_c0509_00002,"[64, 512, 256, 128, 1024, 32]",0.780113,6
test_config_c0509_00003,"[256, 64, 32]",0.726027,3
test_config_c0509_00004,"[512, 1024]",0.788412,2
test_config_c0509_00005,"[1024, 512, 128]",0.788221,3
test_config_c0509_00006,"[64, 512]",0.742543,2
test_config_c0509_00007,"[512, 128, 64]",0.791227,3
test_config_c0509_00008,"[512, 128, 1024, 64, 256, 32]",0.78067,6
test_config_c0509_00009,"[1024, 32]",0.762488,2


[36m(test_config pid=10934)[0m Validation score improved to 0.7253[32m [repeated 5x across cluster][0m
[36m(test_config pid=10934)[0m Epoch [76/100], Loss: 0.0429, Val score: 0.7168[32m [repeated 37x across cluster][0m
[36m(test_config pid=10932)[0m Validation score improved to 0.7587[32m [repeated 4x across cluster][0m
[36m(test_config pid=10934)[0m Epoch [91/100], Loss: 0.0066, Val score: 0.7226[32m [repeated 36x across cluster][0m
[36m(test_config pid=10934)[0m Early stopping at epoch 97
[36m(test_config pid=10933)[0m Validation score improved to 0.8151[32m [repeated 2x across cluster][0m
[36m(test_config pid=10932)[0m Epoch [77/100], Loss: 0.1065, Val score: 0.7188[32m [repeated 28x across cluster][0m
[36m(test_config pid=11177)[0m Validation score improved to 0.6926[32m [repeated 3x across cluster][0m
[36m(test_config pid=10932)[0m Epoch [93/100], Loss: 0.1020, Val score: 0.7340[32m [repeated 25x across cluster][0m
[36m(test_config pid=11251)[0m

[36m(test_config pid=11177)[0m [2025-11-13 19:25:10,775 E 11177 11204] core_worker_process.cc:825: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14[32m [repeated 4x across cluster][0m


[36m(test_config pid=11251)[0m Validation score improved to 0.7120[32m [repeated 9x across cluster][0m
[36m(test_config pid=11251)[0m Epoch [9/100], Loss: 0.1308, Val score: 0.7120[32m [repeated 23x across cluster][0m
[36m(test_config pid=11251)[0m Validation score improved to 0.7404[32m [repeated 8x across cluster][0m
[36m(test_config pid=11251)[0m Epoch [12/100], Loss: 0.0525, Val score: 0.7404[32m [repeated 24x across cluster][0m


[36m(test_config pid=11251)[0m [2025-11-13 19:25:19,683 E 11251 11277] core_worker_process.cc:825: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14


[36m(test_config pid=10932)[0m Validation score improved to 0.7715[32m [repeated 9x across cluster][0m
[36m(test_config pid=11251)[0m Epoch [15/100], Loss: 0.1163, Val score: 0.7359[32m [repeated 25x across cluster][0m
[36m(test_config pid=10932)[0m Validation score improved to 0.7777[32m [repeated 4x across cluster][0m
[36m(test_config pid=10932)[0m Epoch [65/100], Loss: 0.1160, Val score: 0.7458[32m [repeated 25x across cluster][0m
[36m(test_config pid=10932)[0m Validation score improved to 0.7822[32m [repeated 4x across cluster][0m
[36m(test_config pid=10932)[0m Epoch [78/100], Loss: 0.1289, Val score: 0.7386[32m [repeated 26x across cluster][0m
[36m(test_config pid=11177)[0m Validation score improved to 0.7655[32m [repeated 4x across cluster][0m
[36m(test_config pid=10932)[0m Epoch [90/100], Loss: 0.0981, Val score: 0.7383[32m [repeated 25x across cluster][0m
[36m(test_config pid=10932)[0m Early stopping at epoch 94
[36m(test_config pid=10933)[0m

[36m(test_config pid=11342)[0m [2025-11-13 19:26:11,185 E 11342 11369] core_worker_process.cc:825: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14


[36m(test_config pid=11342)[0m Epoch [75/100], Loss: 0.1381, Val score: 0.7134[32m [repeated 37x across cluster][0m
[36m(test_config pid=11342)[0m Validation score improved to 0.7476[32m [repeated 6x across cluster][0m
[36m(test_config pid=11342)[0m Epoch [99/100], Loss: 0.1102, Val score: 0.7632[32m [repeated 36x across cluster][0m
[36m(test_config pid=11251)[0m Validation score improved to 0.7860[32m [repeated 6x across cluster][0m
[36m(test_config pid=11342)[0m Epoch [22/100], Loss: 0.0967, Val score: 0.7158[32m [repeated 36x across cluster][0m
[36m(test_config pid=11342)[0m Validation score improved to 0.7183[32m [repeated 13x across cluster][0m
[36m(test_config pid=11342)[0m Epoch [46/100], Loss: 0.1543, Val score: 0.7094[32m [repeated 37x across cluster][0m
[36m(test_config pid=11342)[0m Validation score improved to 0.7313[32m [repeated 7x across cluster][0m
[36m(test_config pid=10933)[0m Epoch [54/100], Loss: 0.1652, Val score: 0.7333[32m [repe

[36m(test_config pid=11440)[0m [2025-11-13 19:27:54,191 E 11440 11467] core_worker_process.cc:825: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14


[36m(test_config pid=11440)[0m Epoch [66/100], Loss: 0.0862, Val score: 0.8012[32m [repeated 27x across cluster][0m
[36m(test_config pid=11251)[0m Validation score improved to 0.7954[32m [repeated 6x across cluster][0m
[36m(test_config pid=11440)[0m Epoch [80/100], Loss: 0.1926, Val score: 0.8087[32m [repeated 27x across cluster][0m
[36m(test_config pid=11440)[0m Validation score improved to 0.8212
[36m(test_config pid=11177)[0m Validation score improved to 0.7963
[36m(test_config pid=11440)[0m Epoch [96/100], Loss: 0.2090, Val score: 0.8004[32m [repeated 30x across cluster][0m
[36m(test_config pid=11440)[0m Validation score improved to 0.7525[32m [repeated 6x across cluster][0m
[36m(test_config pid=10933)[0m Early stopping at epoch 70
[36m(test_config pid=11177)[0m Epoch [45/100], Loss: 0.0148, Val score: 0.8010[32m [repeated 25x across cluster][0m
[36m(test_config pid=11440)[0m Validation score improved to 0.7992[32m [repeated 7x across cluster][0m


[36m(test_config pid=11531)[0m [2025-11-13 19:28:43,094 E 11531 11557] core_worker_process.cc:825: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14


[36m(test_config pid=11531)[0m Validation score improved to 0.7233[32m [repeated 5x across cluster][0m
[36m(test_config pid=11440)[0m Epoch [9/100], Loss: 0.0711, Val score: 0.6662[32m [repeated 26x across cluster][0m
[36m(test_config pid=11440)[0m Validation score improved to 0.7982[32m [repeated 5x across cluster][0m
[36m(test_config pid=11440)[0m Epoch [23/100], Loss: 0.0866, Val score: 0.7932[32m [repeated 27x across cluster][0m
[36m(test_config pid=11177)[0m Early stopping at epoch 78
[36m(test_config pid=11440)[0m Validation score improved to 0.8022[32m [repeated 2x across cluster][0m
[36m(test_config pid=11531)[0m Epoch [43/100], Loss: 0.1619, Val score: 0.7111[32m [repeated 27x across cluster][0m
[36m(test_config pid=11531)[0m Validation score improved to 0.7434[32m [repeated 6x across cluster][0m
[36m(test_config pid=11177)[0m Epoch [5/100], Loss: 0.0170, Val score: 0.6761[32m [repeated 27x across cluster][0m
[36m(test_config pid=11177)[0m V

[36m(test_config pid=11641)[0m [2025-11-13 19:30:52,365 E 11641 11668] core_worker_process.cc:825: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14


[36m(test_config pid=11531)[0m Validation score improved to 0.7018[32m [repeated 5x across cluster][0m
[36m(test_config pid=11177)[0m Epoch [89/100], Loss: 0.0958, Val score: 0.8056[32m [repeated 26x across cluster][0m
[36m(test_config pid=11641)[0m Validation score improved to 0.7803[32m [repeated 6x across cluster][0m
[36m(test_config pid=11531)[0m Epoch [14/100], Loss: 0.0949, Val score: 0.7189[32m [repeated 28x across cluster][0m
[36m(test_config pid=11531)[0m Validation score improved to 0.7368[32m [repeated 4x across cluster][0m
[36m(test_config pid=11531)[0m Epoch [20/100], Loss: 0.1200, Val score: 0.7567[32m [repeated 27x across cluster][0m
[36m(test_config pid=11641)[0m Validation score improved to 0.7080[32m [repeated 7x across cluster][0m
[36m(test_config pid=11531)[0m Epoch [26/100], Loss: 0.1493, Val score: 0.7604[32m [repeated 27x across cluster][0m
[36m(test_config pid=11177)[0m Validation score improved to 0.6762[32m [repeated 5x acros

[36m(test_config pid=11759)[0m [2025-11-13 19:33:46,431 E 11759 11785] core_worker_process.cc:825: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14


[36m(test_config pid=11759)[0m Validation score improved to 0.7220[32m [repeated 5x across cluster][0m
[36m(test_config pid=11531)[0m Epoch [15/100], Loss: 0.1327, Val score: 0.7099[32m [repeated 16x across cluster][0m
[36m(test_config pid=11177)[0m Validation score improved to 0.7480[32m [repeated 7x across cluster][0m
[36m(test_config pid=11177)[0m Epoch [20/100], Loss: 0.0515, Val score: 0.7452[32m [repeated 17x across cluster][0m
[36m(test_config pid=11759)[0m Validation score improved to 0.7323[32m [repeated 2x across cluster][0m
[36m(test_config pid=11759)[0m Epoch [19/100], Loss: 0.2301, Val score: 0.6915[32m [repeated 15x across cluster][0m
[36m(test_config pid=11759)[0m Validation score improved to 0.7475
[36m(test_config pid=11177)[0m Validation score improved to 0.7769
[36m(test_config pid=11251)[0m Epoch [77/100], Loss: 0.1452, Val score: 0.7886[32m [repeated 16x across cluster][0m
[36m(test_config pid=11531)[0m 
[36m(test_config pid=11531

[36m(test_config pid=11865)[0m [2025-11-13 19:35:38,510 E 11865 11892] core_worker_process.cc:825: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14


[36m(test_config pid=11177)[0m Validation score improved to 0.7990[32m [repeated 4x across cluster][0m
[36m(test_config pid=11865)[0m Epoch [50/100], Loss: 0.1142, Val score: 0.5926[32m [repeated 19x across cluster][0m
[36m(test_config pid=11177)[0m Validation score improved to 0.8096[32m [repeated 2x across cluster][0m
[36m(test_config pid=11865)[0m Early stopping at epoch 56
[36m(test_config pid=11865)[0m Epoch [3/100], Loss: 0.2147, Val score: 0.0000[32m [repeated 18x across cluster][0m
[36m(test_config pid=11865)[0m Validation score improved to 0.7664[32m [repeated 4x across cluster][0m
[36m(test_config pid=11759)[0m Epoch [75/100], Loss: 0.3508, Val score: 0.7819[32m [repeated 16x across cluster][0m
[36m(test_config pid=11865)[0m Validation score improved to 0.7691
[36m(test_config pid=11865)[0m Validation score improved to 0.7725
[36m(test_config pid=11865)[0m Validation score improved to 0.7756
[36m(test_config pid=11865)[0m Epoch [23/100], Loss

[36m(test_config pid=11955)[0m [2025-11-13 19:36:24,388 E 11955 11982] core_worker_process.cc:825: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14


[36m(test_config pid=11865)[0m Epoch [22/100], Loss: 0.1405, Val score: 0.7216[32m [repeated 18x across cluster][0m
[36m(test_config pid=11865)[0m Validation score improved to 0.7664[32m [repeated 2x across cluster][0m
[36m(test_config pid=12035)[0m Epoch [6/100], Loss: 0.1473, Val score: 0.6830[32m [repeated 26x across cluster][0m
[36m(test_config pid=11865)[0m Validation score improved to 0.7795[32m [repeated 10x across cluster][0m
[36m(test_config pid=11759)[0m Early stopping at epoch 99
[36m(test_config pid=11865)[0m Epoch [43/100], Loss: 0.1466, Val score: 0.7500[32m [repeated 27x across cluster][0m
[36m(test_config pid=11759)[0m Validation score improved to 0.0000[32m [repeated 6x across cluster][0m
[36m(test_config pid=12035)[0m Epoch [29/100], Loss: 0.1171, Val score: 0.7371[32m [repeated 27x across cluster][0m
[36m(test_config pid=11865)[0m Early stopping at epoch 58
[36m(test_config pid=11759)[0m Validation score improved to 0.7236[32m [repe

[36m(test_config pid=12035)[0m [2025-11-13 19:36:52,586 E 12035 12062] core_worker_process.cc:825: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14


[36m(test_config pid=11865)[0m Validation score improved to 0.7058[32m [repeated 11x across cluster][0m
[36m(test_config pid=11759)[0m Epoch [11/100], Loss: 0.1461, Val score: 0.7240[32m [repeated 28x across cluster][0m
[36m(test_config pid=11865)[0m Validation score improved to 0.7103[32m [repeated 3x across cluster][0m
[36m(test_config pid=12035)[0m Epoch [77/100], Loss: 0.1158, Val score: 0.7252[32m [repeated 33x across cluster][0m
[36m(test_config pid=12035)[0m Validation score improved to 0.7953[32m [repeated 7x across cluster][0m
[36m(test_config pid=11759)[0m Epoch [17/100], Loss: 0.0505, Val score: 0.7137[32m [repeated 28x across cluster][0m
[36m(test_config pid=11955)[0m Validation score improved to 0.7516[32m [repeated 2x across cluster][0m
[36m(test_config pid=11865)[0m Epoch [55/100], Loss: 0.3769, Val score: 0.6965[32m [repeated 29x across cluster][0m
[36m(test_config pid=11759)[0m Validation score improved to 0.7819[32m [repeated 11x acr

[36m(test_config pid=12136)[0m [2025-11-13 19:38:38,261 E 12136 12163] core_worker_process.cc:825: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14


[36m(test_config pid=12136)[0m Epoch [31/100], Loss: 0.0955, Val score: 0.7368[32m [repeated 23x across cluster][0m
[36m(test_config pid=12136)[0m Validation score improved to 0.7368[32m [repeated 9x across cluster][0m
[36m(test_config pid=12035)[0m Epoch [65/100], Loss: 0.0712, Val score: 0.7570[32m [repeated 24x across cluster][0m
[36m(test_config pid=11955)[0m Validation score improved to 0.7721[32m [repeated 7x across cluster][0m
[36m(test_config pid=12035)[0m Epoch [77/100], Loss: 0.0933, Val score: 0.7681[32m [repeated 25x across cluster][0m
[36m(test_config pid=12035)[0m Validation score improved to 0.7852[32m [repeated 8x across cluster][0m
[36m(test_config pid=12035)[0m Epoch [89/100], Loss: 0.1064, Val score: 0.7411[32m [repeated 25x across cluster][0m
[36m(test_config pid=12136)[0m Validation score improved to 0.7667[32m [repeated 2x across cluster][0m
[36m(test_config pid=12035)[0m Epoch [100/100], Loss: 0.0733, Val score: 0.7911[32m [repe

[36m(test_config pid=12244)[0m [2025-11-13 19:40:41,531 E 12244 12271] core_worker_process.cc:825: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14


[36m(test_config pid=12244)[0m Epoch [17/100], Loss: 0.2153, Val score: 0.6988[32m [repeated 14x across cluster][0m
[36m(test_config pid=12136)[0m Validation score improved to 0.7690
[36m(test_config pid=11955)[0m Validation score improved to 0.7505
[36m(test_config pid=12244)[0m Epoch [21/100], Loss: 0.1131, Val score: 0.7080[32m [repeated 13x across cluster][0m
[36m(test_config pid=11955)[0m Validation score improved to 0.7620
[36m(test_config pid=12136)[0m Epoch [33/100], Loss: 0.1141, Val score: 0.7265[32m [repeated 12x across cluster][0m
[36m(test_config pid=12244)[0m Validation score improved to 0.7168[32m [repeated 2x across cluster][0m
[36m(test_config pid=11955)[0m Epoch [31/100], Loss: 0.1947, Val score: 0.7190[32m [repeated 12x across cluster][0m
[36m(test_config pid=12136)[0m Validation score improved to 0.7831
[36m(test_config pid=12244)[0m Validation score improved to 0.7193
[36m(test_config pid=12244)[0m Epoch [32/100], Loss: 0.1517, Val s

[36m(test_config pid=12465)[0m [2025-11-13 19:50:10,991 E 12465 12492] core_worker_process.cc:825: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14


[36m(test_config pid=12465)[0m Epoch [26/100], Loss: 0.1623, Val score: 0.7804[32m [repeated 13x across cluster][0m
[36m(test_config pid=12465)[0m Validation score improved to 0.7831[32m [repeated 7x across cluster][0m
[36m(test_config pid=11759)[0m Epoch [98/100], Loss: 0.0328, Val score: 0.7786[32m [repeated 14x across cluster][0m
[36m(test_config pid=12465)[0m Validation score improved to 0.7869[32m [repeated 2x across cluster][0m
[36m(test_config pid=11759)[0m Epoch [100/100], Loss: 0.0011, Val score: 0.6832[32m [repeated 14x across cluster][0m
[36m(test_config pid=12465)[0m Validation score improved to 0.7917[32m [repeated 5x across cluster][0m
[36m(test_config pid=12465)[0m Epoch [48/100], Loss: 0.0942, Val score: 0.7817[32m [repeated 14x across cluster][0m
[36m(test_config pid=12244)[0m Validation score improved to 0.7592
[36m(test_config pid=12545)[0m Validation score improved to 0.7191
[36m(test_config pid=12244)[0m Epoch [57/100], Loss: 0.084

[36m(test_config pid=12545)[0m [2025-11-13 19:50:37,952 E 12545 12571] core_worker_process.cc:825: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14


[36m(test_config pid=12621)[0m Validation score improved to 0.7032[32m [repeated 4x across cluster][0m
[36m(test_config pid=12621)[0m Epoch [6/100], Loss: 0.3560, Val score: 0.0000[32m [repeated 19x across cluster][0m
[36m(test_config pid=12244)[0m Validation score improved to 0.7689[32m [repeated 2x across cluster][0m
[36m(test_config pid=12465)[0m Epoch [78/100], Loss: 0.1198, Val score: 0.7807[32m [repeated 22x across cluster][0m
[36m(test_config pid=12465)[0m Validation score improved to 0.8075
[36m(test_config pid=12244)[0m Epoch [72/100], Loss: 0.1336, Val score: 0.7527[32m [repeated 19x across cluster][0m
[36m(test_config pid=12465)[0m Validation score improved to 0.8112


[36m(test_config pid=12621)[0m [2025-11-13 19:50:55,989 E 12621 12648] core_worker_process.cc:825: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14


[36m(test_config pid=12621)[0m Epoch [22/100], Loss: 0.2102, Val score: 0.0000[32m [repeated 19x across cluster][0m
[36m(test_config pid=12465)[0m Validation score improved to 0.8241[32m [repeated 3x across cluster][0m
[36m(test_config pid=12621)[0m Early stopping at epoch 25
[36m(test_config pid=12545)[0m Epoch [26/100], Loss: 0.1214, Val score: 0.7357[32m [repeated 16x across cluster][0m
[36m(test_config pid=12465)[0m Validation score improved to 0.0000[32m [repeated 3x across cluster][0m
[36m(test_config pid=12545)[0m Epoch [29/100], Loss: 0.1066, Val score: 0.7210[32m [repeated 19x across cluster][0m
[36m(test_config pid=12465)[0m Validation score improved to 0.7689[32m [repeated 6x across cluster][0m
[36m(test_config pid=12465)[0m Epoch [14/100], Loss: 0.1941, Val score: 0.7406[32m [repeated 18x across cluster][0m
[36m(test_config pid=12465)[0m Validation score improved to 0.7845[32m [repeated 5x across cluster][0m
[36m(test_config pid=12244)[0m

[36m(test_config pid=12716)[0m [2025-11-13 19:52:07,476 E 12716 12743] core_worker_process.cc:825: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14


[36m(test_config pid=12465)[0m Validation score improved to 0.8405[32m [repeated 2x across cluster][0m
[36m(test_config pid=12716)[0m Epoch [57/100], Loss: 0.1730, Val score: 0.7150[32m [repeated 28x across cluster][0m
[36m(test_config pid=12465)[0m Validation score improved to 0.6654
[36m(test_config pid=12465)[0m Epoch [3/100], Loss: 0.0733, Val score: 0.6264[32m [repeated 28x across cluster][0m
[36m(test_config pid=12716)[0m Validation score improved to 0.7330
[36m(test_config pid=12545)[0m Early stopping at epoch 70
[36m(test_config pid=12716)[0m Epoch [84/100], Loss: 0.1363, Val score: 0.7130[32m [repeated 30x across cluster][0m
[36m(test_config pid=12545)[0m Validation score improved to 0.5684[32m [repeated 6x across cluster][0m
[36m(test_config pid=12621)[0m Early stopping at epoch 81
[36m(test_config pid=12465)[0m Epoch [18/100], Loss: 0.1449, Val score: 0.7003[32m [repeated 27x across cluster][0m
[36m(test_config pid=12716)[0m Validation score

[36m(test_config pid=12843)[0m [2025-11-13 19:54:48,499 E 12843 12869] core_worker_process.cc:825: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14


[36m(test_config pid=12843)[0m Epoch [48/100], Loss: 0.0970, Val score: 0.7710[32m [repeated 22x across cluster][0m
[36m(test_config pid=12545)[0m Validation score improved to 0.7717[32m [repeated 4x across cluster][0m
[36m(test_config pid=12545)[0m Epoch [34/100], Loss: 0.1061, Val score: 0.7525[32m [repeated 22x across cluster][0m
[36m(test_config pid=12843)[0m Validation score improved to 0.7819[32m [repeated 3x across cluster][0m
[36m(test_config pid=12843)[0m Epoch [73/100], Loss: 0.0804, Val score: 0.7774[32m [repeated 25x across cluster][0m
[36m(test_config pid=12843)[0m Validation score improved to 0.7969[32m [repeated 7x across cluster][0m
[36m(test_config pid=12465)[0m Epoch [64/100], Loss: 0.0763, Val score: 0.7417[32m [repeated 24x across cluster][0m
[36m(test_config pid=12843)[0m Validation score improved to 0.7983[32m [repeated 3x across cluster][0m
[36m(test_config pid=12843)[0m Epoch [96/100], Loss: 0.0611, Val score: 0.7580[32m [repea

[36m(test_config pid=12922)[0m [2025-11-13 19:55:19,086 E 12922 12949] core_worker_process.cc:825: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14


[36m(test_config pid=12465)[0m Epoch [85/100], Loss: 0.0956, Val score: 0.7814[32m [repeated 23x across cluster][0m
[36m(test_config pid=12843)[0m Validation score improved to 0.7708[32m [repeated 2x across cluster][0m
[36m(test_config pid=12465)[0m Epoch [92/100], Loss: 0.0945, Val score: 0.7731[32m [repeated 23x across cluster][0m
[36m(test_config pid=12465)[0m Early stopping at epoch 94
[36m(test_config pid=12843)[0m Validation score improved to 0.7899[32m [repeated 4x across cluster][0m
[36m(test_config pid=12843)[0m Epoch [41/100], Loss: 0.0636, Val score: 0.7851[32m [repeated 19x across cluster][0m
[36m(test_config pid=12843)[0m Epoch [51/100], Loss: 0.0920, Val score: 0.7774[32m [repeated 14x across cluster][0m
[36m(test_config pid=12545)[0m Early stopping at epoch 58
[36m(test_config pid=12843)[0m Validation score improved to 0.6312
[36m(test_config pid=12843)[0m Validation score improved to 0.6635
[36m(test_config pid=12843)[0m Epoch [8/100], 

[36m(test_config pid=13004)[0m [2025-11-13 19:56:00,916 E 13004 13031] core_worker_process.cc:825: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14


[36m(test_config pid=12843)[0m Epoch [54/100], Loss: 0.1032, Val score: 0.7530[32m [repeated 26x across cluster][0m
[36m(test_config pid=13004)[0m Validation score improved to 0.7216[32m [repeated 5x across cluster][0m
[36m(test_config pid=12843)[0m Epoch [65/100], Loss: 0.0961, Val score: 0.7880[32m [repeated 21x across cluster][0m
[36m(test_config pid=13004)[0m Validation score improved to 0.7421[32m [repeated 6x across cluster][0m
[36m(test_config pid=12545)[0m Epoch [21/100], Loss: 0.1479, Val score: 0.5940[32m [repeated 21x across cluster][0m
[36m(test_config pid=12843)[0m Validation score improved to 0.8142[32m [repeated 2x across cluster][0m
[36m(test_config pid=12545)[0m Epoch [24/100], Loss: 0.1253, Val score: 0.6842[32m [repeated 22x across cluster][0m
[36m(test_config pid=12843)[0m Early stopping at epoch 91
[36m(test_config pid=12545)[0m Validation score improved to 0.6979
[36m(test_config pid=13004)[0m Validation score improved to 0.7583


[36m(test_config pid=13114)[0m [2025-11-13 19:58:16,233 E 13114 13142] core_worker_process.cc:825: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14


[36m(test_config pid=13114)[0m Validation score improved to 0.7028
[36m(test_config pid=12922)[0m Epoch [79/100], Loss: 0.0026, Val score: 0.7475[32m [repeated 14x across cluster][0m
[36m(test_config pid=12545)[0m Validation score improved to 0.7256
[36m(test_config pid=13004)[0m Epoch [85/100], Loss: 0.0704, Val score: 0.7680[32m [repeated 12x across cluster][0m
[36m(test_config pid=12545)[0m Validation score improved to 0.7261
[36m(test_config pid=13004)[0m Validation score improved to 0.8084
[36m(test_config pid=13004)[0m Validation score improved to 0.8123
[36m(test_config pid=12545)[0m Epoch [19/100], Loss: 0.2338, Val score: 0.7153[32m [repeated 13x across cluster][0m
[36m(test_config pid=13004)[0m Validation score improved to 0.8132[32m [repeated 2x across cluster][0m
[36m(test_config pid=12545)[0m Epoch [22/100], Loss: 0.1303, Val score: 0.7114[32m [repeated 14x across cluster][0m
[36m(test_config pid=13004)[0m Validation score improved to 0.8144

[36m(test_config pid=13223)[0m [2025-11-13 20:00:37,857 E 13223 13250] core_worker_process.cc:825: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14


[36m(test_config pid=12922)[0m Validation score improved to 0.6924[32m [repeated 4x across cluster][0m
[36m(test_config pid=13114)[0m Epoch [86/100], Loss: 0.1943, Val score: 0.7211[32m [repeated 13x across cluster][0m
[36m(test_config pid=12922)[0m Validation score improved to 0.7047[32m [repeated 7x across cluster][0m
[36m(test_config pid=13114)[0m Epoch [89/100], Loss: 0.1277, Val score: 0.7330[32m [repeated 14x across cluster][0m
[36m(test_config pid=12922)[0m Validation score improved to 0.7162[32m [repeated 3x across cluster][0m
[36m(test_config pid=13223)[0m Epoch [21/100], Loss: 0.0137, Val score: 0.7738[32m [repeated 14x across cluster][0m
[36m(test_config pid=12922)[0m Validation score improved to 0.7163[32m [repeated 2x across cluster][0m
[36m(test_config pid=12922)[0m Epoch [10/100], Loss: 0.0924, Val score: 0.7101[32m [repeated 15x across cluster][0m
[36m(test_config pid=13004)[0m Validation score improved to 0.7690[32m [repeated 4x acros

[36m(test_config pid=13331)[0m [2025-11-13 20:03:04,054 E 13331 13358] core_worker_process.cc:825: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14


[36m(test_config pid=12922)[0m Validation score improved to 0.7692
[36m(test_config pid=12922)[0m Epoch [60/100], Loss: 0.1015, Val score: 0.7538[32m [repeated 11x across cluster][0m
[36m(test_config pid=13223)[0m Validation score improved to 0.8047
[36m(test_config pid=12922)[0m Epoch [62/100], Loss: 0.0276, Val score: 0.7505[32m [repeated 12x across cluster][0m
[36m(test_config pid=13223)[0m Validation score improved to 0.8084[32m [repeated 3x across cluster][0m
[36m(test_config pid=12922)[0m Epoch [64/100], Loss: 0.1221, Val score: 0.7487[32m [repeated 10x across cluster][0m
[36m(test_config pid=13114)[0m Validation score improved to 0.7645
[36m(test_config pid=13331)[0m Validation score improved to 0.7226
[36m(test_config pid=12922)[0m Epoch [66/100], Loss: 0.3020, Val score: 0.7617[32m [repeated 12x across cluster][0m
[36m(test_config pid=13331)[0m Validation score improved to 0.7357
[36m(test_config pid=13331)[0m Validation score improved to 0.7475

[36m(test_config pid=13499)[0m [2025-11-13 20:09:38,666 E 13499 13527] core_worker_process.cc:825: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14


[36m(test_config pid=13499)[0m Epoch [33/100], Loss: 0.0222, Val score: 0.7700[32m [repeated 18x across cluster][0m
[36m(test_config pid=13499)[0m Validation score improved to 0.7833[32m [repeated 2x across cluster][0m
[36m(test_config pid=13499)[0m Epoch [39/100], Loss: 0.0597, Val score: 0.7786[32m [repeated 12x across cluster][0m
[36m(test_config pid=13223)[0m Validation score improved to 0.6761[32m [repeated 3x across cluster][0m
[36m(test_config pid=13499)[0m Epoch [48/100], Loss: 0.1011, Val score: 0.7756[32m [repeated 18x across cluster][0m
[36m(test_config pid=13499)[0m Validation score improved to 0.8044[32m [repeated 2x across cluster][0m
[36m(test_config pid=13499)[0m Epoch [56/100], Loss: 0.0055, Val score: 0.7457[32m [repeated 16x across cluster][0m
[36m(test_config pid=13223)[0m Validation score improved to 0.7312[32m [repeated 3x across cluster][0m
[36m(test_config pid=12922)[0m Epoch [16/100], Loss: 0.0821, Val score: 0.7072[32m [repea

[36m(test_config pid=13625)[0m [2025-11-13 20:13:00,610 E 13625 13651] core_worker_process.cc:825: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14


[36m(test_config pid=13625)[0m Validation score improved to 0.7211
[36m(test_config pid=13625)[0m Epoch [20/100], Loss: 0.1049, Val score: 0.7211[32m [repeated 21x across cluster][0m
[36m(test_config pid=13499)[0m Validation score improved to 0.7110[32m [repeated 5x across cluster][0m
[36m(test_config pid=13331)[0m Epoch [40/100], Loss: 0.1092, Val score: 0.7473[32m [repeated 19x across cluster][0m
[36m(test_config pid=13625)[0m Validation score improved to 0.7421[32m [repeated 4x across cluster][0m
[36m(test_config pid=13499)[0m Epoch [16/100], Loss: 0.0569, Val score: 0.7318[32m [repeated 19x across cluster][0m
[36m(test_config pid=13625)[0m Validation score improved to 0.7589[32m [repeated 3x across cluster][0m
[36m(test_config pid=13499)[0m Epoch [24/100], Loss: 0.0520, Val score: 0.7434[32m [repeated 20x across cluster][0m
[36m(test_config pid=13499)[0m Validation score improved to 0.7653[32m [repeated 4x across cluster][0m
[36m(test_config pid=1

[36m(test_config pid=14036)[0m [2025-11-13 20:14:05,598 E 14036 14063] core_worker_process.cc:825: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14


[36m(test_config pid=14036)[0m Validation score improved to 0.7937[32m [repeated 11x across cluster][0m
[36m(test_config pid=14036)[0m Epoch [76/100], Loss: 0.1130, Val score: 0.7928[32m [repeated 43x across cluster][0m
[36m(test_config pid=13331)[0m Early stopping at epoch 86
[36m(test_config pid=14036)[0m Validation score improved to 0.8133[32m [repeated 9x across cluster][0m
[36m(test_config pid=14036)[0m Epoch [94/100], Loss: 0.0572, Val score: 0.8103[32m [repeated 41x across cluster][0m
[36m(test_config pid=13625)[0m Validation score improved to 0.8033[32m [repeated 11x across cluster][0m
[36m(test_config pid=14036)[0m Epoch [12/100], Loss: 0.1116, Val score: 0.7233[32m [repeated 39x across cluster][0m
[36m(test_config pid=13625)[0m Validation score improved to 0.7347[32m [repeated 12x across cluster][0m


[36m(test_config pid=14118)[0m [2025-11-13 20:14:22,602 E 14118 14145] core_worker_process.cc:825: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14


[36m(test_config pid=13625)[0m Epoch [6/100], Loss: 0.1519, Val score: 0.7029[32m [repeated 33x across cluster][0m
[36m(test_config pid=14036)[0m Validation score improved to 0.7766[32m [repeated 7x across cluster][0m
[36m(test_config pid=14036)[0m Epoch [49/100], Loss: 0.1641, Val score: 0.7954[32m [repeated 48x across cluster][0m
[36m(test_config pid=14118)[0m Validation score improved to 0.7972[32m [repeated 3x across cluster][0m
[36m(test_config pid=14036)[0m Epoch [50/100], Loss: 0.0920, Val score: 0.7632[32m [repeated 2x across cluster][0m
[36m(test_config pid=14118)[0m Epoch [81/100], Loss: 0.1082, Val score: 0.7804
[36m(test_config pid=14036)[0m Epoch [51/100], Loss: 0.0605, Val score: 0.7822
[36m(test_config pid=14036)[0m 
[36m(test_config pid=14197)[0m Validation score improved to 0.6879
[36m(test_config pid=14036)[0m Epoch [59/100], Loss: 0.1853, Val score: 0.7587[32m [repeated 18x across cluster][0m
[36m(test_config pid=13625)[0m Validation

[36m(test_config pid=14197)[0m [2025-11-13 20:16:00,589 E 14197 14224] core_worker_process.cc:825: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14


[36m(test_config pid=14036)[0m Epoch [77/100], Loss: 0.1208, Val score: 0.8150[32m [repeated 24x across cluster][0m
[36m(test_config pid=14118)[0m Validation score improved to 0.7553[32m [repeated 10x across cluster][0m
[36m(test_config pid=13625)[0m Epoch [28/100], Loss: 0.1292, Val score: 0.7103[32m [repeated 49x across cluster][0m
[36m(test_config pid=14036)[0m Validation score improved to 0.7090[32m [repeated 14x across cluster][0m
[36m(test_config pid=14036)[0m Epoch [20/100], Loss: 0.1714, Val score: 0.7393[32m [repeated 49x across cluster][0m
[36m(test_config pid=14036)[0m Validation score improved to 0.7641[32m [repeated 13x across cluster][0m
[36m(test_config pid=14118)[0m Epoch [55/100], Loss: 0.1461, Val score: 0.8004[32m [repeated 44x across cluster][0m
[36m(test_config pid=14036)[0m Validation score improved to 0.8006[32m [repeated 6x across cluster][0m
[36m(test_config pid=14036)[0m Epoch [55/100], Loss: 0.1040, Val score: 0.7894[32m [re

[36m(test_config pid=14317)[0m [2025-11-13 20:18:07,673 E 14317 14346] core_worker_process.cc:825: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14


[36m(test_config pid=13625)[0m Validation score improved to 0.7943
[36m(test_config pid=14317)[0m Validation score improved to 0.0000
[36m(test_config pid=13625)[0m Epoch [67/100], Loss: 0.1355, Val score: 0.8038[32m [repeated 9x across cluster][0m
[36m(test_config pid=14317)[0m Validation score improved to 0.7023[32m [repeated 3x across cluster][0m
[36m(test_config pid=13625)[0m Epoch [73/100], Loss: 0.0929, Val score: 0.8000[32m [repeated 10x across cluster][0m
[36m(test_config pid=13625)[0m Validation score improved to 0.8092
[36m(test_config pid=13625)[0m Epoch [80/100], Loss: 0.1084, Val score: 0.7860[32m [repeated 12x across cluster][0m
[36m(test_config pid=14398)[0m Validation score improved to 0.0000
[36m(test_config pid=14398)[0m Epoch [3/100], Loss: 0.2150, Val score: 0.0000[32m [repeated 13x across cluster][0m
[36m(test_config pid=14398)[0m Validation score improved to 0.4014
[36m(test_config pid=13625)[0m Epoch [94/100], Loss: 0.1402, Val sco

[36m(test_config pid=14398)[0m [2025-11-13 20:18:37,821 E 14398 14428] core_worker_process.cc:825: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14


[36m(test_config pid=13625)[0m Epoch [9/100], Loss: 0.0926, Val score: 0.6774[32m [repeated 18x across cluster][0m
[36m(test_config pid=14317)[0m Validation score improved to 0.7361[32m [repeated 4x across cluster][0m
[36m(test_config pid=14317)[0m Epoch [18/100], Loss: 0.3732, Val score: 0.0000[32m [repeated 17x across cluster][0m
[36m(test_config pid=13625)[0m Validation score improved to 0.6827[32m [repeated 3x across cluster][0m
[36m(test_config pid=14197)[0m Epoch [83/100], Loss: 0.0915, Val score: 0.7701[32m [repeated 16x across cluster][0m
[36m(test_config pid=14398)[0m Validation score improved to 0.7453[32m [repeated 5x across cluster][0m
[36m(test_config pid=13625)[0m Epoch [31/100], Loss: 0.0789, Val score: 0.6938[32m [repeated 17x across cluster][0m
[36m(test_config pid=13625)[0m Validation score improved to 0.7104[32m [repeated 4x across cluster][0m
[36m(test_config pid=13625)[0m Epoch [39/100], Loss: 0.1065, Val score: 0.7276[32m [repeat

[36m(test_config pid=14532)[0m [2025-11-13 20:22:15,082 E 14532 14560] core_worker_process.cc:825: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14


[36m(test_config pid=14532)[0m Validation score improved to 0.7022[32m [repeated 13x across cluster][0m
[36m(test_config pid=14532)[0m Epoch [42/100], Loss: 0.1369, Val score: 0.6917[32m [repeated 34x across cluster][0m
[36m(test_config pid=14532)[0m Early stopping at epoch 51
[36m(test_config pid=14532)[0m Validation score improved to 0.3607[32m [repeated 5x across cluster][0m
[36m(test_config pid=14532)[0m Epoch [18/100], Loss: 0.2031, Val score: 0.3607[32m [repeated 35x across cluster][0m
[36m(test_config pid=14532)[0m Validation score improved to 0.6897[32m [repeated 16x across cluster][0m
[36m(test_config pid=14398)[0m Epoch [83/100], Loss: 0.1522, Val score: 0.7747[32m [repeated 32x across cluster][0m
[36m(test_config pid=14532)[0m Validation score improved to 0.7319[32m [repeated 8x across cluster][0m
[36m(test_config pid=14532)[0m Epoch [70/100], Loss: 0.0903, Val score: 0.7120[32m [repeated 35x across cluster][0m
[36m(test_config pid=14532)[

[36m(test_config pid=14633)[0m [2025-11-13 20:24:00,295 E 14633 14660] core_worker_process.cc:825: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14


[36m(test_config pid=14197)[0m Validation score improved to 0.7589
[36m(test_config pid=14633)[0m Epoch [9/100], Loss: 0.2473, Val score: 0.6905[32m [repeated 8x across cluster][0m
[36m(test_config pid=14633)[0m Epoch [11/100], Loss: 0.2527, Val score: 0.6813[32m [repeated 9x across cluster][0m
[36m(test_config pid=14197)[0m Validation score improved to 0.7687
[36m(test_config pid=14398)[0m Epoch [57/100], Loss: 0.1134, Val score: 0.6472[32m [repeated 10x across cluster][0m
[36m(test_config pid=14317)[0m Early stopping at epoch 31
[36m(test_config pid=14398)[0m Validation score improved to 0.7921
[36m(test_config pid=14317)[0m Validation score improved to 0.0000
[36m(test_config pid=14317)[0m Epoch [1/100], Loss: 0.1344, Val score: 0.0000[32m [repeated 8x across cluster][0m
[36m(test_config pid=14317)[0m Validation score improved to 0.7124[32m [repeated 3x across cluster][0m
[36m(test_config pid=14317)[0m Epoch [3/100], Loss: 0.1954, Val score: 0.7124[3

[36m(test_config pid=14746)[0m [2025-11-13 20:26:27,736 E 14746 14772] core_worker_process.cc:825: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14


[36m(test_config pid=14746)[0m Epoch [31/100], Loss: 0.1408, Val score: 0.7526[32m [repeated 24x across cluster][0m
[36m(test_config pid=14746)[0m Validation score improved to 0.7526[32m [repeated 3x across cluster][0m
[36m(test_config pid=14746)[0m Epoch [46/100], Loss: 0.1668, Val score: 0.7421[32m [repeated 22x across cluster][0m
[36m(test_config pid=14746)[0m Validation score improved to 0.7784[32m [repeated 2x across cluster][0m
[36m(test_config pid=14398)[0m Epoch [49/100], Loss: 0.1170, Val score: 0.7359[32m [repeated 22x across cluster][0m
[36m(test_config pid=14746)[0m Early stopping at epoch 62
[36m(test_config pid=14746)[0m Validation score improved to 0.0000
[36m(test_config pid=14746)[0m Validation score improved to 0.7012
[36m(test_config pid=14746)[0m Validation score improved to 0.7453
[36m(test_config pid=14746)[0m Validation score improved to 0.7527
[36m(test_config pid=14746)[0m Epoch [11/100], Loss: 0.1914, Val score: 0.7469[32m [rep

[36m(test_config pid=14855)[0m [2025-11-13 20:28:55,582 E 14855 14883] core_worker_process.cc:825: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14


[36m(test_config pid=14633)[0m Validation score improved to 0.7694
[36m(test_config pid=14855)[0m Epoch [10/100], Loss: 0.0878, Val score: 0.7160[32m [repeated 11x across cluster][0m
[36m(test_config pid=14855)[0m Validation score improved to 0.7160[32m [repeated 3x across cluster][0m
[36m(test_config pid=14633)[0m Epoch [40/100], Loss: 0.2984, Val score: 0.7398[32m [repeated 11x across cluster][0m
[36m(test_config pid=14398)[0m Validation score improved to 0.7631[32m [repeated 2x across cluster][0m
[36m(test_config pid=14197)[0m Epoch [43/100], Loss: 0.0047, Val score: 0.7271[32m [repeated 10x across cluster][0m
[36m(test_config pid=14197)[0m Validation score improved to 0.7271[32m [repeated 2x across cluster][0m
[36m(test_config pid=14633)[0m Epoch [45/100], Loss: 0.2893, Val score: 0.7311[32m [repeated 11x across cluster][0m
[36m(test_config pid=14855)[0m Validation score improved to 0.7390[32m [repeated 2x across cluster][0m
[36m(test_config pid=1

[36m(test_config pid=14959)[0m [2025-11-13 20:30:56,001 E 14959 14985] core_worker_process.cc:825: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14


[36m(test_config pid=14959)[0m Validation score improved to 0.7383[32m [repeated 2x across cluster][0m
[36m(test_config pid=14959)[0m Epoch [14/100], Loss: 0.2746, Val score: 0.7404[32m [repeated 9x across cluster][0m
[36m(test_config pid=14959)[0m Validation score improved to 0.7516[32m [repeated 3x across cluster][0m
[36m(test_config pid=14959)[0m Epoch [19/100], Loss: 0.0437, Val score: 0.7029[32m [repeated 11x across cluster][0m
[36m(test_config pid=14633)[0m Epoch [93/100], Loss: 0.0741, Val score: 0.7822[32m [repeated 11x across cluster][0m
[36m(test_config pid=14959)[0m Validation score improved to 0.7617
[36m(test_config pid=14959)[0m Validation score improved to 0.7645
[36m(test_config pid=14959)[0m Validation score improved to 0.7709
[36m(test_config pid=14959)[0m Epoch [29/100], Loss: 0.1250, Val score: 0.7666[32m [repeated 12x across cluster][0m
[36m(test_config pid=14959)[0m Validation score improved to 0.7803
[36m(test_config pid=14633)[0

[36m(test_config pid=15083)[0m [2025-11-13 20:35:30,689 E 15083 15109] core_worker_process.cc:825: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14


[36m(test_config pid=14959)[0m Epoch [48/100], Loss: 0.0484, Val score: 0.7406[32m [repeated 15x across cluster][0m
[36m(test_config pid=14633)[0m Validation score improved to 0.6556[32m [repeated 7x across cluster][0m
[36m(test_config pid=14633)[0m Epoch [7/100], Loss: 0.1239, Val score: 0.6693[32m [repeated 16x across cluster][0m
[36m(test_config pid=14633)[0m Validation score improved to 0.6693[32m [repeated 4x across cluster][0m
[36m(test_config pid=14959)[0m Epoch [61/100], Loss: 0.2746, Val score: 0.7386[32m [repeated 14x across cluster][0m
[36m(test_config pid=15083)[0m Validation score improved to 0.7290[32m [repeated 2x across cluster][0m
[36m(test_config pid=14959)[0m Epoch [68/100], Loss: 0.0286, Val score: 0.7728[32m [repeated 17x across cluster][0m
[36m(test_config pid=14959)[0m Validation score improved to 0.7784[32m [repeated 2x across cluster][0m
[36m(test_config pid=14959)[0m Epoch [75/100], Loss: 0.0035, Val score: 0.7773[32m [repeat

[36m(test_config pid=15186)[0m [2025-11-13 20:38:03,830 E 15186 15212] core_worker_process.cc:825: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14


[36m(test_config pid=15186)[0m Epoch [5/100], Loss: 0.1771, Val score: 0.1013[32m [repeated 34x across cluster][0m
[36m(test_config pid=15186)[0m Validation score improved to 0.7222[32m [repeated 13x across cluster][0m
[36m(test_config pid=14633)[0m Epoch [93/100], Loss: 0.0566, Val score: 0.7184[32m [repeated 33x across cluster][0m
[36m(test_config pid=14633)[0m Validation score improved to 0.7254[32m [repeated 3x across cluster][0m
[36m(test_config pid=15186)[0m Early stopping at epoch 44
[36m(test_config pid=15186)[0m Epoch [9/100], Loss: 0.1651, Val score: 0.6839[32m [repeated 34x across cluster][0m
[36m(test_config pid=15186)[0m Validation score improved to 0.7295[32m [repeated 12x across cluster][0m
[36m(test_config pid=15186)[0m Epoch [33/100], Loss: 0.1965, Val score: 0.7359[32m [repeated 34x across cluster][0m
[36m(test_config pid=15186)[0m Validation score improved to 0.7359[32m [repeated 4x across cluster][0m
[36m(test_config pid=15186)[0m

[36m(test_config pid=15285)[0m [2025-11-13 20:39:37,863 E 15285 15311] core_worker_process.cc:825: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14


[36m(test_config pid=15285)[0m Validation score improved to 0.7555[32m [repeated 3x across cluster][0m
[36m(test_config pid=15285)[0m Epoch [40/100], Loss: 0.1603, Val score: 0.7639[32m [repeated 16x across cluster][0m
[36m(test_config pid=15083)[0m Early stopping at epoch 100
[36m(test_config pid=15285)[0m Validation score improved to 0.7717[32m [repeated 2x across cluster][0m
[36m(test_config pid=15285)[0m Epoch [47/100], Loss: 0.0834, Val score: 0.7286[32m [repeated 15x across cluster][0m
[36m(test_config pid=15285)[0m Validation score improved to 0.7770[32m [repeated 6x across cluster][0m
[36m(test_config pid=14855)[0m Epoch [85/100], Loss: 0.1099, Val score: 0.7148[32m [repeated 17x across cluster][0m
[36m(test_config pid=14633)[0m Validation score improved to 0.7476[32m [repeated 4x across cluster][0m
[36m(test_config pid=15285)[0m Epoch [62/100], Loss: 0.0497, Val score: 0.7508[32m [repeated 17x across cluster][0m
[36m(test_config pid=14855)[0

[36m(test_config pid=15394)[0m [2025-11-13 20:41:46,988 E 15394 15421] core_worker_process.cc:825: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14


[36m(test_config pid=15285)[0m Validation score improved to 0.7541[32m [repeated 5x across cluster][0m
[36m(test_config pid=14855)[0m Epoch [43/100], Loss: 0.0857, Val score: 0.6827[32m [repeated 20x across cluster][0m
[36m(test_config pid=15285)[0m Validation score improved to 0.7700
[36m(test_config pid=15285)[0m Validation score improved to 0.7721
[36m(test_config pid=15285)[0m Validation score improved to 0.7869
[36m(test_config pid=15394)[0m Epoch [36/100], Loss: 0.1477, Val score: 0.7231[32m [repeated 21x across cluster][0m
[36m(test_config pid=15083)[0m Validation score improved to 0.0000
[36m(test_config pid=15083)[0m Validation score improved to 0.6423
[36m(test_config pid=15083)[0m Epoch [4/100], Loss: 0.1637, Val score: 0.6620[32m [repeated 22x across cluster][0m
[36m(test_config pid=14855)[0m Early stopping at epoch 49
[36m(test_config pid=15083)[0m Validation score improved to 0.6693[32m [repeated 5x across cluster][0m
[36m(test_config pid=

[36m(test_config pid=15480)[0m [2025-11-13 20:42:34,955 E 15480 15506] core_worker_process.cc:825: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14


[36m(test_config pid=15083)[0m Epoch [35/100], Loss: 0.1657, Val score: 0.6567[32m [repeated 29x across cluster][0m
[36m(test_config pid=15285)[0m Validation score improved to 0.6986[32m [repeated 5x across cluster][0m
[36m(test_config pid=15480)[0m Epoch [53/100], Loss: 0.1727, Val score: 0.7831[32m [repeated 29x across cluster][0m
[36m(test_config pid=15285)[0m Validation score improved to 0.7106[32m [repeated 7x across cluster][0m
[36m(test_config pid=15285)[0m Epoch [30/100], Loss: 0.1278, Val score: 0.7193[32m [repeated 28x across cluster][0m
[36m(test_config pid=15394)[0m Validation score improved to 0.7265[32m [repeated 4x across cluster][0m
[36m(test_config pid=15394)[0m Epoch [9/100], Loss: 0.2032, Val score: 0.7460[32m [repeated 29x across cluster][0m
[36m(test_config pid=15480)[0m Validation score improved to 0.7974[32m [repeated 6x across cluster][0m
[36m(test_config pid=15285)[0m Epoch [47/100], Loss: 0.0810, Val score: 0.7294[32m [repeat

[36m(test_config pid=15591)[0m [2025-11-13 20:45:03,807 E 15591 15618] core_worker_process.cc:825: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14


[36m(test_config pid=15083)[0m Epoch [63/100], Loss: 0.0435, Val score: 0.7623[32m [repeated 46x across cluster][0m
[36m(test_config pid=15591)[0m Validation score improved to 0.8112[32m [repeated 4x across cluster][0m
[36m(test_config pid=15394)[0m Epoch [23/100], Loss: 0.1133, Val score: 0.6917[32m [repeated 48x across cluster][0m
[36m(test_config pid=15591)[0m Validation score improved to 0.8259[32m [repeated 5x across cluster][0m
[36m(test_config pid=15591)[0m Epoch [9/100], Loss: 0.1181, Val score: 0.7650[32m [repeated 49x across cluster][0m
[36m(test_config pid=15083)[0m Early stopping at epoch 74
[36m(test_config pid=15394)[0m Validation score improved to 0.7030[32m [repeated 12x across cluster][0m
[36m(test_config pid=15591)[0m Epoch [36/100], Loss: 0.0821, Val score: 0.7879[32m [repeated 46x across cluster][0m
[36m(test_config pid=15591)[0m Validation score improved to 0.8047[32m [repeated 6x across cluster][0m
[36m(test_config pid=15591)[0m

[36m(test_config pid=15681)[0m [2025-11-13 20:45:51,661 E 15681 15707] core_worker_process.cc:825: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14


[36m(test_config pid=15394)[0m Epoch [81/100], Loss: 0.0825, Val score: 0.6968[32m [repeated 58x across cluster][0m
[36m(test_config pid=15480)[0m Early stopping at epoch 55
[36m(test_config pid=15591)[0m Validation score improved to 0.7271[32m [repeated 14x across cluster][0m
[36m(test_config pid=15591)[0m Epoch [26/100], Loss: 0.1986, Val score: 0.7330[32m [repeated 48x across cluster][0m
[36m(test_config pid=15394)[0m Validation score improved to 0.7690[32m [repeated 12x across cluster][0m
[36m(test_config pid=15591)[0m Epoch [51/100], Loss: 0.1059, Val score: 0.7604[32m [repeated 43x across cluster][0m
[36m(test_config pid=15763)[0m Validation score improved to 0.0000[32m [repeated 6x across cluster][0m
[36m(test_config pid=15591)[0m Epoch [77/100], Loss: 0.0422, Val score: 0.7798[32m [repeated 49x across cluster][0m
[36m(test_config pid=15681)[0m Validation score improved to 0.8341[32m [repeated 12x across cluster][0m
[36m(test_config pid=15591)

[36m(test_config pid=15763)[0m [2025-11-13 20:46:26,762 E 15763 15789] core_worker_process.cc:825: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14


[36m(test_config pid=15841)[0m Epoch [12/100], Loss: 0.0037, Val score: 0.6872[32m [repeated 33x across cluster][0m
[36m(test_config pid=15681)[0m Validation score improved to 0.7974[32m [repeated 2x across cluster][0m
[36m(test_config pid=15681)[0m Epoch [44/100], Loss: 0.0834, Val score: 0.7944[32m [repeated 32x across cluster][0m
[36m(test_config pid=15394)[0m Validation score improved to 0.7382[32m [repeated 4x across cluster][0m
[36m(test_config pid=15763)[0m Early stopping at epoch 52
[36m(test_config pid=15681)[0m Epoch [54/100], Loss: 0.0382, Val score: 0.7769[32m [repeated 30x across cluster][0m
[36m(test_config pid=15763)[0m Validation score improved to 0.6314[32m [repeated 6x across cluster][0m


[36m(test_config pid=15841)[0m [2025-11-13 20:46:43,861 E 15841 15868] core_worker_process.cc:825: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14


[36m(test_config pid=15394)[0m Epoch [50/100], Loss: 0.1320, Val score: 0.7348[32m [repeated 31x across cluster][0m
[36m(test_config pid=15394)[0m Validation score improved to 0.7544[32m [repeated 5x across cluster][0m
[36m(test_config pid=15681)[0m Epoch [75/100], Loss: 0.1436, Val score: 0.8006[32m [repeated 32x across cluster][0m
[36m(test_config pid=15681)[0m Validation score improved to 0.8272[32m [repeated 5x across cluster][0m
[36m(test_config pid=15841)[0m Epoch [48/100], Loss: 0.0412, Val score: 0.7268[32m [repeated 32x across cluster][0m
[36m(test_config pid=15394)[0m Validation score improved to 0.7634[32m [repeated 4x across cluster][0m
[36m(test_config pid=15681)[0m Epoch [97/100], Loss: 0.0951, Val score: 0.8155[32m [repeated 34x across cluster][0m
[36m(test_config pid=15394)[0m Validation score improved to 0.7721[32m [repeated 4x across cluster][0m
[36m(test_config pid=15394)[0m Epoch [77/100], Loss: 0.0794, Val score: 0.7786[32m [repea

[36m(test_config pid=15955)[0m [2025-11-13 20:47:56,852 E 15955 15982] core_worker_process.cc:825: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14


[36m(test_config pid=15955)[0m Validation score improved to 0.7954[32m [repeated 11x across cluster][0m
[36m(test_config pid=15841)[0m Early stopping at epoch 41
[36m(test_config pid=15681)[0m Epoch [42/100], Loss: 0.1157, Val score: 0.6977[32m [repeated 46x across cluster][0m
[36m(test_config pid=15681)[0m Validation score improved to 0.7795[32m [repeated 6x across cluster][0m
[36m(test_config pid=15955)[0m Early stopping at epoch 62
[36m(test_config pid=15681)[0m Epoch [52/100], Loss: 0.0509, Val score: 0.7476[32m [repeated 44x across cluster][0m
[36m(test_config pid=15955)[0m Validation score improved to 0.7097[32m [repeated 6x across cluster][0m
[36m(test_config pid=15681)[0m Epoch [63/100], Loss: 0.0344, Val score: 0.7840[32m [repeated 48x across cluster][0m
[36m(test_config pid=15841)[0m Validation score improved to 0.7259[32m [repeated 12x across cluster][0m
[36m(test_config pid=15841)[0m Epoch [23/100], Loss: 0.1758, Val score: 0.7107[32m [rep

[36m(test_config pid=16057)[0m [2025-11-13 20:49:33,188 E 16057 16085] core_worker_process.cc:825: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14


[36m(test_config pid=15841)[0m Epoch [34/100], Loss: 0.0113, Val score: 0.6791[32m [repeated 28x across cluster][0m
[36m(test_config pid=16057)[0m Validation score improved to 0.7173
[36m(test_config pid=15841)[0m Validation score improved to 0.6946
[36m(test_config pid=15841)[0m Epoch [41/100], Loss: 0.1180, Val score: 0.6841[32m [repeated 27x across cluster][0m
[36m(test_config pid=15841)[0m Validation score improved to 0.6979[32m [repeated 5x across cluster][0m
[36m(test_config pid=15763)[0m Epoch [71/100], Loss: 0.1761, Val score: 0.7065[32m [repeated 27x across cluster][0m
[36m(test_config pid=16061)[0m Validation score improved to 0.7383[32m [repeated 4x across cluster][0m
[36m(test_config pid=15841)[0m Epoch [54/100], Loss: 0.1193, Val score: 0.7026[32m [repeated 23x across cluster][0m
[36m(test_config pid=15763)[0m Validation score improved to 0.7128[32m [repeated 4x across cluster][0m
[36m(test_config pid=15841)[0m Epoch [61/100], Loss: 0.0940

2025-11-13 20:58:30,528	INFO tune.py:1009 -- Wrote the latest version of all result files and experiment state to '/home/markus/ray_results/test_config_2025-11-13_19-22-45' in 0.0169s.
2025-11-13 20:58:30,540	INFO tune.py:1041 -- Total run time: 5744.93 seconds (5744.86 seconds for the tuning loop).


Best trial config: {'base_dir': '/mnt/c/Users/chari/OneDrive - Alma Mater Studiorum Università di Bologna/Scuola/Università/MAGISTRALE/GitHub/LB2_project_Group_3/Feature_Selection', 'num_layers': 3, 'hidden_sizes': [256, 128, 64], 'dropout': 0.16480384453338065, 'lr': 0.0009281499336016161, 'batch_size': 64}
Best CV MCC: 0.7949216758601616


## Benchmark set and final evaluation

In [8]:
print(best_trial.config)

{'base_dir': '/mnt/c/Users/chari/OneDrive - Alma Mater Studiorum Università di Bologna/Scuola/Università/MAGISTRALE/GitHub/LB2_project_Group_3/Feature_Selection', 'num_layers': 3, 'hidden_sizes': [256, 128, 64], 'dropout': 0.16480384453338065, 'lr': 0.0009281499336016161, 'batch_size': 64}


In [15]:
from importnb import Notebook
import pandas as pd
import sys, io
# add the folder vonheijine that contains our vonheijine functions
sys.path.append('../Feature_Selection/')
with Notebook():
    import custom_features


In [16]:
def update_vonheijne(sets, matrix): 
    seq_features=[]
    for seq in sets:
        seq=seq.replace("X" , "")
        seq=seq.replace("U" , "C")
        vonhejine=custom_features.vonheijne_feature(matrix, seq) #get the von heijne feature for that sequence
        seq_features.append(vonhejine)
    hejine_col = np.array(seq_features) #transform the list that contains all the features in an array
    return hejine_col

In [17]:
#Load the npz files of training, testing and validation sets for each iteration
dataset = pd.read_csv("../Data_Preparation/train_bench.tsv", sep = "\t")
# load training
# 5th iteration was: validation set 1 , training set 2,3,4 , testing set 5
loaded_data_train = np.load('../Feature_Selection/training_features_5.npz')
x_train = loaded_data_train['matrix']
y_train = loaded_data_train['target']

# load test
loaded_data_test = np.load('../Feature_Selection/testing_features_5.npz')
x_test = loaded_data_test['matrix']
y_test = loaded_data_test['target']

# load validation
loaded_data_validation = np.load('../Feature_Selection/validation_features_5.npz')
x_validation = loaded_data_validation['matrix']
y_validation = loaded_data_validation['target']

#concatenation of the training, and test portion in one unique set, and update the von heijine feature for the new training set, and the validation set and encode the benchmark set.
#concatenate the matrices in the correct order
x_training_conc = np.concatenate((x_train , x_test), axis=0) #order is maintained: 1,2,3,4,5
y_training_conc = np.concatenate((y_train, y_test), axis=0)
#replace the old VonHejine feature with the new VonHejine basing on the updated PSWM
training= dataset.query(" Set=='2' or Set=='3' or Set=='4' or Set=='5'")
validation=dataset.query("Set=='1'")
matrix_training=custom_features.get_pswm(training , 13 , 2)
x_training_conc[:, 17] = update_vonheijne(training["Sequence"], matrix_training)
x_validation[:, 17]=update_vonheijne(validation["Sequence"],matrix_training)
#Load the benchmark set and encode it
benchmark=dataset.query("Set=='Benchmark'")
feature_set_benchmark , feature_order_training = custom_features.get_all_features(benchmark["Sequence"] , matrix_training, 15 )
vector_neg_pos = benchmark["Class"]
vector_proper = vector_neg_pos.map({"Positive": 1, "Negative": 0})
target_benchmark_vector = vector_proper.to_numpy()


In [18]:
#Save the sets for eventually future purposes and analysis
np.savez('benchmark_features.npz', matrix=feature_set_benchmark, target=target_benchmark_vector)
np.savez('training_features.npz', matrix=x_training_conc, target=y_training_conc)
np.savez('validation_features.npz', matrix=x_validation, target=y_validation)

In [20]:
# Split the dataset into training, validation and test sets
train_dataset = HelixDataset(x_training_conc, y_training_conc)
val_dataset = HelixDataset(x_validation, y_validation)
test_dataset = HelixDataset(feature_set_benchmark, target_benchmark_vector)

        # Create data loaders divided in batches
batch_size = best_trial.config["batch_size"]
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

# Initialize the model
input_size = x_training_conc.shape[1]

model = SP_MLP(
    input_size,
    best_trial.config["hidden_sizes"],
    output_size=1,
    dropout_p=best_trial.config["dropout"]).to(device)

optimizer = optim.Adam(model.parameters(), lr=best_trial.config["lr"]) #use adam optimized with learning rate chosen by random search
criterion = nn.BCELoss()  #loss function for binary classification

best_state = train_val(model, train_loader, val_loader,
                       optimizer, criterion,
                       epochs=100, patience=20)
model.load_state_dict(best_state) #best state(with optimized weight) with chosen parameter 

# calcoli MCC sul test di quel fold 
mcc = test(model, test_loader)

Validation score improved to 0.3940
Epoch [1/100], Loss: 0.0948, Val score: 0.3940
Validation score improved to 0.7054
Epoch [2/100], Loss: 0.1864, Val score: 0.7054
Epoch [3/100], Loss: 0.3578, Val score: 0.5944
Epoch [4/100], Loss: 0.2893, Val score: 0.6379
Validation score improved to 0.7189
Epoch [5/100], Loss: 0.4151, Val score: 0.7189
Validation score improved to 0.7309
Epoch [6/100], Loss: 0.2808, Val score: 0.7309
Epoch [7/100], Loss: 0.0223, Val score: 0.5919
Validation score improved to 0.7333
Epoch [8/100], Loss: 0.2762, Val score: 0.7333
Validation score improved to 0.7401
Epoch [9/100], Loss: 0.0598, Val score: 0.7401
Validation score improved to 0.7523
Epoch [10/100], Loss: 0.0446, Val score: 0.7523
Validation score improved to 0.7545
Epoch [11/100], Loss: 0.1370, Val score: 0.7545
Epoch [12/100], Loss: 0.0966, Val score: 0.7422
Epoch [13/100], Loss: 0.0397, Val score: 0.7436
Epoch [14/100], Loss: 0.0599, Val score: 0.7200
Epoch [15/100], Loss: 0.1932, Val score: 0.7352
V

In [22]:
print("MCC on benchmark set:", mcc)

MCC on benchmark set: 0.7464940969320493
