In [None]:
# install packages for colab
!pip install optuna
!pip install sentence_transformers

Collecting optuna
  Downloading optuna-3.6.1-py3-none-any.whl (380 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m380.1/380.1 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.13.2-py3-none-any.whl (232 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.0/233.0 kB[0m [31m13.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting colorlog (from optuna)
  Downloading colorlog-6.8.2-py3-none-any.whl (11 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.5-py3-none-any.whl (78 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.6/78.6 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: Mako, colorlog, alembic, optuna
Successfully installed Mako-1.3.5 alembic-1.13.2 colorlog-6.8.2 optuna-3.6.1
Collecting sentence_transformers
  Downloading sentence_transformers-3.0.1-py3-none-any.whl (227 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━

In [None]:
# auto reload modules
%load_ext autoreload
%autoreload 0

In [None]:
# colab specific, remove if locally
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# edit paths
path_to_data = '/content/drive/MyDrive/civol/iceccme/'
filepath_data = path_to_data + 'sample_labeled_by_qwen2_10_runs_FINAL.csv'
modelpath = path_to_data + 'models_qwen/'

In [None]:
# helper function for bringing data into required structure
def get_dataset_from_df(col_text_a, col_text_b, label_col, df):
    dataset = []
    for index, row in df.iterrows():
        text_a = row[col_text_a]
        text_b = row[col_text_b]
        label = row[label_col]
        dataset.append((text_a, text_b, label))
        dataset.append((text_b, text_a, label))
    return dataset


In [None]:
# import modules
import logging
from tqdm.autonotebook import tqdm, trange
from sentence_transformers import CrossEncoder, InputExample
from torch.utils.data import DataLoader
from sentence_transformers.cross_encoder.evaluation import CEBinaryClassificationEvaluator
from sklearn.model_selection import train_test_split
import optuna
import pandas as pd
import torch

tqdm

  from tqdm.autonotebook import tqdm, trange


In [None]:
# load data
df_base = pd.read_csv(filepath_data, sep=';')
df_base['title_description'] = df_base['title'] + ': ' + df_base['description']

In [None]:
# set logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', force=True)

In [None]:
# get data in structure
dataset = get_dataset_from_df('goal', 'title_description', 'final_label', df_base)  #TODO

In [None]:
# callback for logging loss
current_loss = None


def simple_callback(loss, epoch, steps):
    global current_loss
    current_loss = loss
    logging.log(logging.INFO, f"Loss: {loss}, epoch: {epoch}, steps: {steps}")

In [None]:
# check for gpu
device = "mps" if torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
def objective(trial):
    try:
        # Log the start of a trial with the trial number
        logging.log(logging.INFO, f"Trial {trial.number}")
        # Log that model creation is starting
        logging.log(logging.INFO, f"Model created")
        # Initialize a CrossEncoder model with predefined settings
        model = CrossEncoder('cross-encoder/stsb-roberta-base', num_labels=1, device=device)
        # Suggest a learning rate using a logarithmic scale within a specified range
        learning_rate = trial.suggest_float("learning_rate", 5e-5, 1e-1, log=True)
        # Suggest the batch size within a range from 2 to 16
        batch_size = trial.suggest_int("batch_size", 2, 16)
        # Suggest the number of epochs for training within a range from 1 to 20
        num_epochs = trial.suggest_int("num_epochs", 1, 20)
        # Suggest the number of warmup steps within a range from 0 to 1000
        warmup_steps = trial.suggest_int("warmup_steps", 0, 1000)
        # Suggest weight decay using a logarithmic scale within a specified range
        weight_decay = trial.suggest_float("weight_decay", 1e-5, 1e-1, log=True)
        # Suggest dropout rate within a specified range to prevent overfitting
        dropout_rate = trial.suggest_float("dropout_rate", 0.1, 0.5)

        # Set the dropout rate for the model
        model.dropout_rate = dropout_rate
        # Log all set parameters for debugging purposes
        logging.log(logging.INFO,
                    f"Learning rate: {learning_rate}, batch size: {batch_size}, num epochs: {num_epochs}, warmup steps: {warmup_steps}, weight decay: {weight_decay}, dropout rate: {dropout_rate}")
        # Split the dataset into training and validation sets with a 10% validation split
        train_data, val_data = train_test_split(dataset, test_size=0.1, random_state=347657)

        # Prepare DataLoader for training data
        train_dataloader = DataLoader([InputExample(texts=[t[0], t[1]], label=t[2]) for t in train_data], shuffle=True,
                                      batch_size=batch_size)
        # Prepare DataLoader for validation data
        valid_dataloader = DataLoader([InputExample(texts=[t[0], t[1]], label=t[2]) for t in val_data],
                                      batch_size=batch_size)

        # Prepare validation data for the evaluator
        val_sentence_pairs = [[t[0], t[1]] for t in val_data]
        labels = [t[2] for t in val_data]

        # Initialize the evaluator with the validation data and configuration
        evaluator = CEBinaryClassificationEvaluator(sentence_pairs=val_sentence_pairs, labels=labels, name='goal_eval',
                                                    write_csv=True, show_progress_bar=True)

        # Fit the model with training data, training configurations, and evaluation settings
        model.fit(train_dataloader=train_dataloader,
                  epochs=num_epochs,
                  warmup_steps=warmup_steps,
                  output_path=modelpath + 'model_trial_2_' + str(trial.number),
                  optimizer_params={'lr': learning_rate, 'weight_decay': weight_decay},
                  show_progress_bar=True,
                  evaluator=evaluator,
                  evaluation_steps=1000,
                  save_best_model=True,
                  callback=simple_callback
                  )
        # Log that the model fitting is complete
        logging.log(logging.INFO, f"Model fitted")
        # Evaluate the model using the evaluator and log the accuracy
        accuracy = evaluator(model, output_path=modelpath + 'model_trial_2_' + str(trial.number))
        logging.log(logging.INFO, f"Accuracy: {accuracy}")
        # Report the loss to Optuna and handle pruning if necessary
        global current_loss
        if current_loss:
            trial.report(current_loss, trial.number)
            logging.log(logging.INFO, f"Trial pruned")
            if trial.should_prune():
                raise optuna.TrialPruned()
    except Exception as e:
        # Log any exceptions that occur during the trial
        logging.log(logging.ERROR, f"Error: {e}")
        raise e
    finally:
        # Ensure the model is deleted and GPU memory is cleared after each trial
        if model:
            del model
            if device == "cuda":
                torch.cuda.empty_cache()
            elif device == "mps":
                torch.mps.empty_cache()

    # Return the accuracy if available
    if accuracy:
        return accuracy


TODO: Edit sql Lite Path to drive


In [None]:
storage_path = 'sqlite:///' + modelpath + 'db_max_qwen.sqlite3'

In [None]:
study = optuna.create_study(storage=storage_path, study_name="qwen_data_hpo_max_dc_d", load_if_exists=True, direction="maximize")
study.optimize(objective, n_trials=12)

[I 2024-06-27 07:34:48,326] A new study created in RDB with name: qwen_data_hpo_max_dc_d
2024-06-27 07:34:48,376 - INFO - Trial 0
2024-06-27 07:34:48,378 - INFO - Model created
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/608 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/499M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/142 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/772 [00:00<?, ?B/s]

2024-06-27 07:34:55,610 - INFO - Learning rate: 0.00037381017805452395, batch size: 10, num epochs: 15, warmup steps: 814, weight decay: 0.0022725472851924233, dropout rate: 0.1937256725211224


Epoch:   0%|          | 0/15 [00:00<?, ?it/s]

Iteration:   0%|          | 0/109 [00:00<?, ?it/s]

2024-06-27 07:36:35,074 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 0:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 07:36:39,741 - INFO - Accuracy:           77.69	(Threshold: 0.4021)
2024-06-27 07:36:39,743 - INFO - F1:                 86.01	(Threshold: 0.4021)
2024-06-27 07:36:39,745 - INFO - Precision:          76.85
2024-06-27 07:36:39,746 - INFO - Recall:             97.65
2024-06-27 07:36:39,748 - INFO - Average Precision:  89.60

2024-06-27 07:36:39,757 - INFO - Loss: 0.896007252147228, epoch: 0, steps: -1
2024-06-27 07:36:39,759 - INFO - Save model to /content/drive/MyDrive/civol/iceccme/models_qwen/model_trial_2_0


Iteration:   0%|          | 0/109 [00:00<?, ?it/s]

2024-06-27 07:38:23,378 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 1:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 07:38:27,883 - INFO - Accuracy:           85.12	(Threshold: 0.3377)
2024-06-27 07:38:27,884 - INFO - F1:                 89.77	(Threshold: 0.2842)
2024-06-27 07:38:27,886 - INFO - Precision:          86.81
2024-06-27 07:38:27,888 - INFO - Recall:             92.94
2024-06-27 07:38:27,889 - INFO - Average Precision:  90.24

2024-06-27 07:38:27,898 - INFO - Loss: 0.9024303692339843, epoch: 1, steps: -1
2024-06-27 07:38:27,899 - INFO - Save model to /content/drive/MyDrive/civol/iceccme/models_qwen/model_trial_2_0


Iteration:   0%|          | 0/109 [00:00<?, ?it/s]

2024-06-27 07:40:12,937 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 2:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 07:40:17,375 - INFO - Accuracy:           70.25	(Threshold: 0.6769)
2024-06-27 07:40:17,376 - INFO - F1:                 82.35	(Threshold: 0.6769)
2024-06-27 07:40:17,381 - INFO - Precision:          70.59
2024-06-27 07:40:17,382 - INFO - Recall:             98.82
2024-06-27 07:40:17,385 - INFO - Average Precision:  71.23

2024-06-27 07:40:17,392 - INFO - Loss: 0.7123197257435528, epoch: 2, steps: -1


Iteration:   0%|          | 0/109 [00:00<?, ?it/s]

2024-06-27 07:41:57,997 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 3:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 07:42:02,439 - INFO - Accuracy:           69.42	(Threshold: 0.6167)
2024-06-27 07:42:02,441 - INFO - F1:                 81.95	(Threshold: 0.6167)
2024-06-27 07:42:02,443 - INFO - Precision:          70.00
2024-06-27 07:42:02,445 - INFO - Recall:             98.82
2024-06-27 07:42:02,446 - INFO - Average Precision:  66.07

2024-06-27 07:42:02,453 - INFO - Loss: 0.6606949971009006, epoch: 3, steps: -1


Iteration:   0%|          | 0/109 [00:00<?, ?it/s]

2024-06-27 07:43:43,134 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 4:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 07:43:47,576 - INFO - Accuracy:           72.73	(Threshold: 0.7204)
2024-06-27 07:43:47,578 - INFO - F1:                 83.42	(Threshold: 0.7204)
2024-06-27 07:43:47,581 - INFO - Precision:          72.81
2024-06-27 07:43:47,583 - INFO - Recall:             97.65
2024-06-27 07:43:47,585 - INFO - Average Precision:  75.02

2024-06-27 07:43:47,593 - INFO - Loss: 0.7502138691816406, epoch: 4, steps: -1


Iteration:   0%|          | 0/109 [00:00<?, ?it/s]

2024-06-27 07:45:28,118 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 5:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 07:45:32,563 - INFO - Accuracy:           72.73	(Threshold: 0.7172)
2024-06-27 07:45:32,565 - INFO - F1:                 83.58	(Threshold: 0.7172)
2024-06-27 07:45:32,568 - INFO - Precision:          72.41
2024-06-27 07:45:32,569 - INFO - Recall:             98.82
2024-06-27 07:45:32,571 - INFO - Average Precision:  76.07

2024-06-27 07:45:32,585 - INFO - Loss: 0.7606837441887078, epoch: 5, steps: -1


Iteration:   0%|          | 0/109 [00:00<?, ?it/s]

2024-06-27 07:47:13,119 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 6:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 07:47:17,553 - INFO - Accuracy:           71.07	(Threshold: 0.6723)
2024-06-27 07:47:17,555 - INFO - F1:                 82.93	(Threshold: 0.6723)
2024-06-27 07:47:17,557 - INFO - Precision:          70.83
2024-06-27 07:47:17,559 - INFO - Recall:             100.00
2024-06-27 07:47:17,560 - INFO - Average Precision:  74.60

2024-06-27 07:47:17,568 - INFO - Loss: 0.7460322438407446, epoch: 6, steps: -1


Iteration:   0%|          | 0/109 [00:00<?, ?it/s]

2024-06-27 07:48:58,130 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 7:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 07:49:02,566 - INFO - Accuracy:           72.73	(Threshold: 0.6356)
2024-06-27 07:49:02,567 - INFO - F1:                 83.74	(Threshold: 0.6356)
2024-06-27 07:49:02,570 - INFO - Precision:          72.03
2024-06-27 07:49:02,571 - INFO - Recall:             100.00
2024-06-27 07:49:02,574 - INFO - Average Precision:  75.27

2024-06-27 07:49:02,583 - INFO - Loss: 0.7526852911735211, epoch: 7, steps: -1


Iteration:   0%|          | 0/109 [00:00<?, ?it/s]

2024-06-27 07:50:43,154 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 8:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 07:50:47,601 - INFO - Accuracy:           70.25	(Threshold: 0.7233)
2024-06-27 07:50:47,603 - INFO - F1:                 82.00	(Threshold: 0.7233)
2024-06-27 07:50:47,606 - INFO - Precision:          71.30
2024-06-27 07:50:47,609 - INFO - Recall:             96.47
2024-06-27 07:50:47,612 - INFO - Average Precision:  69.24

2024-06-27 07:50:47,621 - INFO - Loss: 0.692361543981658, epoch: 8, steps: -1


Iteration:   0%|          | 0/109 [00:00<?, ?it/s]

2024-06-27 07:52:28,169 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 9:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 07:52:32,615 - INFO - Accuracy:           71.90	(Threshold: 0.7468)
2024-06-27 07:52:32,616 - INFO - F1:                 83.17	(Threshold: 0.7468)
2024-06-27 07:52:32,619 - INFO - Precision:          71.79
2024-06-27 07:52:32,620 - INFO - Recall:             98.82
2024-06-27 07:52:32,622 - INFO - Average Precision:  74.08

2024-06-27 07:52:32,630 - INFO - Loss: 0.7408436483302794, epoch: 9, steps: -1


Iteration:   0%|          | 0/109 [00:00<?, ?it/s]

2024-06-27 07:54:13,159 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 10:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 07:54:17,591 - INFO - Accuracy:           72.73	(Threshold: 0.6114)
2024-06-27 07:54:17,594 - INFO - F1:                 83.17	(Threshold: 0.6114)
2024-06-27 07:54:17,596 - INFO - Precision:          71.79
2024-06-27 07:54:17,597 - INFO - Recall:             98.82
2024-06-27 07:54:17,599 - INFO - Average Precision:  76.60

2024-06-27 07:54:17,609 - INFO - Loss: 0.765994335064869, epoch: 10, steps: -1


Iteration:   0%|          | 0/109 [00:00<?, ?it/s]

2024-06-27 07:55:58,231 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 11:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 07:56:02,606 - INFO - Accuracy:           71.07	(Threshold: 0.6288)
2024-06-27 07:56:02,607 - INFO - F1:                 82.00	(Threshold: 0.6288)
2024-06-27 07:56:02,610 - INFO - Precision:          71.30
2024-06-27 07:56:02,612 - INFO - Recall:             96.47
2024-06-27 07:56:02,613 - INFO - Average Precision:  79.20

2024-06-27 07:56:02,621 - INFO - Loss: 0.7919661552119119, epoch: 11, steps: -1


Iteration:   0%|          | 0/109 [00:00<?, ?it/s]

2024-06-27 07:57:43,183 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 12:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 07:57:47,615 - INFO - Accuracy:           72.73	(Threshold: 0.6216)
2024-06-27 07:57:47,617 - INFO - F1:                 83.58	(Threshold: 0.6216)
2024-06-27 07:57:47,619 - INFO - Precision:          72.41
2024-06-27 07:57:47,620 - INFO - Recall:             98.82
2024-06-27 07:57:47,623 - INFO - Average Precision:  74.51

2024-06-27 07:57:47,640 - INFO - Loss: 0.7450722394220847, epoch: 12, steps: -1


Iteration:   0%|          | 0/109 [00:00<?, ?it/s]

2024-06-27 07:59:28,114 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 13:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 07:59:32,543 - INFO - Accuracy:           73.55	(Threshold: 0.6578)
2024-06-27 07:59:32,545 - INFO - F1:                 83.84	(Threshold: 0.6578)
2024-06-27 07:59:32,547 - INFO - Precision:          73.45
2024-06-27 07:59:32,550 - INFO - Recall:             97.65
2024-06-27 07:59:32,552 - INFO - Average Precision:  77.57

2024-06-27 07:59:32,561 - INFO - Loss: 0.7756700168514143, epoch: 13, steps: -1


Iteration:   0%|          | 0/109 [00:00<?, ?it/s]

2024-06-27 08:01:13,160 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 14:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 08:01:17,584 - INFO - Accuracy:           75.21	(Threshold: 0.6464)
2024-06-27 08:01:17,586 - INFO - F1:                 83.52	(Threshold: 0.6464)
2024-06-27 08:01:17,588 - INFO - Precision:          78.35
2024-06-27 08:01:17,590 - INFO - Recall:             89.41
2024-06-27 08:01:17,592 - INFO - Average Precision:  79.46

2024-06-27 08:01:17,600 - INFO - Loss: 0.7946215400760855, epoch: 14, steps: -1
2024-06-27 08:01:17,611 - INFO - Model fitted
2024-06-27 08:01:17,613 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 08:01:21,429 - INFO - Accuracy:           75.21	(Threshold: 0.6464)
2024-06-27 08:01:21,430 - INFO - F1:                 83.52	(Threshold: 0.6464)
2024-06-27 08:01:21,432 - INFO - Precision:          78.35
2024-06-27 08:01:21,435 - INFO - Recall:             89.41
2024-06-27 08:01:21,437 - INFO - Average Precision:  79.46

2024-06-27 08:01:21,444 - INFO - Accuracy: 0.7946215400760855
2024-06-27 08:01:21,465 - INFO - Trial pruned
[I 2024-06-27 08:01:21,660] Trial 0 finished with value: 0.7946215400760855 and parameters: {'learning_rate': 0.00037381017805452395, 'batch_size': 10, 'num_epochs': 15, 'warmup_steps': 814, 'weight_decay': 0.0022725472851924233, 'dropout_rate': 0.1937256725211224}. Best is trial 0 with value: 0.7946215400760855.
2024-06-27 08:01:21,697 - INFO - Trial 1
2024-06-27 08:01:21,701 - INFO - Model created
2024-06-27 08:01:22,966 - INFO - Learning rate: 0.021975276676503706, batch size: 13, num epochs: 3, warmup steps: 785, weight decay: 0.00778851169695488

Epoch:   0%|          | 0/3 [00:00<?, ?it/s]

Iteration:   0%|          | 0/84 [00:00<?, ?it/s]

2024-06-27 08:03:02,985 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 0:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 08:03:07,463 - INFO - Accuracy:           71.07	(Threshold: 0.5232)
2024-06-27 08:03:07,465 - INFO - F1:                 82.93	(Threshold: 0.5232)
2024-06-27 08:03:07,467 - INFO - Precision:          70.83
2024-06-27 08:03:07,468 - INFO - Recall:             100.00
2024-06-27 08:03:07,471 - INFO - Average Precision:  70.25

2024-06-27 08:03:07,481 - INFO - Loss: 0.7024793388429752, epoch: 0, steps: -1
2024-06-27 08:03:07,483 - INFO - Save model to /content/drive/MyDrive/civol/iceccme/models_qwen/model_trial_2_1


Iteration:   0%|          | 0/84 [00:00<?, ?it/s]

2024-06-27 08:04:54,386 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 1:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 08:04:58,867 - INFO - Accuracy:           71.07	(Threshold: 0.6056)
2024-06-27 08:04:58,868 - INFO - F1:                 82.93	(Threshold: 0.6056)
2024-06-27 08:04:58,872 - INFO - Precision:          70.83
2024-06-27 08:04:58,874 - INFO - Recall:             100.00
2024-06-27 08:04:58,875 - INFO - Average Precision:  70.25

2024-06-27 08:04:58,882 - INFO - Loss: 0.7024793388429752, epoch: 1, steps: -1


Iteration:   0%|          | 0/84 [00:00<?, ?it/s]

2024-06-27 08:06:38,467 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 2:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 08:06:42,950 - INFO - Accuracy:           71.07	(Threshold: 0.4354)
2024-06-27 08:06:42,951 - INFO - F1:                 82.93	(Threshold: 0.4354)
2024-06-27 08:06:42,954 - INFO - Precision:          70.83
2024-06-27 08:06:42,955 - INFO - Recall:             100.00
2024-06-27 08:06:42,957 - INFO - Average Precision:  71.86

2024-06-27 08:06:42,966 - INFO - Loss: 0.7185652705240106, epoch: 2, steps: -1
2024-06-27 08:06:42,967 - INFO - Save model to /content/drive/MyDrive/civol/iceccme/models_qwen/model_trial_2_1
2024-06-27 08:06:49,310 - INFO - Model fitted
2024-06-27 08:06:49,315 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 08:06:53,103 - INFO - Accuracy:           71.07	(Threshold: 0.4354)
2024-06-27 08:06:53,105 - INFO - F1:                 82.93	(Threshold: 0.4354)
2024-06-27 08:06:53,106 - INFO - Precision:          70.83
2024-06-27 08:06:53,108 - INFO - Recall:             100.00
2024-06-27 08:06:53,109 - INFO - Average Precision:  71.86

2024-06-27 08:06:53,123 - INFO - Accuracy: 0.7185652705240106
2024-06-27 08:06:53,147 - INFO - Trial pruned
[I 2024-06-27 08:06:53,388] Trial 1 finished with value: 0.7185652705240106 and parameters: {'learning_rate': 0.021975276676503706, 'batch_size': 13, 'num_epochs': 3, 'warmup_steps': 785, 'weight_decay': 0.007788511696954886, 'dropout_rate': 0.17100022434781958}. Best is trial 0 with value: 0.7946215400760855.
2024-06-27 08:06:53,426 - INFO - Trial 2
2024-06-27 08:06:53,428 - INFO - Model created
2024-06-27 08:06:54,649 - INFO - Learning rate: 0.09408022153265705, batch size: 13, num epochs: 1, warmup steps: 142, weight decay: 0.0001398909047598677,

Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

Iteration:   0%|          | 0/84 [00:00<?, ?it/s]

2024-06-27 08:08:34,325 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 0:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 08:08:38,799 - INFO - Accuracy:           71.07	(Threshold: 0.9952)
2024-06-27 08:08:38,800 - INFO - F1:                 82.93	(Threshold: 0.9952)
2024-06-27 08:08:38,803 - INFO - Precision:          70.83
2024-06-27 08:08:38,805 - INFO - Recall:             100.00
2024-06-27 08:08:38,808 - INFO - Average Precision:  70.25

2024-06-27 08:08:38,827 - INFO - Loss: 0.7024793388429752, epoch: 0, steps: -1
2024-06-27 08:08:38,829 - INFO - Save model to /content/drive/MyDrive/civol/iceccme/models_qwen/model_trial_2_2
2024-06-27 08:08:40,965 - INFO - Model fitted
2024-06-27 08:08:40,968 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 08:08:44,808 - INFO - Accuracy:           71.07	(Threshold: 0.9952)
2024-06-27 08:08:44,812 - INFO - F1:                 82.93	(Threshold: 0.9952)
2024-06-27 08:08:44,815 - INFO - Precision:          70.83
2024-06-27 08:08:44,818 - INFO - Recall:             100.00
2024-06-27 08:08:44,820 - INFO - Average Precision:  70.25

2024-06-27 08:08:44,830 - INFO - Accuracy: 0.7024793388429752
2024-06-27 08:08:44,850 - INFO - Trial pruned
[I 2024-06-27 08:08:45,064] Trial 2 finished with value: 0.7024793388429752 and parameters: {'learning_rate': 0.09408022153265705, 'batch_size': 13, 'num_epochs': 1, 'warmup_steps': 142, 'weight_decay': 0.0001398909047598677, 'dropout_rate': 0.4300701769825903}. Best is trial 0 with value: 0.7946215400760855.
2024-06-27 08:08:45,099 - INFO - Trial 3
2024-06-27 08:08:45,102 - INFO - Model created
2024-06-27 08:08:46,305 - INFO - Learning rate: 0.002656394238621821, batch size: 7, num epochs: 11, warmup steps: 302, weight decay: 2.7844009162882434e-05

Epoch:   0%|          | 0/11 [00:00<?, ?it/s]

Iteration:   0%|          | 0/156 [00:00<?, ?it/s]

2024-06-27 08:10:33,872 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 0:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 08:10:37,988 - INFO - Accuracy:           71.07	(Threshold: 0.6387)
2024-06-27 08:10:37,990 - INFO - F1:                 82.93	(Threshold: 0.6387)
2024-06-27 08:10:37,995 - INFO - Precision:          70.83
2024-06-27 08:10:37,997 - INFO - Recall:             100.00
2024-06-27 08:10:37,998 - INFO - Average Precision:  70.25

2024-06-27 08:10:38,007 - INFO - Loss: 0.7024793388429752, epoch: 0, steps: -1
2024-06-27 08:10:38,008 - INFO - Save model to /content/drive/MyDrive/civol/iceccme/models_qwen/model_trial_2_3


Iteration:   0%|          | 0/156 [00:00<?, ?it/s]

2024-06-27 08:12:27,783 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 1:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 08:12:31,941 - INFO - Accuracy:           71.07	(Threshold: 0.6243)
2024-06-27 08:12:31,943 - INFO - F1:                 82.93	(Threshold: 0.6243)
2024-06-27 08:12:31,945 - INFO - Precision:          70.83
2024-06-27 08:12:31,947 - INFO - Recall:             100.00
2024-06-27 08:12:31,949 - INFO - Average Precision:  70.25

2024-06-27 08:12:31,958 - INFO - Loss: 0.7024793388429752, epoch: 1, steps: -1


Iteration:   0%|          | 0/156 [00:00<?, ?it/s]

2024-06-27 08:14:19,088 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 2:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 08:14:23,250 - INFO - Accuracy:           71.90	(Threshold: 0.4109)
2024-06-27 08:14:23,251 - INFO - F1:                 83.17	(Threshold: 0.4109)
2024-06-27 08:14:23,254 - INFO - Precision:          71.79
2024-06-27 08:14:23,255 - INFO - Recall:             98.82
2024-06-27 08:14:23,257 - INFO - Average Precision:  72.33

2024-06-27 08:14:23,266 - INFO - Loss: 0.7233057851239669, epoch: 2, steps: -1
2024-06-27 08:14:23,267 - INFO - Save model to /content/drive/MyDrive/civol/iceccme/models_qwen/model_trial_2_3


Iteration:   0%|          | 0/156 [00:00<?, ?it/s]

2024-06-27 08:16:15,083 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 3:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 08:16:19,220 - INFO - Accuracy:           71.07	(Threshold: 0.6528)
2024-06-27 08:16:19,221 - INFO - F1:                 82.93	(Threshold: 0.6528)
2024-06-27 08:16:19,224 - INFO - Precision:          70.83
2024-06-27 08:16:19,226 - INFO - Recall:             100.00
2024-06-27 08:16:19,228 - INFO - Average Precision:  70.25

2024-06-27 08:16:19,240 - INFO - Loss: 0.7024793388429752, epoch: 3, steps: -1


Iteration:   0%|          | 0/156 [00:00<?, ?it/s]

2024-06-27 08:18:06,606 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 4:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 08:18:10,768 - INFO - Accuracy:           69.42	(Threshold: 0.5788)
2024-06-27 08:18:10,769 - INFO - F1:                 81.95	(Threshold: 0.5788)
2024-06-27 08:18:10,772 - INFO - Precision:          70.00
2024-06-27 08:18:10,774 - INFO - Recall:             98.82
2024-06-27 08:18:10,776 - INFO - Average Precision:  70.97

2024-06-27 08:18:10,786 - INFO - Loss: 0.7096722782151969, epoch: 4, steps: -1


Iteration:   0%|          | 0/156 [00:00<?, ?it/s]

2024-06-27 08:19:57,783 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 5:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 08:20:01,945 - INFO - Accuracy:           71.90	(Threshold: 0.5029)
2024-06-27 08:20:01,947 - INFO - F1:                 83.17	(Threshold: 0.5029)
2024-06-27 08:20:01,949 - INFO - Precision:          71.79
2024-06-27 08:20:01,951 - INFO - Recall:             98.82
2024-06-27 08:20:01,953 - INFO - Average Precision:  70.85

2024-06-27 08:20:01,961 - INFO - Loss: 0.7085183384648626, epoch: 5, steps: -1


Iteration:   0%|          | 0/156 [00:00<?, ?it/s]

2024-06-27 08:21:49,207 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 6:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 08:21:53,373 - INFO - Accuracy:           71.07	(Threshold: 0.7480)
2024-06-27 08:21:53,375 - INFO - F1:                 82.93	(Threshold: 0.7480)
2024-06-27 08:21:53,377 - INFO - Precision:          70.83
2024-06-27 08:21:53,379 - INFO - Recall:             100.00
2024-06-27 08:21:53,381 - INFO - Average Precision:  70.97

2024-06-27 08:21:53,390 - INFO - Loss: 0.7096722782151969, epoch: 6, steps: -1


Iteration:   0%|          | 0/156 [00:00<?, ?it/s]

2024-06-27 08:23:40,667 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 7:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 08:23:44,819 - INFO - Accuracy:           69.42	(Threshold: 0.6511)
2024-06-27 08:23:44,821 - INFO - F1:                 81.95	(Threshold: 0.6511)
2024-06-27 08:23:44,826 - INFO - Precision:          70.00
2024-06-27 08:23:44,827 - INFO - Recall:             98.82
2024-06-27 08:23:44,829 - INFO - Average Precision:  68.77

2024-06-27 08:23:44,841 - INFO - Loss: 0.6876558888018016, epoch: 7, steps: -1


Iteration:   0%|          | 0/156 [00:00<?, ?it/s]

2024-06-27 08:25:31,909 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 8:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 08:25:36,081 - INFO - Accuracy:           71.07	(Threshold: 0.6681)
2024-06-27 08:25:36,083 - INFO - F1:                 82.93	(Threshold: 0.6681)
2024-06-27 08:25:36,085 - INFO - Precision:          70.83
2024-06-27 08:25:36,086 - INFO - Recall:             100.00
2024-06-27 08:25:36,087 - INFO - Average Precision:  70.25

2024-06-27 08:25:36,098 - INFO - Loss: 0.7024793388429752, epoch: 8, steps: -1


Iteration:   0%|          | 0/156 [00:00<?, ?it/s]

2024-06-27 08:27:23,185 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 9:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 08:27:27,315 - INFO - Accuracy:           69.42	(Threshold: 0.5721)
2024-06-27 08:27:27,316 - INFO - F1:                 81.95	(Threshold: 0.5721)
2024-06-27 08:27:27,318 - INFO - Precision:          70.00
2024-06-27 08:27:27,320 - INFO - Recall:             98.82
2024-06-27 08:27:27,321 - INFO - Average Precision:  71.48

2024-06-27 08:27:27,332 - INFO - Loss: 0.7147517960352184, epoch: 9, steps: -1


Iteration:   0%|          | 0/156 [00:00<?, ?it/s]

2024-06-27 08:29:14,324 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 10:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 08:29:18,453 - INFO - Accuracy:           71.07	(Threshold: 0.6499)
2024-06-27 08:29:18,454 - INFO - F1:                 82.93	(Threshold: 0.6499)
2024-06-27 08:29:18,456 - INFO - Precision:          70.83
2024-06-27 08:29:18,458 - INFO - Recall:             100.00
2024-06-27 08:29:18,459 - INFO - Average Precision:  72.61

2024-06-27 08:29:18,470 - INFO - Loss: 0.7261147301895965, epoch: 10, steps: -1
2024-06-27 08:29:18,471 - INFO - Save model to /content/drive/MyDrive/civol/iceccme/models_qwen/model_trial_2_3
2024-06-27 08:29:22,590 - INFO - Model fitted
2024-06-27 08:29:22,593 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 08:29:26,475 - INFO - Accuracy:           71.07	(Threshold: 0.6499)
2024-06-27 08:29:26,481 - INFO - F1:                 82.93	(Threshold: 0.6499)
2024-06-27 08:29:26,484 - INFO - Precision:          70.83
2024-06-27 08:29:26,485 - INFO - Recall:             100.00
2024-06-27 08:29:26,487 - INFO - Average Precision:  72.61

2024-06-27 08:29:26,498 - INFO - Accuracy: 0.7261147301895965
2024-06-27 08:29:26,524 - INFO - Trial pruned
[I 2024-06-27 08:29:26,725] Trial 3 finished with value: 0.7261147301895965 and parameters: {'learning_rate': 0.002656394238621821, 'batch_size': 7, 'num_epochs': 11, 'warmup_steps': 302, 'weight_decay': 2.7844009162882434e-05, 'dropout_rate': 0.13027091281787084}. Best is trial 0 with value: 0.7946215400760855.
2024-06-27 08:29:26,758 - INFO - Trial 4
2024-06-27 08:29:26,763 - INFO - Model created
2024-06-27 08:29:27,982 - INFO - Learning rate: 0.002388253570161409, batch size: 7, num epochs: 11, warmup steps: 445, weight decay: 0.00013273613527069

Epoch:   0%|          | 0/11 [00:00<?, ?it/s]

Iteration:   0%|          | 0/156 [00:00<?, ?it/s]

2024-06-27 08:31:14,945 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 0:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 08:31:19,094 - INFO - Accuracy:           71.07	(Threshold: 0.5462)
2024-06-27 08:31:19,095 - INFO - F1:                 82.93	(Threshold: 0.5462)
2024-06-27 08:31:19,097 - INFO - Precision:          70.83
2024-06-27 08:31:19,099 - INFO - Recall:             100.00
2024-06-27 08:31:19,100 - INFO - Average Precision:  71.59

2024-06-27 08:31:19,112 - INFO - Loss: 0.7158830023277446, epoch: 0, steps: -1
2024-06-27 08:31:19,113 - INFO - Save model to /content/drive/MyDrive/civol/iceccme/models_qwen/model_trial_2_4


Iteration:   0%|          | 0/156 [00:00<?, ?it/s]

2024-06-27 08:33:08,037 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 1:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 08:33:12,131 - INFO - Accuracy:           71.07	(Threshold: 0.7267)
2024-06-27 08:33:12,132 - INFO - F1:                 82.76	(Threshold: 0.7267)
2024-06-27 08:33:12,134 - INFO - Precision:          71.19
2024-06-27 08:33:12,136 - INFO - Recall:             98.82
2024-06-27 08:33:12,137 - INFO - Average Precision:  70.54

2024-06-27 08:33:12,150 - INFO - Loss: 0.7053726646179885, epoch: 1, steps: -1


Iteration:   0%|          | 0/156 [00:00<?, ?it/s]

2024-06-27 08:34:58,985 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 2:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 08:35:03,131 - INFO - Accuracy:           69.42	(Threshold: 0.5853)
2024-06-27 08:35:03,133 - INFO - F1:                 81.95	(Threshold: 0.5853)
2024-06-27 08:35:03,136 - INFO - Precision:          70.00
2024-06-27 08:35:03,139 - INFO - Recall:             98.82
2024-06-27 08:35:03,141 - INFO - Average Precision:  72.01

2024-06-27 08:35:03,149 - INFO - Loss: 0.7201383639744448, epoch: 2, steps: -1
2024-06-27 08:35:03,150 - INFO - Save model to /content/drive/MyDrive/civol/iceccme/models_qwen/model_trial_2_4


Iteration:   0%|          | 0/156 [00:00<?, ?it/s]

2024-06-27 08:36:56,701 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 3:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 08:37:00,841 - INFO - Accuracy:           71.07	(Threshold: 0.7646)
2024-06-27 08:37:00,844 - INFO - F1:                 82.93	(Threshold: 0.7646)
2024-06-27 08:37:00,845 - INFO - Precision:          70.83
2024-06-27 08:37:00,846 - INFO - Recall:             100.00
2024-06-27 08:37:00,850 - INFO - Average Precision:  70.77

2024-06-27 08:37:00,859 - INFO - Loss: 0.7077331440988499, epoch: 3, steps: -1


Iteration:   0%|          | 0/156 [00:00<?, ?it/s]

2024-06-27 08:38:47,390 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 4:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 08:38:51,507 - INFO - Accuracy:           69.42	(Threshold: 0.6356)
2024-06-27 08:38:51,513 - INFO - F1:                 81.95	(Threshold: 0.6356)
2024-06-27 08:38:51,514 - INFO - Precision:          70.00
2024-06-27 08:38:51,516 - INFO - Recall:             98.82
2024-06-27 08:38:51,520 - INFO - Average Precision:  68.85

2024-06-27 08:38:51,530 - INFO - Loss: 0.688474749578885, epoch: 4, steps: -1


Iteration:   0%|          | 0/156 [00:00<?, ?it/s]

2024-06-27 08:40:38,116 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 5:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 08:40:42,231 - INFO - Accuracy:           70.25	(Threshold: 0.6182)
2024-06-27 08:40:42,232 - INFO - F1:                 82.35	(Threshold: 0.6182)
2024-06-27 08:40:42,236 - INFO - Precision:          70.59
2024-06-27 08:40:42,238 - INFO - Recall:             98.82
2024-06-27 08:40:42,240 - INFO - Average Precision:  70.23

2024-06-27 08:40:42,250 - INFO - Loss: 0.7023259983939443, epoch: 5, steps: -1


Iteration:   0%|          | 0/156 [00:00<?, ?it/s]

2024-06-27 08:42:28,767 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 6:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 08:42:32,881 - INFO - Accuracy:           69.42	(Threshold: 0.6472)
2024-06-27 08:42:32,882 - INFO - F1:                 81.95	(Threshold: 0.6472)
2024-06-27 08:42:32,884 - INFO - Precision:          70.00
2024-06-27 08:42:32,887 - INFO - Recall:             98.82
2024-06-27 08:42:32,889 - INFO - Average Precision:  71.69

2024-06-27 08:42:32,899 - INFO - Loss: 0.7169245662038036, epoch: 6, steps: -1


Iteration:   0%|          | 0/156 [00:00<?, ?it/s]

2024-06-27 08:44:19,887 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 7:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 08:44:24,014 - INFO - Accuracy:           69.42	(Threshold: 0.7634)
2024-06-27 08:44:24,015 - INFO - F1:                 81.95	(Threshold: 0.7634)
2024-06-27 08:44:24,017 - INFO - Precision:          70.00
2024-06-27 08:44:24,018 - INFO - Recall:             98.82
2024-06-27 08:44:24,020 - INFO - Average Precision:  70.38

2024-06-27 08:44:24,029 - INFO - Loss: 0.703811097855211, epoch: 7, steps: -1


Iteration:   0%|          | 0/156 [00:00<?, ?it/s]

2024-06-27 08:46:10,665 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 8:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 08:46:14,787 - INFO - Accuracy:           71.07	(Threshold: 0.6185)
2024-06-27 08:46:14,789 - INFO - F1:                 82.93	(Threshold: 0.6185)
2024-06-27 08:46:14,791 - INFO - Precision:          70.83
2024-06-27 08:46:14,793 - INFO - Recall:             100.00
2024-06-27 08:46:14,794 - INFO - Average Precision:  72.01

2024-06-27 08:46:14,801 - INFO - Loss: 0.7200906474279772, epoch: 8, steps: -1


Iteration:   0%|          | 0/156 [00:00<?, ?it/s]

2024-06-27 08:48:01,550 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 9:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 08:48:05,626 - INFO - Accuracy:           71.90	(Threshold: 0.5758)
2024-06-27 08:48:05,627 - INFO - F1:                 83.17	(Threshold: 0.5758)
2024-06-27 08:48:05,629 - INFO - Precision:          71.79
2024-06-27 08:48:05,631 - INFO - Recall:             98.82
2024-06-27 08:48:05,632 - INFO - Average Precision:  71.25

2024-06-27 08:48:05,642 - INFO - Loss: 0.7125466925917593, epoch: 9, steps: -1


Iteration:   0%|          | 0/156 [00:00<?, ?it/s]

2024-06-27 08:49:52,359 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 10:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 08:49:56,482 - INFO - Accuracy:           71.90	(Threshold: 0.5936)
2024-06-27 08:49:56,483 - INFO - F1:                 83.17	(Threshold: 0.5936)
2024-06-27 08:49:56,486 - INFO - Precision:          71.79
2024-06-27 08:49:56,489 - INFO - Recall:             98.82
2024-06-27 08:49:56,491 - INFO - Average Precision:  71.09

2024-06-27 08:49:56,500 - INFO - Loss: 0.7108509137920903, epoch: 10, steps: -1
2024-06-27 08:49:56,508 - INFO - Model fitted
2024-06-27 08:49:56,513 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 08:50:00,348 - INFO - Accuracy:           71.90	(Threshold: 0.5936)
2024-06-27 08:50:00,349 - INFO - F1:                 83.17	(Threshold: 0.5936)
2024-06-27 08:50:00,354 - INFO - Precision:          71.79
2024-06-27 08:50:00,356 - INFO - Recall:             98.82
2024-06-27 08:50:00,358 - INFO - Average Precision:  71.09

2024-06-27 08:50:00,365 - INFO - Accuracy: 0.7108509137920903
2024-06-27 08:50:00,384 - INFO - Trial pruned
[I 2024-06-27 08:50:00,551] Trial 4 finished with value: 0.7108509137920903 and parameters: {'learning_rate': 0.002388253570161409, 'batch_size': 7, 'num_epochs': 11, 'warmup_steps': 445, 'weight_decay': 0.00013273613527069305, 'dropout_rate': 0.12465654510142805}. Best is trial 0 with value: 0.7946215400760855.
2024-06-27 08:50:00,582 - INFO - Trial 5
2024-06-27 08:50:00,583 - INFO - Model created
2024-06-27 08:50:01,635 - INFO - Learning rate: 0.0002570707166954449, batch size: 15, num epochs: 6, warmup steps: 909, weight decay: 0.03516699667276541

Epoch:   0%|          | 0/6 [00:00<?, ?it/s]

Iteration:   0%|          | 0/73 [00:00<?, ?it/s]

2024-06-27 08:51:40,962 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 0:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 08:51:45,478 - INFO - Accuracy:           76.86	(Threshold: 0.4444)
2024-06-27 08:51:45,480 - INFO - F1:                 84.97	(Threshold: 0.3674)
2024-06-27 08:51:45,483 - INFO - Precision:          75.93
2024-06-27 08:51:45,485 - INFO - Recall:             96.47
2024-06-27 08:51:45,488 - INFO - Average Precision:  88.30

2024-06-27 08:51:45,499 - INFO - Loss: 0.8830096985248443, epoch: 0, steps: -1
2024-06-27 08:51:45,500 - INFO - Save model to /content/drive/MyDrive/civol/iceccme/models_qwen/model_trial_2_5


Iteration:   0%|          | 0/73 [00:00<?, ?it/s]

2024-06-27 08:53:27,377 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 1:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 08:53:31,934 - INFO - Accuracy:           85.12	(Threshold: 0.1416)
2024-06-27 08:53:31,935 - INFO - F1:                 90.00	(Threshold: 0.1362)
2024-06-27 08:53:31,937 - INFO - Precision:          85.26
2024-06-27 08:53:31,939 - INFO - Recall:             95.29
2024-06-27 08:53:31,941 - INFO - Average Precision:  96.48

2024-06-27 08:53:31,951 - INFO - Loss: 0.9647595132745537, epoch: 1, steps: -1
2024-06-27 08:53:31,952 - INFO - Save model to /content/drive/MyDrive/civol/iceccme/models_qwen/model_trial_2_5


Iteration:   0%|          | 0/73 [00:00<?, ?it/s]

2024-06-27 08:55:13,396 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 2:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 08:55:17,919 - INFO - Accuracy:           90.91	(Threshold: 0.5179)
2024-06-27 08:55:17,920 - INFO - F1:                 93.71	(Threshold: 0.5179)
2024-06-27 08:55:17,923 - INFO - Precision:          91.11
2024-06-27 08:55:17,925 - INFO - Recall:             96.47
2024-06-27 08:55:17,928 - INFO - Average Precision:  96.31

2024-06-27 08:55:17,938 - INFO - Loss: 0.9630768432312187, epoch: 2, steps: -1


Iteration:   0%|          | 0/73 [00:00<?, ?it/s]

2024-06-27 08:56:57,069 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 3:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 08:57:01,582 - INFO - Accuracy:           85.12	(Threshold: 0.7590)
2024-06-27 08:57:01,584 - INFO - F1:                 89.29	(Threshold: 0.0582)
2024-06-27 08:57:01,586 - INFO - Precision:          90.36
2024-06-27 08:57:01,588 - INFO - Recall:             88.24
2024-06-27 08:57:01,590 - INFO - Average Precision:  96.10

2024-06-27 08:57:01,604 - INFO - Loss: 0.960957617290817, epoch: 3, steps: -1


Iteration:   0%|          | 0/73 [00:00<?, ?it/s]

2024-06-27 08:58:40,671 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 4:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 08:58:45,185 - INFO - Accuracy:           85.95	(Threshold: 0.1917)
2024-06-27 08:58:45,186 - INFO - F1:                 89.57	(Threshold: 0.1586)
2024-06-27 08:58:45,188 - INFO - Precision:          93.59
2024-06-27 08:58:45,190 - INFO - Recall:             85.88
2024-06-27 08:58:45,191 - INFO - Average Precision:  96.28

2024-06-27 08:58:45,201 - INFO - Loss: 0.9627774640473461, epoch: 4, steps: -1


Iteration:   0%|          | 0/73 [00:00<?, ?it/s]

2024-06-27 09:00:24,280 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 5:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 09:00:28,802 - INFO - Accuracy:           84.30	(Threshold: 0.1904)
2024-06-27 09:00:28,805 - INFO - F1:                 88.62	(Threshold: 0.1904)
2024-06-27 09:00:28,807 - INFO - Precision:          90.24
2024-06-27 09:00:28,809 - INFO - Recall:             87.06
2024-06-27 09:00:28,814 - INFO - Average Precision:  89.47

2024-06-27 09:00:28,821 - INFO - Loss: 0.8947331337996255, epoch: 5, steps: -1
2024-06-27 09:00:28,828 - INFO - Model fitted
2024-06-27 09:00:28,832 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 09:00:32,755 - INFO - Accuracy:           84.30	(Threshold: 0.1904)
2024-06-27 09:00:32,757 - INFO - F1:                 88.62	(Threshold: 0.1904)
2024-06-27 09:00:32,761 - INFO - Precision:          90.24
2024-06-27 09:00:32,764 - INFO - Recall:             87.06
2024-06-27 09:00:32,766 - INFO - Average Precision:  89.47

2024-06-27 09:00:32,773 - INFO - Accuracy: 0.8947331337996255
2024-06-27 09:00:32,806 - INFO - Trial pruned
[I 2024-06-27 09:00:33,024] Trial 5 finished with value: 0.8947331337996255 and parameters: {'learning_rate': 0.0002570707166954449, 'batch_size': 15, 'num_epochs': 6, 'warmup_steps': 909, 'weight_decay': 0.03516699667276541, 'dropout_rate': 0.17491498077496714}. Best is trial 5 with value: 0.8947331337996255.
2024-06-27 09:00:33,056 - INFO - Trial 6
2024-06-27 09:00:33,057 - INFO - Model created
2024-06-27 09:00:34,108 - INFO - Learning rate: 0.0007539713352954273, batch size: 4, num epochs: 15, warmup steps: 970, weight decay: 0.0002886303928657176

Epoch:   0%|          | 0/15 [00:00<?, ?it/s]

Iteration:   0%|          | 0/273 [00:00<?, ?it/s]

2024-06-27 09:02:24,637 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 0:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 09:02:28,496 - INFO - Accuracy:           69.42	(Threshold: 0.6254)
2024-06-27 09:02:28,497 - INFO - F1:                 81.95	(Threshold: 0.6254)
2024-06-27 09:02:28,499 - INFO - Precision:          70.00
2024-06-27 09:02:28,501 - INFO - Recall:             98.82
2024-06-27 09:02:28,502 - INFO - Average Precision:  71.85

2024-06-27 09:02:28,512 - INFO - Loss: 0.7185340798627502, epoch: 0, steps: -1
2024-06-27 09:02:28,515 - INFO - Save model to /content/drive/MyDrive/civol/iceccme/models_qwen/model_trial_2_6


Iteration:   0%|          | 0/273 [00:00<?, ?it/s]

2024-06-27 09:04:21,351 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 1:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 09:04:25,208 - INFO - Accuracy:           69.42	(Threshold: 0.6600)
2024-06-27 09:04:25,209 - INFO - F1:                 81.95	(Threshold: 0.6600)
2024-06-27 09:04:25,212 - INFO - Precision:          70.00
2024-06-27 09:04:25,213 - INFO - Recall:             98.82
2024-06-27 09:04:25,215 - INFO - Average Precision:  72.01

2024-06-27 09:04:25,222 - INFO - Loss: 0.7200690451336464, epoch: 1, steps: -1
2024-06-27 09:04:25,223 - INFO - Save model to /content/drive/MyDrive/civol/iceccme/models_qwen/model_trial_2_6


Iteration:   0%|          | 0/273 [00:00<?, ?it/s]

2024-06-27 09:06:18,459 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 2:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 09:06:22,308 - INFO - Accuracy:           71.07	(Threshold: 0.7879)
2024-06-27 09:06:22,310 - INFO - F1:                 82.93	(Threshold: 0.7879)
2024-06-27 09:06:22,312 - INFO - Precision:          70.83
2024-06-27 09:06:22,313 - INFO - Recall:             100.00
2024-06-27 09:06:22,315 - INFO - Average Precision:  70.02

2024-06-27 09:06:22,328 - INFO - Loss: 0.700153648418733, epoch: 2, steps: -1


Iteration:   0%|          | 0/273 [00:00<?, ?it/s]

2024-06-27 09:08:12,558 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 3:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 09:08:16,403 - INFO - Accuracy:           71.07	(Threshold: 0.7334)
2024-06-27 09:08:16,404 - INFO - F1:                 82.93	(Threshold: 0.7334)
2024-06-27 09:08:16,406 - INFO - Precision:          70.83
2024-06-27 09:08:16,408 - INFO - Recall:             100.00
2024-06-27 09:08:16,409 - INFO - Average Precision:  71.92

2024-06-27 09:08:16,415 - INFO - Loss: 0.7191651695160823, epoch: 3, steps: -1


Iteration:   0%|          | 0/273 [00:00<?, ?it/s]

2024-06-27 09:10:07,010 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 4:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 09:10:10,851 - INFO - Accuracy:           71.07	(Threshold: 0.7715)
2024-06-27 09:10:10,853 - INFO - F1:                 82.93	(Threshold: 0.7715)
2024-06-27 09:10:10,856 - INFO - Precision:          70.83
2024-06-27 09:10:10,858 - INFO - Recall:             100.00
2024-06-27 09:10:10,860 - INFO - Average Precision:  70.18

2024-06-27 09:10:10,871 - INFO - Loss: 0.7018400206985596, epoch: 4, steps: -1


Iteration:   0%|          | 0/273 [00:00<?, ?it/s]

2024-06-27 09:12:00,884 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 5:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 09:12:04,700 - INFO - Accuracy:           71.07	(Threshold: 0.5782)
2024-06-27 09:12:04,702 - INFO - F1:                 82.76	(Threshold: 0.5782)
2024-06-27 09:12:04,708 - INFO - Precision:          71.19
2024-06-27 09:12:04,710 - INFO - Recall:             98.82
2024-06-27 09:12:04,711 - INFO - Average Precision:  71.38

2024-06-27 09:12:04,721 - INFO - Loss: 0.7137978898897024, epoch: 5, steps: -1


Iteration:   0%|          | 0/273 [00:00<?, ?it/s]

2024-06-27 09:13:55,034 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 6:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 09:13:58,875 - INFO - Accuracy:           71.07	(Threshold: 0.6875)
2024-06-27 09:13:58,876 - INFO - F1:                 82.93	(Threshold: 0.6875)
2024-06-27 09:13:58,877 - INFO - Precision:          70.83
2024-06-27 09:13:58,881 - INFO - Recall:             100.00
2024-06-27 09:13:58,882 - INFO - Average Precision:  71.01

2024-06-27 09:13:58,896 - INFO - Loss: 0.7101224946707148, epoch: 6, steps: -1


Iteration:   0%|          | 0/273 [00:00<?, ?it/s]

2024-06-27 09:15:49,224 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 7:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 09:15:53,051 - INFO - Accuracy:           71.07	(Threshold: 0.6850)
2024-06-27 09:15:53,052 - INFO - F1:                 82.93	(Threshold: 0.6850)
2024-06-27 09:15:53,054 - INFO - Precision:          70.83
2024-06-27 09:15:53,056 - INFO - Recall:             100.00
2024-06-27 09:15:53,057 - INFO - Average Precision:  73.15

2024-06-27 09:15:53,067 - INFO - Loss: 0.73150695324108, epoch: 7, steps: -1
2024-06-27 09:15:53,070 - INFO - Save model to /content/drive/MyDrive/civol/iceccme/models_qwen/model_trial_2_6


Iteration:   0%|          | 0/273 [00:00<?, ?it/s]

2024-06-27 09:17:46,141 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 8:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 09:17:49,993 - INFO - Accuracy:           71.07	(Threshold: 0.5737)
2024-06-27 09:17:49,994 - INFO - F1:                 82.93	(Threshold: 0.5737)
2024-06-27 09:17:49,998 - INFO - Precision:          70.83
2024-06-27 09:17:50,000 - INFO - Recall:             100.00
2024-06-27 09:17:50,002 - INFO - Average Precision:  73.98

2024-06-27 09:17:50,011 - INFO - Loss: 0.7397751358667425, epoch: 8, steps: -1
2024-06-27 09:17:50,013 - INFO - Save model to /content/drive/MyDrive/civol/iceccme/models_qwen/model_trial_2_6


Iteration:   0%|          | 0/273 [00:00<?, ?it/s]

2024-06-27 09:19:43,034 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 9:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 09:19:46,896 - INFO - Accuracy:           71.07	(Threshold: 0.6972)
2024-06-27 09:19:46,902 - INFO - F1:                 82.93	(Threshold: 0.6972)
2024-06-27 09:19:46,904 - INFO - Precision:          70.83
2024-06-27 09:19:46,906 - INFO - Recall:             100.00
2024-06-27 09:19:46,907 - INFO - Average Precision:  70.09

2024-06-27 09:19:46,917 - INFO - Loss: 0.7009262052698606, epoch: 9, steps: -1


Iteration:   0%|          | 0/273 [00:00<?, ?it/s]

2024-06-27 09:21:37,998 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 10:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 09:21:41,842 - INFO - Accuracy:           71.90	(Threshold: 0.6146)
2024-06-27 09:21:41,843 - INFO - F1:                 83.33	(Threshold: 0.6146)
2024-06-27 09:21:41,845 - INFO - Precision:          71.43
2024-06-27 09:21:41,847 - INFO - Recall:             100.00
2024-06-27 09:21:41,849 - INFO - Average Precision:  69.89

2024-06-27 09:21:41,857 - INFO - Loss: 0.6988670710828349, epoch: 10, steps: -1


Iteration:   0%|          | 0/273 [00:00<?, ?it/s]

2024-06-27 09:23:32,180 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 11:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 09:23:36,017 - INFO - Accuracy:           71.07	(Threshold: 0.6268)
2024-06-27 09:23:36,018 - INFO - F1:                 82.93	(Threshold: 0.6268)
2024-06-27 09:23:36,021 - INFO - Precision:          70.83
2024-06-27 09:23:36,022 - INFO - Recall:             100.00
2024-06-27 09:23:36,024 - INFO - Average Precision:  70.61

2024-06-27 09:23:36,032 - INFO - Loss: 0.7060509426304451, epoch: 11, steps: -1


Iteration:   0%|          | 0/273 [00:00<?, ?it/s]

2024-06-27 09:25:26,782 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 12:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 09:25:30,672 - INFO - Accuracy:           69.42	(Threshold: 0.6616)
2024-06-27 09:25:30,674 - INFO - F1:                 81.95	(Threshold: 0.6616)
2024-06-27 09:25:30,678 - INFO - Precision:          70.00
2024-06-27 09:25:30,682 - INFO - Recall:             98.82
2024-06-27 09:25:30,685 - INFO - Average Precision:  68.82

2024-06-27 09:25:30,696 - INFO - Loss: 0.6881615832481756, epoch: 12, steps: -1


Iteration:   0%|          | 0/273 [00:00<?, ?it/s]

2024-06-27 09:27:21,193 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 13:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 09:27:25,013 - INFO - Accuracy:           70.25	(Threshold: 0.6732)
2024-06-27 09:27:25,014 - INFO - F1:                 82.18	(Threshold: 0.6732)
2024-06-27 09:27:25,017 - INFO - Precision:          70.94
2024-06-27 09:27:25,018 - INFO - Recall:             97.65
2024-06-27 09:27:25,020 - INFO - Average Precision:  72.08

2024-06-27 09:27:25,027 - INFO - Loss: 0.7208008179915876, epoch: 13, steps: -1


Iteration:   0%|          | 0/273 [00:00<?, ?it/s]

2024-06-27 09:29:14,960 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 14:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 09:29:18,781 - INFO - Accuracy:           71.07	(Threshold: 0.6583)
2024-06-27 09:29:18,783 - INFO - F1:                 82.93	(Threshold: 0.6583)
2024-06-27 09:29:18,784 - INFO - Precision:          70.83
2024-06-27 09:29:18,786 - INFO - Recall:             100.00
2024-06-27 09:29:18,787 - INFO - Average Precision:  71.28

2024-06-27 09:29:18,799 - INFO - Loss: 0.7127986318494339, epoch: 14, steps: -1
2024-06-27 09:29:18,809 - INFO - Model fitted
2024-06-27 09:29:18,811 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 09:29:22,641 - INFO - Accuracy:           71.07	(Threshold: 0.6583)
2024-06-27 09:29:22,643 - INFO - F1:                 82.93	(Threshold: 0.6583)
2024-06-27 09:29:22,645 - INFO - Precision:          70.83
2024-06-27 09:29:22,646 - INFO - Recall:             100.00
2024-06-27 09:29:22,648 - INFO - Average Precision:  71.28

2024-06-27 09:29:22,658 - INFO - Accuracy: 0.7127986318494339
2024-06-27 09:29:22,696 - INFO - Trial pruned
[I 2024-06-27 09:29:22,881] Trial 6 finished with value: 0.7127986318494339 and parameters: {'learning_rate': 0.0007539713352954273, 'batch_size': 4, 'num_epochs': 15, 'warmup_steps': 970, 'weight_decay': 0.00028863039286571767, 'dropout_rate': 0.32816788427921717}. Best is trial 5 with value: 0.8947331337996255.
2024-06-27 09:29:22,912 - INFO - Trial 7
2024-06-27 09:29:22,913 - INFO - Model created
2024-06-27 09:29:23,932 - INFO - Learning rate: 0.022986878218583806, batch size: 4, num epochs: 16, warmup steps: 551, weight decay: 4.739201991060393e

Epoch:   0%|          | 0/16 [00:00<?, ?it/s]

Iteration:   0%|          | 0/273 [00:00<?, ?it/s]

2024-06-27 09:31:15,022 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 0:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 09:31:18,812 - INFO - Accuracy:           71.07	(Threshold: 0.8937)
2024-06-27 09:31:18,814 - INFO - F1:                 82.93	(Threshold: 0.8937)
2024-06-27 09:31:18,816 - INFO - Precision:          70.83
2024-06-27 09:31:18,817 - INFO - Recall:             100.00
2024-06-27 09:31:18,819 - INFO - Average Precision:  70.25

2024-06-27 09:31:18,835 - INFO - Loss: 0.7024793388429752, epoch: 0, steps: -1
2024-06-27 09:31:18,836 - INFO - Save model to /content/drive/MyDrive/civol/iceccme/models_qwen/model_trial_2_7


Iteration:   0%|          | 0/273 [00:00<?, ?it/s]

2024-06-27 09:33:11,678 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 1:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 09:33:15,498 - INFO - Accuracy:           71.07	(Threshold: 0.9456)
2024-06-27 09:33:15,499 - INFO - F1:                 82.93	(Threshold: 0.9456)
2024-06-27 09:33:15,501 - INFO - Precision:          70.83
2024-06-27 09:33:15,503 - INFO - Recall:             100.00
2024-06-27 09:33:15,505 - INFO - Average Precision:  70.25

2024-06-27 09:33:15,515 - INFO - Loss: 0.7024793388429752, epoch: 1, steps: -1


Iteration:   0%|          | 0/273 [00:00<?, ?it/s]

2024-06-27 09:35:05,754 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 2:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 09:35:09,615 - INFO - Accuracy:           71.07	(Threshold: 0.9990)
2024-06-27 09:35:09,617 - INFO - F1:                 82.93	(Threshold: 0.9990)
2024-06-27 09:35:09,619 - INFO - Precision:          70.83
2024-06-27 09:35:09,621 - INFO - Recall:             100.00
2024-06-27 09:35:09,623 - INFO - Average Precision:  70.25

2024-06-27 09:35:09,633 - INFO - Loss: 0.7024793388429752, epoch: 2, steps: -1


Iteration:   0%|          | 0/273 [00:00<?, ?it/s]

2024-06-27 09:37:00,091 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 3:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 09:37:03,898 - INFO - Accuracy:           71.07	(Threshold: 0.0157)
2024-06-27 09:37:03,900 - INFO - F1:                 82.93	(Threshold: 0.0157)
2024-06-27 09:37:03,902 - INFO - Precision:          70.83
2024-06-27 09:37:03,903 - INFO - Recall:             100.00
2024-06-27 09:37:03,905 - INFO - Average Precision:  70.25

2024-06-27 09:37:03,922 - INFO - Loss: 0.7024793388429752, epoch: 3, steps: -1


Iteration:   0%|          | 0/273 [00:00<?, ?it/s]

2024-06-27 09:38:54,200 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 4:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 09:38:58,017 - INFO - Accuracy:           71.07	(Threshold: 0.4612)
2024-06-27 09:38:58,019 - INFO - F1:                 82.93	(Threshold: 0.4612)
2024-06-27 09:38:58,021 - INFO - Precision:          70.83
2024-06-27 09:38:58,022 - INFO - Recall:             100.00
2024-06-27 09:38:58,025 - INFO - Average Precision:  70.25

2024-06-27 09:38:58,034 - INFO - Loss: 0.7024793388429752, epoch: 4, steps: -1


Iteration:   0%|          | 0/273 [00:00<?, ?it/s]

2024-06-27 09:40:47,751 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 5:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 09:40:51,547 - INFO - Accuracy:           71.07	(Threshold: 0.9951)
2024-06-27 09:40:51,548 - INFO - F1:                 82.93	(Threshold: 0.9951)
2024-06-27 09:40:51,550 - INFO - Precision:          70.83
2024-06-27 09:40:51,553 - INFO - Recall:             100.00
2024-06-27 09:40:51,555 - INFO - Average Precision:  70.25

2024-06-27 09:40:51,563 - INFO - Loss: 0.7024793388429752, epoch: 5, steps: -1


Iteration:   0%|          | 0/273 [00:00<?, ?it/s]

2024-06-27 09:42:41,532 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 6:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 09:42:45,365 - INFO - Accuracy:           71.07	(Threshold: 0.9896)
2024-06-27 09:42:45,368 - INFO - F1:                 82.93	(Threshold: 0.9896)
2024-06-27 09:42:45,370 - INFO - Precision:          70.83
2024-06-27 09:42:45,372 - INFO - Recall:             100.00
2024-06-27 09:42:45,375 - INFO - Average Precision:  70.25

2024-06-27 09:42:45,387 - INFO - Loss: 0.7024793388429752, epoch: 6, steps: -1


Iteration:   0%|          | 0/273 [00:00<?, ?it/s]

2024-06-27 09:44:36,069 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 7:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 09:44:39,890 - INFO - Accuracy:           71.07	(Threshold: 0.4686)
2024-06-27 09:44:39,891 - INFO - F1:                 82.93	(Threshold: 0.4686)
2024-06-27 09:44:39,893 - INFO - Precision:          70.83
2024-06-27 09:44:39,895 - INFO - Recall:             100.00
2024-06-27 09:44:39,897 - INFO - Average Precision:  70.25

2024-06-27 09:44:39,907 - INFO - Loss: 0.7024793388429752, epoch: 7, steps: -1


Iteration:   0%|          | 0/273 [00:00<?, ?it/s]

2024-06-27 09:46:29,877 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 8:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 09:46:33,702 - INFO - Accuracy:           71.07	(Threshold: 0.8293)
2024-06-27 09:46:33,704 - INFO - F1:                 82.93	(Threshold: 0.8293)
2024-06-27 09:46:33,707 - INFO - Precision:          70.83
2024-06-27 09:46:33,708 - INFO - Recall:             100.00
2024-06-27 09:46:33,710 - INFO - Average Precision:  70.25

2024-06-27 09:46:33,717 - INFO - Loss: 0.7024793388429752, epoch: 8, steps: -1


Iteration:   0%|          | 0/273 [00:00<?, ?it/s]

2024-06-27 09:48:24,078 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 9:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 09:48:27,918 - INFO - Accuracy:           71.07	(Threshold: 0.6269)
2024-06-27 09:48:27,919 - INFO - F1:                 82.93	(Threshold: 0.6269)
2024-06-27 09:48:27,921 - INFO - Precision:          70.83
2024-06-27 09:48:27,923 - INFO - Recall:             100.00
2024-06-27 09:48:27,924 - INFO - Average Precision:  70.25

2024-06-27 09:48:27,935 - INFO - Loss: 0.7024793388429752, epoch: 9, steps: -1


Iteration:   0%|          | 0/273 [00:00<?, ?it/s]

2024-06-27 09:50:18,331 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 10:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 09:50:22,141 - INFO - Accuracy:           71.07	(Threshold: 0.9455)
2024-06-27 09:50:22,142 - INFO - F1:                 82.93	(Threshold: 0.9455)
2024-06-27 09:50:22,144 - INFO - Precision:          70.83
2024-06-27 09:50:22,146 - INFO - Recall:             100.00
2024-06-27 09:50:22,148 - INFO - Average Precision:  70.25

2024-06-27 09:50:22,159 - INFO - Loss: 0.7024793388429752, epoch: 10, steps: -1


Iteration:   0%|          | 0/273 [00:00<?, ?it/s]

2024-06-27 09:52:12,622 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 11:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 09:52:16,456 - INFO - Accuracy:           71.07	(Threshold: 0.6203)
2024-06-27 09:52:16,458 - INFO - F1:                 82.93	(Threshold: 0.6203)
2024-06-27 09:52:16,460 - INFO - Precision:          70.83
2024-06-27 09:52:16,462 - INFO - Recall:             100.00
2024-06-27 09:52:16,463 - INFO - Average Precision:  70.25

2024-06-27 09:52:16,473 - INFO - Loss: 0.7024793388429752, epoch: 11, steps: -1


Iteration:   0%|          | 0/273 [00:00<?, ?it/s]

2024-06-27 09:54:06,741 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 12:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 09:54:10,562 - INFO - Accuracy:           71.07	(Threshold: 0.8405)
2024-06-27 09:54:10,564 - INFO - F1:                 82.93	(Threshold: 0.8405)
2024-06-27 09:54:10,566 - INFO - Precision:          70.83
2024-06-27 09:54:10,569 - INFO - Recall:             100.00
2024-06-27 09:54:10,572 - INFO - Average Precision:  70.25

2024-06-27 09:54:10,579 - INFO - Loss: 0.7024793388429752, epoch: 12, steps: -1


Iteration:   0%|          | 0/273 [00:00<?, ?it/s]

2024-06-27 09:56:00,270 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 13:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 09:56:04,138 - INFO - Accuracy:           71.07	(Threshold: 0.4843)
2024-06-27 09:56:04,140 - INFO - F1:                 82.93	(Threshold: 0.4843)
2024-06-27 09:56:04,142 - INFO - Precision:          70.83
2024-06-27 09:56:04,143 - INFO - Recall:             100.00
2024-06-27 09:56:04,145 - INFO - Average Precision:  70.25

2024-06-27 09:56:04,156 - INFO - Loss: 0.7024793388429752, epoch: 13, steps: -1


Iteration:   0%|          | 0/273 [00:00<?, ?it/s]

2024-06-27 09:57:54,555 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 14:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 09:57:58,350 - INFO - Accuracy:           71.07	(Threshold: 0.7839)
2024-06-27 09:57:58,351 - INFO - F1:                 82.93	(Threshold: 0.7839)
2024-06-27 09:57:58,354 - INFO - Precision:          70.83
2024-06-27 09:57:58,355 - INFO - Recall:             100.00
2024-06-27 09:57:58,357 - INFO - Average Precision:  70.25

2024-06-27 09:57:58,369 - INFO - Loss: 0.7024793388429752, epoch: 14, steps: -1


Iteration:   0%|          | 0/273 [00:00<?, ?it/s]

2024-06-27 09:59:48,377 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 15:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 09:59:52,193 - INFO - Accuracy:           71.07	(Threshold: 0.5842)
2024-06-27 09:59:52,194 - INFO - F1:                 82.93	(Threshold: 0.5842)
2024-06-27 09:59:52,196 - INFO - Precision:          70.83
2024-06-27 09:59:52,199 - INFO - Recall:             100.00
2024-06-27 09:59:52,200 - INFO - Average Precision:  70.25

2024-06-27 09:59:52,209 - INFO - Loss: 0.7024793388429752, epoch: 15, steps: -1
2024-06-27 09:59:52,225 - INFO - Model fitted
2024-06-27 09:59:52,227 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 09:59:56,073 - INFO - Accuracy:           71.07	(Threshold: 0.5842)
2024-06-27 09:59:56,074 - INFO - F1:                 82.93	(Threshold: 0.5842)
2024-06-27 09:59:56,076 - INFO - Precision:          70.83
2024-06-27 09:59:56,078 - INFO - Recall:             100.00
2024-06-27 09:59:56,081 - INFO - Average Precision:  70.25

2024-06-27 09:59:56,089 - INFO - Accuracy: 0.7024793388429752
2024-06-27 09:59:56,110 - INFO - Trial pruned
[I 2024-06-27 09:59:56,279] Trial 7 finished with value: 0.7024793388429752 and parameters: {'learning_rate': 0.022986878218583806, 'batch_size': 4, 'num_epochs': 16, 'warmup_steps': 551, 'weight_decay': 4.739201991060393e-05, 'dropout_rate': 0.4808581365517105}. Best is trial 5 with value: 0.8947331337996255.
2024-06-27 09:59:56,313 - INFO - Trial 8
2024-06-27 09:59:56,314 - INFO - Model created
2024-06-27 09:59:57,341 - INFO - Learning rate: 0.02422627225973041, batch size: 5, num epochs: 19, warmup steps: 377, weight decay: 0.009431744671664341, 

Epoch:   0%|          | 0/19 [00:00<?, ?it/s]

Iteration:   0%|          | 0/218 [00:00<?, ?it/s]

2024-06-27 10:01:43,522 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 0:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 10:01:47,565 - INFO - Accuracy:           71.07	(Threshold: 0.8378)
2024-06-27 10:01:47,566 - INFO - F1:                 82.93	(Threshold: 0.8378)
2024-06-27 10:01:47,569 - INFO - Precision:          70.83
2024-06-27 10:01:47,570 - INFO - Recall:             100.00
2024-06-27 10:01:47,572 - INFO - Average Precision:  70.25

2024-06-27 10:01:47,582 - INFO - Loss: 0.7024793388429752, epoch: 0, steps: -1
2024-06-27 10:01:47,584 - INFO - Save model to /content/drive/MyDrive/civol/iceccme/models_qwen/model_trial_2_8


Iteration:   0%|          | 0/218 [00:00<?, ?it/s]

2024-06-27 10:03:36,365 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 1:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 10:03:40,415 - INFO - Accuracy:           71.07	(Threshold: 0.2674)
2024-06-27 10:03:40,416 - INFO - F1:                 82.93	(Threshold: 0.2674)
2024-06-27 10:03:40,419 - INFO - Precision:          70.83
2024-06-27 10:03:40,421 - INFO - Recall:             100.00
2024-06-27 10:03:40,423 - INFO - Average Precision:  70.25

2024-06-27 10:03:40,431 - INFO - Loss: 0.7024793388429752, epoch: 1, steps: -1


Iteration:   0%|          | 0/218 [00:00<?, ?it/s]

2024-06-27 10:05:26,164 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 2:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 10:05:30,200 - INFO - Accuracy:           71.07	(Threshold: 0.0153)
2024-06-27 10:05:30,201 - INFO - F1:                 82.93	(Threshold: 0.0153)
2024-06-27 10:05:30,204 - INFO - Precision:          70.83
2024-06-27 10:05:30,209 - INFO - Recall:             100.00
2024-06-27 10:05:30,211 - INFO - Average Precision:  70.25

2024-06-27 10:05:30,219 - INFO - Loss: 0.7024793388429752, epoch: 2, steps: -1


Iteration:   0%|          | 0/218 [00:00<?, ?it/s]

2024-06-27 10:07:16,043 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 3:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 10:07:20,074 - INFO - Accuracy:           71.07	(Threshold: 0.5172)
2024-06-27 10:07:20,075 - INFO - F1:                 82.93	(Threshold: 0.5172)
2024-06-27 10:07:20,077 - INFO - Precision:          70.83
2024-06-27 10:07:20,079 - INFO - Recall:             100.00
2024-06-27 10:07:20,080 - INFO - Average Precision:  70.25

2024-06-27 10:07:20,089 - INFO - Loss: 0.7024793388429752, epoch: 3, steps: -1


Iteration:   0%|          | 0/218 [00:00<?, ?it/s]

2024-06-27 10:09:06,051 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 4:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 10:09:10,092 - INFO - Accuracy:           71.07	(Threshold: 0.9991)
2024-06-27 10:09:10,093 - INFO - F1:                 82.93	(Threshold: 0.9991)
2024-06-27 10:09:10,096 - INFO - Precision:          70.83
2024-06-27 10:09:10,098 - INFO - Recall:             100.00
2024-06-27 10:09:10,099 - INFO - Average Precision:  70.25

2024-06-27 10:09:10,109 - INFO - Loss: 0.7024793388429752, epoch: 4, steps: -1


Iteration:   0%|          | 0/218 [00:00<?, ?it/s]

2024-06-27 10:10:56,183 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 5:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 10:11:00,230 - INFO - Accuracy:           71.07	(Threshold: 0.9683)
2024-06-27 10:11:00,232 - INFO - F1:                 82.93	(Threshold: 0.9683)
2024-06-27 10:11:00,237 - INFO - Precision:          70.83
2024-06-27 10:11:00,239 - INFO - Recall:             100.00
2024-06-27 10:11:00,240 - INFO - Average Precision:  70.25

2024-06-27 10:11:00,249 - INFO - Loss: 0.7024793388429752, epoch: 5, steps: -1


Iteration:   0%|          | 0/218 [00:00<?, ?it/s]

2024-06-27 10:12:46,485 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 6:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 10:12:50,523 - INFO - Accuracy:           71.07	(Threshold: 0.5613)
2024-06-27 10:12:50,529 - INFO - F1:                 82.93	(Threshold: 0.5613)
2024-06-27 10:12:50,530 - INFO - Precision:          70.83
2024-06-27 10:12:50,531 - INFO - Recall:             100.00
2024-06-27 10:12:50,535 - INFO - Average Precision:  70.25

2024-06-27 10:12:50,541 - INFO - Loss: 0.7024793388429752, epoch: 6, steps: -1


Iteration:   0%|          | 0/218 [00:00<?, ?it/s]

2024-06-27 10:14:36,351 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 7:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 10:14:40,391 - INFO - Accuracy:           71.07	(Threshold: 0.4687)
2024-06-27 10:14:40,393 - INFO - F1:                 82.93	(Threshold: 0.4687)
2024-06-27 10:14:40,395 - INFO - Precision:          70.83
2024-06-27 10:14:40,397 - INFO - Recall:             100.00
2024-06-27 10:14:40,398 - INFO - Average Precision:  70.25

2024-06-27 10:14:40,409 - INFO - Loss: 0.7024793388429752, epoch: 7, steps: -1


Iteration:   0%|          | 0/218 [00:00<?, ?it/s]

2024-06-27 10:16:26,540 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 8:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 10:16:30,584 - INFO - Accuracy:           71.07	(Threshold: 0.1185)
2024-06-27 10:16:30,586 - INFO - F1:                 82.93	(Threshold: 0.1185)
2024-06-27 10:16:30,587 - INFO - Precision:          70.83
2024-06-27 10:16:30,590 - INFO - Recall:             100.00
2024-06-27 10:16:30,591 - INFO - Average Precision:  70.25

2024-06-27 10:16:30,599 - INFO - Loss: 0.7024793388429752, epoch: 8, steps: -1


Iteration:   0%|          | 0/218 [00:00<?, ?it/s]

2024-06-27 10:18:16,333 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 9:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 10:18:20,379 - INFO - Accuracy:           71.07	(Threshold: 0.6984)
2024-06-27 10:18:20,385 - INFO - F1:                 82.93	(Threshold: 0.6984)
2024-06-27 10:18:20,387 - INFO - Precision:          70.83
2024-06-27 10:18:20,388 - INFO - Recall:             100.00
2024-06-27 10:18:20,392 - INFO - Average Precision:  70.25

2024-06-27 10:18:20,402 - INFO - Loss: 0.7024793388429752, epoch: 9, steps: -1


Iteration:   0%|          | 0/218 [00:00<?, ?it/s]

2024-06-27 10:20:06,340 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 10:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 10:20:10,241 - INFO - Accuracy:           71.07	(Threshold: 0.0190)
2024-06-27 10:20:10,243 - INFO - F1:                 82.93	(Threshold: 0.0190)
2024-06-27 10:20:10,245 - INFO - Precision:          70.83
2024-06-27 10:20:10,246 - INFO - Recall:             100.00
2024-06-27 10:20:10,248 - INFO - Average Precision:  70.25

2024-06-27 10:20:10,257 - INFO - Loss: 0.7024793388429752, epoch: 10, steps: -1


Iteration:   0%|          | 0/218 [00:00<?, ?it/s]

2024-06-27 10:21:55,995 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 11:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 10:22:00,033 - INFO - Accuracy:           71.07	(Threshold: 0.4563)
2024-06-27 10:22:00,037 - INFO - F1:                 82.93	(Threshold: 0.4563)
2024-06-27 10:22:00,040 - INFO - Precision:          70.83
2024-06-27 10:22:00,043 - INFO - Recall:             100.00
2024-06-27 10:22:00,045 - INFO - Average Precision:  70.25

2024-06-27 10:22:00,056 - INFO - Loss: 0.7024793388429752, epoch: 11, steps: -1


Iteration:   0%|          | 0/218 [00:00<?, ?it/s]

2024-06-27 10:23:45,929 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 12:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 10:23:49,993 - INFO - Accuracy:           71.07	(Threshold: 0.6237)
2024-06-27 10:23:49,994 - INFO - F1:                 82.93	(Threshold: 0.6237)
2024-06-27 10:23:49,996 - INFO - Precision:          70.83
2024-06-27 10:23:49,998 - INFO - Recall:             100.00
2024-06-27 10:23:50,000 - INFO - Average Precision:  70.25

2024-06-27 10:23:50,012 - INFO - Loss: 0.7024793388429752, epoch: 12, steps: -1


Iteration:   0%|          | 0/218 [00:00<?, ?it/s]

2024-06-27 10:25:35,842 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 13:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 10:25:39,885 - INFO - Accuracy:           71.07	(Threshold: 0.7839)
2024-06-27 10:25:39,887 - INFO - F1:                 82.93	(Threshold: 0.7839)
2024-06-27 10:25:39,891 - INFO - Precision:          70.83
2024-06-27 10:25:39,894 - INFO - Recall:             100.00
2024-06-27 10:25:39,896 - INFO - Average Precision:  70.25

2024-06-27 10:25:39,905 - INFO - Loss: 0.7024793388429752, epoch: 13, steps: -1


Iteration:   0%|          | 0/218 [00:00<?, ?it/s]

2024-06-27 10:27:25,751 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 14:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 10:27:29,783 - INFO - Accuracy:           71.07	(Threshold: 0.7307)
2024-06-27 10:27:29,784 - INFO - F1:                 82.93	(Threshold: 0.7307)
2024-06-27 10:27:29,786 - INFO - Precision:          70.83
2024-06-27 10:27:29,788 - INFO - Recall:             100.00
2024-06-27 10:27:29,789 - INFO - Average Precision:  70.25

2024-06-27 10:27:29,805 - INFO - Loss: 0.7024793388429752, epoch: 14, steps: -1


Iteration:   0%|          | 0/218 [00:00<?, ?it/s]

2024-06-27 10:29:16,290 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 15:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 10:29:20,314 - INFO - Accuracy:           71.07	(Threshold: 0.8368)
2024-06-27 10:29:20,316 - INFO - F1:                 82.93	(Threshold: 0.8368)
2024-06-27 10:29:20,318 - INFO - Precision:          70.83
2024-06-27 10:29:20,320 - INFO - Recall:             100.00
2024-06-27 10:29:20,322 - INFO - Average Precision:  70.25

2024-06-27 10:29:20,333 - INFO - Loss: 0.7024793388429752, epoch: 15, steps: -1


Iteration:   0%|          | 0/218 [00:00<?, ?it/s]

2024-06-27 10:31:05,947 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 16:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 10:31:09,976 - INFO - Accuracy:           71.07	(Threshold: 0.5729)
2024-06-27 10:31:09,977 - INFO - F1:                 82.93	(Threshold: 0.5729)
2024-06-27 10:31:09,979 - INFO - Precision:          70.83
2024-06-27 10:31:09,980 - INFO - Recall:             100.00
2024-06-27 10:31:09,982 - INFO - Average Precision:  70.25

2024-06-27 10:31:09,990 - INFO - Loss: 0.7024793388429752, epoch: 16, steps: -1


Iteration:   0%|          | 0/218 [00:00<?, ?it/s]

2024-06-27 10:32:55,621 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 17:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 10:32:59,640 - INFO - Accuracy:           71.07	(Threshold: 0.5755)
2024-06-27 10:32:59,641 - INFO - F1:                 82.93	(Threshold: 0.5755)
2024-06-27 10:32:59,644 - INFO - Precision:          70.83
2024-06-27 10:32:59,645 - INFO - Recall:             100.00
2024-06-27 10:32:59,647 - INFO - Average Precision:  70.25

2024-06-27 10:32:59,656 - INFO - Loss: 0.7024793388429752, epoch: 17, steps: -1


Iteration:   0%|          | 0/218 [00:00<?, ?it/s]

2024-06-27 10:34:46,079 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 18:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 10:34:50,116 - INFO - Accuracy:           71.07	(Threshold: 0.5955)
2024-06-27 10:34:50,117 - INFO - F1:                 82.93	(Threshold: 0.5955)
2024-06-27 10:34:50,119 - INFO - Precision:          70.83
2024-06-27 10:34:50,121 - INFO - Recall:             100.00
2024-06-27 10:34:50,123 - INFO - Average Precision:  70.25

2024-06-27 10:34:50,133 - INFO - Loss: 0.7024793388429752, epoch: 18, steps: -1
2024-06-27 10:34:50,146 - INFO - Model fitted
2024-06-27 10:34:50,148 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 10:34:53,939 - INFO - Accuracy:           71.07	(Threshold: 0.5955)
2024-06-27 10:34:53,940 - INFO - F1:                 82.93	(Threshold: 0.5955)
2024-06-27 10:34:53,942 - INFO - Precision:          70.83
2024-06-27 10:34:53,944 - INFO - Recall:             100.00
2024-06-27 10:34:53,946 - INFO - Average Precision:  70.25

2024-06-27 10:34:53,954 - INFO - Accuracy: 0.7024793388429752
2024-06-27 10:34:53,974 - INFO - Trial pruned
[I 2024-06-27 10:34:54,150] Trial 8 finished with value: 0.7024793388429752 and parameters: {'learning_rate': 0.02422627225973041, 'batch_size': 5, 'num_epochs': 19, 'warmup_steps': 377, 'weight_decay': 0.009431744671664341, 'dropout_rate': 0.3117635806706708}. Best is trial 5 with value: 0.8947331337996255.
2024-06-27 10:34:54,197 - INFO - Trial 9
2024-06-27 10:34:54,199 - INFO - Model created
2024-06-27 10:34:55,217 - INFO - Learning rate: 0.000926077844966131, batch size: 11, num epochs: 17, warmup steps: 733, weight decay: 4.525471473182416e-05,

Epoch:   0%|          | 0/17 [00:00<?, ?it/s]

Iteration:   0%|          | 0/99 [00:00<?, ?it/s]

2024-06-27 10:36:39,245 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 0:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 10:36:43,954 - INFO - Accuracy:           84.30	(Threshold: 0.5779)
2024-06-27 10:36:43,955 - INFO - F1:                 88.89	(Threshold: 0.5779)
2024-06-27 10:36:43,957 - INFO - Precision:          88.37
2024-06-27 10:36:43,959 - INFO - Recall:             89.41
2024-06-27 10:36:43,961 - INFO - Average Precision:  86.75

2024-06-27 10:36:43,973 - INFO - Loss: 0.867548410735016, epoch: 0, steps: -1
2024-06-27 10:36:43,974 - INFO - Save model to /content/drive/MyDrive/civol/iceccme/models_qwen/model_trial_2_9


Iteration:   0%|          | 0/99 [00:00<?, ?it/s]

2024-06-27 10:38:30,115 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 1:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 10:38:34,723 - INFO - Accuracy:           69.42	(Threshold: 0.8378)
2024-06-27 10:38:34,725 - INFO - F1:                 81.95	(Threshold: 0.8378)
2024-06-27 10:38:34,728 - INFO - Precision:          70.00
2024-06-27 10:38:34,729 - INFO - Recall:             98.82
2024-06-27 10:38:34,731 - INFO - Average Precision:  74.60

2024-06-27 10:38:34,741 - INFO - Loss: 0.7460379047186736, epoch: 1, steps: -1


Iteration:   0%|          | 0/99 [00:00<?, ?it/s]

2024-06-27 10:40:18,035 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 2:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 10:40:22,615 - INFO - Accuracy:           71.90	(Threshold: 0.5218)
2024-06-27 10:40:22,617 - INFO - F1:                 83.33	(Threshold: 0.5218)
2024-06-27 10:40:22,619 - INFO - Precision:          71.43
2024-06-27 10:40:22,621 - INFO - Recall:             100.00
2024-06-27 10:40:22,623 - INFO - Average Precision:  68.26

2024-06-27 10:40:22,631 - INFO - Loss: 0.6825710813007857, epoch: 2, steps: -1


Iteration:   0%|          | 0/99 [00:00<?, ?it/s]

2024-06-27 10:42:05,616 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 3:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 10:42:10,231 - INFO - Accuracy:           69.42	(Threshold: 0.7544)
2024-06-27 10:42:10,233 - INFO - F1:                 81.95	(Threshold: 0.7544)
2024-06-27 10:42:10,235 - INFO - Precision:          70.00
2024-06-27 10:42:10,237 - INFO - Recall:             98.82
2024-06-27 10:42:10,238 - INFO - Average Precision:  73.67

2024-06-27 10:42:10,250 - INFO - Loss: 0.736698194307702, epoch: 3, steps: -1


Iteration:   0%|          | 0/99 [00:00<?, ?it/s]

2024-06-27 10:43:53,428 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 4:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 10:43:58,031 - INFO - Accuracy:           70.25	(Threshold: 0.6327)
2024-06-27 10:43:58,033 - INFO - F1:                 82.35	(Threshold: 0.6327)
2024-06-27 10:43:58,036 - INFO - Precision:          70.59
2024-06-27 10:43:58,038 - INFO - Recall:             98.82
2024-06-27 10:43:58,041 - INFO - Average Precision:  71.58

2024-06-27 10:43:58,050 - INFO - Loss: 0.7157938911949606, epoch: 4, steps: -1


Iteration:   0%|          | 0/99 [00:00<?, ?it/s]

2024-06-27 10:45:41,167 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 5:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 10:45:45,769 - INFO - Accuracy:           71.07	(Threshold: 0.7051)
2024-06-27 10:45:45,772 - INFO - F1:                 82.93	(Threshold: 0.7051)
2024-06-27 10:45:45,774 - INFO - Precision:          70.83
2024-06-27 10:45:45,777 - INFO - Recall:             100.00
2024-06-27 10:45:45,779 - INFO - Average Precision:  70.87

2024-06-27 10:45:45,790 - INFO - Loss: 0.7086537625054372, epoch: 5, steps: -1


Iteration:   0%|          | 0/99 [00:00<?, ?it/s]

2024-06-27 10:47:29,016 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 6:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 10:47:33,606 - INFO - Accuracy:           71.07	(Threshold: 0.6748)
2024-06-27 10:47:33,608 - INFO - F1:                 82.93	(Threshold: 0.6748)
2024-06-27 10:47:33,610 - INFO - Precision:          70.83
2024-06-27 10:47:33,611 - INFO - Recall:             100.00
2024-06-27 10:47:33,613 - INFO - Average Precision:  72.62

2024-06-27 10:47:33,622 - INFO - Loss: 0.7261827392984161, epoch: 6, steps: -1


Iteration:   0%|          | 0/99 [00:00<?, ?it/s]

2024-06-27 10:49:16,805 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 7:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 10:49:21,407 - INFO - Accuracy:           70.25	(Threshold: 0.4695)
2024-06-27 10:49:21,408 - INFO - F1:                 82.35	(Threshold: 0.4695)
2024-06-27 10:49:21,411 - INFO - Precision:          70.59
2024-06-27 10:49:21,413 - INFO - Recall:             98.82
2024-06-27 10:49:21,415 - INFO - Average Precision:  68.09

2024-06-27 10:49:21,423 - INFO - Loss: 0.6809403430793805, epoch: 7, steps: -1


Iteration:   0%|          | 0/99 [00:00<?, ?it/s]

2024-06-27 10:51:04,684 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 8:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 10:51:09,291 - INFO - Accuracy:           71.07	(Threshold: 0.5737)
2024-06-27 10:51:09,292 - INFO - F1:                 82.93	(Threshold: 0.5737)
2024-06-27 10:51:09,298 - INFO - Precision:          70.83
2024-06-27 10:51:09,299 - INFO - Recall:             100.00
2024-06-27 10:51:09,302 - INFO - Average Precision:  72.72

2024-06-27 10:51:09,312 - INFO - Loss: 0.7271810542398778, epoch: 8, steps: -1


Iteration:   0%|          | 0/99 [00:00<?, ?it/s]

2024-06-27 10:52:52,510 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 9:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 10:52:57,119 - INFO - Accuracy:           71.07	(Threshold: 0.7187)
2024-06-27 10:52:57,120 - INFO - F1:                 82.93	(Threshold: 0.7187)
2024-06-27 10:52:57,124 - INFO - Precision:          70.83
2024-06-27 10:52:57,125 - INFO - Recall:             100.00
2024-06-27 10:52:57,127 - INFO - Average Precision:  69.55

2024-06-27 10:52:57,137 - INFO - Loss: 0.6954666162053406, epoch: 9, steps: -1


Iteration:   0%|          | 0/99 [00:00<?, ?it/s]

2024-06-27 10:54:40,279 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 10:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 10:54:44,888 - INFO - Accuracy:           71.07	(Threshold: 0.6258)
2024-06-27 10:54:44,889 - INFO - F1:                 82.93	(Threshold: 0.6258)
2024-06-27 10:54:44,892 - INFO - Precision:          70.83
2024-06-27 10:54:44,893 - INFO - Recall:             100.00
2024-06-27 10:54:44,895 - INFO - Average Precision:  71.82

2024-06-27 10:54:44,911 - INFO - Loss: 0.7181575109382595, epoch: 10, steps: -1


Iteration:   0%|          | 0/99 [00:00<?, ?it/s]

2024-06-27 10:56:27,771 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 11:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 10:56:32,414 - INFO - Accuracy:           71.07	(Threshold: 0.5946)
2024-06-27 10:56:32,415 - INFO - F1:                 82.93	(Threshold: 0.5946)
2024-06-27 10:56:32,418 - INFO - Precision:          70.83
2024-06-27 10:56:32,419 - INFO - Recall:             100.00
2024-06-27 10:56:32,421 - INFO - Average Precision:  69.83

2024-06-27 10:56:32,429 - INFO - Loss: 0.6982660832928211, epoch: 11, steps: -1


Iteration:   0%|          | 0/99 [00:00<?, ?it/s]

2024-06-27 10:58:15,493 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 12:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 10:58:20,093 - INFO - Accuracy:           71.07	(Threshold: 0.6363)
2024-06-27 10:58:20,095 - INFO - F1:                 82.93	(Threshold: 0.6363)
2024-06-27 10:58:20,097 - INFO - Precision:          70.83
2024-06-27 10:58:20,098 - INFO - Recall:             100.00
2024-06-27 10:58:20,100 - INFO - Average Precision:  70.85

2024-06-27 10:58:20,113 - INFO - Loss: 0.7085464268351969, epoch: 12, steps: -1


Iteration:   0%|          | 0/99 [00:00<?, ?it/s]

2024-06-27 11:00:03,203 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 13:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 11:00:07,814 - INFO - Accuracy:           71.07	(Threshold: 0.6136)
2024-06-27 11:00:07,816 - INFO - F1:                 82.93	(Threshold: 0.6136)
2024-06-27 11:00:07,818 - INFO - Precision:          70.83
2024-06-27 11:00:07,820 - INFO - Recall:             100.00
2024-06-27 11:00:07,822 - INFO - Average Precision:  70.37

2024-06-27 11:00:07,829 - INFO - Loss: 0.703665892435946, epoch: 13, steps: -1


Iteration:   0%|          | 0/99 [00:00<?, ?it/s]

2024-06-27 11:01:51,123 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 14:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 11:01:55,730 - INFO - Accuracy:           71.07	(Threshold: 0.6309)
2024-06-27 11:01:55,731 - INFO - F1:                 82.93	(Threshold: 0.6309)
2024-06-27 11:01:55,734 - INFO - Precision:          70.83
2024-06-27 11:01:55,736 - INFO - Recall:             100.00
2024-06-27 11:01:55,738 - INFO - Average Precision:  70.25

2024-06-27 11:01:55,744 - INFO - Loss: 0.7024793388429752, epoch: 14, steps: -1


Iteration:   0%|          | 0/99 [00:00<?, ?it/s]

2024-06-27 11:03:38,869 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 15:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 11:03:43,456 - INFO - Accuracy:           71.90	(Threshold: 0.6182)
2024-06-27 11:03:43,458 - INFO - F1:                 83.33	(Threshold: 0.6182)
2024-06-27 11:03:43,460 - INFO - Precision:          71.43
2024-06-27 11:03:43,461 - INFO - Recall:             100.00
2024-06-27 11:03:43,462 - INFO - Average Precision:  70.05

2024-06-27 11:03:43,474 - INFO - Loss: 0.7005408361691784, epoch: 15, steps: -1


Iteration:   0%|          | 0/99 [00:00<?, ?it/s]

2024-06-27 11:05:26,557 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset after epoch 16:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 11:05:31,172 - INFO - Accuracy:           70.25	(Threshold: 0.6360)
2024-06-27 11:05:31,174 - INFO - F1:                 82.35	(Threshold: 0.6360)
2024-06-27 11:05:31,177 - INFO - Precision:          70.59
2024-06-27 11:05:31,178 - INFO - Recall:             98.82
2024-06-27 11:05:31,180 - INFO - Average Precision:  69.34

2024-06-27 11:05:31,188 - INFO - Loss: 0.693353705118411, epoch: 16, steps: -1
2024-06-27 11:05:31,199 - INFO - Model fitted
2024-06-27 11:05:31,201 - INFO - CEBinaryClassificationEvaluator: Evaluating the model on goal_eval dataset:


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

2024-06-27 11:05:35,047 - INFO - Accuracy:           70.25	(Threshold: 0.6360)
2024-06-27 11:05:35,048 - INFO - F1:                 82.35	(Threshold: 0.6360)
2024-06-27 11:05:35,050 - INFO - Precision:          70.59
2024-06-27 11:05:35,053 - INFO - Recall:             98.82
2024-06-27 11:05:35,055 - INFO - Average Precision:  69.34

2024-06-27 11:05:35,061 - INFO - Accuracy: 0.693353705118411
2024-06-27 11:05:35,082 - INFO - Trial pruned
[I 2024-06-27 11:05:35,275] Trial 9 finished with value: 0.693353705118411 and parameters: {'learning_rate': 0.000926077844966131, 'batch_size': 11, 'num_epochs': 17, 'warmup_steps': 733, 'weight_decay': 4.525471473182416e-05, 'dropout_rate': 0.11641311745480959}. Best is trial 5 with value: 0.8947331337996255.
2024-06-27 11:05:35,309 - INFO - Trial 10
2024-06-27 11:05:35,310 - INFO - Model created
2024-06-27 11:05:36,391 - INFO - Learning rate: 5.268528030008424e-05, batch size: 16, num epochs: 6, warmup steps: 995, weight decay: 0.06422146703104012,

Epoch:   0%|          | 0/6 [00:00<?, ?it/s]

Iteration:   0%|          | 0/69 [00:00<?, ?it/s]

2024-06-27 11:05:38,422 - ERROR - Error: CUDA out of memory. Tried to allocate 192.00 MiB. GPU 
[W 2024-06-27 11:05:38,655] Trial 10 failed with parameters: {'learning_rate': 5.268528030008424e-05, 'batch_size': 16, 'num_epochs': 6, 'warmup_steps': 995, 'weight_decay': 0.06422146703104012, 'dropout_rate': 0.20614980955005321} because of the following error: OutOfMemoryError('CUDA out of memory. Tried to allocate 192.00 MiB. GPU ').
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/optuna/study/_optimize.py", line 196, in _run_trial
    value_or_values = func(trial)
  File "<ipython-input-13-f35afe2df549>", line 49, in objective
    raise e
  File "<ipython-input-13-f35afe2df549>", line 27, in objective
    model.fit(train_dataloader=train_dataloader,
  File "/usr/local/lib/python3.10/dist-packages/sentence_transformers/cross_encoder/CrossEncoder.py", line 278, in fit
    model_predictions = self.model(**features, return_dict=True)
  File "/usr/local/lib

OutOfMemoryError: CUDA out of memory. Tried to allocate 192.00 MiB. GPU 

In [None]:
print(f"Best value: {study.best_value} (params: {study.best_params})")