In [1]:
from transformers import BasicTokenizer, Trainer
from datasets import concatenate_datasets, load_from_disk
import kagglehub
import optuna
import torch
import math
import base

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/jovyan/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package punkt to /home/jovyan/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /home/jovyan/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /home/jovyan/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger_eng is already up-to-
[nltk_data]       date!


In [2]:
base.reset_seed()

In [3]:
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("GPU is available and will be used:", torch.cuda.get_device_name(0))
else:
    device = torch.device("cpu")
    print("GPU is not available, using CPU.")

GPU is available and will be used: NVIDIA A100 80GB PCIe MIG 2g.20gb


In [4]:
my_glove = kagglehub.dataset_download("thanakomsn/glove6b300dtxt")
print(my_glove)

/home/jovyan/.cache/kagglehub/datasets/thanakomsn/glove6b300dtxt/versions/1


In [5]:
GLOVE_FILE = f"{my_glove}/glove.6B.300d.txt"
DATASET = "trec"

In [6]:
train_data = load_from_disk(f"~/data/{DATASET}/train-logits_fine")
eval_data = load_from_disk(f"~/data/{DATASET}/eval-logits_fine")
test_data = load_from_disk(f"~/data/{DATASET}/test-logits_fine")

all_train_data = load_from_disk(f"~/data/{DATASET}/train-logits-augmented_fine")

all_data = concatenate_datasets([load_from_disk(file) for file in [f"~/data/{DATASET}/eval-logits_fine", f"~/data/{DATASET}/test-logits_fine", f"~/data/{DATASET}/train-logits-augmented_fine"]])
tokenizer = BasicTokenizer(do_lower_case=True)

In [7]:
train_data_tokens = list(map(lambda e: tokenizer.tokenize(e["sentence"]), train_data))
eval_data_tokens = list(map(lambda e: tokenizer.tokenize(e["sentence"]), eval_data))
test_data_tokens = list(map(lambda e: tokenizer.tokenize(e["sentence"]), test_data))

all_train_data_tokens = list(map(lambda e: tokenizer.tokenize(e["sentence"]), all_train_data))

all_data_tokens = list(map(lambda e: tokenizer.tokenize(e["sentence"]), all_data))

In [8]:
vocab = base.get_vocab(all_data_tokens)

In [9]:
word_index = dict(zip(vocab, range(len(vocab))))

In [10]:
embeddings_index = base.get_embeddings_indeces(GLOVE_FILE)

Found 400000 word vectors.


In [11]:
print(len(vocab))
num_tokens = len(vocab) + 2
embedding_dim = 300

8766


In [12]:
embedding_matrix = base.get_embedding_matrix(num_tokens, embedding_dim, word_index, embeddings_index)

Converted 8551 words (215) misses


In [13]:
train_data_index = list(map(lambda x: list(map(lambda y: word_index[y], x)),train_data_tokens))
eval_data_index = list(map(lambda x: list(map(lambda y: word_index[y], x)),eval_data_tokens))
test_data_index = list(map(lambda x: list(map(lambda y: word_index[y], x)),test_data_tokens))

all_train_data_index = list(map(lambda x: list(map(lambda y: word_index[y], x)),all_train_data_tokens))

In [14]:
train_padded_data = list(map(lambda x: base.padd(x,60), train_data_index))
eval_padded_data = list(map(lambda x: base.padd(x,60), eval_data_index))
test_padded_data = list(map(lambda x: base.padd(x,60), test_data_index))

all_train_padded_data = list(map(lambda x: base.padd(x,60), all_train_data_index))

In [15]:
train_data = train_data.add_column("input_ids", train_padded_data)
eval_data = eval_data.add_column("input_ids", eval_padded_data)
test_data = test_data.add_column("input_ids", test_padded_data)

all_train_data = all_train_data.add_column("input_ids", all_train_padded_data)

In [None]:
num_epochs = 15
batch_size = 128

In [17]:
#Nápočet epoch na steps
data_length = len(train_data)
min_r = math.ceil(data_length/batch_size)*5
max_r = math.ceil(data_length/batch_size)*num_epochs
warm_up = math.ceil(data_length/batch_size/10)

In [18]:
def hp_space(trial):
    params =  {
        "learning_rate": trial.suggest_float("learning_rate", 5e-5, 5e-3, log=True),
        "weight_decay": trial.suggest_float("weight_decay", 0, 1e-2, step=1e-3),
        "warmup_steps" : trial.suggest_int("warmup_steps", 0, warm_up)
    }   
    print(f"Trial {trial.number} with params: {params}")
    return params

In [19]:
pruner = optuna.pruners.HyperbandPruner(min_resource=min_r, max_resource=max_r, reduction_factor=2, bootstrap_count=2)
sampler = optuna.samplers.TPESampler(seed=42, multivariate=True)



In [None]:
def get_BiLSTM():
    return base.BiLSTMClassifier(embedding_matrix=embedding_matrix, embedding_dim=embedding_dim, fc_dim=400, hidden_dim=300, output_dim=50)

In [None]:
base.reset_seed()

In [22]:
training_args = base.get_training_args(output_dir=f"~/results/{DATASET}/bilstm-base_fine_hp-search", logging_dir=f"~/logs/{DATASET}/bilstm-base_fine_hp-search", epochs=num_epochs, batch_size=batch_size)

In [23]:
trainer = Trainer(
    args=training_args,
    train_dataset=train_data,
    eval_dataset=eval_data,
    compute_metrics=base.compute_metrics,
    model_init = lambda: get_BiLSTM(),
)
  

In [24]:
best_trial = trainer.hyperparameter_search(
    direction="maximize",
    backend="optuna",
    hp_space=hp_space,
    compute_objective=lambda metrics: metrics["eval_f1"],
    pruner=pruner,
    sampler=sampler,
    study_name="Base",
    n_trials=150
)

[I 2025-03-23 01:13:16,011] A new study created in memory with name: Base


Trial 0 with params: {'learning_rate': 0.0002805758207667253, 'weight_decay': 0.01, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.4584,3.069126,0.177819,0.00579,0.020238,0.006485
2,2.8344,2.626046,0.363886,0.03989,0.074396,0.04746
3,2.4687,2.306453,0.439047,0.068129,0.100127,0.071554
4,2.1965,2.091045,0.471127,0.10861,0.118121,0.09377
5,2.0097,1.932238,0.527039,0.132902,0.151495,0.13201
6,1.8305,1.845821,0.532539,0.150639,0.157151,0.139438
7,1.7119,1.765708,0.549038,0.179995,0.169621,0.155691
8,1.6299,1.704964,0.571036,0.207771,0.19221,0.177835
9,1.5366,1.643923,0.575619,0.199444,0.187425,0.173288
10,1.4563,1.602813,0.593034,0.233051,0.208454,0.197623


[I 2025-03-23 01:14:15,358] Trial 0 pruned. 


Trial 1 with params: {'learning_rate': 0.0007875660249889869, 'weight_decay': 0.001, 'warmup_steps': 0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.1321,2.596069,0.352887,0.050332,0.073663,0.04719
2,2.2888,2.032072,0.503208,0.105052,0.135283,0.108371
3,1.8392,1.709319,0.563703,0.21463,0.175021,0.161167
4,1.4978,1.461385,0.644363,0.303752,0.25856,0.259748
5,1.2426,1.348165,0.659945,0.317788,0.295044,0.286731


[I 2025-03-23 01:14:36,887] Trial 1 pruned. 


Trial 2 with params: {'learning_rate': 6.533369619026643e-05, 'weight_decay': 0.009000000000000001, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.811,3.618339,0.176902,0.003538,0.02,0.006012
2,3.3573,3.181626,0.176902,0.003538,0.02,0.006012
3,3.1471,3.066321,0.176902,0.003538,0.02,0.006012
4,2.9966,2.934118,0.287809,0.027363,0.049589,0.033004
5,2.908,2.833761,0.348304,0.04031,0.067395,0.046142
6,2.7957,2.750341,0.349221,0.036498,0.069042,0.043796
7,2.7176,2.684718,0.35472,0.037889,0.069948,0.045424
8,2.6705,2.630808,0.369386,0.038043,0.075451,0.048442
9,2.6167,2.586061,0.370302,0.037896,0.075369,0.048505
10,2.5736,2.56153,0.387718,0.039489,0.081489,0.052089


[I 2025-03-23 01:15:22,006] Trial 2 pruned. 


Trial 3 with params: {'learning_rate': 0.0013035123791853842, 'weight_decay': 0.0, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.0898,2.413036,0.393217,0.072935,0.087761,0.06587
2,2.0691,1.845839,0.535289,0.170309,0.168369,0.150431
3,1.5857,1.451737,0.64253,0.301251,0.26216,0.258377
4,1.1986,1.259654,0.694775,0.375828,0.342741,0.343451
5,0.9466,1.183822,0.710357,0.411755,0.364871,0.368432


[I 2025-03-23 01:15:42,877] Trial 3 pruned. 


Trial 4 with params: {'learning_rate': 0.002311294500510415, 'weight_decay': 0.002, 'warmup_steps': 0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.7627,2.076098,0.494959,0.12683,0.137194,0.113722
2,1.7765,1.632031,0.5967,0.253419,0.246135,0.226332
3,1.3034,1.265994,0.688359,0.357061,0.343196,0.336913
4,0.9241,1.121722,0.710357,0.449929,0.408924,0.415273
5,0.6364,1.040889,0.744271,0.530806,0.478752,0.490374
6,0.4347,1.063461,0.731439,0.518502,0.473687,0.478772
7,0.2871,1.196735,0.746104,0.608135,0.516584,0.536805
8,0.1846,1.167184,0.759853,0.60251,0.566083,0.569041
9,0.1263,1.193905,0.772686,0.637881,0.60607,0.611316
10,0.0606,1.280458,0.761687,0.628287,0.607313,0.603093


[I 2025-03-23 01:16:55,429] Trial 4 finished with value: 0.6291900777925411 and parameters: {'learning_rate': 0.002311294500510415, 'weight_decay': 0.002, 'warmup_steps': 0}. Best is trial 4 with value: 0.6291900777925411.


Trial 5 with params: {'learning_rate': 0.00011635338541918901, 'weight_decay': 0.003, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.6783,3.25952,0.176902,0.003538,0.02,0.006012
2,3.1466,3.023148,0.179652,0.006604,0.020822,0.007398
3,2.9341,2.810968,0.351054,0.041412,0.067221,0.03969
4,2.7334,2.659315,0.359303,0.038147,0.070818,0.046853
5,2.6156,2.519705,0.394134,0.040526,0.082666,0.053796


Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--recall/11f90e583db35601050aed380d48e83202a896976b9608432fba9244fb447f24 (last modified on Fri Jan 10 23:14:00 2025) since it couldn't be found locally at evaluate-metric--recall, or remotely on the Hugging Face Hub.
[I 2025-03-23 01:18:35,254] Trial 5 pruned. 


Trial 6 with params: {'learning_rate': 0.0003654769917956456, 'weight_decay': 0.003, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.4121,2.999998,0.190651,0.017247,0.025455,0.013909
2,2.7258,2.484273,0.410632,0.045855,0.08815,0.058501
3,2.3039,2.130928,0.476627,0.096553,0.116746,0.090341
4,2.0278,1.933419,0.514207,0.140566,0.14556,0.124102
5,1.8228,1.777779,0.554537,0.177616,0.173807,0.153835
6,1.6329,1.686394,0.571036,0.206947,0.186897,0.173492
7,1.5055,1.616487,0.585701,0.218232,0.202957,0.194651
8,1.412,1.551501,0.597617,0.240844,0.215167,0.208428
9,1.3108,1.490073,0.624198,0.32802,0.244036,0.245582
10,1.2117,1.458836,0.630614,0.30871,0.25972,0.259421


[I 2025-03-23 01:20:09,607] Trial 6 finished with value: 0.2806875549439002 and parameters: {'learning_rate': 0.0003654769917956456, 'weight_decay': 0.003, 'warmup_steps': 3}. Best is trial 4 with value: 0.6291900777925411.


Trial 7 with params: {'learning_rate': 9.505122659935192e-05, 'weight_decay': 0.003, 'warmup_steps': 1}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.7267,3.354667,0.176902,0.003538,0.02,0.006012
2,3.2051,3.095615,0.176902,0.003538,0.02,0.006012
3,3.0193,2.903063,0.351054,0.027768,0.066474,0.038919
4,2.8321,2.762799,0.349221,0.039562,0.068277,0.044692
5,2.7341,2.642381,0.378552,0.039581,0.077125,0.050743


[I 2025-03-23 01:20:40,221] Trial 7 pruned. 


Trial 8 with params: {'learning_rate': 0.00040842279473800845, 'weight_decay': 0.008, 'warmup_steps': 0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.3232,2.891825,0.318057,0.058441,0.061664,0.047207
2,2.6371,2.382618,0.422548,0.056906,0.092843,0.06303
3,2.2045,2.052993,0.487626,0.130065,0.122017,0.098234
4,1.9354,1.835632,0.533456,0.149113,0.161113,0.141194
5,1.7215,1.682369,0.570119,0.206494,0.187722,0.169007


[I 2025-03-23 01:21:08,854] Trial 8 pruned. 


Trial 9 with params: {'learning_rate': 0.0005338741354740678, 'weight_decay': 0.006, 'warmup_steps': 0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.2438,2.80672,0.367553,0.035847,0.07704,0.047719
2,2.4964,2.235304,0.44363,0.077333,0.101909,0.07558
3,2.0546,1.912395,0.520623,0.130348,0.14316,0.118249
4,1.7597,1.691826,0.571036,0.207393,0.185145,0.173607
5,1.5249,1.520833,0.611366,0.246783,0.22829,0.217238
6,1.3109,1.450078,0.624198,0.31584,0.256776,0.253738
7,1.1718,1.380707,0.64253,0.346876,0.269056,0.277273
8,1.0659,1.289476,0.676444,0.368792,0.313266,0.314114
9,0.942,1.261803,0.67461,0.363322,0.321188,0.323147
10,0.8426,1.225895,0.675527,0.398629,0.333485,0.337799


[I 2025-03-23 01:22:31,880] Trial 9 finished with value: 0.3657307671319295 and parameters: {'learning_rate': 0.0005338741354740678, 'weight_decay': 0.006, 'warmup_steps': 0}. Best is trial 4 with value: 0.6291900777925411.


Trial 10 with params: {'learning_rate': 0.004518165681587256, 'weight_decay': 0.002, 'warmup_steps': 0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.628,1.937702,0.52154,0.16415,0.15968,0.139615
2,1.5797,1.485446,0.63978,0.293123,0.280818,0.266292
3,1.1103,1.175117,0.71494,0.408284,0.381851,0.376764
4,0.709,1.063089,0.732356,0.563085,0.502508,0.513595
5,0.4053,1.051732,0.757104,0.571929,0.53847,0.537024
6,0.2167,1.182582,0.759853,0.635771,0.545976,0.570879
7,0.1208,1.271838,0.765353,0.653231,0.575895,0.596053
8,0.0524,1.320067,0.765353,0.635156,0.595629,0.598236
9,0.0222,1.332461,0.770852,0.640759,0.619949,0.61684
10,0.0105,1.357728,0.774519,0.647728,0.616957,0.620172


[I 2025-03-23 01:23:21,377] Trial 10 pruned. 


Trial 11 with params: {'learning_rate': 0.0020056372842325635, 'weight_decay': 0.006, 'warmup_steps': 0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.7784,2.121227,0.48396,0.112368,0.134134,0.109883
2,1.8194,1.693797,0.576535,0.247478,0.220364,0.207036
3,1.3653,1.277166,0.676444,0.315846,0.314772,0.298195
4,0.9738,1.147899,0.695692,0.423045,0.37526,0.383754
5,0.6909,1.069588,0.742438,0.495612,0.455968,0.463455
6,0.5024,1.060098,0.732356,0.509427,0.462701,0.468073
7,0.3564,1.137995,0.739688,0.625807,0.543176,0.563637
8,0.2429,1.139362,0.756187,0.633301,0.592347,0.59963
9,0.1453,1.180696,0.758937,0.621172,0.568476,0.579335
10,0.0899,1.257576,0.76352,0.654048,0.581833,0.599829


[I 2025-03-23 01:24:42,128] Trial 11 finished with value: 0.5850829439701133 and parameters: {'learning_rate': 0.0020056372842325635, 'weight_decay': 0.006, 'warmup_steps': 0}. Best is trial 4 with value: 0.6291900777925411.


Trial 12 with params: {'learning_rate': 0.0033049565193748773, 'weight_decay': 0.007, 'warmup_steps': 0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6645,1.978765,0.515124,0.138038,0.144164,0.123201
2,1.6644,1.47186,0.635197,0.291164,0.278486,0.256956
3,1.1689,1.182092,0.697525,0.383465,0.360926,0.352399
4,0.7668,1.082879,0.725023,0.522248,0.46496,0.48105
5,0.4826,1.037819,0.75802,0.584099,0.493866,0.517294
6,0.2887,1.23629,0.72594,0.609806,0.53956,0.546011
7,0.1811,1.315268,0.758937,0.667189,0.549251,0.57958
8,0.0961,1.22991,0.771769,0.680517,0.612056,0.629103
9,0.0374,1.295632,0.781852,0.693387,0.621068,0.641626
10,0.0214,1.368196,0.771769,0.640432,0.617821,0.614698


[I 2025-03-23 01:25:34,903] Trial 12 pruned. 


Trial 13 with params: {'learning_rate': 0.0018997871267974278, 'weight_decay': 0.005, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.941,2.240448,0.44088,0.143663,0.111084,0.095151
2,1.8849,1.679041,0.585701,0.242899,0.20737,0.191973
3,1.3729,1.265077,0.678277,0.322734,0.318936,0.31236
4,0.9846,1.149336,0.708524,0.425601,0.387741,0.38738
5,0.727,1.102164,0.726856,0.45324,0.416212,0.419138
6,0.5244,1.057743,0.738772,0.518278,0.492483,0.491535
7,0.346,1.147568,0.748854,0.58167,0.490672,0.512581
8,0.2418,1.165115,0.75802,0.62198,0.548578,0.567308
9,0.1664,1.179453,0.765353,0.631051,0.599074,0.602703
10,0.0882,1.25788,0.762603,0.643975,0.585906,0.59947


[I 2025-03-23 01:27:10,719] Trial 13 finished with value: 0.6134015829864538 and parameters: {'learning_rate': 0.0018997871267974278, 'weight_decay': 0.005, 'warmup_steps': 2}. Best is trial 4 with value: 0.6291900777925411.


Trial 14 with params: {'learning_rate': 0.002120746655142563, 'weight_decay': 0.004, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.8836,2.169833,0.462878,0.117907,0.120308,0.103482
2,1.8322,1.692014,0.563703,0.211211,0.196278,0.184826
3,1.346,1.263258,0.68286,0.325167,0.316415,0.310501
4,0.9635,1.155743,0.709441,0.472045,0.410158,0.421488
5,0.6861,1.046193,0.735105,0.510261,0.463184,0.470541
6,0.4777,1.075778,0.733272,0.582586,0.50962,0.519763
7,0.3205,1.211308,0.730522,0.6306,0.518662,0.549672
8,0.2138,1.156549,0.758937,0.656248,0.606233,0.615399
9,0.1199,1.248233,0.756187,0.60976,0.578186,0.580304
10,0.067,1.333533,0.747938,0.612343,0.569226,0.577191


[I 2025-03-23 01:27:58,374] Trial 14 pruned. 


Trial 15 with params: {'learning_rate': 0.003827341260767903, 'weight_decay': 0.008, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.7891,1.986694,0.497709,0.125269,0.140992,0.114439
2,1.6404,1.495384,0.63978,0.302819,0.272695,0.263505
3,1.1269,1.160166,0.709441,0.394493,0.38167,0.369011
4,0.7109,1.084721,0.71494,0.542801,0.47993,0.495203
5,0.4391,1.100238,0.754354,0.566174,0.532821,0.538748
6,0.2617,1.187088,0.754354,0.610026,0.535347,0.553065
7,0.1582,1.295575,0.76352,0.645618,0.598426,0.609735
8,0.0641,1.266158,0.786434,0.689696,0.640951,0.652643
9,0.0282,1.399201,0.772686,0.699328,0.633654,0.6526
10,0.0122,1.429423,0.780935,0.651711,0.61045,0.617063


[I 2025-03-23 01:29:19,424] Trial 15 finished with value: 0.6478988203836527 and parameters: {'learning_rate': 0.003827341260767903, 'weight_decay': 0.008, 'warmup_steps': 3}. Best is trial 15 with value: 0.6478988203836527.


Trial 16 with params: {'learning_rate': 0.0010018348952328356, 'weight_decay': 0.007, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.1376,2.528125,0.363886,0.055468,0.078186,0.052489
2,2.1977,1.927368,0.529789,0.120823,0.157913,0.130302
3,1.7237,1.580106,0.585701,0.219816,0.196518,0.187458
4,1.333,1.352407,0.666361,0.332255,0.296427,0.29752
5,1.0817,1.248701,0.681027,0.344574,0.319404,0.314693


[I 2025-03-23 01:29:56,505] Trial 16 pruned. 


Trial 17 with params: {'learning_rate': 0.003147329048348789, 'weight_decay': 0.01, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.735,1.999876,0.495875,0.123579,0.142107,0.116685
2,1.656,1.533447,0.625115,0.302063,0.272651,0.266761
3,1.1403,1.160465,0.710357,0.384731,0.368299,0.359682
4,0.7524,1.133075,0.725023,0.4909,0.452006,0.45784
5,0.4939,1.109587,0.746104,0.579343,0.529575,0.526511
6,0.3069,1.160133,0.754354,0.64514,0.564451,0.580077
7,0.1846,1.30765,0.746104,0.666587,0.578433,0.597083
8,0.0963,1.305044,0.756187,0.624521,0.575566,0.580069
9,0.0497,1.380402,0.767186,0.702506,0.607888,0.630744
10,0.0236,1.365768,0.776352,0.681588,0.61403,0.631074


[I 2025-03-23 01:31:51,367] Trial 17 finished with value: 0.6453735796997044 and parameters: {'learning_rate': 0.003147329048348789, 'weight_decay': 0.01, 'warmup_steps': 3}. Best is trial 15 with value: 0.6478988203836527.


Trial 18 with params: {'learning_rate': 0.004371089537104322, 'weight_decay': 0.01, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.9253,2.043674,0.477544,0.128161,0.13235,0.11621
2,1.6694,1.592927,0.605866,0.282031,0.248466,0.236798
3,1.1373,1.194338,0.703941,0.422021,0.381998,0.377593
4,0.7271,1.11288,0.71494,0.459949,0.436096,0.435301
5,0.4447,1.080905,0.753437,0.553323,0.530753,0.53113
6,0.2614,1.208824,0.75802,0.6444,0.604236,0.603273
7,0.1363,1.258984,0.775435,0.663202,0.607498,0.620175
8,0.0611,1.365226,0.764436,0.596552,0.602287,0.59338
9,0.029,1.425444,0.772686,0.661436,0.613183,0.626234
10,0.0125,1.417349,0.774519,0.637953,0.617833,0.621054


[I 2025-03-23 01:33:20,459] Trial 18 finished with value: 0.6145352675565019 and parameters: {'learning_rate': 0.004371089537104322, 'weight_decay': 0.01, 'warmup_steps': 3}. Best is trial 15 with value: 0.6478988203836527.


Trial 19 with params: {'learning_rate': 0.0012105518888218083, 'weight_decay': 0.009000000000000001, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.0773,2.447135,0.396884,0.064707,0.087893,0.063616
2,2.0788,1.853438,0.530706,0.144893,0.159009,0.139413
3,1.6002,1.454136,0.638863,0.307336,0.253503,0.24966
4,1.2208,1.282231,0.68286,0.389034,0.326955,0.333835
5,0.9742,1.189082,0.694775,0.404687,0.352419,0.359708
6,0.7654,1.093331,0.715857,0.451461,0.411707,0.416776
7,0.5863,1.142385,0.729606,0.517961,0.417629,0.439309
8,0.4666,1.10424,0.734189,0.55694,0.486514,0.496579
9,0.3585,1.076761,0.744271,0.565408,0.520092,0.532202
10,0.2646,1.097246,0.747021,0.593917,0.56024,0.56552


[I 2025-03-23 01:34:31,062] Trial 19 pruned. 


Trial 20 with params: {'learning_rate': 0.004585143951463039, 'weight_decay': 0.006, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.701,1.947515,0.512374,0.176637,0.154976,0.141685
2,1.5425,1.527094,0.63428,0.314962,0.296224,0.290549
3,1.0178,1.115575,0.707608,0.424301,0.392084,0.385945
4,0.6274,1.067225,0.733272,0.487325,0.454461,0.457886
5,0.3669,1.152976,0.75527,0.582662,0.530074,0.543786
6,0.2077,1.134649,0.774519,0.667213,0.594121,0.617563
7,0.0902,1.370942,0.778185,0.703136,0.618636,0.642317
8,0.043,1.310582,0.7956,0.656544,0.638104,0.637502
9,0.019,1.355813,0.789184,0.666922,0.634225,0.641045
10,0.0076,1.391494,0.797434,0.667853,0.641179,0.647899


[I 2025-03-23 01:36:01,825] Trial 20 finished with value: 0.6540735509647655 and parameters: {'learning_rate': 0.004585143951463039, 'weight_decay': 0.006, 'warmup_steps': 4}. Best is trial 20 with value: 0.6540735509647655.


Trial 21 with params: {'learning_rate': 0.0043028002710254944, 'weight_decay': 0.006, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.7025,1.991535,0.52154,0.155634,0.161717,0.140889
2,1.5832,1.401443,0.654445,0.313596,0.300216,0.290065
3,1.0456,1.10668,0.709441,0.449811,0.38658,0.390981
4,0.645,1.091736,0.727773,0.547783,0.480316,0.491462
5,0.3715,1.103942,0.750687,0.600338,0.545941,0.558385
6,0.226,1.20225,0.759853,0.589236,0.540458,0.545837
7,0.1178,1.253744,0.767186,0.645263,0.607881,0.612668
8,0.0398,1.316531,0.783685,0.646697,0.616006,0.622458
9,0.0144,1.45444,0.789184,0.683537,0.618049,0.633838
10,0.0093,1.47417,0.785518,0.638615,0.59834,0.608381


[I 2025-03-23 01:36:56,959] Trial 21 pruned. 


Trial 22 with params: {'learning_rate': 0.004528021245601566, 'weight_decay': 0.006, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.9636,2.06843,0.483043,0.129795,0.13989,0.120239
2,1.711,1.598966,0.595784,0.269979,0.233859,0.220049
3,1.1562,1.16677,0.694775,0.380796,0.374065,0.364578
4,0.7315,1.06345,0.738772,0.481242,0.459765,0.454931
5,0.4454,1.07924,0.75527,0.562756,0.514049,0.524153
6,0.2379,1.144201,0.764436,0.605552,0.584027,0.582543
7,0.1193,1.282228,0.777269,0.654345,0.580477,0.5959
8,0.0491,1.314092,0.782768,0.661322,0.611773,0.626036
9,0.0231,1.359484,0.781852,0.666579,0.638103,0.640658
10,0.0123,1.477256,0.784601,0.66384,0.63337,0.636481


Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--precision/155d3220d6cd4a6553f12da68eeb3d1f97cf431206304a4bc6e2d564c29502e9 (last modified on Fri Jan 10 23:13:59 2025) since it couldn't be found locally at evaluate-metric--precision, or remotely on the Hugging Face Hub.
[I 2025-03-23 01:38:29,532] Trial 22 finished with value: 0.6505307080285718 and parameters: {'learning_rate': 0.004528021245601566, 'weight_decay': 0.006, 'warmup_steps': 3}. Best is trial 20 with value: 0.6540735509647655.


Trial 23 with params: {'learning_rate': 0.0010642591661547583, 'weight_decay': 0.004, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.1454,2.509841,0.371219,0.05524,0.080887,0.055018
2,2.1756,1.908394,0.532539,0.138082,0.163335,0.137614
3,1.6879,1.550147,0.589368,0.240944,0.199679,0.191357
4,1.2968,1.331839,0.663611,0.334593,0.298446,0.300915
5,1.0428,1.240895,0.68561,0.382466,0.335252,0.340197


[I 2025-03-23 01:39:30,123] Trial 23 pruned. 


Trial 24 with params: {'learning_rate': 0.0048251536813382805, 'weight_decay': 0.006, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.0168,2.078284,0.47846,0.133072,0.130727,0.112583
2,1.7114,1.621498,0.598533,0.275697,0.247473,0.23742
3,1.1309,1.197766,0.701192,0.420945,0.392342,0.382533
4,0.7442,1.066761,0.738772,0.49399,0.461417,0.460145
5,0.4238,1.097489,0.75802,0.573435,0.51605,0.528172
6,0.2483,1.096004,0.777269,0.64506,0.586694,0.597823
7,0.1282,1.243166,0.782768,0.664302,0.618123,0.624981
8,0.0613,1.278128,0.778185,0.667925,0.629698,0.633843
9,0.0249,1.360536,0.790101,0.676846,0.624522,0.634601
10,0.0094,1.362826,0.788268,0.680566,0.636536,0.645774


[I 2025-03-23 01:40:46,256] Trial 24 finished with value: 0.6334277388872923 and parameters: {'learning_rate': 0.0048251536813382805, 'weight_decay': 0.006, 'warmup_steps': 3}. Best is trial 20 with value: 0.6540735509647655.


Trial 25 with params: {'learning_rate': 0.0032741765036247137, 'weight_decay': 0.003, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.0114,2.128055,0.473877,0.103315,0.12542,0.102187
2,1.7767,1.556224,0.610449,0.234969,0.253433,0.233123
3,1.209,1.183037,0.705775,0.388237,0.361084,0.357364
4,0.81,1.096441,0.72044,0.435748,0.423619,0.419246
5,0.5175,1.084919,0.749771,0.579028,0.529366,0.527222
6,0.3186,1.059881,0.769936,0.670771,0.554869,0.585904
7,0.1756,1.262017,0.751604,0.613609,0.567432,0.577368
8,0.1012,1.242476,0.773602,0.658846,0.598797,0.613342
9,0.0472,1.391134,0.769936,0.691437,0.624354,0.638558
10,0.022,1.354756,0.777269,0.669972,0.623518,0.636105


[I 2025-03-23 01:42:17,172] Trial 25 finished with value: 0.638053465681931 and parameters: {'learning_rate': 0.0032741765036247137, 'weight_decay': 0.003, 'warmup_steps': 4}. Best is trial 20 with value: 0.6540735509647655.


Trial 26 with params: {'learning_rate': 0.001012638988964328, 'weight_decay': 0.009000000000000001, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.1651,2.543808,0.363886,0.050283,0.078306,0.05207
2,2.2073,1.934129,0.528873,0.12049,0.155308,0.128037
3,1.7283,1.590766,0.581118,0.218295,0.192206,0.180597
4,1.3446,1.364724,0.662695,0.327576,0.292521,0.290346
5,1.0977,1.270903,0.681027,0.345402,0.327295,0.321028
6,0.8792,1.156175,0.697525,0.402123,0.373037,0.373577
7,0.7119,1.220062,0.705775,0.47413,0.365383,0.384589
8,0.6089,1.097622,0.714024,0.461558,0.424304,0.430826
9,0.4785,1.075564,0.728689,0.506531,0.440627,0.456801
10,0.3717,1.065189,0.736939,0.584996,0.513537,0.531587


[I 2025-03-23 01:43:14,526] Trial 26 pruned. 


Trial 27 with params: {'learning_rate': 0.0024667826624207444, 'weight_decay': 0.008, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.8619,2.111487,0.467461,0.11653,0.122091,0.106198
2,1.7793,1.614885,0.583868,0.281708,0.232553,0.216962
3,1.2783,1.203838,0.712191,0.387942,0.347895,0.34326
4,0.8902,1.13644,0.712191,0.490012,0.404185,0.421122
5,0.6299,1.05364,0.736939,0.494622,0.433436,0.442827
6,0.4145,1.068266,0.75527,0.542488,0.501489,0.501992
7,0.2647,1.133075,0.758937,0.640936,0.56224,0.58358
8,0.1497,1.255325,0.752521,0.606409,0.562757,0.562978
9,0.0883,1.303131,0.76077,0.62359,0.585548,0.590231
10,0.0487,1.35467,0.76352,0.648755,0.624398,0.619014


[I 2025-03-23 01:44:34,803] Trial 27 finished with value: 0.6278901175037028 and parameters: {'learning_rate': 0.0024667826624207444, 'weight_decay': 0.008, 'warmup_steps': 4}. Best is trial 20 with value: 0.6540735509647655.


Trial 28 with params: {'learning_rate': 0.004683316894202572, 'weight_decay': 0.005, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6588,1.980096,0.498625,0.188662,0.156051,0.139586
2,1.5447,1.446517,0.649863,0.329154,0.287505,0.28446
3,1.0383,1.145233,0.71769,0.407649,0.386719,0.38045
4,0.6491,1.01942,0.747938,0.52059,0.50002,0.499869
5,0.3544,1.028855,0.777269,0.666693,0.607302,0.618386
6,0.1981,1.058813,0.782768,0.647872,0.613627,0.611658
7,0.0933,1.157625,0.782768,0.67915,0.605596,0.625981
8,0.0348,1.21092,0.786434,0.702848,0.650652,0.664326
9,0.0156,1.313161,0.785518,0.698761,0.642042,0.658561
10,0.0073,1.284219,0.787351,0.702124,0.626062,0.647752


[I 2025-03-23 01:45:55,707] Trial 28 finished with value: 0.6665423472962837 and parameters: {'learning_rate': 0.004683316894202572, 'weight_decay': 0.005, 'warmup_steps': 2}. Best is trial 28 with value: 0.6665423472962837.


Trial 29 with params: {'learning_rate': 0.004355274973374928, 'weight_decay': 0.006, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6719,1.998808,0.489459,0.154386,0.14211,0.117545
2,1.585,1.450988,0.64253,0.313879,0.291182,0.282833
3,1.0593,1.115784,0.715857,0.414341,0.389809,0.379782
4,0.6455,1.104486,0.742438,0.576962,0.499096,0.516899
5,0.3622,0.998063,0.781852,0.615244,0.57227,0.578817
6,0.174,1.124966,0.780935,0.667808,0.614068,0.628677
7,0.0872,1.273518,0.773602,0.6688,0.617727,0.626736
8,0.0398,1.290582,0.784601,0.650624,0.635132,0.632159
9,0.02,1.315369,0.7956,0.674421,0.635099,0.645948
10,0.0082,1.34491,0.787351,0.677451,0.635381,0.644883


[I 2025-03-23 01:47:22,129] Trial 29 finished with value: 0.6464936018342238 and parameters: {'learning_rate': 0.004355274973374928, 'weight_decay': 0.006, 'warmup_steps': 2}. Best is trial 28 with value: 0.6665423472962837.


Trial 30 with params: {'learning_rate': 0.0040888419576858934, 'weight_decay': 0.002, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.9073,2.033406,0.487626,0.129652,0.13096,0.109153
2,1.6852,1.548096,0.615949,0.288127,0.26254,0.248449
3,1.1513,1.191972,0.706691,0.399328,0.372023,0.365831
4,0.7253,1.062433,0.730522,0.512939,0.449353,0.457607
5,0.4428,1.067894,0.759853,0.635921,0.559498,0.5804
6,0.254,1.163598,0.766269,0.638761,0.5716,0.588324
7,0.1408,1.225022,0.779102,0.688915,0.611925,0.636108
8,0.0501,1.303982,0.782768,0.702171,0.63014,0.64852
9,0.0224,1.345774,0.783685,0.681417,0.634703,0.648365
10,0.0082,1.428432,0.784601,0.689429,0.645228,0.653714


Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--accuracy/f887c0aab52c2d38e1f8a215681126379eca617f96c447638f751434e8e65b14 (last modified on Sat Oct 12 13:56:14 2024) since it couldn't be found locally at evaluate-metric--accuracy, or remotely on the Hugging Face Hub.
[I 2025-03-23 01:48:55,877] Trial 30 finished with value: 0.6723279188169891 and parameters: {'learning_rate': 0.0040888419576858934, 'weight_decay': 0.002, 'warmup_steps': 3}. Best is trial 30 with value: 0.6723279188169891.


Trial 31 with params: {'learning_rate': 0.00468721685751369, 'weight_decay': 0.002, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.9972,2.063825,0.480293,0.162645,0.136392,0.119378
2,1.722,1.595352,0.5967,0.254874,0.228838,0.217413
3,1.1584,1.24416,0.683776,0.370599,0.360631,0.346798
4,0.7474,1.055913,0.731439,0.501376,0.45624,0.465668
5,0.446,1.098045,0.75527,0.584382,0.511664,0.525159
6,0.2359,1.11275,0.777269,0.674236,0.608383,0.623085
7,0.1145,1.314471,0.768103,0.643245,0.5946,0.603259
8,0.0557,1.320682,0.786434,0.671304,0.63171,0.639326
9,0.0255,1.354211,0.785518,0.67462,0.645546,0.6485
10,0.0106,1.400159,0.79835,0.683605,0.658286,0.659742


[I 2025-03-23 01:50:28,832] Trial 31 finished with value: 0.6688024928060093 and parameters: {'learning_rate': 0.00468721685751369, 'weight_decay': 0.002, 'warmup_steps': 3}. Best is trial 30 with value: 0.6723279188169891.


Trial 32 with params: {'learning_rate': 0.004076116510209678, 'weight_decay': 0.0, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.9066,2.041764,0.482126,0.129773,0.129423,0.109022
2,1.6915,1.575439,0.603116,0.267056,0.254675,0.240448
3,1.1696,1.18993,0.705775,0.397996,0.368056,0.367247
4,0.7444,1.054375,0.736022,0.476161,0.436572,0.444444
5,0.4453,1.044683,0.759853,0.597246,0.555022,0.562662
6,0.2539,1.183359,0.765353,0.648915,0.564145,0.588663
7,0.1468,1.231238,0.770852,0.694629,0.602086,0.630737
8,0.07,1.324734,0.777269,0.674788,0.620521,0.629839
9,0.0309,1.357111,0.790101,0.692795,0.647729,0.658953
10,0.0109,1.434238,0.791017,0.684494,0.642035,0.651819


[I 2025-03-23 01:52:51,379] Trial 32 finished with value: 0.6527226134904351 and parameters: {'learning_rate': 0.004076116510209678, 'weight_decay': 0.0, 'warmup_steps': 3}. Best is trial 30 with value: 0.6723279188169891.


Trial 33 with params: {'learning_rate': 0.004492836970557258, 'weight_decay': 0.002, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.9593,2.062161,0.477544,0.135449,0.133367,0.115966
2,1.7076,1.612216,0.593951,0.261363,0.228491,0.210693
3,1.1775,1.209788,0.696609,0.413269,0.356549,0.361649
4,0.7319,1.094292,0.726856,0.453302,0.429373,0.418794
5,0.4591,1.037516,0.750687,0.576204,0.510975,0.52374
6,0.2511,1.150627,0.756187,0.613052,0.55526,0.566298
7,0.1214,1.263016,0.778185,0.678432,0.607515,0.624718
8,0.0607,1.248489,0.785518,0.649599,0.618136,0.624503
9,0.024,1.288251,0.779102,0.628911,0.631455,0.62245
10,0.0078,1.399231,0.786434,0.66138,0.617949,0.625133


[I 2025-03-23 01:54:19,960] Trial 33 finished with value: 0.6249051508507685 and parameters: {'learning_rate': 0.004492836970557258, 'weight_decay': 0.002, 'warmup_steps': 3}. Best is trial 30 with value: 0.6723279188169891.


Trial 34 with params: {'learning_rate': 0.0011428994112913256, 'weight_decay': 0.002, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.0968,2.486721,0.389551,0.067821,0.085155,0.061612
2,2.1141,1.876397,0.52429,0.164249,0.152108,0.134105
3,1.6358,1.487853,0.624198,0.287222,0.233445,0.228037
4,1.2562,1.288703,0.676444,0.390459,0.324327,0.331512
5,1.0005,1.188953,0.688359,0.380777,0.334977,0.337568
6,0.7938,1.106563,0.71494,0.444297,0.396877,0.403144
7,0.6152,1.175291,0.716774,0.524877,0.395131,0.421768
8,0.5064,1.095673,0.728689,0.529658,0.482431,0.493187
9,0.3796,1.078669,0.739688,0.554856,0.503406,0.514939
10,0.2935,1.093896,0.736939,0.562927,0.532566,0.536757


[I 2025-03-23 01:55:22,852] Trial 34 pruned. 


Trial 35 with params: {'learning_rate': 0.00461681335830425, 'weight_decay': 0.003, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6672,1.96727,0.491292,0.159766,0.149533,0.131029
2,1.5444,1.436006,0.651696,0.330899,0.294049,0.292684
3,1.0273,1.082295,0.72044,0.425704,0.409432,0.406441
4,0.6423,1.014655,0.743355,0.588591,0.504512,0.529939
5,0.3585,0.967799,0.772686,0.611016,0.539283,0.556668
6,0.1776,1.103973,0.769019,0.634787,0.558866,0.577013
7,0.0892,1.174425,0.787351,0.682842,0.627967,0.638845
8,0.0291,1.290632,0.790101,0.68255,0.612325,0.628348
9,0.0175,1.375011,0.781852,0.674528,0.62125,0.631511
10,0.0095,1.338118,0.786434,0.666175,0.605176,0.618982


[I 2025-03-23 01:56:15,952] Trial 35 pruned. 


Trial 36 with params: {'learning_rate': 5.370203809578854e-05, 'weight_decay': 0.009000000000000001, 'warmup_steps': 1}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.8282,3.689899,0.176902,0.003538,0.02,0.006012
2,3.4495,3.239011,0.176902,0.003538,0.02,0.006012
3,3.1909,3.12141,0.176902,0.003538,0.02,0.006012
4,3.0828,3.022279,0.184235,0.010246,0.022133,0.009444
5,3.004,2.925405,0.313474,0.028787,0.056028,0.036775
6,2.8895,2.849166,0.338222,0.036597,0.064487,0.041363
7,2.8192,2.791162,0.341888,0.037469,0.065669,0.041584
8,2.7783,2.742011,0.349221,0.038248,0.068989,0.04354
9,2.7287,2.7011,0.351054,0.038141,0.069226,0.04445
10,2.6905,2.677357,0.355637,0.037375,0.072002,0.046187


Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--f1/34c46321f42186df33a6260966e34a368f14868d9cc2ba47d142112e2800d233 (last modified on Fri Jan 10 23:14:01 2025) since it couldn't be found locally at evaluate-metric--f1, or remotely on the Hugging Face Hub.
[I 2025-03-23 01:57:53,699] Trial 36 pruned. 


Trial 37 with params: {'learning_rate': 0.002465671347664241, 'weight_decay': 0.003, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.8981,2.126776,0.472961,0.108331,0.122013,0.102504
2,1.7851,1.623264,0.582951,0.228103,0.224546,0.212215
3,1.2616,1.214933,0.700275,0.358748,0.352009,0.345993
4,0.8568,1.137636,0.72319,0.456765,0.425706,0.430848
5,0.6042,1.08617,0.732356,0.539799,0.47447,0.482408
6,0.4015,1.061618,0.757104,0.598895,0.508408,0.531214
7,0.2591,1.166198,0.76352,0.669326,0.56441,0.593558
8,0.1554,1.195816,0.764436,0.655993,0.590689,0.601393
9,0.0819,1.283059,0.770852,0.659941,0.591306,0.60713
10,0.0456,1.33962,0.758937,0.661303,0.615931,0.623401


[I 2025-03-23 01:59:11,005] Trial 37 finished with value: 0.6223109039233156 and parameters: {'learning_rate': 0.002465671347664241, 'weight_decay': 0.003, 'warmup_steps': 3}. Best is trial 30 with value: 0.6723279188169891.


Trial 38 with params: {'learning_rate': 0.004505139891995941, 'weight_decay': 0.002, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.9582,2.057709,0.483043,0.1255,0.137382,0.118036
2,1.697,1.60794,0.597617,0.237566,0.241192,0.223183
3,1.1617,1.197272,0.699358,0.394957,0.365463,0.357748
4,0.7326,1.083079,0.722273,0.439613,0.427362,0.422068
5,0.4493,1.1042,0.757104,0.5561,0.498808,0.511658
6,0.2525,1.140681,0.765353,0.606186,0.553913,0.566201
7,0.1291,1.281852,0.775435,0.67706,0.620887,0.631292
8,0.0573,1.281304,0.780018,0.709014,0.63659,0.654938
9,0.0263,1.31412,0.784601,0.637132,0.629489,0.628684
10,0.0105,1.44988,0.780935,0.637957,0.625163,0.620363


[I 2025-03-23 02:01:06,677] Trial 38 finished with value: 0.6297534428072563 and parameters: {'learning_rate': 0.004505139891995941, 'weight_decay': 0.002, 'warmup_steps': 3}. Best is trial 30 with value: 0.6723279188169891.


Trial 39 with params: {'learning_rate': 5.7801019639330395e-05, 'weight_decay': 0.002, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.8291,3.678902,0.176902,0.003538,0.02,0.006012
2,3.425,3.220679,0.176902,0.003538,0.02,0.006012
3,3.1769,3.104718,0.176902,0.003538,0.02,0.006012
4,3.0548,3.004155,0.2044,0.027749,0.027602,0.016634
5,2.9697,2.895936,0.31439,0.041087,0.057119,0.040277
6,2.8564,2.815824,0.334555,0.036524,0.063427,0.04082
7,2.7806,2.756053,0.340055,0.037735,0.065208,0.042083
8,2.7344,2.695717,0.362053,0.038693,0.073136,0.047121
9,2.6808,2.651165,0.361137,0.038235,0.072698,0.04687
10,2.6396,2.625458,0.373052,0.038526,0.077128,0.049591


[I 2025-03-23 02:02:47,450] Trial 39 pruned. 


Trial 40 with params: {'learning_rate': 0.004520814819994214, 'weight_decay': 0.003, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.695,1.958593,0.516957,0.184177,0.16033,0.144539
2,1.548,1.4641,0.63978,0.312916,0.304761,0.29398
3,1.0287,1.115453,0.711274,0.458848,0.403343,0.40589
4,0.6314,1.078567,0.726856,0.512279,0.475283,0.479324
5,0.3706,1.124143,0.752521,0.626814,0.56051,0.571444
6,0.2139,1.185401,0.769936,0.661859,0.601485,0.612829
7,0.0935,1.330286,0.772686,0.669933,0.603421,0.618499
8,0.0406,1.326399,0.782768,0.672859,0.615014,0.626395
9,0.0167,1.389295,0.788268,0.660124,0.62264,0.631169
10,0.0064,1.421942,0.7956,0.653976,0.629644,0.632957


[I 2025-03-23 02:04:48,203] Trial 40 finished with value: 0.6297273820333325 and parameters: {'learning_rate': 0.004520814819994214, 'weight_decay': 0.003, 'warmup_steps': 4}. Best is trial 30 with value: 0.6723279188169891.


Trial 41 with params: {'learning_rate': 0.002127257001320833, 'weight_decay': 0.0, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.9017,2.210017,0.44088,0.10771,0.117879,0.100444
2,1.8399,1.631362,0.581118,0.249266,0.216009,0.204653
3,1.3261,1.23678,0.697525,0.349848,0.329745,0.324674
4,0.9335,1.118327,0.724106,0.492651,0.419997,0.431466
5,0.6671,1.071015,0.732356,0.489818,0.418803,0.435645
6,0.4612,1.040747,0.746104,0.519806,0.46956,0.4742
7,0.3143,1.142946,0.748854,0.674781,0.512564,0.55081
8,0.1996,1.140164,0.761687,0.639234,0.594549,0.604297
9,0.122,1.21671,0.773602,0.650761,0.594575,0.611156
10,0.069,1.266609,0.769936,0.667441,0.618525,0.630834


[I 2025-03-23 02:06:21,514] Trial 41 finished with value: 0.617345728114091 and parameters: {'learning_rate': 0.002127257001320833, 'weight_decay': 0.0, 'warmup_steps': 3}. Best is trial 30 with value: 0.6723279188169891.


Trial 42 with params: {'learning_rate': 0.003725199424453043, 'weight_decay': 0.0, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.7691,1.988835,0.496792,0.128802,0.138736,0.114112
2,1.634,1.503563,0.638863,0.343593,0.283495,0.279557
3,1.1097,1.163078,0.702108,0.392122,0.375169,0.3695
4,0.7187,1.106863,0.727773,0.555695,0.501482,0.51108
5,0.4464,1.124344,0.743355,0.59028,0.51461,0.53433
6,0.2559,1.190002,0.742438,0.659928,0.578065,0.593219
7,0.1544,1.320974,0.75527,0.723001,0.602832,0.639262
8,0.0737,1.312514,0.768103,0.647821,0.612301,0.613054
9,0.0402,1.414047,0.776352,0.671382,0.630925,0.637418
10,0.0157,1.486425,0.7956,0.681261,0.635256,0.644133


[I 2025-03-23 02:08:20,303] Trial 42 finished with value: 0.6461247914027405 and parameters: {'learning_rate': 0.003725199424453043, 'weight_decay': 0.0, 'warmup_steps': 3}. Best is trial 30 with value: 0.6723279188169891.


Trial 43 with params: {'learning_rate': 0.004735378247823, 'weight_decay': 0.001, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6596,1.940939,0.507791,0.140477,0.153642,0.129474
2,1.5478,1.368744,0.659945,0.328232,0.299787,0.293772
3,1.0184,1.09245,0.719523,0.41532,0.388839,0.384698
4,0.6399,1.027377,0.741522,0.543373,0.492968,0.504051
5,0.3633,1.049951,0.775435,0.630758,0.570254,0.583974
6,0.1927,1.13661,0.771769,0.625021,0.583161,0.59005
7,0.0895,1.171491,0.793767,0.723667,0.637546,0.660808
8,0.0382,1.250145,0.793767,0.686297,0.659279,0.662798
9,0.0149,1.34751,0.791017,0.702735,0.636241,0.652404
10,0.0062,1.370896,0.793767,0.720521,0.649581,0.670141


[I 2025-03-23 02:10:48,841] Trial 43 finished with value: 0.6669981963963173 and parameters: {'learning_rate': 0.004735378247823, 'weight_decay': 0.001, 'warmup_steps': 2}. Best is trial 30 with value: 0.6723279188169891.


Trial 44 with params: {'learning_rate': 0.0041287864698773, 'weight_decay': 0.0, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6982,1.995193,0.494042,0.123959,0.145036,0.116787
2,1.612,1.429893,0.649863,0.299979,0.292774,0.28246
3,1.0743,1.172697,0.702108,0.395452,0.379715,0.37049
4,0.6709,1.089502,0.718607,0.524974,0.466544,0.476066
5,0.3929,1.061937,0.762603,0.597601,0.550314,0.556658
6,0.2171,1.132477,0.774519,0.653717,0.573367,0.591056
7,0.0927,1.19565,0.780935,0.643318,0.594957,0.60595
8,0.038,1.287994,0.780935,0.675874,0.617824,0.630136
9,0.021,1.31613,0.777269,0.666881,0.638578,0.63541
10,0.0068,1.392471,0.785518,0.693692,0.620932,0.637786


[I 2025-03-23 02:12:14,938] Trial 44 finished with value: 0.6549546732927299 and parameters: {'learning_rate': 0.0041287864698773, 'weight_decay': 0.0, 'warmup_steps': 2}. Best is trial 30 with value: 0.6723279188169891.


Trial 45 with params: {'learning_rate': 0.00446660348527456, 'weight_decay': 0.001, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6603,1.96639,0.487626,0.159923,0.150328,0.130456
2,1.5617,1.406814,0.64528,0.306884,0.280497,0.277196
3,1.0278,1.086405,0.725023,0.385598,0.389875,0.376004
4,0.6317,1.050733,0.737855,0.530163,0.464481,0.475627
5,0.3488,1.018993,0.786434,0.667953,0.597997,0.61227
6,0.1906,1.100616,0.775435,0.693939,0.62617,0.642193
7,0.0957,1.111468,0.794684,0.673305,0.642504,0.645262
8,0.0347,1.194475,0.806599,0.695291,0.643494,0.656985
9,0.0125,1.223996,0.804766,0.696519,0.631405,0.652232
10,0.0045,1.273885,0.805683,0.695229,0.64657,0.662284


[I 2025-03-23 02:13:40,669] Trial 45 finished with value: 0.6530375045687403 and parameters: {'learning_rate': 0.00446660348527456, 'weight_decay': 0.001, 'warmup_steps': 2}. Best is trial 30 with value: 0.6723279188169891.


Trial 46 with params: {'learning_rate': 0.0017943043315272575, 'weight_decay': 0.0, 'warmup_steps': 1}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.9125,2.245341,0.434464,0.113277,0.111541,0.091741
2,1.9048,1.728238,0.558203,0.221164,0.187501,0.172216
3,1.4218,1.30848,0.67736,0.308112,0.311218,0.302694
4,1.0354,1.189606,0.702108,0.434896,0.383051,0.392271
5,0.7653,1.124864,0.716774,0.461042,0.416261,0.416342


[I 2025-03-23 02:14:36,922] Trial 46 pruned. 


Trial 47 with params: {'learning_rate': 0.003870398253918685, 'weight_decay': 0.0, 'warmup_steps': 1}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6559,1.955326,0.505041,0.152886,0.150745,0.127487
2,1.5944,1.402434,0.649863,0.321235,0.297371,0.295566
3,1.1105,1.167954,0.693859,0.393548,0.357083,0.357126
4,0.7173,1.076124,0.730522,0.534554,0.471412,0.489507
5,0.449,1.029903,0.75802,0.592195,0.541099,0.550364
6,0.2368,1.161707,0.76352,0.644422,0.567768,0.587129
7,0.1285,1.274384,0.76352,0.645116,0.586491,0.600255
8,0.0612,1.26892,0.782768,0.673157,0.617094,0.630906
9,0.028,1.319996,0.778185,0.675373,0.618557,0.636415
10,0.0102,1.400146,0.777269,0.671335,0.623893,0.6367


[I 2025-03-23 02:16:05,384] Trial 47 finished with value: 0.6426129086286977 and parameters: {'learning_rate': 0.003870398253918685, 'weight_decay': 0.0, 'warmup_steps': 1}. Best is trial 30 with value: 0.6723279188169891.


Trial 48 with params: {'learning_rate': 0.0036338398006852545, 'weight_decay': 0.001, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.7781,2.016442,0.493126,0.137924,0.142499,0.121141
2,1.6866,1.507972,0.629698,0.301536,0.267347,0.258189
3,1.1368,1.148604,0.714024,0.409239,0.377056,0.377532
4,0.7235,1.032201,0.734189,0.475451,0.43927,0.444994
5,0.4488,0.997917,0.764436,0.629908,0.552536,0.572091
6,0.2523,1.174713,0.75802,0.636206,0.572457,0.585248
7,0.1477,1.255699,0.778185,0.688339,0.612724,0.634927
8,0.0645,1.289321,0.774519,0.673107,0.631507,0.638407
9,0.0303,1.389055,0.769019,0.638692,0.592534,0.603821
10,0.0128,1.389272,0.792851,0.685847,0.622207,0.639463


[I 2025-03-23 02:17:39,740] Trial 48 finished with value: 0.6272988957822913 and parameters: {'learning_rate': 0.0036338398006852545, 'weight_decay': 0.001, 'warmup_steps': 2}. Best is trial 30 with value: 0.6723279188169891.


Trial 49 with params: {'learning_rate': 0.0031552809391126838, 'weight_decay': 0.003, 'warmup_steps': 1}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.7269,2.037171,0.504125,0.156624,0.148742,0.125751
2,1.6953,1.502462,0.63428,0.295915,0.28035,0.268674
3,1.1582,1.157804,0.707608,0.375069,0.369435,0.363365
4,0.7608,1.074475,0.729606,0.495083,0.440206,0.453256
5,0.4767,0.998433,0.764436,0.596545,0.542154,0.551096
6,0.2884,1.091753,0.757104,0.61607,0.524466,0.545833
7,0.1581,1.193661,0.76352,0.65745,0.567543,0.59734
8,0.079,1.225307,0.780935,0.6816,0.621152,0.631634
9,0.0401,1.341528,0.767186,0.687373,0.614294,0.635659
10,0.0199,1.311671,0.787351,0.675489,0.623227,0.637323


[I 2025-03-23 02:18:57,086] Trial 49 finished with value: 0.6486390684136694 and parameters: {'learning_rate': 0.0031552809391126838, 'weight_decay': 0.003, 'warmup_steps': 1}. Best is trial 30 with value: 0.6723279188169891.


Trial 50 with params: {'learning_rate': 0.0046905965855212695, 'weight_decay': 0.0, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6598,1.996859,0.497709,0.173414,0.154264,0.13752
2,1.562,1.428065,0.648029,0.341811,0.288615,0.28667
3,1.0328,1.110324,0.71769,0.420195,0.382365,0.379099
4,0.64,1.084945,0.733272,0.526581,0.473175,0.478401
5,0.3608,1.048761,0.770852,0.643818,0.583895,0.601117
6,0.1929,1.07248,0.784601,0.692827,0.636701,0.646941
7,0.0942,1.248755,0.781852,0.681827,0.621279,0.63531
8,0.0338,1.273731,0.785518,0.677978,0.623698,0.637831
9,0.0144,1.370289,0.785518,0.698308,0.630375,0.648615
10,0.0078,1.378285,0.789184,0.702422,0.634995,0.650271


[I 2025-03-23 02:20:22,376] Trial 50 finished with value: 0.6337305568954257 and parameters: {'learning_rate': 0.0046905965855212695, 'weight_decay': 0.0, 'warmup_steps': 2}. Best is trial 30 with value: 0.6723279188169891.


Trial 51 with params: {'learning_rate': 0.0002870061009341604, 'weight_decay': 0.007, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.4439,3.057573,0.177819,0.010214,0.020238,0.006486
2,2.8226,2.609783,0.378552,0.040672,0.07806,0.049924
3,2.4538,2.294989,0.43813,0.068958,0.100209,0.07192
4,2.1833,2.087273,0.473877,0.09841,0.119006,0.094836
5,1.9969,1.924101,0.52154,0.128327,0.147122,0.126834
6,1.8133,1.834832,0.531622,0.147931,0.158293,0.139677
7,1.6963,1.758374,0.554537,0.220551,0.175326,0.163612
8,1.6111,1.695603,0.571036,0.212579,0.191362,0.178103
9,1.5197,1.632311,0.585701,0.215861,0.197085,0.185924
10,1.4336,1.594955,0.5967,0.241392,0.212831,0.202408


[I 2025-03-23 02:21:44,181] Trial 51 pruned. 


Trial 52 with params: {'learning_rate': 0.0013721470326533815, 'weight_decay': 0.0, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.0425,2.378846,0.404216,0.081451,0.092328,0.069638
2,2.0314,1.808285,0.549954,0.138563,0.175378,0.150207
3,1.5547,1.425946,0.644363,0.280409,0.268618,0.261013
4,1.1711,1.243744,0.681027,0.371308,0.333354,0.334682
5,0.91,1.147897,0.706691,0.442786,0.383559,0.394198


[I 2025-03-23 02:22:13,402] Trial 52 pruned. 


Trial 53 with params: {'learning_rate': 0.00021967416393079315, 'weight_decay': 0.0, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.4989,3.116122,0.177819,0.004484,0.020238,0.006496
2,2.9354,2.750208,0.347388,0.03352,0.066882,0.038462
3,2.6304,2.471385,0.40055,0.042365,0.084298,0.055086
4,2.3746,2.25508,0.445463,0.083312,0.102298,0.076737
5,2.1918,2.089261,0.491292,0.108616,0.125066,0.102389
6,2.0149,1.992653,0.507791,0.131151,0.138643,0.116302
7,1.905,1.908949,0.509624,0.140866,0.141206,0.120913
8,1.8274,1.844121,0.536205,0.153398,0.163011,0.145264
9,1.7412,1.791522,0.542621,0.1594,0.162917,0.145513
10,1.669,1.762146,0.550871,0.172304,0.176927,0.158843


[I 2025-03-23 02:22:59,947] Trial 53 pruned. 


Trial 54 with params: {'learning_rate': 0.004987233174423026, 'weight_decay': 0.004, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6386,1.951096,0.498625,0.165981,0.161862,0.143816
2,1.5546,1.455616,0.637947,0.317503,0.295076,0.287166
3,1.0338,1.084478,0.727773,0.435569,0.40772,0.407862
4,0.6171,1.033472,0.730522,0.5429,0.490504,0.502739
5,0.3442,1.043206,0.769936,0.647,0.575679,0.591434
6,0.18,1.168154,0.771769,0.67111,0.629888,0.639334
7,0.0791,1.230803,0.784601,0.682818,0.608785,0.627832
8,0.0279,1.299273,0.789184,0.689701,0.628612,0.639509
9,0.0148,1.312115,0.800183,0.700976,0.645061,0.660337
10,0.0051,1.340322,0.7956,0.686502,0.635254,0.645112


[I 2025-03-23 02:24:21,526] Trial 54 finished with value: 0.6605266693637499 and parameters: {'learning_rate': 0.004987233174423026, 'weight_decay': 0.004, 'warmup_steps': 2}. Best is trial 30 with value: 0.6723279188169891.


Trial 55 with params: {'learning_rate': 0.0002606336830980987, 'weight_decay': 0.0, 'warmup_steps': 0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.4261,3.067121,0.177819,0.010214,0.020238,0.006486
2,2.8482,2.656523,0.366636,0.039821,0.075413,0.048017
3,2.5037,2.341613,0.43538,0.068774,0.098046,0.069163
4,2.2352,2.135421,0.468378,0.101736,0.116552,0.091848
5,2.0568,1.96978,0.512374,0.128545,0.140113,0.120266
6,1.8804,1.885546,0.519707,0.14477,0.149138,0.131753
7,1.767,1.799338,0.538955,0.175814,0.165226,0.150504
8,1.6849,1.74035,0.566453,0.198091,0.185201,0.167921
9,1.5959,1.689866,0.560037,0.184807,0.178927,0.165544
10,1.5195,1.646569,0.585701,0.202942,0.200594,0.185349


[I 2025-03-23 02:25:22,431] Trial 55 pruned. 


Trial 56 with params: {'learning_rate': 0.004187887892053515, 'weight_decay': 0.004, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6864,1.979205,0.491292,0.137261,0.145933,0.123484
2,1.5888,1.383083,0.653529,0.324078,0.29646,0.291253
3,1.0518,1.117047,0.714024,0.394338,0.383107,0.370613
4,0.6576,1.108509,0.718607,0.52231,0.458728,0.476573
5,0.3855,0.991548,0.772686,0.621846,0.564987,0.578741
6,0.2035,1.101528,0.789184,0.709428,0.635642,0.655554
7,0.1002,1.152587,0.791934,0.686637,0.639408,0.649475
8,0.0528,1.181083,0.796517,0.682238,0.643626,0.650726
9,0.0265,1.22394,0.806599,0.684415,0.655952,0.658326
10,0.01,1.293208,0.802933,0.679898,0.65793,0.656618


[I 2025-03-23 02:27:09,345] Trial 56 finished with value: 0.6636089429621215 and parameters: {'learning_rate': 0.004187887892053515, 'weight_decay': 0.004, 'warmup_steps': 2}. Best is trial 30 with value: 0.6723279188169891.


Trial 57 with params: {'learning_rate': 0.0034963509323455103, 'weight_decay': 0.005, 'warmup_steps': 1}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6688,1.957542,0.509624,0.152372,0.152147,0.129455
2,1.6154,1.445551,0.640697,0.284138,0.277197,0.271216
3,1.1166,1.156605,0.709441,0.406329,0.377754,0.378244
4,0.705,1.109069,0.72044,0.481454,0.457444,0.460716
5,0.4486,1.029072,0.767186,0.633176,0.571352,0.581352
6,0.2598,1.072742,0.775435,0.687506,0.613702,0.631004
7,0.1324,1.251402,0.766269,0.650161,0.61833,0.617272
8,0.0592,1.267582,0.772686,0.654704,0.620701,0.626249
9,0.0269,1.353259,0.776352,0.661859,0.635469,0.635287
10,0.0152,1.406418,0.774519,0.659027,0.638317,0.634606


[I 2025-03-23 02:29:01,332] Trial 57 finished with value: 0.6499409999755676 and parameters: {'learning_rate': 0.0034963509323455103, 'weight_decay': 0.005, 'warmup_steps': 1}. Best is trial 30 with value: 0.6723279188169891.


Trial 58 with params: {'learning_rate': 0.004776402872404119, 'weight_decay': 0.004, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6622,1.986141,0.498625,0.168493,0.149783,0.131922
2,1.5541,1.408256,0.658112,0.347804,0.299666,0.295486
3,1.043,1.114201,0.713107,0.407242,0.373195,0.374984
4,0.6337,1.071135,0.734189,0.520401,0.4875,0.493479
5,0.3649,1.004752,0.773602,0.639102,0.581421,0.586662
6,0.1783,1.141543,0.783685,0.656816,0.605808,0.617041
7,0.0822,1.219268,0.781852,0.668345,0.643957,0.644306
8,0.0351,1.270857,0.782768,0.6499,0.633902,0.630707
9,0.013,1.30632,0.79835,0.661221,0.628161,0.633414
10,0.0051,1.345793,0.79835,0.670619,0.632395,0.637568


[I 2025-03-23 02:31:08,107] Trial 58 finished with value: 0.6512098894882439 and parameters: {'learning_rate': 0.004776402872404119, 'weight_decay': 0.004, 'warmup_steps': 2}. Best is trial 30 with value: 0.6723279188169891.


Trial 59 with params: {'learning_rate': 0.003865245121774692, 'weight_decay': 0.004, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.7076,2.016073,0.499542,0.179518,0.160213,0.143975
2,1.641,1.488263,0.63428,0.308744,0.276517,0.27045
3,1.1041,1.122172,0.716774,0.403953,0.384933,0.383301
4,0.7026,1.064999,0.71769,0.514908,0.468387,0.478968
5,0.4172,1.029704,0.762603,0.656184,0.56984,0.594757
6,0.2294,1.146178,0.748854,0.626241,0.548593,0.566806
7,0.136,1.198055,0.775435,0.683664,0.628873,0.636966
8,0.0569,1.229115,0.782768,0.658891,0.625004,0.630709
9,0.023,1.338395,0.785518,0.695943,0.626757,0.642775
10,0.0137,1.327377,0.788268,0.651236,0.630371,0.62721


[I 2025-03-23 02:31:59,326] Trial 59 pruned. 


Trial 60 with params: {'learning_rate': 0.004752730714823491, 'weight_decay': 0.005, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6567,1.978806,0.500458,0.16708,0.155485,0.139059
2,1.5528,1.401227,0.650779,0.326879,0.291584,0.293346
3,1.0168,1.089797,0.713107,0.403403,0.374761,0.368921
4,0.6169,1.064664,0.736939,0.518216,0.470445,0.47435
5,0.3671,1.044379,0.770852,0.632152,0.600931,0.594296
6,0.1937,1.105686,0.777269,0.656864,0.571806,0.594768
7,0.086,1.161185,0.784601,0.667288,0.627587,0.637031
8,0.0373,1.236789,0.788268,0.633429,0.640582,0.61958
9,0.0206,1.223457,0.790101,0.67855,0.640876,0.647354
10,0.008,1.286133,0.797434,0.664645,0.64221,0.641533


[I 2025-03-23 02:34:09,304] Trial 60 finished with value: 0.6438810479218762 and parameters: {'learning_rate': 0.004752730714823491, 'weight_decay': 0.005, 'warmup_steps': 2}. Best is trial 30 with value: 0.6723279188169891.


Trial 61 with params: {'learning_rate': 0.002817950612180228, 'weight_decay': 0.003, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.7819,2.046065,0.482126,0.122187,0.13366,0.110829
2,1.7272,1.535204,0.617782,0.296009,0.26216,0.249647
3,1.2052,1.19237,0.695692,0.379746,0.345002,0.338712
4,0.8148,1.112436,0.713107,0.484775,0.440136,0.448804
5,0.5617,1.005949,0.747021,0.501472,0.465441,0.468584


[I 2025-03-23 02:34:37,037] Trial 61 pruned. 


Trial 62 with params: {'learning_rate': 0.004869405842182705, 'weight_decay': 0.004, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.035,2.091225,0.464711,0.117535,0.119608,0.099767
2,1.7213,1.607498,0.578368,0.234299,0.227023,0.211847
3,1.1794,1.252675,0.686526,0.378573,0.348066,0.342011
4,0.7444,1.040866,0.722273,0.498861,0.433647,0.446189
5,0.4227,1.128925,0.749771,0.569835,0.519937,0.527743
6,0.2404,1.153764,0.762603,0.611598,0.59446,0.595611
7,0.1149,1.200796,0.782768,0.657337,0.600489,0.612402
8,0.0526,1.347915,0.777269,0.649638,0.609548,0.612533
9,0.0215,1.336959,0.792851,0.66568,0.640119,0.641553
10,0.009,1.393135,0.79835,0.670419,0.64492,0.644065


Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--accuracy/f887c0aab52c2d38e1f8a215681126379eca617f96c447638f751434e8e65b14 (last modified on Sat Oct 12 13:56:14 2024) since it couldn't be found locally at evaluate-metric--accuracy, or remotely on the Hugging Face Hub.
[I 2025-03-23 02:36:04,056] Trial 62 finished with value: 0.6352241388124682 and parameters: {'learning_rate': 0.004869405842182705, 'weight_decay': 0.004, 'warmup_steps': 3}. Best is trial 30 with value: 0.6723279188169891.


Trial 63 with params: {'learning_rate': 0.0027515528857548614, 'weight_decay': 0.002, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.7805,2.067455,0.479377,0.112178,0.132931,0.108951
2,1.7312,1.514711,0.617782,0.284236,0.252117,0.241312
3,1.2088,1.194577,0.695692,0.385304,0.348286,0.347972
4,0.8188,1.103879,0.731439,0.525606,0.464181,0.477287
5,0.558,1.004074,0.741522,0.517397,0.455892,0.467619


[I 2025-03-23 02:36:50,453] Trial 63 pruned. 


Trial 64 with params: {'learning_rate': 0.0014740970021661379, 'weight_decay': 0.005, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.9895,2.290215,0.428048,0.1164,0.103404,0.084809
2,1.9721,1.751066,0.567369,0.222582,0.190856,0.172268
3,1.4938,1.358027,0.667278,0.292922,0.291121,0.28072
4,1.1064,1.219088,0.689276,0.355881,0.348212,0.345761
5,0.8541,1.123299,0.708524,0.397147,0.377311,0.373967
6,0.6353,1.070172,0.730522,0.497808,0.450244,0.456629
7,0.474,1.115169,0.737855,0.52742,0.432898,0.45793
8,0.3636,1.086499,0.741522,0.59331,0.500331,0.525739
9,0.2504,1.122947,0.738772,0.597566,0.503391,0.530926
10,0.1765,1.136469,0.746104,0.602103,0.542988,0.559726


[I 2025-03-23 02:37:49,208] Trial 64 pruned. 


Trial 65 with params: {'learning_rate': 0.00010546468583372021, 'weight_decay': 0.008, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.7202,3.317762,0.176902,0.003538,0.02,0.006012
2,3.185,3.073245,0.176902,0.003538,0.02,0.006012
3,2.9847,2.868214,0.351971,0.040305,0.067104,0.039467
4,2.7888,2.713127,0.355637,0.037936,0.070117,0.045917
5,2.6796,2.583916,0.387718,0.040281,0.08062,0.052802


[I 2025-03-23 02:38:14,409] Trial 65 pruned. 


Trial 66 with params: {'learning_rate': 0.004884619421439062, 'weight_decay': 0.002, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6269,1.972976,0.502291,0.164852,0.161238,0.144456
2,1.5326,1.407533,0.647113,0.323714,0.292775,0.286425
3,1.0212,1.098572,0.71494,0.439101,0.389876,0.390942
4,0.6254,1.096394,0.722273,0.502682,0.468899,0.472405
5,0.3465,1.036532,0.76077,0.65184,0.603787,0.61232
6,0.1757,1.170573,0.778185,0.670093,0.594437,0.61306
7,0.0824,1.200339,0.787351,0.668537,0.63518,0.641846
8,0.0288,1.293859,0.793767,0.67032,0.638304,0.643451
9,0.0136,1.339936,0.791017,0.66357,0.613805,0.630108
10,0.0065,1.333754,0.791934,0.697066,0.655791,0.665843


[I 2025-03-23 02:39:36,925] Trial 66 finished with value: 0.6702259797328415 and parameters: {'learning_rate': 0.004884619421439062, 'weight_decay': 0.002, 'warmup_steps': 2}. Best is trial 30 with value: 0.6723279188169891.


Trial 67 with params: {'learning_rate': 0.004450164455804819, 'weight_decay': 0.003, 'warmup_steps': 1}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.79,2.058413,0.487626,0.130816,0.145603,0.115439
2,1.6489,1.488849,0.636114,0.284188,0.28241,0.26875
3,1.124,1.167506,0.702108,0.37361,0.368188,0.361449
4,0.6982,1.072802,0.726856,0.570599,0.492317,0.504946
5,0.3855,0.993948,0.772686,0.577583,0.558349,0.555094
6,0.2081,1.112692,0.771769,0.682292,0.612558,0.629643
7,0.101,1.232297,0.775435,0.699423,0.625182,0.637472
8,0.0489,1.308935,0.777269,0.66475,0.62621,0.634133
9,0.0162,1.354013,0.783685,0.673198,0.65367,0.655929
10,0.0089,1.336663,0.791934,0.660192,0.644887,0.64628


[I 2025-03-23 02:40:53,047] Trial 67 finished with value: 0.6540394632795389 and parameters: {'learning_rate': 0.004450164455804819, 'weight_decay': 0.003, 'warmup_steps': 1}. Best is trial 30 with value: 0.6723279188169891.


Trial 68 with params: {'learning_rate': 0.004362441479794168, 'weight_decay': 0.008, 'warmup_steps': 1}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.7776,2.049977,0.482126,0.130678,0.145523,0.114309
2,1.6512,1.481792,0.629698,0.277744,0.26945,0.254981
3,1.1282,1.139807,0.714024,0.428203,0.384823,0.386148
4,0.6982,1.076343,0.726856,0.545593,0.489359,0.493526
5,0.4039,1.030893,0.768103,0.594524,0.557606,0.560849
6,0.2323,1.1136,0.769936,0.637411,0.595203,0.603423
7,0.1054,1.211614,0.769936,0.659131,0.618179,0.623231
8,0.0505,1.252689,0.789184,0.691495,0.63643,0.647191
9,0.021,1.366402,0.781852,0.686105,0.646223,0.650933
10,0.0103,1.347159,0.783685,0.688201,0.648931,0.655757


[I 2025-03-23 02:42:19,839] Trial 68 finished with value: 0.6590020059104725 and parameters: {'learning_rate': 0.004362441479794168, 'weight_decay': 0.008, 'warmup_steps': 1}. Best is trial 30 with value: 0.6723279188169891.


Trial 69 with params: {'learning_rate': 0.004272133999065009, 'weight_decay': 0.003, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.9008,2.027493,0.476627,0.131107,0.135034,0.1128
2,1.6498,1.544336,0.609533,0.271271,0.250151,0.239495
3,1.114,1.190381,0.707608,0.420066,0.385337,0.380778
4,0.7313,1.08233,0.722273,0.508239,0.498787,0.492052
5,0.416,1.098851,0.749771,0.610151,0.543095,0.551668
6,0.2395,1.187747,0.759853,0.605069,0.576884,0.58034
7,0.1386,1.272457,0.768103,0.643946,0.604534,0.610516
8,0.0532,1.285283,0.783685,0.669368,0.627493,0.629335
9,0.0203,1.345894,0.789184,0.657224,0.639706,0.638069
10,0.0082,1.487379,0.779102,0.671674,0.630441,0.635381


[I 2025-03-23 02:43:38,283] Trial 69 finished with value: 0.6371775888252208 and parameters: {'learning_rate': 0.004272133999065009, 'weight_decay': 0.003, 'warmup_steps': 3}. Best is trial 30 with value: 0.6723279188169891.


Trial 70 with params: {'learning_rate': 0.004879704460605826, 'weight_decay': 0.003, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6291,1.940285,0.499542,0.191241,0.156252,0.138307
2,1.5432,1.46068,0.626948,0.312619,0.280593,0.274447
3,1.0068,1.069805,0.71769,0.44166,0.40157,0.404359
4,0.6177,1.040304,0.741522,0.583484,0.505702,0.528542
5,0.3296,1.061734,0.778185,0.693949,0.612746,0.634956
6,0.1801,1.162632,0.784601,0.679821,0.616421,0.62605
7,0.087,1.173136,0.786434,0.69237,0.633634,0.64507
8,0.0395,1.193274,0.781852,0.679607,0.652737,0.654088
9,0.0151,1.239158,0.791934,0.696138,0.648934,0.662164
10,0.0053,1.335278,0.799267,0.712375,0.638796,0.65997


[I 2025-03-23 02:44:57,941] Trial 70 finished with value: 0.6653198663168294 and parameters: {'learning_rate': 0.004879704460605826, 'weight_decay': 0.003, 'warmup_steps': 2}. Best is trial 30 with value: 0.6723279188169891.


Trial 71 with params: {'learning_rate': 0.0041898933117276915, 'weight_decay': 0.003, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6794,1.992691,0.501375,0.133714,0.1467,0.122245
2,1.5886,1.423843,0.654445,0.31725,0.301962,0.297126
3,1.0474,1.11382,0.714024,0.403715,0.387257,0.382308
4,0.6485,1.109836,0.727773,0.531713,0.471351,0.48643
5,0.4045,0.975371,0.768103,0.624411,0.568663,0.579238
6,0.2039,1.060843,0.775435,0.68206,0.606518,0.622586
7,0.1021,1.193641,0.783685,0.688278,0.628262,0.641424
8,0.0437,1.233831,0.784601,0.660867,0.619694,0.621722
9,0.0247,1.244351,0.799267,0.693498,0.641827,0.650906
10,0.0142,1.254628,0.7956,0.701645,0.659004,0.665367


[I 2025-03-23 02:46:37,235] Trial 71 finished with value: 0.6701347246594435 and parameters: {'learning_rate': 0.0041898933117276915, 'weight_decay': 0.003, 'warmup_steps': 2}. Best is trial 30 with value: 0.6723279188169891.


Trial 72 with params: {'learning_rate': 0.00010295616529943657, 'weight_decay': 0.005, 'warmup_steps': 0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.6903,3.290987,0.176902,0.003538,0.02,0.006012
2,3.1738,3.066376,0.176902,0.003538,0.02,0.006012
3,2.9803,2.865809,0.347388,0.046403,0.065881,0.038137
4,2.7904,2.717256,0.351971,0.037805,0.069018,0.04487
5,2.6876,2.594076,0.382218,0.039509,0.078898,0.051703
6,2.5493,2.497619,0.395967,0.040197,0.083588,0.053477
7,2.4597,2.421141,0.401467,0.046732,0.085414,0.057432
8,2.4019,2.363096,0.418882,0.046037,0.091029,0.060633
9,2.3411,2.30881,0.430797,0.074765,0.095644,0.067959
10,2.2868,2.281895,0.44363,0.083733,0.101984,0.074434


[I 2025-03-23 02:47:40,562] Trial 72 pruned. 


Trial 73 with params: {'learning_rate': 0.0004201995563692489, 'weight_decay': 0.001, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.3986,2.965266,0.206233,0.041184,0.031214,0.019621
2,2.6709,2.404317,0.418882,0.060697,0.090596,0.061312
3,2.2219,2.060008,0.487626,0.129603,0.122689,0.097958
4,1.9377,1.849208,0.530706,0.150506,0.157145,0.138351
5,1.7201,1.691928,0.572869,0.18902,0.18731,0.16748


[I 2025-03-23 02:48:12,596] Trial 73 pruned. 


Trial 74 with params: {'learning_rate': 0.0017324456980040768, 'weight_decay': 0.003, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.9717,2.223288,0.445463,0.119216,0.112082,0.095209
2,1.9003,1.708871,0.568286,0.240495,0.194047,0.178145
3,1.4259,1.305007,0.67461,0.308249,0.306502,0.296211
4,1.0296,1.164455,0.706691,0.423299,0.376918,0.386144
5,0.7739,1.080822,0.734189,0.483016,0.43332,0.438596
6,0.5588,1.090506,0.736939,0.560874,0.494011,0.505674
7,0.3913,1.188113,0.734189,0.523038,0.450583,0.468609
8,0.2875,1.054752,0.757104,0.609054,0.544584,0.563847
9,0.1806,1.132674,0.758937,0.61798,0.566117,0.574548
10,0.1158,1.181016,0.762603,0.650322,0.593801,0.610641


[I 2025-03-23 02:49:07,821] Trial 74 pruned. 


Trial 75 with params: {'learning_rate': 0.004289301508523269, 'weight_decay': 0.002, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6739,2.005672,0.486709,0.132438,0.140965,0.115589
2,1.589,1.422132,0.644363,0.305847,0.284455,0.283013
3,1.0309,1.145684,0.703025,0.406967,0.375584,0.367344
4,0.6397,1.09435,0.741522,0.561606,0.470653,0.49167
5,0.3714,0.974172,0.777269,0.660618,0.573574,0.598416
6,0.1816,1.093147,0.780935,0.716859,0.636736,0.66125
7,0.0883,1.213324,0.777269,0.664109,0.62294,0.630049
8,0.0406,1.237131,0.787351,0.653172,0.62439,0.627442
9,0.0193,1.34112,0.783685,0.655867,0.621507,0.625915
10,0.0099,1.358003,0.781852,0.664485,0.630662,0.637761


[I 2025-03-23 02:51:08,507] Trial 75 finished with value: 0.6470779719919213 and parameters: {'learning_rate': 0.004289301508523269, 'weight_decay': 0.002, 'warmup_steps': 2}. Best is trial 30 with value: 0.6723279188169891.


Trial 76 with params: {'learning_rate': 0.0046271654246595436, 'weight_decay': 0.003, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6708,1.983385,0.480293,0.15173,0.144466,0.1245
2,1.5641,1.410505,0.647113,0.319215,0.288304,0.284808
3,1.0466,1.095777,0.719523,0.376718,0.385126,0.374056
4,0.6448,1.024484,0.741522,0.567673,0.48638,0.506716
5,0.3696,1.003228,0.786434,0.670122,0.606226,0.621081
6,0.1855,1.117868,0.772686,0.658602,0.579354,0.60212
7,0.0881,1.208538,0.772686,0.658509,0.601889,0.617931
8,0.0411,1.195904,0.790101,0.668539,0.619108,0.633264
9,0.0207,1.288023,0.788268,0.673567,0.616857,0.630107
10,0.0071,1.298846,0.8011,0.718919,0.644755,0.664051


[I 2025-03-23 02:52:41,409] Trial 76 finished with value: 0.6433238124058654 and parameters: {'learning_rate': 0.0046271654246595436, 'weight_decay': 0.003, 'warmup_steps': 2}. Best is trial 30 with value: 0.6723279188169891.


Trial 77 with params: {'learning_rate': 0.003452841359230546, 'weight_decay': 0.002, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.735,2.009073,0.498625,0.113406,0.135685,0.110116
2,1.6347,1.479266,0.630614,0.32222,0.283237,0.277566
3,1.1176,1.196996,0.691109,0.394022,0.36903,0.367238
4,0.7156,1.083606,0.722273,0.531921,0.459766,0.475282
5,0.4451,1.042316,0.753437,0.566341,0.536239,0.540649
6,0.2961,1.111043,0.764436,0.631469,0.568974,0.583185
7,0.1566,1.246459,0.752521,0.631742,0.555143,0.577945
8,0.0694,1.243699,0.784601,0.69798,0.645407,0.655305
9,0.0345,1.346188,0.780018,0.665194,0.612185,0.625171
10,0.0196,1.378076,0.783685,0.68972,0.610666,0.630485


[I 2025-03-23 02:53:40,849] Trial 77 pruned. 


Trial 78 with params: {'learning_rate': 0.0002764210110674979, 'weight_decay': 0.005, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.4729,3.078544,0.177819,0.004421,0.020238,0.006499
2,2.8522,2.646222,0.359303,0.038916,0.072652,0.045757
3,2.4949,2.330588,0.43813,0.070026,0.099285,0.070479
4,2.2224,2.1218,0.466544,0.092958,0.115329,0.091474
5,2.0348,1.953102,0.515124,0.132019,0.141953,0.121022


[I 2025-03-23 02:54:23,108] Trial 78 pruned. 


Trial 79 with params: {'learning_rate': 0.004296089380810639, 'weight_decay': 0.002, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6706,1.990388,0.487626,0.135354,0.141884,0.117916
2,1.5884,1.446038,0.638863,0.316002,0.28226,0.281728
3,1.0446,1.164548,0.698442,0.405847,0.362615,0.355443
4,0.6423,1.07852,0.731439,0.520539,0.440203,0.460533
5,0.3767,0.991074,0.774519,0.639807,0.560395,0.578926
6,0.1891,1.10285,0.785518,0.729056,0.630777,0.655599
7,0.096,1.222767,0.780935,0.662204,0.61747,0.626398
8,0.0422,1.283081,0.783685,0.658841,0.610347,0.62531
9,0.0147,1.337248,0.790101,0.687197,0.629627,0.644457
10,0.0077,1.392189,0.792851,0.649698,0.620847,0.621274


[I 2025-03-23 02:55:25,219] Trial 79 pruned. 


Trial 80 with params: {'learning_rate': 0.0048914303091457416, 'weight_decay': 0.002, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.0349,2.096109,0.473877,0.122654,0.129004,0.109038
2,1.7199,1.644118,0.573786,0.253998,0.229674,0.216646
3,1.1527,1.221786,0.699358,0.379453,0.372112,0.364424
4,0.7397,1.086751,0.719523,0.481383,0.445845,0.448995
5,0.4349,1.12361,0.747021,0.571615,0.520418,0.530785
6,0.2482,1.100503,0.768103,0.651785,0.596456,0.606756
7,0.1168,1.236797,0.785518,0.66731,0.615404,0.630046
8,0.0506,1.292797,0.776352,0.671835,0.622296,0.633073
9,0.0184,1.352227,0.781852,0.678895,0.623186,0.636332
10,0.0122,1.329839,0.791017,0.680263,0.64431,0.646168


[I 2025-03-23 02:56:58,262] Trial 80 finished with value: 0.6466377777972254 and parameters: {'learning_rate': 0.0048914303091457416, 'weight_decay': 0.002, 'warmup_steps': 3}. Best is trial 30 with value: 0.6723279188169891.


Trial 81 with params: {'learning_rate': 0.004630590162355753, 'weight_decay': 0.006, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.668,1.968215,0.487626,0.155723,0.144327,0.123647
2,1.5471,1.449408,0.655362,0.333137,0.294005,0.290933
3,1.0274,1.085484,0.724106,0.398631,0.402099,0.394264
4,0.6402,0.987434,0.747021,0.563463,0.488292,0.504396
5,0.3536,1.020406,0.773602,0.633151,0.546012,0.567329
6,0.1845,1.117556,0.778185,0.66128,0.586979,0.610457
7,0.0763,1.168402,0.794684,0.697365,0.620412,0.644625
8,0.0373,1.228241,0.788268,0.674222,0.649376,0.647622
9,0.0173,1.255582,0.791934,0.695638,0.637974,0.655865
10,0.0061,1.309937,0.796517,0.667106,0.650408,0.645863


Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--f1/34c46321f42186df33a6260966e34a368f14868d9cc2ba47d142112e2800d233 (last modified on Fri Jan 10 23:14:01 2025) since it couldn't be found locally at evaluate-metric--f1, or remotely on the Hugging Face Hub.
[I 2025-03-23 02:59:30,910] Trial 81 finished with value: 0.6456509668372793 and parameters: {'learning_rate': 0.004630590162355753, 'weight_decay': 0.006, 'warmup_steps': 2}. Best is trial 30 with value: 0.6723279188169891.


Trial 82 with params: {'learning_rate': 0.0020126202465930447, 'weight_decay': 0.004, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.9276,2.23489,0.43813,0.102149,0.111229,0.092999
2,1.8705,1.693887,0.574702,0.252878,0.204036,0.192088
3,1.3578,1.254278,0.689276,0.310938,0.324821,0.311806
4,0.9704,1.15094,0.707608,0.4574,0.416625,0.418004
5,0.713,1.073502,0.735105,0.478721,0.448313,0.449278


[I 2025-03-23 03:00:00,236] Trial 82 pruned. 


Trial 83 with params: {'learning_rate': 0.004475730478437034, 'weight_decay': 0.004, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6662,1.979605,0.495875,0.155443,0.153078,0.132992
2,1.5771,1.394206,0.657195,0.306814,0.296138,0.293363
3,1.0438,1.145051,0.707608,0.392209,0.374656,0.363435
4,0.657,1.062521,0.741522,0.57005,0.466685,0.493579
5,0.359,1.032245,0.769019,0.626717,0.555144,0.571659
6,0.1868,1.138433,0.766269,0.656107,0.593869,0.605966
7,0.0937,1.253119,0.781852,0.670655,0.594174,0.615452
8,0.0344,1.247915,0.794684,0.66201,0.626635,0.635757
9,0.0187,1.287917,0.7956,0.658275,0.621181,0.627028
10,0.0068,1.326159,0.791934,0.675383,0.628224,0.641081


[I 2025-03-23 03:01:33,055] Trial 83 finished with value: 0.6369747699444835 and parameters: {'learning_rate': 0.004475730478437034, 'weight_decay': 0.004, 'warmup_steps': 2}. Best is trial 30 with value: 0.6723279188169891.


Trial 84 with params: {'learning_rate': 0.004724945460563563, 'weight_decay': 0.003, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6597,1.994895,0.488543,0.161689,0.149378,0.133496
2,1.5582,1.455638,0.647113,0.334817,0.289348,0.289394
3,1.0406,1.099913,0.72044,0.431199,0.384349,0.382232
4,0.6321,1.031605,0.745188,0.53376,0.475389,0.485159
5,0.3599,0.978795,0.776352,0.664958,0.565018,0.584361
6,0.1816,1.117993,0.779102,0.69173,0.62129,0.640222
7,0.0742,1.201068,0.789184,0.666299,0.625333,0.633971
8,0.0333,1.247295,0.786434,0.686637,0.652988,0.656256
9,0.0132,1.342225,0.791017,0.682373,0.611574,0.629293
10,0.005,1.334311,0.793767,0.661681,0.620807,0.626306


[I 2025-03-23 03:02:29,529] Trial 84 pruned. 


Trial 85 with params: {'learning_rate': 0.004606344568321607, 'weight_decay': 0.005, 'warmup_steps': 1}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.8173,2.053146,0.476627,0.102952,0.131573,0.100878
2,1.6472,1.521491,0.636114,0.294724,0.28205,0.270596
3,1.1061,1.119565,0.713107,0.404721,0.377489,0.376046
4,0.6609,1.056723,0.733272,0.540483,0.484253,0.500516
5,0.378,1.044172,0.768103,0.624299,0.59422,0.595672
6,0.2258,1.166308,0.764436,0.656994,0.595952,0.606199
7,0.101,1.270217,0.769019,0.677484,0.600471,0.620503
8,0.0409,1.239639,0.788268,0.680065,0.647915,0.653973
9,0.0194,1.350002,0.782768,0.674103,0.621704,0.636567
10,0.0089,1.399512,0.796517,0.714928,0.645617,0.665354


[I 2025-03-23 03:03:47,550] Trial 85 finished with value: 0.6359641734764317 and parameters: {'learning_rate': 0.004606344568321607, 'weight_decay': 0.005, 'warmup_steps': 1}. Best is trial 30 with value: 0.6723279188169891.


Trial 86 with params: {'learning_rate': 0.0002597113179487162, 'weight_decay': 0.01, 'warmup_steps': 1}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.4516,3.075764,0.177819,0.006064,0.020238,0.006485
2,2.8573,2.662776,0.356554,0.037588,0.072193,0.04511
3,2.5137,2.351033,0.434464,0.068791,0.098388,0.069199
4,2.2435,2.143136,0.464711,0.091631,0.114726,0.090704
5,2.0643,1.977663,0.517874,0.13329,0.142328,0.121669


[I 2025-03-23 03:04:13,051] Trial 86 pruned. 


Trial 87 with params: {'learning_rate': 0.0010815277193077942, 'weight_decay': 0.004, 'warmup_steps': 1}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.092,2.517226,0.374885,0.061841,0.079774,0.054857
2,2.1463,1.907572,0.522456,0.144318,0.150326,0.128328
3,1.6827,1.537027,0.613199,0.281073,0.224272,0.218062
4,1.3028,1.313454,0.669111,0.341467,0.302665,0.305034
5,1.0414,1.220447,0.68561,0.3791,0.334123,0.334407
6,0.8269,1.121444,0.71494,0.446064,0.399084,0.405415
7,0.6512,1.16973,0.71769,0.494105,0.389588,0.410899
8,0.5422,1.106986,0.729606,0.519657,0.474596,0.475508
9,0.4248,1.100467,0.725023,0.502211,0.443213,0.460426
10,0.3274,1.092257,0.745188,0.612671,0.544205,0.563461


[I 2025-03-23 03:05:01,008] Trial 87 pruned. 


Trial 88 with params: {'learning_rate': 0.0042540090665307935, 'weight_decay': 0.002, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6765,1.99452,0.492209,0.138882,0.143051,0.119172
2,1.5898,1.469247,0.640697,0.320448,0.287148,0.283897
3,1.0447,1.149147,0.706691,0.379428,0.371697,0.362717
4,0.6354,1.115386,0.729606,0.534037,0.450049,0.467077
5,0.3652,1.023278,0.772686,0.614154,0.569933,0.573761
6,0.1997,1.097895,0.769936,0.670102,0.610892,0.621782
7,0.0888,1.172027,0.784601,0.669081,0.617243,0.627228
8,0.0365,1.216305,0.799267,0.691993,0.629053,0.645744
9,0.021,1.348025,0.781852,0.694369,0.61519,0.636518
10,0.0131,1.32735,0.794684,0.701393,0.639166,0.658271


[I 2025-03-23 03:06:27,217] Trial 88 finished with value: 0.6383694979672208 and parameters: {'learning_rate': 0.0042540090665307935, 'weight_decay': 0.002, 'warmup_steps': 2}. Best is trial 30 with value: 0.6723279188169891.


Trial 89 with params: {'learning_rate': 0.004575210805401306, 'weight_decay': 0.002, 'warmup_steps': 1}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.8109,2.048514,0.48396,0.125101,0.135836,0.104208
2,1.645,1.531965,0.628781,0.28058,0.269254,0.259203
3,1.1185,1.125891,0.719523,0.407928,0.376095,0.372402
4,0.6851,1.063956,0.72319,0.547262,0.472327,0.488701
5,0.375,1.123922,0.748854,0.57209,0.536533,0.542377
6,0.227,1.177336,0.776352,0.703909,0.602241,0.625719
7,0.1124,1.244593,0.765353,0.645348,0.61522,0.612614
8,0.0441,1.270097,0.778185,0.720931,0.64235,0.662691
9,0.0167,1.382194,0.780935,0.699797,0.642712,0.657121
10,0.0073,1.402958,0.780018,0.673836,0.638587,0.643682


[I 2025-03-23 03:08:23,750] Trial 89 finished with value: 0.6587602011850044 and parameters: {'learning_rate': 0.004575210805401306, 'weight_decay': 0.002, 'warmup_steps': 1}. Best is trial 30 with value: 0.6723279188169891.


Trial 90 with params: {'learning_rate': 0.004708962310783337, 'weight_decay': 0.003, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6606,1.969577,0.502291,0.154003,0.151713,0.128279
2,1.5546,1.42457,0.644363,0.309599,0.287971,0.280678
3,1.0343,1.110637,0.713107,0.430828,0.382708,0.382098
4,0.6419,1.059591,0.725023,0.509182,0.459209,0.472253
5,0.3604,1.042175,0.774519,0.599751,0.555915,0.560827
6,0.1813,1.151298,0.764436,0.65115,0.591979,0.603943
7,0.0893,1.182131,0.780935,0.670424,0.614657,0.627363
8,0.0407,1.213715,0.793767,0.697545,0.659806,0.667604
9,0.0179,1.317644,0.786434,0.684833,0.631493,0.646318
10,0.0078,1.328225,0.79835,0.707465,0.65544,0.665618


[I 2025-03-23 03:10:07,818] Trial 90 finished with value: 0.6591318042792377 and parameters: {'learning_rate': 0.004708962310783337, 'weight_decay': 0.003, 'warmup_steps': 2}. Best is trial 30 with value: 0.6723279188169891.


Trial 91 with params: {'learning_rate': 0.0033441170260056843, 'weight_decay': 0.004, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.8302,2.037647,0.484876,0.110726,0.129524,0.107395
2,1.7068,1.574121,0.609533,0.274554,0.251274,0.242183
3,1.1926,1.217626,0.688359,0.386621,0.364976,0.35171
4,0.7573,1.173707,0.709441,0.479474,0.414443,0.424147
5,0.5028,1.064957,0.749771,0.52352,0.501079,0.501212
6,0.3053,1.130926,0.756187,0.630119,0.558722,0.566432
7,0.1639,1.249356,0.764436,0.65585,0.570845,0.589645
8,0.0921,1.275594,0.786434,0.670253,0.619836,0.632785
9,0.0447,1.32759,0.785518,0.691133,0.614916,0.632998
10,0.0217,1.365926,0.786434,0.647207,0.637661,0.634641


[I 2025-03-23 03:11:37,716] Trial 91 finished with value: 0.6422791412368763 and parameters: {'learning_rate': 0.0033441170260056843, 'weight_decay': 0.004, 'warmup_steps': 2}. Best is trial 30 with value: 0.6723279188169891.


Trial 92 with params: {'learning_rate': 0.0022190601125336882, 'weight_decay': 0.003, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.8509,2.138674,0.465628,0.107529,0.120736,0.100569
2,1.8138,1.657054,0.56462,0.203791,0.194762,0.183254
3,1.3169,1.262822,0.687443,0.338947,0.324967,0.316549
4,0.9367,1.139012,0.708524,0.432164,0.398443,0.401771
5,0.6601,1.062425,0.730522,0.460507,0.432221,0.432395


[I 2025-03-23 03:12:05,178] Trial 92 pruned. 


Trial 93 with params: {'learning_rate': 0.004073029843013879, 'weight_decay': 0.001, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6825,1.946876,0.509624,0.159071,0.158187,0.140254
2,1.5744,1.444296,0.648029,0.328886,0.294512,0.28839
3,1.0357,1.118382,0.703941,0.429953,0.392816,0.392761
4,0.6538,1.031175,0.740605,0.546906,0.4887,0.502378
5,0.3782,1.044499,0.76077,0.592162,0.549926,0.558533
6,0.2166,1.155283,0.766269,0.641144,0.566601,0.582992
7,0.1156,1.25596,0.762603,0.668853,0.581476,0.604723
8,0.0509,1.300426,0.774519,0.656859,0.598124,0.603714
9,0.0269,1.328091,0.779102,0.699115,0.625608,0.647521
10,0.0097,1.384688,0.779102,0.667541,0.624464,0.632718


[I 2025-03-23 03:13:06,645] Trial 93 pruned. 


Trial 94 with params: {'learning_rate': 0.0046198901795009685, 'weight_decay': 0.003, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6679,1.973485,0.492209,0.157398,0.150595,0.131722
2,1.5563,1.41986,0.649863,0.327236,0.301294,0.29776
3,1.041,1.108801,0.71494,0.390125,0.381163,0.3724
4,0.6582,0.971493,0.756187,0.577225,0.523289,0.532995
5,0.3519,0.994538,0.775435,0.644972,0.573826,0.586947
6,0.1804,1.085283,0.777269,0.660991,0.584449,0.60826
7,0.0861,1.189692,0.787351,0.694024,0.617542,0.641227
8,0.0326,1.230079,0.794684,0.686933,0.656093,0.659837
9,0.0137,1.298393,0.788268,0.658118,0.615775,0.623472
10,0.0065,1.314457,0.799267,0.683945,0.631807,0.646226


[I 2025-03-23 03:15:19,175] Trial 94 finished with value: 0.6450264178260816 and parameters: {'learning_rate': 0.0046198901795009685, 'weight_decay': 0.003, 'warmup_steps': 2}. Best is trial 30 with value: 0.6723279188169891.


Trial 95 with params: {'learning_rate': 0.004681499253111555, 'weight_decay': 0.004, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.9965,2.067224,0.47846,0.143347,0.137116,0.119056
2,1.7171,1.609458,0.594867,0.287067,0.234445,0.22711
3,1.1508,1.231292,0.696609,0.38669,0.374485,0.361761
4,0.7429,1.086475,0.730522,0.480566,0.448458,0.450542
5,0.4454,1.13654,0.752521,0.536034,0.493644,0.496879
6,0.2527,1.163442,0.758937,0.605069,0.561401,0.563597
7,0.1214,1.301226,0.774519,0.706963,0.609062,0.631076
8,0.0563,1.282649,0.788268,0.678515,0.643357,0.641104
9,0.0266,1.376833,0.783685,0.696401,0.633818,0.647735
10,0.0083,1.484961,0.781852,0.68721,0.622865,0.632443


[I 2025-03-23 03:16:13,958] Trial 95 pruned. 


Trial 96 with params: {'learning_rate': 0.004871001854951032, 'weight_decay': 0.003, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6315,1.984371,0.483043,0.160946,0.152192,0.136316
2,1.5523,1.44235,0.643446,0.32036,0.294873,0.287119
3,1.0405,1.114978,0.708524,0.417302,0.379179,0.380221
4,0.6409,1.026151,0.752521,0.610434,0.516731,0.540814
5,0.3469,1.063095,0.773602,0.670874,0.593454,0.614087
6,0.1831,1.128313,0.783685,0.685019,0.616676,0.62815
7,0.085,1.21237,0.777269,0.672628,0.602952,0.613286
8,0.0323,1.248846,0.796517,0.732084,0.666171,0.681213
9,0.0171,1.307372,0.7956,0.752663,0.660362,0.690036
10,0.0077,1.333688,0.791017,0.741314,0.652102,0.677216


[I 2025-03-23 03:17:57,875] Trial 96 finished with value: 0.6684937176304008 and parameters: {'learning_rate': 0.004871001854951032, 'weight_decay': 0.003, 'warmup_steps': 2}. Best is trial 30 with value: 0.6723279188169891.


Trial 97 with params: {'learning_rate': 0.004828883118747524, 'weight_decay': 0.001, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.0186,2.075706,0.474794,0.138423,0.128811,0.111357
2,1.7181,1.670563,0.577452,0.248923,0.229799,0.216054
3,1.1544,1.1797,0.703025,0.410275,0.373049,0.370554
4,0.7309,1.018876,0.745188,0.518553,0.483259,0.482241
5,0.4158,1.07214,0.766269,0.633309,0.539891,0.552562
6,0.2491,1.124627,0.775435,0.659153,0.596493,0.606936
7,0.12,1.307375,0.766269,0.68087,0.587496,0.612915
8,0.0639,1.304361,0.792851,0.687387,0.639912,0.644631
9,0.0312,1.357526,0.785518,0.671961,0.642186,0.641793
10,0.0137,1.420224,0.793767,0.690367,0.643796,0.65496


[I 2025-03-23 03:19:18,525] Trial 97 finished with value: 0.6589246115723892 and parameters: {'learning_rate': 0.004828883118747524, 'weight_decay': 0.001, 'warmup_steps': 3}. Best is trial 30 with value: 0.6723279188169891.


Trial 98 with params: {'learning_rate': 0.002321613102597942, 'weight_decay': 0.005, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.7976,2.103308,0.472044,0.121206,0.127492,0.109084
2,1.7599,1.603878,0.600367,0.265425,0.237544,0.223569
3,1.2622,1.252366,0.684693,0.344289,0.339861,0.332309
4,0.8803,1.145967,0.719523,0.46033,0.417091,0.425206
5,0.6149,1.04554,0.732356,0.479243,0.448331,0.448196
6,0.4114,1.129903,0.742438,0.625286,0.503392,0.534735
7,0.2752,1.187035,0.750687,0.666545,0.532951,0.564932
8,0.1696,1.188669,0.759853,0.652191,0.586817,0.597544
9,0.0976,1.277239,0.751604,0.605731,0.58712,0.579215
10,0.0569,1.326821,0.750687,0.615209,0.599913,0.589876


[I 2025-03-23 03:20:11,157] Trial 98 pruned. 


Trial 99 with params: {'learning_rate': 0.002202341383102902, 'weight_decay': 0.002, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.8935,2.162152,0.455545,0.111075,0.117833,0.099225
2,1.8205,1.642958,0.576535,0.221756,0.21386,0.198528
3,1.3206,1.23156,0.699358,0.362779,0.334166,0.327422
4,0.9243,1.128149,0.714024,0.44689,0.410801,0.41361
5,0.6691,1.090869,0.734189,0.481742,0.425519,0.435096


[I 2025-03-23 03:20:36,580] Trial 99 pruned. 


Trial 100 with params: {'learning_rate': 0.0047845258320262, 'weight_decay': 0.006, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6612,1.993937,0.497709,0.171594,0.152791,0.137659
2,1.5597,1.403961,0.654445,0.305984,0.287604,0.281693
3,1.0277,1.089032,0.722273,0.462017,0.392882,0.401252
4,0.6146,1.070858,0.731439,0.494986,0.463759,0.464854
5,0.3449,1.050959,0.767186,0.678101,0.598889,0.612254
6,0.1867,1.147418,0.776352,0.659713,0.608211,0.616931
7,0.0872,1.28765,0.776352,0.701249,0.619443,0.632622
8,0.0324,1.292389,0.788268,0.652995,0.621902,0.621618
9,0.0131,1.375557,0.790101,0.647715,0.627804,0.622013
10,0.0045,1.405002,0.789184,0.657479,0.636314,0.632016


Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--f1/34c46321f42186df33a6260966e34a368f14868d9cc2ba47d142112e2800d233 (last modified on Fri Jan 10 23:14:01 2025) since it couldn't be found locally at evaluate-metric--f1, or remotely on the Hugging Face Hub.
[I 2025-03-23 03:22:07,157] Trial 100 pruned. 


Trial 101 with params: {'learning_rate': 0.004589661597450042, 'weight_decay': 0.003, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6696,1.957969,0.493126,0.140814,0.14534,0.123741
2,1.5648,1.419976,0.649863,0.330851,0.290533,0.28609
3,1.0452,1.070612,0.728689,0.409346,0.40916,0.396842
4,0.6492,1.001615,0.741522,0.578694,0.500884,0.519231
5,0.3619,0.99543,0.780018,0.624347,0.553624,0.568871
6,0.1821,1.173821,0.756187,0.631248,0.55263,0.576992
7,0.1006,1.173099,0.788268,0.68039,0.623127,0.640285
8,0.0353,1.226328,0.789184,0.677863,0.622484,0.637537
9,0.0154,1.31566,0.786434,0.71052,0.635075,0.659424
10,0.0098,1.307229,0.791934,0.707218,0.648151,0.666103


[I 2025-03-23 03:23:40,857] Trial 101 finished with value: 0.6629400656438049 and parameters: {'learning_rate': 0.004589661597450042, 'weight_decay': 0.003, 'warmup_steps': 2}. Best is trial 30 with value: 0.6723279188169891.


Trial 102 with params: {'learning_rate': 0.0020057768657499404, 'weight_decay': 0.003, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.9349,2.234029,0.439963,0.102843,0.110868,0.093887
2,1.8686,1.693125,0.569203,0.231104,0.200185,0.185477
3,1.354,1.252986,0.687443,0.311242,0.323701,0.311355
4,0.97,1.138112,0.716774,0.44213,0.409177,0.408596
5,0.7048,1.088605,0.724106,0.488742,0.430186,0.436922
6,0.4992,1.025097,0.748854,0.548813,0.503063,0.51416
7,0.3391,1.158778,0.747938,0.599909,0.486886,0.513633
8,0.2155,1.156492,0.769936,0.668702,0.577128,0.600734
9,0.1405,1.20961,0.76077,0.618034,0.577803,0.582945
10,0.085,1.278352,0.759853,0.657136,0.591532,0.601967


[I 2025-03-23 03:24:35,387] Trial 102 pruned. 


Trial 103 with params: {'learning_rate': 0.004415585116255329, 'weight_decay': 0.003, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6588,1.979839,0.48121,0.1599,0.149781,0.130386
2,1.5707,1.456957,0.643446,0.306034,0.286218,0.280757
3,1.0379,1.104773,0.715857,0.38927,0.387348,0.374569
4,0.6477,1.0233,0.736939,0.55896,0.467998,0.489933
5,0.3686,1.005449,0.785518,0.67647,0.609542,0.622256
6,0.1875,1.11578,0.768103,0.642912,0.580308,0.594835
7,0.0887,1.303716,0.769019,0.708229,0.619413,0.642912
8,0.0382,1.206212,0.796517,0.693684,0.645848,0.657358
9,0.0172,1.287907,0.79835,0.719788,0.648875,0.668466
10,0.0081,1.304643,0.802933,0.711917,0.641415,0.66048


[I 2025-03-23 03:26:01,794] Trial 103 finished with value: 0.6670964927589896 and parameters: {'learning_rate': 0.004415585116255329, 'weight_decay': 0.003, 'warmup_steps': 2}. Best is trial 30 with value: 0.6723279188169891.


Trial 104 with params: {'learning_rate': 6.119956273045214e-05, 'weight_decay': 0.006, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.8195,3.649573,0.176902,0.003538,0.02,0.006012
2,3.3898,3.199963,0.176902,0.003538,0.02,0.006012
3,3.1618,3.086523,0.176902,0.003538,0.02,0.006012
4,3.0267,2.964419,0.257562,0.029278,0.041563,0.028278
5,2.9354,2.861752,0.336389,0.039209,0.063492,0.043551
6,2.8233,2.779683,0.341888,0.036806,0.06638,0.04192
7,2.7485,2.716612,0.347388,0.037514,0.067856,0.043558
8,2.7029,2.6638,0.368469,0.038949,0.075087,0.048497
9,2.6494,2.619133,0.36572,0.037767,0.074368,0.047917
10,2.6068,2.59438,0.380385,0.038737,0.079263,0.050688


[I 2025-03-23 03:27:00,808] Trial 104 pruned. 


Trial 105 with params: {'learning_rate': 0.00361662766744382, 'weight_decay': 0.002, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.7745,1.969829,0.506874,0.126657,0.144339,0.119749
2,1.6618,1.532996,0.612282,0.285237,0.253878,0.242823
3,1.1344,1.181395,0.703025,0.390548,0.362092,0.358661
4,0.7337,1.086604,0.713107,0.480874,0.426624,0.437548
5,0.4641,1.028291,0.76077,0.618245,0.520333,0.547277
6,0.2632,1.158605,0.753437,0.637261,0.55347,0.573384
7,0.1503,1.178997,0.778185,0.662234,0.594372,0.616343
8,0.0716,1.258432,0.772686,0.665193,0.618018,0.627726
9,0.0348,1.356258,0.776352,0.65656,0.604675,0.617518
10,0.0155,1.354993,0.782768,0.678994,0.62478,0.639904


[I 2025-03-23 03:28:30,076] Trial 105 finished with value: 0.6242937719960998 and parameters: {'learning_rate': 0.00361662766744382, 'weight_decay': 0.002, 'warmup_steps': 2}. Best is trial 30 with value: 0.6723279188169891.


Trial 106 with params: {'learning_rate': 0.004125843089537382, 'weight_decay': 0.002, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6975,1.98614,0.497709,0.127464,0.147141,0.120988
2,1.6127,1.430178,0.63703,0.288831,0.281599,0.273639
3,1.0634,1.150968,0.708524,0.390412,0.395363,0.378861
4,0.6745,1.104706,0.719523,0.551753,0.464941,0.483478
5,0.3907,1.009244,0.769936,0.635686,0.593075,0.587676
6,0.2145,1.079452,0.773602,0.648893,0.618009,0.616097
7,0.0938,1.242317,0.780935,0.673804,0.634481,0.634819
8,0.0388,1.321159,0.780935,0.67054,0.631268,0.635054
9,0.0157,1.371756,0.793767,0.687507,0.639464,0.649509
10,0.0098,1.394321,0.789184,0.677852,0.637974,0.645603


[I 2025-03-23 03:30:26,219] Trial 106 finished with value: 0.6589288348940222 and parameters: {'learning_rate': 0.004125843089537382, 'weight_decay': 0.002, 'warmup_steps': 2}. Best is trial 30 with value: 0.6723279188169891.


Trial 107 with params: {'learning_rate': 0.004992427138088802, 'weight_decay': 0.004, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6353,1.957124,0.498625,0.162272,0.162806,0.142045
2,1.5511,1.41749,0.649863,0.343531,0.305223,0.300277
3,1.0322,1.090683,0.72044,0.425087,0.392309,0.390832
4,0.6114,1.074153,0.732356,0.521373,0.47191,0.475991
5,0.3428,1.027195,0.778185,0.659025,0.61774,0.623272
6,0.1727,1.041001,0.7956,0.680184,0.634208,0.642767
7,0.0886,1.133098,0.788268,0.688676,0.641684,0.652811
8,0.0395,1.157158,0.793767,0.700668,0.649741,0.661755
9,0.0118,1.233063,0.796517,0.705028,0.653151,0.665079
10,0.0061,1.280753,0.79835,0.711077,0.664166,0.673104


[I 2025-03-23 03:31:54,916] Trial 107 finished with value: 0.6793781854063073 and parameters: {'learning_rate': 0.004992427138088802, 'weight_decay': 0.004, 'warmup_steps': 2}. Best is trial 107 with value: 0.6793781854063073.


Trial 108 with params: {'learning_rate': 0.004877338863567352, 'weight_decay': 0.004, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6269,1.979434,0.494042,0.180002,0.158354,0.14286
2,1.5432,1.463764,0.627864,0.312793,0.287028,0.281479
3,1.0137,1.11518,0.71494,0.411964,0.391172,0.387592
4,0.6213,1.04445,0.737855,0.601755,0.496462,0.526269
5,0.338,1.011086,0.783685,0.666759,0.614108,0.623678
6,0.1777,1.109206,0.775435,0.652513,0.621291,0.623086
7,0.083,1.167086,0.777269,0.667474,0.600678,0.618293
8,0.0281,1.168094,0.797434,0.686559,0.643764,0.65206
9,0.0139,1.291996,0.796517,0.689067,0.636162,0.651095
10,0.0063,1.314505,0.794684,0.68899,0.62382,0.639835


Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--recall/11f90e583db35601050aed380d48e83202a896976b9608432fba9244fb447f24 (last modified on Fri Jan 10 23:14:00 2025) since it couldn't be found locally at evaluate-metric--recall, or remotely on the Hugging Face Hub.
[I 2025-03-23 03:33:14,102] Trial 108 pruned. 


Trial 109 with params: {'learning_rate': 0.004811634538847792, 'weight_decay': 0.002, 'warmup_steps': 1}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.8498,2.03318,0.484876,0.13963,0.139597,0.112744
2,1.6432,1.56194,0.621448,0.297354,0.267018,0.249366
3,1.129,1.154108,0.705775,0.391822,0.377976,0.368454
4,0.6746,1.069875,0.731439,0.550902,0.455899,0.474421
5,0.391,0.99811,0.777269,0.633025,0.609966,0.607788
6,0.1926,1.180065,0.770852,0.679428,0.589938,0.612275
7,0.0974,1.225914,0.772686,0.700997,0.611585,0.633677
8,0.0419,1.263015,0.786434,0.731439,0.640671,0.666964
9,0.0169,1.31697,0.788268,0.725028,0.637315,0.667485
10,0.0073,1.331851,0.7956,0.717236,0.653405,0.673938


[I 2025-03-23 03:34:36,487] Trial 109 finished with value: 0.6574879550930912 and parameters: {'learning_rate': 0.004811634538847792, 'weight_decay': 0.002, 'warmup_steps': 1}. Best is trial 107 with value: 0.6793781854063073.


Trial 110 with params: {'learning_rate': 0.001796838554610854, 'weight_decay': 0.002, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.9348,2.243644,0.43538,0.100844,0.109737,0.089178
2,1.8984,1.694014,0.578368,0.220412,0.202254,0.187319
3,1.3932,1.272174,0.686526,0.337522,0.326017,0.318411
4,0.997,1.156869,0.709441,0.43848,0.391026,0.397411
5,0.7463,1.110161,0.732356,0.462377,0.423801,0.426987
6,0.5383,1.055953,0.744271,0.515719,0.462848,0.469571
7,0.3794,1.176895,0.741522,0.620128,0.484335,0.524628
8,0.2627,1.167468,0.751604,0.606993,0.551665,0.560848
9,0.1816,1.178797,0.768103,0.631991,0.585685,0.592585
10,0.1097,1.230485,0.75802,0.619389,0.589632,0.586915


[I 2025-03-23 03:35:29,131] Trial 110 pruned. 


Trial 111 with params: {'learning_rate': 0.003923462828799726, 'weight_decay': 0.004, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.7928,1.974817,0.505041,0.130287,0.146891,0.121704
2,1.6287,1.562753,0.625115,0.285305,0.270995,0.256479
3,1.1313,1.147871,0.704858,0.396048,0.388888,0.37982
4,0.7175,1.050208,0.738772,0.550798,0.481445,0.491223
5,0.4274,1.140155,0.749771,0.578011,0.543353,0.54799
6,0.2411,1.202152,0.759853,0.64812,0.569343,0.589674
7,0.1301,1.300097,0.770852,0.701276,0.629339,0.650249
8,0.065,1.355111,0.76077,0.612613,0.612793,0.603582
9,0.033,1.406653,0.769019,0.644111,0.616232,0.619103
10,0.0163,1.449917,0.770852,0.65667,0.615816,0.621336


[I 2025-03-23 03:36:22,858] Trial 111 pruned. 


Trial 112 with params: {'learning_rate': 0.000915201931634133, 'weight_decay': 0.009000000000000001, 'warmup_steps': 0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.0959,2.56086,0.362053,0.061358,0.076397,0.04927
2,2.2103,1.975141,0.505041,0.107534,0.136173,0.111727
3,1.7609,1.618994,0.598533,0.213567,0.204189,0.191372
4,1.392,1.380577,0.663611,0.34177,0.287011,0.291654
5,1.129,1.284979,0.672777,0.333801,0.311757,0.306626
6,0.9202,1.181573,0.690192,0.394824,0.354114,0.356629
7,0.7546,1.226068,0.689276,0.420774,0.346078,0.358011
8,0.6466,1.115015,0.719523,0.476029,0.418186,0.427434
9,0.5123,1.112983,0.71494,0.482701,0.421408,0.439346
10,0.4137,1.110896,0.72319,0.510922,0.464307,0.47341


[I 2025-03-23 03:37:09,972] Trial 112 pruned. 


Trial 113 with params: {'learning_rate': 0.004997473557524831, 'weight_decay': 0.002, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.642,1.953847,0.497709,0.166198,0.168239,0.14951
2,1.5585,1.381941,0.651696,0.330067,0.303066,0.294227
3,1.0211,1.109353,0.709441,0.413517,0.389963,0.386203
4,0.6195,1.048846,0.741522,0.555919,0.501513,0.506988
5,0.334,1.117657,0.76077,0.631581,0.569086,0.580448
6,0.1769,1.16937,0.764436,0.626744,0.585605,0.592933
7,0.0862,1.258109,0.777269,0.679537,0.629102,0.638232
8,0.0342,1.304621,0.780935,0.671307,0.618089,0.630329
9,0.0137,1.329612,0.790101,0.67885,0.629483,0.642206
10,0.0057,1.387213,0.786434,0.675596,0.638305,0.64431


[I 2025-03-23 03:38:25,729] Trial 113 finished with value: 0.644141580132737 and parameters: {'learning_rate': 0.004997473557524831, 'weight_decay': 0.002, 'warmup_steps': 2}. Best is trial 107 with value: 0.6793781854063073.


Trial 114 with params: {'learning_rate': 0.00463598685389814, 'weight_decay': 0.004, 'warmup_steps': 1}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.8228,2.041676,0.48121,0.116426,0.132875,0.103549
2,1.6432,1.524018,0.635197,0.294574,0.281023,0.26958
3,1.1191,1.140435,0.709441,0.398313,0.3809,0.37091
4,0.683,1.069124,0.72044,0.492216,0.441047,0.449128
5,0.38,1.041394,0.76352,0.615311,0.577333,0.580172
6,0.2193,1.130529,0.778185,0.683467,0.617972,0.629486
7,0.1062,1.259611,0.780935,0.663179,0.628122,0.633076
8,0.0526,1.320321,0.780935,0.694394,0.622832,0.639312
9,0.0239,1.309892,0.778185,0.695378,0.635776,0.647968
10,0.0125,1.402937,0.786434,0.676313,0.639457,0.643501


[I 2025-03-23 03:39:39,539] Trial 114 finished with value: 0.6441193409118603 and parameters: {'learning_rate': 0.00463598685389814, 'weight_decay': 0.004, 'warmup_steps': 1}. Best is trial 107 with value: 0.6793781854063073.


Trial 115 with params: {'learning_rate': 0.002546358917730456, 'weight_decay': 0.004, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.7838,2.092345,0.47571,0.115721,0.128943,0.103874
2,1.7566,1.600016,0.612282,0.278067,0.250891,0.235183
3,1.259,1.227005,0.693859,0.369771,0.346931,0.339729
4,0.8651,1.130439,0.72319,0.481355,0.42948,0.441428
5,0.5857,1.030409,0.741522,0.522167,0.493499,0.493111


[I 2025-03-23 03:40:03,382] Trial 115 pruned. 


Trial 116 with params: {'learning_rate': 0.004849071175931925, 'weight_decay': 0.002, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6309,1.958965,0.496792,0.17834,0.158594,0.142221
2,1.5294,1.393119,0.659945,0.331729,0.301951,0.300471
3,1.0169,1.111793,0.724106,0.393163,0.391909,0.382617
4,0.6341,1.022821,0.744271,0.578076,0.509005,0.528705
5,0.3526,1.030196,0.767186,0.640339,0.621208,0.618285
6,0.187,1.095833,0.779102,0.673512,0.624693,0.638443
7,0.0882,1.183154,0.787351,0.682247,0.632291,0.640854
8,0.0557,1.206492,0.783685,0.679149,0.644364,0.648894
9,0.0216,1.251438,0.783685,0.659101,0.630289,0.634096
10,0.0076,1.290836,0.791017,0.698467,0.652196,0.662249


[I 2025-03-23 03:42:10,846] Trial 116 finished with value: 0.6684474118915744 and parameters: {'learning_rate': 0.004849071175931925, 'weight_decay': 0.002, 'warmup_steps': 2}. Best is trial 107 with value: 0.6793781854063073.


Trial 117 with params: {'learning_rate': 0.0023816688470060924, 'weight_decay': 0.0, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.7866,2.104619,0.48121,0.122466,0.134188,0.114139
2,1.7678,1.600741,0.609533,0.273876,0.244121,0.229418
3,1.2713,1.247841,0.681027,0.347533,0.330935,0.325932
4,0.8779,1.146696,0.71494,0.498392,0.4139,0.42636
5,0.6118,1.028565,0.738772,0.496363,0.456591,0.456788


[I 2025-03-23 03:43:21,165] Trial 117 pruned. 


Trial 118 with params: {'learning_rate': 0.0019170520184840808, 'weight_decay': 0.002, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.9047,2.225156,0.439047,0.1074,0.117002,0.099761
2,1.8546,1.64294,0.581118,0.277958,0.218554,0.206556
3,1.3561,1.26792,0.68561,0.331966,0.322231,0.312447
4,0.9681,1.158897,0.711274,0.457698,0.399144,0.408736
5,0.7145,1.062191,0.738772,0.486737,0.451845,0.44976


[I 2025-03-23 03:43:46,875] Trial 118 pruned. 


Trial 119 with params: {'learning_rate': 0.004190023225173477, 'weight_decay': 0.002, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.8889,2.025983,0.48121,0.130395,0.137942,0.114305
2,1.6697,1.572796,0.608616,0.289519,0.251994,0.236382
3,1.1475,1.225161,0.698442,0.405012,0.368929,0.3639
4,0.7185,1.077431,0.72044,0.512591,0.474176,0.474192
5,0.4506,1.057339,0.765353,0.584597,0.553765,0.553334
6,0.2572,1.114129,0.747021,0.585605,0.552761,0.55486
7,0.1354,1.210904,0.776352,0.645217,0.612639,0.617405
8,0.0626,1.298341,0.787351,0.651357,0.620176,0.62094
9,0.0264,1.384382,0.780018,0.655357,0.623659,0.630831
10,0.0106,1.379912,0.788268,0.672373,0.633314,0.643047


[I 2025-03-23 03:45:37,814] Trial 119 finished with value: 0.6420669692560155 and parameters: {'learning_rate': 0.004190023225173477, 'weight_decay': 0.002, 'warmup_steps': 3}. Best is trial 107 with value: 0.6793781854063073.


Trial 120 with params: {'learning_rate': 0.0036239245139075113, 'weight_decay': 0.001, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.7739,1.983619,0.501375,0.139042,0.143306,0.119133
2,1.6669,1.55134,0.609533,0.283187,0.252307,0.242586
3,1.1364,1.17736,0.701192,0.393463,0.361412,0.357616
4,0.7348,1.068599,0.72319,0.518827,0.437815,0.452234
5,0.4589,1.024197,0.761687,0.60959,0.527443,0.548524
6,0.2533,1.159901,0.751604,0.618207,0.567416,0.577971
7,0.1453,1.245238,0.761687,0.618814,0.567592,0.582857
8,0.0721,1.241942,0.776352,0.662341,0.612511,0.625134
9,0.03,1.3887,0.784601,0.671357,0.616482,0.627111
10,0.0153,1.355458,0.784601,0.66588,0.619344,0.628896


[I 2025-03-23 03:46:26,414] Trial 120 pruned. 


Trial 121 with params: {'learning_rate': 0.004923665805632059, 'weight_decay': 0.002, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6381,1.978232,0.493126,0.165248,0.160003,0.142205
2,1.5482,1.388674,0.657195,0.330585,0.291561,0.285238
3,1.0239,1.121341,0.713107,0.420823,0.384765,0.382643
4,0.6076,1.049968,0.745188,0.557539,0.496707,0.506002
5,0.3407,1.034004,0.769019,0.578721,0.550501,0.551697
6,0.168,1.162188,0.779102,0.653163,0.589781,0.607678
7,0.0848,1.247849,0.773602,0.660497,0.58397,0.603681
8,0.0315,1.302283,0.777269,0.631072,0.642219,0.627055
9,0.0167,1.426293,0.776352,0.650912,0.608497,0.617432
10,0.0161,1.429149,0.776352,0.651805,0.632489,0.629652


[I 2025-03-23 03:47:14,896] Trial 121 pruned. 


Trial 122 with params: {'learning_rate': 0.004602354833745206, 'weight_decay': 0.003, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6694,1.963651,0.492209,0.155801,0.148875,0.128453
2,1.5651,1.450558,0.638863,0.315258,0.284909,0.281513
3,1.0422,1.100071,0.719523,0.393617,0.385549,0.377512
4,0.6467,1.014823,0.744271,0.569537,0.467632,0.490772
5,0.3813,0.99568,0.781852,0.632768,0.589877,0.593923
6,0.2079,1.16547,0.765353,0.629608,0.559369,0.576367
7,0.096,1.253869,0.778185,0.665308,0.614843,0.625595
8,0.0306,1.234245,0.781852,0.657023,0.622797,0.626685
9,0.0156,1.318513,0.779102,0.671787,0.622215,0.63496
10,0.0073,1.336411,0.791934,0.685103,0.639025,0.649428


[I 2025-03-23 03:48:37,311] Trial 122 finished with value: 0.6530008324882257 and parameters: {'learning_rate': 0.004602354833745206, 'weight_decay': 0.003, 'warmup_steps': 2}. Best is trial 107 with value: 0.6793781854063073.


Trial 123 with params: {'learning_rate': 0.004160911537991551, 'weight_decay': 0.004, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6857,1.97964,0.495875,0.167407,0.153394,0.13541
2,1.5934,1.415544,0.649863,0.301626,0.293086,0.283578
3,1.0813,1.167515,0.707608,0.412148,0.39591,0.38987
4,0.6883,1.112648,0.706691,0.500316,0.441214,0.448997
5,0.399,1.00329,0.764436,0.570558,0.535029,0.541239
6,0.2229,1.130793,0.770852,0.655444,0.580531,0.598512
7,0.1035,1.163234,0.782768,0.64977,0.613925,0.621815
8,0.0422,1.251618,0.782768,0.662016,0.616959,0.626173
9,0.0166,1.247874,0.785518,0.664547,0.622289,0.630325
10,0.0062,1.354492,0.788268,0.707796,0.616647,0.642872


[I 2025-03-23 03:49:55,303] Trial 123 finished with value: 0.6472049417914906 and parameters: {'learning_rate': 0.004160911537991551, 'weight_decay': 0.004, 'warmup_steps': 2}. Best is trial 107 with value: 0.6793781854063073.


Trial 124 with params: {'learning_rate': 0.004739763664228839, 'weight_decay': 0.001, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6617,1.938126,0.507791,0.139697,0.15283,0.128906
2,1.5551,1.337887,0.666361,0.326294,0.298552,0.29341
3,1.0269,1.107902,0.718607,0.417719,0.395417,0.388601
4,0.6288,1.035543,0.735105,0.593983,0.511056,0.527593
5,0.3501,1.031065,0.786434,0.661839,0.603401,0.611908
6,0.1809,1.13326,0.777269,0.656888,0.614508,0.623056
7,0.0723,1.263561,0.787351,0.677504,0.625329,0.635963
8,0.0366,1.259858,0.791017,0.679644,0.649455,0.654525
9,0.017,1.367452,0.787351,0.699643,0.65139,0.662524
10,0.0054,1.337985,0.797434,0.684613,0.6607,0.661473


[I 2025-03-23 03:51:30,382] Trial 124 finished with value: 0.6542754830892199 and parameters: {'learning_rate': 0.004739763664228839, 'weight_decay': 0.001, 'warmup_steps': 2}. Best is trial 107 with value: 0.6793781854063073.


Trial 125 with params: {'learning_rate': 0.004984086975865407, 'weight_decay': 0.003, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6409,1.963804,0.491292,0.162756,0.159977,0.141001
2,1.5536,1.439868,0.640697,0.305712,0.288466,0.275965
3,1.0365,1.091169,0.722273,0.42925,0.398861,0.398932
4,0.6127,1.035383,0.734189,0.539926,0.49289,0.500311
5,0.3398,1.097161,0.76077,0.657782,0.596511,0.607682
6,0.1785,1.255195,0.76352,0.649867,0.583653,0.601548
7,0.0897,1.186881,0.780018,0.680348,0.640307,0.647383
8,0.0326,1.258919,0.782768,0.677627,0.644391,0.645388
9,0.0201,1.304922,0.779102,0.647029,0.633304,0.628275
10,0.0112,1.280777,0.788268,0.684246,0.643041,0.647399


[I 2025-03-23 03:53:28,578] Trial 125 finished with value: 0.6529125660648234 and parameters: {'learning_rate': 0.004984086975865407, 'weight_decay': 0.003, 'warmup_steps': 2}. Best is trial 107 with value: 0.6793781854063073.


Trial 126 with params: {'learning_rate': 0.0020885856857537867, 'weight_decay': 0.002, 'warmup_steps': 1}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.8332,2.143598,0.464711,0.112501,0.121895,0.102173
2,1.8332,1.675893,0.575619,0.22092,0.203837,0.192257
3,1.371,1.275697,0.695692,0.331382,0.326494,0.315834
4,0.9826,1.172643,0.702108,0.449834,0.394002,0.405154
5,0.7012,1.093773,0.721357,0.443419,0.426383,0.414414


[I 2025-03-23 03:54:52,753] Trial 126 pruned. 


Trial 127 with params: {'learning_rate': 0.004716742771374477, 'weight_decay': 0.002, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.0014,2.072619,0.467461,0.139735,0.128184,0.110279
2,1.7121,1.602276,0.59945,0.240969,0.231853,0.21589
3,1.1634,1.164836,0.705775,0.403529,0.380763,0.374891
4,0.7369,1.072675,0.72319,0.478934,0.445498,0.450984
5,0.4359,1.12325,0.747021,0.564221,0.512551,0.5188
6,0.2525,1.130815,0.773602,0.604924,0.574285,0.571657
7,0.1101,1.234542,0.779102,0.686812,0.60606,0.627011
8,0.0542,1.361261,0.787351,0.68229,0.636057,0.648857
9,0.0184,1.430209,0.773602,0.680548,0.625848,0.636899
10,0.0089,1.446144,0.787351,0.67167,0.638358,0.6459


Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--accuracy/f887c0aab52c2d38e1f8a215681126379eca617f96c447638f751434e8e65b14 (last modified on Sat Oct 12 13:56:14 2024) since it couldn't be found locally at evaluate-metric--accuracy, or remotely on the Hugging Face Hub.
[I 2025-03-23 03:56:45,799] Trial 127 finished with value: 0.6531932198644044 and parameters: {'learning_rate': 0.004716742771374477, 'weight_decay': 0.002, 'warmup_steps': 3}. Best is trial 107 with value: 0.6793781854063073.


Trial 128 with params: {'learning_rate': 0.00467147223941434, 'weight_decay': 0.004, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6595,1.957847,0.494959,0.152352,0.147779,0.125799
2,1.5571,1.405367,0.653529,0.317267,0.291649,0.284563
3,1.0418,1.092053,0.72044,0.418343,0.392193,0.391362
4,0.6383,1.014234,0.749771,0.609385,0.524992,0.5472
5,0.3435,1.08869,0.771769,0.63842,0.597396,0.6027
6,0.1869,1.080741,0.780018,0.661421,0.612364,0.624017
7,0.0768,1.254493,0.781852,0.652038,0.608883,0.61438
8,0.0348,1.271957,0.802933,0.665556,0.655202,0.649485
9,0.0191,1.362721,0.794684,0.701009,0.634804,0.652619
10,0.009,1.363317,0.796517,0.682346,0.643248,0.648556


[I 2025-03-23 03:58:07,746] Trial 128 finished with value: 0.650988171608558 and parameters: {'learning_rate': 0.00467147223941434, 'weight_decay': 0.004, 'warmup_steps': 2}. Best is trial 107 with value: 0.6793781854063073.


Trial 129 with params: {'learning_rate': 0.004939413634886599, 'weight_decay': 0.003, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.038,2.077818,0.470211,0.116241,0.126189,0.104332
2,1.7208,1.601657,0.582035,0.260306,0.245031,0.23414
3,1.169,1.230134,0.692026,0.415208,0.382094,0.378503
4,0.7322,1.021711,0.746104,0.560237,0.493588,0.504046
5,0.4278,1.089846,0.749771,0.595355,0.552059,0.556016
6,0.2359,1.164066,0.753437,0.659824,0.576635,0.596689
7,0.1019,1.323763,0.786434,0.704937,0.632266,0.650275
8,0.0495,1.32952,0.791934,0.663857,0.630874,0.634663
9,0.0212,1.375077,0.794684,0.672979,0.624136,0.636506
10,0.0105,1.392231,0.790101,0.67582,0.633496,0.6421


[I 2025-03-23 03:59:32,167] Trial 129 finished with value: 0.6532370099340866 and parameters: {'learning_rate': 0.004939413634886599, 'weight_decay': 0.003, 'warmup_steps': 3}. Best is trial 107 with value: 0.6793781854063073.


Trial 130 with params: {'learning_rate': 0.0021911907666705803, 'weight_decay': 0.005, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.8967,2.173848,0.461045,0.118639,0.124265,0.104849
2,1.8151,1.637859,0.583868,0.244004,0.227878,0.210962
3,1.3217,1.237572,0.688359,0.334917,0.321993,0.313521
4,0.9398,1.123331,0.715857,0.476531,0.416707,0.425734
5,0.6761,1.062046,0.733272,0.476957,0.429349,0.436798


Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--recall/11f90e583db35601050aed380d48e83202a896976b9608432fba9244fb447f24 (last modified on Fri Jan 10 23:14:00 2025) since it couldn't be found locally at evaluate-metric--recall, or remotely on the Hugging Face Hub.
[I 2025-03-23 04:00:30,070] Trial 130 pruned. 


Trial 131 with params: {'learning_rate': 0.0049948568363508165, 'weight_decay': 0.003, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6417,1.960383,0.490376,0.164208,0.163718,0.145873
2,1.5555,1.398399,0.654445,0.332883,0.302589,0.299586
3,1.0194,1.083648,0.733272,0.431789,0.405523,0.407983
4,0.6112,1.065192,0.733272,0.532583,0.488921,0.49608
5,0.331,1.139977,0.761687,0.617489,0.579453,0.580014
6,0.183,1.174352,0.769019,0.647726,0.609139,0.613318
7,0.0888,1.25392,0.780935,0.678075,0.601668,0.620767
8,0.0306,1.252187,0.779102,0.664617,0.617988,0.627006
9,0.0117,1.340197,0.788268,0.680389,0.623415,0.639257
10,0.0055,1.360264,0.789184,0.680654,0.636092,0.646087


[I 2025-03-23 04:02:06,706] Trial 131 finished with value: 0.6427425500044284 and parameters: {'learning_rate': 0.0049948568363508165, 'weight_decay': 0.003, 'warmup_steps': 2}. Best is trial 107 with value: 0.6793781854063073.


Trial 132 with params: {'learning_rate': 0.0032236848121495747, 'weight_decay': 0.005, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.8511,2.064874,0.482126,0.116947,0.12829,0.10846
2,1.7289,1.606507,0.608616,0.280175,0.250155,0.237505
3,1.2216,1.187964,0.696609,0.35769,0.35515,0.341222
4,0.785,1.133495,0.71769,0.478328,0.436858,0.445145
5,0.5269,1.078251,0.745188,0.579904,0.506569,0.516963


[I 2025-03-23 04:02:31,460] Trial 132 pruned. 


Trial 133 with params: {'learning_rate': 0.004106947181064993, 'weight_decay': 0.005, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.7067,2.001276,0.482126,0.149529,0.137807,0.114461
2,1.6208,1.44246,0.64528,0.296329,0.290456,0.281876
3,1.0832,1.155577,0.709441,0.404244,0.384783,0.377088
4,0.6789,1.071416,0.72594,0.574042,0.494801,0.512338
5,0.3977,0.996894,0.772686,0.594163,0.568701,0.567384
6,0.2194,1.11623,0.777269,0.664323,0.595388,0.61223
7,0.0871,1.259194,0.773602,0.623514,0.597504,0.595823
8,0.0477,1.215774,0.793767,0.672637,0.638466,0.64327
9,0.019,1.321354,0.779102,0.673713,0.630595,0.640969
10,0.007,1.370219,0.785518,0.688127,0.627398,0.643476


[I 2025-03-23 04:04:02,607] Trial 133 finished with value: 0.6484660255216622 and parameters: {'learning_rate': 0.004106947181064993, 'weight_decay': 0.005, 'warmup_steps': 2}. Best is trial 107 with value: 0.6793781854063073.


Trial 134 with params: {'learning_rate': 0.0030452149051435964, 'weight_decay': 0.003, 'warmup_steps': 1}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.7143,1.996897,0.507791,0.135103,0.147032,0.122653
2,1.688,1.515095,0.615949,0.293106,0.255986,0.25192
3,1.1818,1.177302,0.702108,0.380616,0.353927,0.347927
4,0.791,1.071925,0.730522,0.474588,0.439494,0.444105
5,0.511,1.035502,0.756187,0.553401,0.488616,0.502561


[I 2025-03-23 04:04:59,010] Trial 134 pruned. 


Trial 135 with params: {'learning_rate': 0.0022204157733840533, 'weight_decay': 0.003, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.8493,2.13728,0.465628,0.111391,0.121505,0.10194
2,1.8084,1.652345,0.567369,0.208155,0.202084,0.18964
3,1.3162,1.256301,0.688359,0.347577,0.325917,0.316652
4,0.9341,1.127152,0.711274,0.450227,0.407127,0.412365
5,0.651,1.0516,0.731439,0.501008,0.442255,0.447465
6,0.442,1.079672,0.736022,0.55669,0.496883,0.510402
7,0.2906,1.156181,0.751604,0.655342,0.553383,0.580067
8,0.1898,1.195137,0.735105,0.626049,0.561792,0.578426
9,0.1055,1.244766,0.767186,0.628086,0.59617,0.602594
10,0.0581,1.297662,0.752521,0.625262,0.588026,0.593822


[I 2025-03-23 04:05:54,651] Trial 135 pruned. 


Trial 136 with params: {'learning_rate': 0.004949360062136621, 'weight_decay': 0.004, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6392,1.979038,0.488543,0.160065,0.157007,0.138451
2,1.5462,1.426451,0.653529,0.318894,0.292769,0.285555
3,1.0381,1.095451,0.731439,0.416227,0.395572,0.395547
4,0.6401,1.048116,0.729606,0.574535,0.491624,0.506917
5,0.3508,1.09308,0.767186,0.607036,0.5763,0.572696
6,0.1846,1.13726,0.775435,0.645375,0.591188,0.603712
7,0.0969,1.179637,0.786434,0.694419,0.612202,0.639657
8,0.0298,1.230145,0.783685,0.677461,0.64109,0.648443
9,0.0116,1.302495,0.799267,0.712066,0.647695,0.663187
10,0.0066,1.341763,0.791017,0.717457,0.647244,0.663247


[I 2025-03-23 04:07:20,878] Trial 136 finished with value: 0.6490519772223803 and parameters: {'learning_rate': 0.004949360062136621, 'weight_decay': 0.004, 'warmup_steps': 2}. Best is trial 107 with value: 0.6793781854063073.


Trial 137 with params: {'learning_rate': 0.004887231649088056, 'weight_decay': 0.003, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6296,1.977715,0.494042,0.160503,0.156085,0.136521
2,1.5473,1.473991,0.636114,0.330643,0.282493,0.278861
3,1.0284,1.096227,0.731439,0.447647,0.422813,0.426319
4,0.6205,1.031816,0.752521,0.575126,0.496961,0.513315
5,0.349,0.97074,0.773602,0.668163,0.59556,0.610042
6,0.1791,1.060465,0.781852,0.664888,0.619775,0.62313
7,0.0805,1.241279,0.792851,0.694737,0.640921,0.654827
8,0.0402,1.18857,0.794684,0.708353,0.667611,0.672254
9,0.0159,1.281578,0.786434,0.70922,0.6387,0.657411
10,0.0081,1.299786,0.791934,0.711937,0.652539,0.662708


[I 2025-03-23 04:08:34,140] Trial 137 finished with value: 0.6695332109814145 and parameters: {'learning_rate': 0.004887231649088056, 'weight_decay': 0.003, 'warmup_steps': 2}. Best is trial 107 with value: 0.6793781854063073.


Trial 138 with params: {'learning_rate': 0.004879497055330371, 'weight_decay': 0.002, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6317,1.980761,0.482126,0.156225,0.150339,0.13415
2,1.5475,1.468231,0.635197,0.328451,0.289204,0.287266
3,1.0282,1.09845,0.715857,0.416979,0.389833,0.384624
4,0.6192,1.043888,0.747938,0.588414,0.504273,0.526891
5,0.3443,1.042724,0.779102,0.606734,0.578935,0.581185
6,0.1761,1.096843,0.780935,0.687686,0.596637,0.619751
7,0.0837,1.17602,0.775435,0.668777,0.626851,0.631812
8,0.0366,1.190911,0.787351,0.68589,0.632858,0.643882
9,0.0144,1.289256,0.797434,0.683057,0.621662,0.642854
10,0.0056,1.289233,0.810266,0.742308,0.662657,0.687048


[I 2025-03-23 04:09:52,188] Trial 138 finished with value: 0.6872468518180237 and parameters: {'learning_rate': 0.004879497055330371, 'weight_decay': 0.002, 'warmup_steps': 2}. Best is trial 138 with value: 0.6872468518180237.


Trial 139 with params: {'learning_rate': 0.004912707810905297, 'weight_decay': 0.001, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6327,1.985008,0.504125,0.168502,0.158626,0.141131
2,1.5428,1.424282,0.653529,0.330321,0.290601,0.284028
3,1.0091,1.090329,0.721357,0.413599,0.392356,0.389926
4,0.5947,1.031807,0.736022,0.560542,0.503095,0.515742
5,0.3389,1.034685,0.772686,0.623927,0.564526,0.576752
6,0.1671,1.118806,0.772686,0.639986,0.612714,0.610169
7,0.0825,1.160578,0.790101,0.675591,0.625688,0.639156
8,0.0288,1.241903,0.782768,0.707977,0.652614,0.668463
9,0.0117,1.318166,0.791017,0.691996,0.632468,0.652438
10,0.0047,1.370026,0.796517,0.72351,0.671109,0.681854


[I 2025-03-23 04:11:23,827] Trial 139 finished with value: 0.6803835577703867 and parameters: {'learning_rate': 0.004912707810905297, 'weight_decay': 0.001, 'warmup_steps': 2}. Best is trial 138 with value: 0.6872468518180237.


Trial 140 with params: {'learning_rate': 0.00432414831072382, 'weight_decay': 0.001, 'warmup_steps': 1}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.7715,2.041466,0.483043,0.115139,0.138873,0.106454
2,1.6552,1.450791,0.640697,0.311433,0.292781,0.278004
3,1.1238,1.134085,0.710357,0.392763,0.373242,0.361974
4,0.7084,1.043893,0.725023,0.549494,0.481848,0.489668
5,0.4236,1.015013,0.761687,0.565963,0.531045,0.534242
6,0.2476,1.116369,0.764436,0.676486,0.614886,0.632225
7,0.117,1.184257,0.770852,0.676072,0.616905,0.626139
8,0.0496,1.236273,0.779102,0.6649,0.636054,0.640195
9,0.0255,1.266633,0.7956,0.663511,0.647604,0.646111
10,0.0094,1.314039,0.792851,0.639981,0.625335,0.625032


[I 2025-03-23 04:12:11,257] Trial 140 pruned. 


Trial 141 with params: {'learning_rate': 0.004491402887406403, 'weight_decay': 0.0, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6688,1.977976,0.494959,0.15663,0.151527,0.131399
2,1.5733,1.403692,0.648946,0.311316,0.285762,0.286813
3,1.0429,1.120068,0.710357,0.40698,0.380296,0.371175
4,0.6488,1.046293,0.742438,0.583069,0.493497,0.516359
5,0.3732,1.028944,0.769936,0.6037,0.546435,0.558282
6,0.1806,1.113277,0.785518,0.670717,0.600069,0.6195
7,0.0989,1.215484,0.769936,0.67066,0.597431,0.619692
8,0.0373,1.21058,0.786434,0.642744,0.618165,0.618861
9,0.0181,1.33854,0.788268,0.685027,0.619152,0.640541
10,0.0073,1.327005,0.796517,0.680685,0.628459,0.644513


[I 2025-03-23 04:13:40,660] Trial 141 finished with value: 0.6541123603982701 and parameters: {'learning_rate': 0.004491402887406403, 'weight_decay': 0.0, 'warmup_steps': 2}. Best is trial 138 with value: 0.6872468518180237.


Trial 142 with params: {'learning_rate': 0.004747774176859156, 'weight_decay': 0.0, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.0016,2.073616,0.466544,0.157856,0.129628,0.113668
2,1.6934,1.633773,0.587534,0.282451,0.231575,0.221973
3,1.146,1.200371,0.698442,0.399434,0.377288,0.366833
4,0.7432,1.062357,0.724106,0.478035,0.457519,0.454793
5,0.43,1.129938,0.756187,0.550913,0.52642,0.52663
6,0.2462,1.138229,0.756187,0.602291,0.558981,0.561037
7,0.1283,1.309267,0.773602,0.691691,0.571364,0.604227
8,0.0635,1.241839,0.775435,0.647374,0.601812,0.613172
9,0.0232,1.304046,0.791017,0.706403,0.667601,0.671714
10,0.0094,1.379819,0.782768,0.715097,0.654092,0.667658


[I 2025-03-23 04:15:10,845] Trial 142 finished with value: 0.6571278180605237 and parameters: {'learning_rate': 0.004747774176859156, 'weight_decay': 0.0, 'warmup_steps': 3}. Best is trial 138 with value: 0.6872468518180237.


Trial 143 with params: {'learning_rate': 0.004785873886261683, 'weight_decay': 0.002, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6641,1.981443,0.505958,0.182068,0.159815,0.141122
2,1.5482,1.47341,0.651696,0.329316,0.286119,0.283618
3,1.0405,1.095854,0.714024,0.404789,0.37295,0.368066
4,0.6299,1.072325,0.730522,0.537733,0.499852,0.507171
5,0.3579,1.043918,0.766269,0.585649,0.563206,0.557925
6,0.1792,1.12562,0.774519,0.703829,0.620929,0.641468
7,0.0861,1.233875,0.783685,0.667412,0.59964,0.61649
8,0.0396,1.23923,0.796517,0.705633,0.649838,0.663599
9,0.0186,1.250149,0.789184,0.672412,0.631579,0.641894
10,0.0092,1.270699,0.797434,0.676413,0.654013,0.654903


[I 2025-03-23 04:16:30,908] Trial 143 finished with value: 0.6623326094681586 and parameters: {'learning_rate': 0.004785873886261683, 'weight_decay': 0.002, 'warmup_steps': 2}. Best is trial 138 with value: 0.6872468518180237.


Trial 144 with params: {'learning_rate': 0.004403905330532995, 'weight_decay': 0.001, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6638,1.971712,0.493126,0.156437,0.152959,0.133018
2,1.5782,1.407491,0.649863,0.331485,0.291886,0.292919
3,1.034,1.109526,0.710357,0.402698,0.376802,0.367104
4,0.6352,1.036507,0.744271,0.567064,0.483675,0.504684
5,0.3618,1.038727,0.775435,0.639507,0.577644,0.594893
6,0.1912,1.143087,0.764436,0.595432,0.531719,0.549262
7,0.0975,1.164903,0.776352,0.656036,0.606671,0.617063
8,0.033,1.167753,0.788268,0.687011,0.645712,0.651841
9,0.0162,1.2651,0.788268,0.680488,0.632232,0.644934
10,0.0073,1.300647,0.786434,0.720734,0.639956,0.662043


[I 2025-03-23 04:17:56,766] Trial 144 finished with value: 0.6323593964065226 and parameters: {'learning_rate': 0.004403905330532995, 'weight_decay': 0.001, 'warmup_steps': 2}. Best is trial 138 with value: 0.6872468518180237.


Trial 145 with params: {'learning_rate': 0.0017806186064595034, 'weight_decay': 0.002, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.9367,2.248756,0.437214,0.096361,0.110703,0.090835
2,1.9065,1.701693,0.574702,0.240237,0.201714,0.188289
3,1.4003,1.278525,0.686526,0.330483,0.32449,0.315672
4,1.0078,1.17632,0.709441,0.415936,0.391223,0.391325
5,0.7612,1.094523,0.730522,0.448116,0.410273,0.412175


[I 2025-03-23 04:18:26,995] Trial 145 pruned. 


Trial 146 with params: {'learning_rate': 0.004990505002298743, 'weight_decay': 0.0, 'warmup_steps': 1}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.869,1.991151,0.497709,0.137789,0.144787,0.119749
2,1.5988,1.513613,0.624198,0.282764,0.271978,0.256594
3,1.0942,1.131715,0.719523,0.424512,0.40474,0.396533
4,0.6817,0.999468,0.747938,0.503979,0.467214,0.474673
5,0.3884,1.050969,0.769936,0.593633,0.556752,0.553614
6,0.2147,1.136353,0.769019,0.680615,0.599,0.619483
7,0.0993,1.19242,0.789184,0.684092,0.625528,0.639268
8,0.038,1.299158,0.780935,0.707037,0.635403,0.654022
9,0.0163,1.326161,0.791934,0.709763,0.658969,0.672043
10,0.0093,1.349512,0.79835,0.700587,0.643884,0.656668


[I 2025-03-23 04:20:04,905] Trial 146 finished with value: 0.6633456367063095 and parameters: {'learning_rate': 0.004990505002298743, 'weight_decay': 0.0, 'warmup_steps': 1}. Best is trial 138 with value: 0.6872468518180237.


Trial 147 with params: {'learning_rate': 0.0025351809116318682, 'weight_decay': 0.001, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.78,2.08795,0.471127,0.123149,0.127779,0.106857
2,1.7458,1.559291,0.612282,0.276721,0.246219,0.233606
3,1.2456,1.212807,0.697525,0.380308,0.345552,0.341407
4,0.8516,1.105494,0.728689,0.457168,0.436652,0.438907
5,0.5848,1.007938,0.749771,0.552107,0.470125,0.480041


[I 2025-03-23 04:20:39,075] Trial 147 pruned. 


Trial 148 with params: {'learning_rate': 0.004336757300411707, 'weight_decay': 0.003, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.919,2.03414,0.484876,0.128003,0.136525,0.117001
2,1.6788,1.619525,0.60495,0.294426,0.2452,0.233528
3,1.1749,1.178951,0.704858,0.419537,0.361885,0.365682
4,0.7338,1.113917,0.715857,0.474927,0.426712,0.434513
5,0.4333,1.087022,0.751604,0.542359,0.508756,0.508896


[I 2025-03-23 04:21:06,245] Trial 148 pruned. 


Trial 149 with params: {'learning_rate': 0.002688402728586328, 'weight_decay': 0.002, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.7784,2.07688,0.47846,0.118378,0.131007,0.107522
2,1.744,1.558154,0.612282,0.248966,0.247346,0.22771
3,1.2259,1.207063,0.696609,0.384152,0.351361,0.346501
4,0.8332,1.084556,0.732356,0.453426,0.435252,0.438903
5,0.5521,1.046944,0.741522,0.502998,0.460192,0.464932
6,0.3672,1.115579,0.736022,0.627105,0.517241,0.541263
7,0.2211,1.190699,0.76077,0.642092,0.570644,0.589537
8,0.1363,1.224378,0.75527,0.659573,0.59849,0.60903
9,0.0728,1.289222,0.770852,0.628477,0.594403,0.601562
10,0.0384,1.299283,0.766269,0.610677,0.606186,0.593995


[I 2025-03-23 04:22:03,252] Trial 149 pruned. 


In [25]:
print(best_trial)

BestRun(run_id='138', objective=0.6872468518180237, hyperparameters={'learning_rate': 0.004879497055330371, 'weight_decay': 0.002, 'warmup_steps': 2}, run_summary=None)


In [26]:
base.reset_seed()

In [27]:
training_args = base.get_training_args(output_dir=f"~/results/{DATASET}/bilstm-distill_fine_hp-search", logging_dir=f"~/logs/{DATASET}/bilstm-distill_fine_hp-search", remove_unused_columns=False, epochs=num_epochs, batch_size=batch_size)

In [28]:
def hp_space(trial):
    params =  {
        "learning_rate": trial.suggest_float("learning_rate", 5e-5, 5e-3, log=True),
        "weight_decay": trial.suggest_float("weight_decay", 0, 1e-2, step=1e-3),
        "warmup_steps" : trial.suggest_int("warmup_steps", 0, warm_up),
        "lambda_param": trial.suggest_float("lambda_param",0,1,step=.1),
        "temperature": trial.suggest_float("temperature", 2,7, step=.5)
    }
    print(f"Trial {trial.number} with params: {params}")
    return params

In [29]:
pruner = optuna.pruners.HyperbandPruner(min_resource=min_r, max_resource=max_r, reduction_factor=2, bootstrap_count=2)
sampler = optuna.samplers.TPESampler(seed=42, multivariate=True)



In [30]:
trainer = base.DistilTrainer(
    args=training_args,
    train_dataset=train_data,
    eval_dataset=eval_data,
    compute_metrics=base.compute_metrics,
    model_init = lambda: get_BiLSTM()
)
  

In [31]:
best_trial2 = trainer.hyperparameter_search(
    direction="maximize",
    backend="optuna",
    hp_space=hp_space,
    compute_objective=lambda metrics: metrics["eval_f1"],
    pruner=pruner,
    sampler=sampler,
    study_name="Test-destilace",
    n_trials=150
)

[I 2025-03-23 04:22:03,475] A new study created in memory with name: Test-destilace


Trial 0 with params: {'learning_rate': 0.0002805758207667253, 'weight_decay': 0.01, 'warmup_steps': 3, 'lambda_param': 0.6000000000000001, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.2348,2.019891,0.176902,0.003538,0.02,0.006012
2,1.8622,1.71517,0.368469,0.039467,0.073651,0.045954
3,1.6359,1.523278,0.432631,0.069515,0.096849,0.067902
4,1.4724,1.394872,0.476627,0.112013,0.119179,0.093869
5,1.362,1.310333,0.512374,0.116109,0.136523,0.113681
6,1.2588,1.254398,0.523373,0.148252,0.149132,0.13049
7,1.199,1.209889,0.541705,0.165743,0.162353,0.144619
8,1.1494,1.179047,0.574702,0.198799,0.188546,0.167292
9,1.1009,1.146723,0.571036,0.185979,0.179132,0.160836
10,1.0574,1.119623,0.590284,0.178956,0.200186,0.179331


[I 2025-03-23 04:22:55,045] Trial 0 pruned. 


Trial 1 with params: {'learning_rate': 0.00010255552094216992, 'weight_decay': 0.0, 'warmup_steps': 4, 'lambda_param': 0.6000000000000001, 'temperature': 5.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.367,2.136253,0.176902,0.003538,0.02,0.006012
2,2.0911,2.028804,0.176902,0.003538,0.02,0.006012
3,1.9702,1.882706,0.352887,0.046888,0.067171,0.038984
4,1.8333,1.776653,0.348304,0.038751,0.067885,0.04432
5,1.755,1.689185,0.380385,0.038991,0.077451,0.050284


[I 2025-03-23 04:23:21,247] Trial 1 pruned. 


Trial 2 with params: {'learning_rate': 5.497167787383099e-05, 'weight_decay': 0.01, 'warmup_steps': 4, 'lambda_param': 0.2, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.4323,2.339943,0.176902,0.003538,0.02,0.006012
2,2.2106,2.105233,0.176902,0.003538,0.02,0.006012
3,2.0999,2.056298,0.176902,0.003538,0.02,0.006012
4,2.0327,1.98137,0.191567,0.023104,0.024086,0.012465
5,1.9632,1.907727,0.310724,0.027212,0.055347,0.035516
6,1.8803,1.847184,0.334555,0.038837,0.062833,0.039095
7,1.8293,1.804899,0.336389,0.036124,0.063791,0.040314
8,1.7911,1.763332,0.356554,0.039144,0.071103,0.045377
9,1.7605,1.732891,0.359303,0.036699,0.071191,0.045084
10,1.7318,1.715842,0.366636,0.038836,0.074864,0.04829


[I 2025-03-23 04:24:11,232] Trial 2 pruned. 


Trial 3 with params: {'learning_rate': 0.00011635338541918901, 'weight_decay': 0.003, 'warmup_steps': 2, 'lambda_param': 0.4, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.3401,2.112457,0.176902,0.003538,0.02,0.006012
2,2.0677,1.987603,0.179652,0.006465,0.020822,0.007389
3,1.9283,1.833505,0.343721,0.038056,0.065137,0.038928
4,1.7854,1.726898,0.361137,0.03766,0.071404,0.046418
5,1.7035,1.635547,0.395967,0.04073,0.083475,0.054199


[I 2025-03-23 04:24:39,700] Trial 3 pruned. 


Trial 4 with params: {'learning_rate': 0.0008369042894376068, 'weight_decay': 0.001, 'warmup_steps': 1, 'lambda_param': 0.4, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.0442,1.68784,0.35747,0.044326,0.072447,0.046968
2,1.5085,1.362346,0.494042,0.12391,0.127967,0.104831
3,1.2647,1.171389,0.55637,0.14274,0.165075,0.142049
4,1.075,1.057908,0.609533,0.229184,0.201865,0.190136
5,0.9401,0.97742,0.648946,0.290086,0.252975,0.248896
6,0.7999,0.87738,0.686526,0.337236,0.301486,0.299425
7,0.7,0.826635,0.699358,0.354128,0.311691,0.313784
8,0.6271,0.796871,0.713107,0.383691,0.336121,0.335244
9,0.5597,0.777415,0.715857,0.404349,0.349014,0.359445
10,0.4971,0.750847,0.72594,0.384014,0.368781,0.368597


[I 2025-03-23 04:25:59,984] Trial 4 finished with value: 0.42982984344246306 and parameters: {'learning_rate': 0.0008369042894376068, 'weight_decay': 0.001, 'warmup_steps': 1, 'lambda_param': 0.4, 'temperature': 4.5}. Best is trial 4 with value: 0.42982984344246306.


Trial 5 with params: {'learning_rate': 0.0018591820902866042, 'weight_decay': 0.002, 'warmup_steps': 2, 'lambda_param': 0.6000000000000001, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.9006,1.473243,0.450962,0.070561,0.105687,0.078966
2,1.3072,1.212988,0.548121,0.150154,0.167234,0.146166
3,1.0443,0.935619,0.658112,0.27885,0.261281,0.251672
4,0.8124,0.846953,0.684693,0.358567,0.304528,0.305907
5,0.6507,0.756365,0.716774,0.360937,0.349267,0.346013
6,0.5247,0.702824,0.746104,0.450985,0.391726,0.404687
7,0.4226,0.692093,0.752521,0.465103,0.444875,0.444611
8,0.3549,0.6538,0.774519,0.530627,0.470323,0.484339
9,0.2894,0.651423,0.769936,0.513179,0.483209,0.488645
10,0.2425,0.627389,0.778185,0.560452,0.500393,0.515549


[I 2025-03-23 04:26:56,548] Trial 5 pruned. 


Trial 6 with params: {'learning_rate': 0.0008204643365323959, 'weight_decay': 0.001, 'warmup_steps': 0, 'lambda_param': 1.0, 'temperature': 7.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.0241,1.677025,0.359303,0.039864,0.073025,0.046442
2,1.4997,1.380261,0.463795,0.108137,0.110767,0.087279
3,1.2741,1.184425,0.542621,0.143156,0.156203,0.135385
4,1.0868,1.062062,0.616865,0.228467,0.210819,0.194672
5,0.9411,0.98242,0.646196,0.288531,0.252399,0.248551
6,0.8071,0.885649,0.689276,0.330968,0.305948,0.300499
7,0.7135,0.851081,0.689276,0.352322,0.302551,0.306683
8,0.6422,0.804422,0.708524,0.352954,0.327525,0.327297
9,0.5737,0.775822,0.714024,0.380835,0.344996,0.351948
10,0.5087,0.752854,0.726856,0.380315,0.370362,0.371713


[I 2025-03-23 04:28:18,891] Trial 6 finished with value: 0.4044223922519349 and parameters: {'learning_rate': 0.0008204643365323959, 'weight_decay': 0.001, 'warmup_steps': 0, 'lambda_param': 1.0, 'temperature': 7.0}. Best is trial 4 with value: 0.42982984344246306.


Trial 7 with params: {'learning_rate': 0.0020690200562805084, 'weight_decay': 0.003, 'warmup_steps': 0, 'lambda_param': 0.7000000000000001, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8193,1.447242,0.476627,0.126932,0.123112,0.095221
2,1.2832,1.213401,0.548121,0.161788,0.171932,0.151852
3,1.0285,0.926263,0.663611,0.272499,0.269848,0.257284
4,0.7924,0.852466,0.681943,0.362817,0.305768,0.306317
5,0.636,0.739128,0.728689,0.393096,0.368214,0.366502
6,0.5083,0.71443,0.742438,0.42799,0.389309,0.395038
7,0.4075,0.682918,0.756187,0.464885,0.422134,0.432626
8,0.3382,0.648685,0.773602,0.512047,0.458848,0.467663
9,0.2728,0.655326,0.770852,0.542336,0.481545,0.495917
10,0.2281,0.640411,0.775435,0.576707,0.508896,0.525832


[I 2025-03-23 04:29:45,342] Trial 7 pruned. 


Trial 8 with params: {'learning_rate': 8.770946743725407e-05, 'weight_decay': 0.005, 'warmup_steps': 0, 'lambda_param': 1.0, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.3701,2.161932,0.176902,0.003538,0.02,0.006012
2,2.1056,2.050703,0.176902,0.003538,0.02,0.006012
3,2.008,1.923594,0.349221,0.029496,0.065883,0.040269
4,1.8773,1.830193,0.323556,0.038973,0.060471,0.039216
5,1.8064,1.742981,0.363886,0.037556,0.072185,0.046591


[I 2025-03-23 04:30:09,457] Trial 8 pruned. 


Trial 9 with params: {'learning_rate': 0.0010568529720322872, 'weight_decay': 0.003, 'warmup_steps': 2, 'lambda_param': 0.6000000000000001, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.0257,1.657498,0.374885,0.058739,0.077629,0.050582
2,1.4579,1.334286,0.485793,0.104996,0.123828,0.100352
3,1.2123,1.120872,0.571036,0.196967,0.17743,0.158644
4,1.0036,0.979451,0.637947,0.278668,0.233876,0.228448
5,0.8527,0.906348,0.672777,0.311742,0.288482,0.283636
6,0.7157,0.825787,0.698442,0.340145,0.314825,0.313141
7,0.6118,0.79171,0.71769,0.396157,0.339385,0.343888
8,0.5439,0.743406,0.726856,0.387671,0.365074,0.367098
9,0.4705,0.72598,0.747021,0.43875,0.401928,0.410099
10,0.4066,0.717764,0.741522,0.452001,0.418484,0.423981


[I 2025-03-23 04:31:43,369] Trial 9 finished with value: 0.47323410137981214 and parameters: {'learning_rate': 0.0010568529720322872, 'weight_decay': 0.003, 'warmup_steps': 2, 'lambda_param': 0.6000000000000001, 'temperature': 3.0}. Best is trial 9 with value: 0.47323410137981214.


Trial 10 with params: {'learning_rate': 0.0019688396221773483, 'weight_decay': 0.004, 'warmup_steps': 4, 'lambda_param': 0.4, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.913,1.473528,0.450962,0.101988,0.106675,0.084012
2,1.2936,1.177662,0.55637,0.173183,0.173215,0.153147
3,1.0133,0.913453,0.670027,0.286707,0.283362,0.272756
4,0.7817,0.818817,0.702108,0.381583,0.340863,0.339412
5,0.6237,0.741219,0.733272,0.379381,0.370486,0.366451
6,0.4989,0.707638,0.747938,0.433989,0.401751,0.406275
7,0.392,0.691987,0.75527,0.426834,0.420035,0.407882
8,0.3302,0.653289,0.765353,0.517686,0.459617,0.468093
9,0.2661,0.647591,0.770852,0.530949,0.475912,0.491456
10,0.2182,0.631691,0.773602,0.570823,0.493947,0.511426


[I 2025-03-23 04:32:55,242] Trial 10 pruned. 


Trial 11 with params: {'learning_rate': 0.0013946810228975734, 'weight_decay': 0.002, 'warmup_steps': 0, 'lambda_param': 0.1, 'temperature': 5.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.903,1.516794,0.43538,0.073219,0.100484,0.074145
2,1.3554,1.279965,0.52154,0.141044,0.153242,0.134166
3,1.109,0.998198,0.637947,0.232915,0.233698,0.217588
4,0.8832,0.915545,0.673694,0.340456,0.279731,0.278302
5,0.7362,0.815857,0.703025,0.344058,0.325382,0.31894
6,0.6066,0.769559,0.724106,0.413375,0.361878,0.367579
7,0.5105,0.72939,0.734189,0.442032,0.379657,0.3891
8,0.4412,0.696995,0.749771,0.472938,0.427088,0.438894
9,0.3703,0.686674,0.761687,0.53062,0.446805,0.4677
10,0.312,0.670804,0.772686,0.498444,0.477475,0.481393


[I 2025-03-23 04:34:51,681] Trial 11 finished with value: 0.521642028204758 and parameters: {'learning_rate': 0.0013946810228975734, 'weight_decay': 0.002, 'warmup_steps': 0, 'lambda_param': 0.1, 'temperature': 5.0}. Best is trial 11 with value: 0.521642028204758.


Trial 12 with params: {'learning_rate': 0.003908411800319493, 'weight_decay': 0.001, 'warmup_steps': 0, 'lambda_param': 0.0, 'temperature': 5.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.7431,1.344743,0.485793,0.132424,0.134397,0.109084
2,1.1735,1.047845,0.622365,0.256088,0.248901,0.235683
3,0.8655,0.828401,0.718607,0.352735,0.3544,0.341043
4,0.6333,0.71532,0.733272,0.422414,0.38524,0.38922
5,0.4676,0.644657,0.767186,0.490415,0.442349,0.450996
6,0.3456,0.619406,0.772686,0.534326,0.459813,0.482485
7,0.2491,0.619248,0.780935,0.621498,0.533253,0.553239
8,0.1984,0.59413,0.796517,0.660034,0.588876,0.606848
9,0.1546,0.576206,0.804766,0.685448,0.595636,0.62157
10,0.1263,0.576044,0.811182,0.69605,0.636641,0.649884


[I 2025-03-23 04:36:24,493] Trial 12 finished with value: 0.6782818162736831 and parameters: {'learning_rate': 0.003908411800319493, 'weight_decay': 0.001, 'warmup_steps': 0, 'lambda_param': 0.0, 'temperature': 5.5}. Best is trial 12 with value: 0.6782818162736831.


Trial 13 with params: {'learning_rate': 0.0026513997752155216, 'weight_decay': 0.002, 'warmup_steps': 0, 'lambda_param': 0.0, 'temperature': 6.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.815,1.422111,0.47846,0.124652,0.132813,0.108651
2,1.2537,1.165803,0.56462,0.157657,0.183841,0.162224
3,0.9768,0.905153,0.678277,0.30289,0.296494,0.286262
4,0.7485,0.809105,0.698442,0.37187,0.331339,0.336737
5,0.5832,0.712065,0.747021,0.441587,0.397942,0.403023
6,0.454,0.672316,0.757104,0.463393,0.430445,0.438405
7,0.3494,0.662077,0.76077,0.487324,0.445706,0.448596
8,0.2858,0.656058,0.777269,0.531548,0.501985,0.50603
9,0.2252,0.625837,0.775435,0.601071,0.525795,0.54312
10,0.1884,0.62153,0.779102,0.622634,0.545711,0.561375


[I 2025-03-23 04:37:45,452] Trial 13 finished with value: 0.5998685532171455 and parameters: {'learning_rate': 0.0026513997752155216, 'weight_decay': 0.002, 'warmup_steps': 0, 'lambda_param': 0.0, 'temperature': 6.0}. Best is trial 12 with value: 0.6782818162736831.


Trial 14 with params: {'learning_rate': 0.0011374598377402296, 'weight_decay': 0.003, 'warmup_steps': 1, 'lambda_param': 0.0, 'temperature': 7.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.0013,1.623026,0.384968,0.060698,0.081366,0.052757
2,1.4268,1.33418,0.485793,0.113846,0.125076,0.102385
3,1.1982,1.091707,0.589368,0.244159,0.190025,0.172366
4,0.9826,0.965953,0.643446,0.292409,0.243884,0.237583
5,0.8293,0.875821,0.68011,0.295974,0.294701,0.285451


[I 2025-03-23 04:38:10,012] Trial 14 pruned. 


Trial 15 with params: {'learning_rate': 0.004568142596704069, 'weight_decay': 0.002, 'warmup_steps': 0, 'lambda_param': 0.2, 'temperature': 7.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.7313,1.325981,0.500458,0.11288,0.139436,0.107622
2,1.1295,0.991623,0.648029,0.285117,0.272597,0.257645
3,0.8219,0.825803,0.71494,0.359607,0.354217,0.343878
4,0.5982,0.714223,0.740605,0.418217,0.396953,0.400648
5,0.4361,0.672303,0.75527,0.494505,0.451949,0.462562
6,0.3234,0.629879,0.769936,0.571283,0.505677,0.52419
7,0.2367,0.610366,0.780935,0.623974,0.531932,0.553922
8,0.1818,0.603447,0.793767,0.625296,0.579432,0.589734
9,0.1393,0.584636,0.796517,0.649706,0.585727,0.602661
10,0.1189,0.576879,0.80385,0.657057,0.641122,0.639853


[I 2025-03-23 04:39:49,693] Trial 15 finished with value: 0.6767052824832153 and parameters: {'learning_rate': 0.004568142596704069, 'weight_decay': 0.002, 'warmup_steps': 0, 'lambda_param': 0.2, 'temperature': 7.0}. Best is trial 12 with value: 0.6782818162736831.


Trial 16 with params: {'learning_rate': 0.003470702772133528, 'weight_decay': 0.005, 'warmup_steps': 1, 'lambda_param': 0.4, 'temperature': 6.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.7836,1.371304,0.477544,0.114037,0.12813,0.104404
2,1.2073,1.050193,0.607699,0.224465,0.229447,0.208866
3,0.8903,0.832973,0.707608,0.378726,0.338138,0.335237
4,0.6627,0.747282,0.730522,0.416702,0.380411,0.387682
5,0.4904,0.676255,0.75802,0.460671,0.421612,0.426886
6,0.3762,0.637287,0.765353,0.504596,0.454645,0.467902
7,0.2823,0.628817,0.782768,0.552654,0.51178,0.514885
8,0.2272,0.605922,0.789184,0.610116,0.54606,0.561171
9,0.1726,0.586733,0.7956,0.635672,0.576903,0.592791
10,0.1417,0.589264,0.797434,0.625247,0.597446,0.599971


[I 2025-03-23 04:41:08,996] Trial 16 finished with value: 0.6435225918731579 and parameters: {'learning_rate': 0.003470702772133528, 'weight_decay': 0.005, 'warmup_steps': 1, 'lambda_param': 0.4, 'temperature': 6.5}. Best is trial 12 with value: 0.6782818162736831.


Trial 17 with params: {'learning_rate': 0.0027066534224075463, 'weight_decay': 0.0, 'warmup_steps': 3, 'lambda_param': 0.30000000000000004, 'temperature': 7.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8034,1.389757,0.470211,0.121367,0.123063,0.094652
2,1.2145,1.106683,0.588451,0.21899,0.206917,0.188201
3,0.9125,0.873652,0.681943,0.33734,0.318219,0.311747
4,0.6926,0.773602,0.724106,0.383533,0.358957,0.362918
5,0.5266,0.709279,0.742438,0.418529,0.399252,0.39792
6,0.4126,0.671226,0.76077,0.489762,0.440721,0.448333
7,0.3064,0.654318,0.759853,0.487028,0.475674,0.470685
8,0.2504,0.634839,0.778185,0.553768,0.512067,0.521557
9,0.192,0.62322,0.782768,0.609206,0.544474,0.560989
10,0.1606,0.617064,0.785518,0.610259,0.571147,0.577895


[I 2025-03-23 04:43:36,893] Trial 17 finished with value: 0.642653625174915 and parameters: {'learning_rate': 0.0027066534224075463, 'weight_decay': 0.0, 'warmup_steps': 3, 'lambda_param': 0.30000000000000004, 'temperature': 7.0}. Best is trial 12 with value: 0.6782818162736831.


Trial 18 with params: {'learning_rate': 0.004585531913721111, 'weight_decay': 0.001, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.7505,1.351653,0.473877,0.116606,0.12913,0.101867
2,1.1136,1.015532,0.644363,0.278748,0.270753,0.258016
3,0.7969,0.790709,0.72319,0.380285,0.357788,0.349769
4,0.5712,0.694669,0.749771,0.430568,0.403354,0.405488
5,0.4044,0.643312,0.768103,0.55217,0.487011,0.506034
6,0.2968,0.613202,0.777269,0.556959,0.509848,0.525086
7,0.2122,0.605829,0.776352,0.583745,0.51667,0.535007
8,0.1604,0.588411,0.787351,0.655168,0.573099,0.59526
9,0.1273,0.581031,0.797434,0.638952,0.595395,0.606918
10,0.1097,0.588449,0.8011,0.70804,0.64346,0.661894


[I 2025-03-23 04:45:06,679] Trial 18 finished with value: 0.6821126071957518 and parameters: {'learning_rate': 0.004585531913721111, 'weight_decay': 0.001, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 4.0}. Best is trial 18 with value: 0.6821126071957518.


Trial 19 with params: {'learning_rate': 0.0028049499477387704, 'weight_decay': 0.0, 'warmup_steps': 1, 'lambda_param': 0.0, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8019,1.380758,0.479377,0.106507,0.12344,0.10022
2,1.2285,1.081259,0.593951,0.202368,0.208853,0.189221
3,0.931,0.859011,0.692942,0.334435,0.316918,0.310947
4,0.7011,0.763096,0.71769,0.410436,0.361289,0.370958
5,0.5394,0.691769,0.747021,0.429763,0.413698,0.411695
6,0.4141,0.655959,0.765353,0.473388,0.449632,0.451858
7,0.312,0.654569,0.761687,0.509105,0.449864,0.461716
8,0.2522,0.625151,0.771769,0.523025,0.486906,0.495656
9,0.1963,0.61536,0.789184,0.620194,0.551989,0.568342
10,0.1626,0.610167,0.784601,0.634326,0.560748,0.571803


[I 2025-03-23 04:47:35,347] Trial 19 finished with value: 0.6288373880391691 and parameters: {'learning_rate': 0.0028049499477387704, 'weight_decay': 0.0, 'warmup_steps': 1, 'lambda_param': 0.0, 'temperature': 2.5}. Best is trial 18 with value: 0.6821126071957518.


Trial 20 with params: {'learning_rate': 0.0036400786694624943, 'weight_decay': 0.003, 'warmup_steps': 2, 'lambda_param': 0.1, 'temperature': 5.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.7804,1.356143,0.47846,0.128706,0.128367,0.1032
2,1.183,1.03748,0.616865,0.282788,0.248712,0.238887
3,0.8626,0.82718,0.703025,0.359919,0.33161,0.324288
4,0.639,0.720688,0.730522,0.406843,0.382874,0.383741
5,0.4637,0.667777,0.766269,0.481982,0.436563,0.443019
6,0.3482,0.634861,0.769936,0.515137,0.468009,0.478747
7,0.2541,0.629676,0.780018,0.597557,0.526217,0.545669
8,0.2014,0.60414,0.785518,0.681631,0.57149,0.605236
9,0.1539,0.581568,0.797434,0.716497,0.620434,0.646045
10,0.1254,0.58074,0.794684,0.723504,0.607514,0.640507


[I 2025-03-23 04:49:18,772] Trial 20 finished with value: 0.6987835836175371 and parameters: {'learning_rate': 0.0036400786694624943, 'weight_decay': 0.003, 'warmup_steps': 2, 'lambda_param': 0.1, 'temperature': 5.0}. Best is trial 20 with value: 0.6987835836175371.


Trial 21 with params: {'learning_rate': 0.003745703739082518, 'weight_decay': 0.0, 'warmup_steps': 2, 'lambda_param': 0.1, 'temperature': 5.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.7603,1.371354,0.474794,0.117315,0.12803,0.101392
2,1.1492,1.000777,0.636114,0.287735,0.259758,0.253214
3,0.8363,0.795435,0.71769,0.382468,0.347016,0.34169
4,0.6106,0.722871,0.736939,0.429571,0.401673,0.401926
5,0.4538,0.654386,0.770852,0.497813,0.454848,0.455331
6,0.3375,0.624614,0.779102,0.601046,0.494154,0.521414
7,0.2467,0.609824,0.794684,0.5785,0.536053,0.544732
8,0.1974,0.593471,0.797434,0.603581,0.565995,0.574898
9,0.1499,0.580557,0.79835,0.63334,0.578913,0.595395
10,0.1242,0.577817,0.800183,0.631979,0.594762,0.603017


[I 2025-03-23 04:50:42,334] Trial 21 finished with value: 0.6640934875922766 and parameters: {'learning_rate': 0.003745703739082518, 'weight_decay': 0.0, 'warmup_steps': 2, 'lambda_param': 0.1, 'temperature': 5.0}. Best is trial 20 with value: 0.6987835836175371.


Trial 22 with params: {'learning_rate': 0.0026462024795839193, 'weight_decay': 0.006, 'warmup_steps': 2, 'lambda_param': 0.1, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8324,1.427331,0.466544,0.083117,0.122539,0.093187
2,1.2479,1.183699,0.560953,0.236818,0.188677,0.173208
3,0.9581,0.880439,0.690192,0.319141,0.308437,0.296046
4,0.7281,0.776037,0.721357,0.394009,0.354446,0.35258
5,0.558,0.691719,0.751604,0.433519,0.40618,0.404213
6,0.4328,0.654042,0.75527,0.43889,0.422493,0.424789
7,0.3321,0.655362,0.769936,0.533357,0.471019,0.477095
8,0.2724,0.637422,0.775435,0.543367,0.501493,0.511629
9,0.2138,0.615588,0.788268,0.593532,0.528709,0.547526
10,0.1733,0.601558,0.788268,0.595654,0.561395,0.564947


Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--f1/34c46321f42186df33a6260966e34a368f14868d9cc2ba47d142112e2800d233 (last modified on Fri Jan 10 23:14:01 2025) since it couldn't be found locally at evaluate-metric--f1, or remotely on the Hugging Face Hub.
[I 2025-03-23 04:52:02,835] Trial 22 finished with value: 0.5757743822507889 and parameters: {'learning_rate': 0.0026462024795839193, 'weight_decay': 0.006, 'warmup_steps': 2, 'lambda_param': 0.1, 'temperature': 3.5}. Best is trial 20 with value: 0.6987835836175371.


Trial 23 with params: {'learning_rate': 0.002277512484992173, 'weight_decay': 0.002, 'warmup_steps': 4, 'lambda_param': 0.0, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8757,1.439628,0.476627,0.115293,0.119379,0.092232
2,1.2698,1.170685,0.565536,0.194166,0.187342,0.170531
3,0.9703,0.89886,0.678277,0.294119,0.296154,0.284133
4,0.7433,0.782707,0.71494,0.371403,0.339534,0.341355
5,0.5735,0.699779,0.751604,0.438617,0.397021,0.397469
6,0.4505,0.677777,0.762603,0.470149,0.417213,0.428899
7,0.3503,0.65889,0.765353,0.520545,0.461821,0.47596
8,0.292,0.644722,0.780935,0.504362,0.474748,0.481976
9,0.2303,0.626016,0.773602,0.605174,0.510826,0.533929
10,0.1874,0.608437,0.790101,0.627142,0.556987,0.577634


Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--accuracy/f887c0aab52c2d38e1f8a215681126379eca617f96c447638f751434e8e65b14 (last modified on Sat Oct 12 13:56:14 2024) since it couldn't be found locally at evaluate-metric--accuracy, or remotely on the Hugging Face Hub.
[I 2025-03-23 04:53:54,223] Trial 23 finished with value: 0.6218172025568559 and parameters: {'learning_rate': 0.002277512484992173, 'weight_decay': 0.002, 'warmup_steps': 4, 'lambda_param': 0.0, 'temperature': 4.0}. Best is trial 20 with value: 0.6987835836175371.


Trial 24 with params: {'learning_rate': 0.00429595606675416, 'weight_decay': 0.003, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.7435,1.341973,0.482126,0.090029,0.129118,0.098477
2,1.1161,0.985281,0.64253,0.281265,0.262757,0.256704
3,0.8122,0.798231,0.712191,0.366783,0.34487,0.342896
4,0.5848,0.717096,0.742438,0.45498,0.399645,0.407723
5,0.4238,0.656902,0.769019,0.502839,0.467625,0.473915
6,0.3123,0.633878,0.780935,0.543134,0.481179,0.496272
7,0.2277,0.601133,0.791017,0.576536,0.514424,0.531383
8,0.1714,0.593986,0.789184,0.61015,0.550073,0.56687
9,0.1358,0.571722,0.802016,0.671396,0.601993,0.626817
10,0.1154,0.587562,0.802933,0.704351,0.624316,0.647127


[I 2025-03-23 04:55:25,239] Trial 24 finished with value: 0.6890285404822954 and parameters: {'learning_rate': 0.00429595606675416, 'weight_decay': 0.003, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 4.5}. Best is trial 20 with value: 0.6987835836175371.


Trial 25 with params: {'learning_rate': 0.004231866635442915, 'weight_decay': 0.004, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.7418,1.338112,0.48396,0.114978,0.131655,0.102878
2,1.1192,0.981738,0.648029,0.28077,0.272961,0.263459
3,0.8135,0.798518,0.71494,0.36346,0.349606,0.342137
4,0.5938,0.713094,0.743355,0.428298,0.40354,0.405056
5,0.4265,0.663204,0.761687,0.504787,0.456701,0.464826
6,0.3137,0.645076,0.764436,0.541003,0.480326,0.494791
7,0.2333,0.617969,0.780935,0.642955,0.532709,0.562879
8,0.1762,0.594914,0.787351,0.633108,0.570662,0.586722
9,0.1406,0.59168,0.793767,0.656224,0.590024,0.612235
10,0.1191,0.588609,0.794684,0.685722,0.639408,0.649217


[I 2025-03-23 04:57:28,969] Trial 25 finished with value: 0.693867036362202 and parameters: {'learning_rate': 0.004231866635442915, 'weight_decay': 0.004, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 4.5}. Best is trial 20 with value: 0.6987835836175371.


Trial 26 with params: {'learning_rate': 0.0042370896723960315, 'weight_decay': 0.006, 'warmup_steps': 1, 'lambda_param': 0.0, 'temperature': 5.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8123,1.365436,0.483043,0.111619,0.129814,0.104978
2,1.1942,1.04239,0.615949,0.260267,0.23421,0.216237
3,0.8781,0.838848,0.693859,0.334506,0.325067,0.315911
4,0.6439,0.743378,0.724106,0.400345,0.370225,0.372818
5,0.4707,0.66841,0.756187,0.449428,0.426161,0.422867
6,0.3483,0.644938,0.770852,0.540633,0.470637,0.48887
7,0.2567,0.624739,0.780935,0.551525,0.517928,0.525873
8,0.2001,0.607821,0.786434,0.58484,0.54387,0.555788
9,0.1549,0.601857,0.791017,0.652068,0.584228,0.60426
10,0.1275,0.591281,0.793767,0.646483,0.593256,0.607257


[I 2025-03-23 04:58:52,884] Trial 26 finished with value: 0.6637931805172747 and parameters: {'learning_rate': 0.0042370896723960315, 'weight_decay': 0.006, 'warmup_steps': 1, 'lambda_param': 0.0, 'temperature': 5.5}. Best is trial 20 with value: 0.6987835836175371.


Trial 27 with params: {'learning_rate': 0.002315821606051138, 'weight_decay': 0.005, 'warmup_steps': 3, 'lambda_param': 0.2, 'temperature': 5.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8962,1.446494,0.457379,0.098811,0.108655,0.084079
2,1.2779,1.186052,0.562786,0.190403,0.184289,0.166543
3,0.9911,0.908932,0.666361,0.278594,0.284182,0.271978
4,0.7555,0.813873,0.701192,0.371107,0.334482,0.336866
5,0.5916,0.722609,0.745188,0.426904,0.394766,0.397322
6,0.4671,0.672394,0.752521,0.450517,0.406318,0.415858
7,0.3606,0.666389,0.764436,0.509595,0.459118,0.463888
8,0.2943,0.637633,0.779102,0.511591,0.485957,0.491622
9,0.2324,0.633655,0.777269,0.608179,0.511924,0.535417
10,0.1962,0.616925,0.785518,0.640182,0.550338,0.574073


[I 2025-03-23 04:59:54,790] Trial 27 pruned. 


Trial 28 with params: {'learning_rate': 0.003991181408810343, 'weight_decay': 0.003, 'warmup_steps': 3, 'lambda_param': 0.0, 'temperature': 6.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8444,1.400093,0.464711,0.143125,0.124644,0.103513
2,1.2014,1.084249,0.594867,0.224663,0.214111,0.202625
3,0.8798,0.831049,0.706691,0.34463,0.332573,0.321874
4,0.6379,0.71932,0.730522,0.387298,0.372899,0.370442
5,0.4681,0.66005,0.76077,0.46296,0.432689,0.433075
6,0.3487,0.626903,0.773602,0.528303,0.465318,0.476625
7,0.2532,0.614425,0.775435,0.52223,0.496877,0.497875
8,0.1997,0.598491,0.790101,0.574492,0.541862,0.548184
9,0.158,0.596243,0.790101,0.611076,0.558855,0.57475
10,0.1285,0.587703,0.799267,0.671038,0.617701,0.629139


[I 2025-03-23 05:01:16,333] Trial 28 finished with value: 0.677545931470658 and parameters: {'learning_rate': 0.003991181408810343, 'weight_decay': 0.003, 'warmup_steps': 3, 'lambda_param': 0.0, 'temperature': 6.0}. Best is trial 20 with value: 0.6987835836175371.


Trial 29 with params: {'learning_rate': 0.002791243062252435, 'weight_decay': 0.004, 'warmup_steps': 2, 'lambda_param': 0.1, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8398,1.428785,0.460128,0.082985,0.11845,0.092989
2,1.2552,1.18007,0.55912,0.230262,0.18665,0.168969
3,0.96,0.891581,0.68561,0.32926,0.303498,0.294822
4,0.7251,0.767797,0.721357,0.40585,0.360154,0.362178
5,0.5497,0.691366,0.747021,0.432064,0.403912,0.400251
6,0.424,0.644652,0.762603,0.453975,0.425475,0.431412
7,0.3211,0.650517,0.761687,0.472192,0.462716,0.454145
8,0.2633,0.6181,0.778185,0.525065,0.501949,0.50738
9,0.2018,0.603636,0.784601,0.556252,0.532912,0.539317
10,0.1657,0.599278,0.792851,0.63969,0.58837,0.599181


[I 2025-03-23 05:02:54,091] Trial 29 finished with value: 0.6409105536010135 and parameters: {'learning_rate': 0.002791243062252435, 'weight_decay': 0.004, 'warmup_steps': 2, 'lambda_param': 0.1, 'temperature': 4.5}. Best is trial 20 with value: 0.6987835836175371.


Trial 30 with params: {'learning_rate': 0.000311584806759745, 'weight_decay': 0.008, 'warmup_steps': 0, 'lambda_param': 0.0, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.1969,1.974114,0.181485,0.01157,0.021252,0.00832
2,1.8115,1.664514,0.394134,0.043725,0.081827,0.053457
3,1.5838,1.473242,0.450962,0.086324,0.105252,0.077313
4,1.422,1.34624,0.486709,0.09304,0.124952,0.09727
5,1.3148,1.270877,0.523373,0.139039,0.143916,0.124597
6,1.2124,1.220044,0.545371,0.155954,0.16291,0.144047
7,1.1509,1.174569,0.560953,0.164182,0.174633,0.15566
8,1.1011,1.142231,0.584785,0.17601,0.193601,0.170947
9,1.0503,1.110772,0.590284,0.216156,0.193811,0.178808
10,1.0007,1.080794,0.605866,0.189057,0.208977,0.188309


[I 2025-03-23 05:04:01,179] Trial 30 pruned. 


Trial 31 with params: {'learning_rate': 0.0044077302177424815, 'weight_decay': 0.001, 'warmup_steps': 3, 'lambda_param': 0.0, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8575,1.344753,0.493126,0.130729,0.134297,0.10639
2,1.1616,1.010223,0.636114,0.276743,0.259233,0.252261
3,0.83,0.852444,0.701192,0.359904,0.347231,0.33093
4,0.6037,0.72937,0.732356,0.424422,0.376773,0.384734
5,0.4419,0.651556,0.753437,0.45969,0.420176,0.422224
6,0.3197,0.634765,0.773602,0.552413,0.474947,0.489889
7,0.2332,0.612758,0.784601,0.564096,0.534882,0.538047
8,0.1852,0.60059,0.789184,0.610346,0.576812,0.580086
9,0.1434,0.576664,0.799267,0.644157,0.583891,0.600544
10,0.1207,0.576638,0.806599,0.643379,0.617254,0.619074


[I 2025-03-23 05:05:20,295] Trial 31 finished with value: 0.6647364599316802 and parameters: {'learning_rate': 0.0044077302177424815, 'weight_decay': 0.001, 'warmup_steps': 3, 'lambda_param': 0.0, 'temperature': 4.0}. Best is trial 20 with value: 0.6987835836175371.


Trial 32 with params: {'learning_rate': 0.00476977354019093, 'weight_decay': 0.004, 'warmup_steps': 1, 'lambda_param': 0.2, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.911,1.38533,0.472044,0.105885,0.121662,0.098258
2,1.2052,1.048992,0.611366,0.230919,0.236107,0.217963
3,0.8801,0.86187,0.692942,0.34171,0.321893,0.31443
4,0.6479,0.740792,0.72044,0.399007,0.359768,0.366199
5,0.4712,0.673859,0.758937,0.44787,0.435796,0.434826
6,0.3445,0.638885,0.776352,0.519452,0.485796,0.498083
7,0.2546,0.629919,0.777269,0.555902,0.517528,0.523741
8,0.1988,0.605467,0.786434,0.563957,0.531245,0.538492
9,0.1524,0.592945,0.7956,0.627645,0.583174,0.592557
10,0.1291,0.594153,0.804766,0.709753,0.64815,0.662435


[I 2025-03-23 05:06:43,322] Trial 32 finished with value: 0.6885374597703909 and parameters: {'learning_rate': 0.00476977354019093, 'weight_decay': 0.004, 'warmup_steps': 1, 'lambda_param': 0.2, 'temperature': 4.5}. Best is trial 20 with value: 0.6987835836175371.


Trial 33 with params: {'learning_rate': 0.004655522697349133, 'weight_decay': 0.003, 'warmup_steps': 0, 'lambda_param': 0.30000000000000004, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.7301,1.319181,0.504125,0.111955,0.139181,0.107381
2,1.1253,1.012402,0.64253,0.285043,0.267163,0.252723
3,0.8108,0.811444,0.712191,0.36305,0.352259,0.34543
4,0.5802,0.711431,0.733272,0.425547,0.385278,0.390846
5,0.4167,0.656715,0.758937,0.497909,0.453472,0.464971
6,0.306,0.637261,0.775435,0.583481,0.518102,0.532136
7,0.2234,0.609963,0.781852,0.594554,0.536131,0.548382
8,0.1763,0.601573,0.783685,0.62873,0.569476,0.583346
9,0.1376,0.591912,0.794684,0.643208,0.596184,0.60308
10,0.1181,0.588979,0.792851,0.656103,0.610098,0.618673


[I 2025-03-23 05:08:18,508] Trial 33 finished with value: 0.665872740357504 and parameters: {'learning_rate': 0.004655522697349133, 'weight_decay': 0.003, 'warmup_steps': 0, 'lambda_param': 0.30000000000000004, 'temperature': 3.0}. Best is trial 20 with value: 0.6987835836175371.


Trial 34 with params: {'learning_rate': 0.003341840864830417, 'weight_decay': 0.004, 'warmup_steps': 1, 'lambda_param': 0.30000000000000004, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.7828,1.367972,0.485793,0.126438,0.131687,0.108122
2,1.2096,1.040019,0.606783,0.216397,0.226904,0.204109
3,0.8979,0.837515,0.703025,0.359484,0.330387,0.324427
4,0.6723,0.75291,0.725023,0.422221,0.373178,0.381677
5,0.5006,0.682904,0.757104,0.482217,0.436424,0.442616
6,0.3813,0.637762,0.766269,0.472004,0.43971,0.445903
7,0.2917,0.622538,0.771769,0.572993,0.483224,0.501754
8,0.2328,0.607434,0.780935,0.619879,0.541844,0.559717
9,0.175,0.595649,0.789184,0.620175,0.563995,0.580775
10,0.145,0.595611,0.800183,0.63248,0.595212,0.601098


[I 2025-03-23 05:09:35,481] Trial 34 finished with value: 0.6489399044549092 and parameters: {'learning_rate': 0.003341840864830417, 'weight_decay': 0.004, 'warmup_steps': 1, 'lambda_param': 0.30000000000000004, 'temperature': 4.5}. Best is trial 20 with value: 0.6987835836175371.


Trial 35 with params: {'learning_rate': 0.0007521694170670531, 'weight_decay': 0.003, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.0728,1.721828,0.346471,0.040681,0.069304,0.043129
2,1.5444,1.404124,0.468378,0.122129,0.112559,0.088583
3,1.301,1.220721,0.525206,0.139242,0.147256,0.124609
4,1.1116,1.078702,0.603116,0.217078,0.197816,0.182588
5,0.9741,1.000099,0.631531,0.287918,0.240864,0.237134


[I 2025-03-23 05:10:07,073] Trial 35 pruned. 


Trial 36 with params: {'learning_rate': 0.004827870834825323, 'weight_decay': 0.005, 'warmup_steps': 3, 'lambda_param': 0.0, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8589,1.350643,0.485793,0.100304,0.133693,0.10768
2,1.1386,1.003604,0.648029,0.278547,0.271167,0.257879
3,0.8117,0.843711,0.697525,0.350196,0.345127,0.329354
4,0.5964,0.730192,0.730522,0.426276,0.387826,0.394417
5,0.4281,0.662969,0.759853,0.485624,0.447457,0.452299
6,0.3127,0.623874,0.777269,0.534931,0.478867,0.491474
7,0.2257,0.629218,0.782768,0.560113,0.526843,0.532217
8,0.1754,0.612232,0.793767,0.617968,0.577813,0.587001
9,0.1394,0.598639,0.791017,0.615185,0.579659,0.588774
10,0.1181,0.604155,0.793767,0.670682,0.6233,0.636578


[I 2025-03-23 05:11:22,537] Trial 36 finished with value: 0.661019186246651 and parameters: {'learning_rate': 0.004827870834825323, 'weight_decay': 0.005, 'warmup_steps': 3, 'lambda_param': 0.0, 'temperature': 4.0}. Best is trial 20 with value: 0.6987835836175371.


Trial 37 with params: {'learning_rate': 0.004408419995059942, 'weight_decay': 0.007, 'warmup_steps': 2, 'lambda_param': 0.7000000000000001, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.7459,1.33286,0.480293,0.132583,0.130907,0.104439
2,1.1108,0.977913,0.658112,0.305287,0.281191,0.274442
3,0.8048,0.784917,0.716774,0.374134,0.352806,0.347312
4,0.5832,0.708354,0.744271,0.442679,0.407602,0.414306
5,0.4239,0.652266,0.769019,0.538157,0.470483,0.483423
6,0.3136,0.621249,0.771769,0.547958,0.491541,0.506885
7,0.2263,0.608428,0.791017,0.639821,0.541582,0.570653
8,0.1708,0.59813,0.791017,0.617037,0.566517,0.578862
9,0.1347,0.586388,0.793767,0.609791,0.567341,0.579953
10,0.1157,0.596336,0.791017,0.629022,0.57319,0.588174


[I 2025-03-23 05:12:17,538] Trial 37 pruned. 


Trial 38 with params: {'learning_rate': 0.00014198795619548116, 'weight_decay': 0.005, 'warmup_steps': 3, 'lambda_param': 0.30000000000000004, 'temperature': 6.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.3166,2.097828,0.176902,0.003538,0.02,0.006012
2,2.0329,1.929454,0.297892,0.031815,0.052387,0.031945
3,1.8665,1.768793,0.350137,0.037634,0.068024,0.042746
4,1.7184,1.653205,0.392301,0.041487,0.08136,0.053935
5,1.628,1.555125,0.416132,0.068082,0.089902,0.061236


[I 2025-03-23 05:12:43,333] Trial 38 pruned. 


Trial 39 with params: {'learning_rate': 0.00023647740624003471, 'weight_decay': 0.01, 'warmup_steps': 2, 'lambda_param': 0.8, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.2461,2.043145,0.176902,0.003538,0.02,0.006012
2,1.9033,1.763151,0.348304,0.036344,0.067146,0.039236
3,1.6928,1.588737,0.411549,0.044944,0.087956,0.057946
4,1.5362,1.45566,0.451879,0.079149,0.10489,0.078152
5,1.4292,1.370605,0.486709,0.095316,0.119933,0.093979
6,1.3312,1.303439,0.505041,0.136665,0.134342,0.111664
7,1.2709,1.265601,0.514207,0.136793,0.142744,0.120821
8,1.2228,1.23923,0.537122,0.171669,0.163574,0.141875
9,1.1768,1.202611,0.548121,0.177576,0.1633,0.144649
10,1.1359,1.180041,0.56187,0.154725,0.18012,0.156182


[I 2025-03-23 05:13:35,705] Trial 39 pruned. 


Trial 40 with params: {'learning_rate': 0.00012124257132049206, 'weight_decay': 0.009000000000000001, 'warmup_steps': 0, 'lambda_param': 0.30000000000000004, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.3198,2.105684,0.176902,0.003538,0.02,0.006012
2,2.0568,1.969242,0.206233,0.033547,0.028401,0.015843
3,1.9139,1.819747,0.342805,0.039198,0.065442,0.040836
4,1.7669,1.704653,0.366636,0.037355,0.073199,0.047366
5,1.6826,1.613108,0.396884,0.040127,0.083751,0.053958


[I 2025-03-23 05:14:02,096] Trial 40 pruned. 


Trial 41 with params: {'learning_rate': 0.004526251228015789, 'weight_decay': 0.003, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.7517,1.360529,0.474794,0.097744,0.129269,0.101074
2,1.12,1.022584,0.638863,0.256297,0.264106,0.250372
3,0.7995,0.794984,0.71769,0.372991,0.359212,0.353158
4,0.5733,0.71288,0.743355,0.449277,0.401812,0.409421
5,0.4109,0.651459,0.769936,0.533905,0.46782,0.486145
6,0.301,0.608457,0.792851,0.587021,0.5154,0.536372
7,0.2237,0.605783,0.79835,0.612092,0.54288,0.563807
8,0.1687,0.581304,0.8011,0.647724,0.570987,0.590753
9,0.131,0.573095,0.810266,0.672479,0.624961,0.639459
10,0.1113,0.581089,0.806599,0.70952,0.639084,0.659455


[I 2025-03-23 05:15:31,155] Trial 41 finished with value: 0.686985565470159 and parameters: {'learning_rate': 0.004526251228015789, 'weight_decay': 0.003, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 2.5}. Best is trial 20 with value: 0.6987835836175371.


Trial 42 with params: {'learning_rate': 0.0026574588792579227, 'weight_decay': 0.003, 'warmup_steps': 2, 'lambda_param': 0.2, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8338,1.430445,0.464711,0.103634,0.122275,0.094686
2,1.251,1.189042,0.55912,0.221672,0.187797,0.169967
3,0.9626,0.881456,0.68286,0.304636,0.304814,0.289312
4,0.7294,0.769999,0.72044,0.394326,0.357409,0.356465
5,0.5564,0.69405,0.740605,0.400906,0.388808,0.381525


[I 2025-03-23 05:16:04,347] Trial 42 pruned. 


Trial 43 with params: {'learning_rate': 0.003024355821361906, 'weight_decay': 0.004, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8348,1.41585,0.467461,0.108241,0.122336,0.09742
2,1.2339,1.122977,0.578368,0.179895,0.202329,0.178735
3,0.9259,0.867719,0.689276,0.321047,0.304748,0.295316
4,0.6964,0.765012,0.710357,0.370661,0.346829,0.347402
5,0.527,0.693094,0.744271,0.445771,0.40589,0.406117
6,0.403,0.656971,0.762603,0.467614,0.435555,0.441487
7,0.3041,0.653894,0.76077,0.508964,0.459875,0.465599
8,0.2473,0.618508,0.781852,0.546223,0.501611,0.5156
9,0.1926,0.607144,0.784601,0.595091,0.523836,0.546338
10,0.1594,0.602875,0.7956,0.630294,0.583078,0.596837


[I 2025-03-23 05:17:46,035] Trial 43 finished with value: 0.6526065005562868 and parameters: {'learning_rate': 0.003024355821361906, 'weight_decay': 0.004, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 2.5}. Best is trial 20 with value: 0.6987835836175371.


Trial 44 with params: {'learning_rate': 0.0014691315499909523, 'weight_decay': 0.009000000000000001, 'warmup_steps': 4, 'lambda_param': 0.9, 'temperature': 6.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.9675,1.532252,0.420715,0.074431,0.095621,0.073946
2,1.3598,1.270942,0.529789,0.15349,0.154446,0.138129
3,1.1115,0.993269,0.640697,0.269175,0.239732,0.230069
4,0.8703,0.881207,0.678277,0.343609,0.289076,0.287042
5,0.7153,0.803429,0.705775,0.348843,0.328454,0.323458


[I 2025-03-23 05:18:10,815] Trial 44 pruned. 


Trial 45 with params: {'learning_rate': 0.004296780942748746, 'weight_decay': 0.006, 'warmup_steps': 1, 'lambda_param': 0.0, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8176,1.364549,0.490376,0.111188,0.132167,0.106512
2,1.1926,1.04373,0.611366,0.236026,0.234969,0.216848
3,0.8785,0.852015,0.690192,0.3178,0.318568,0.307942
4,0.6455,0.745151,0.726856,0.415772,0.373658,0.38108
5,0.4671,0.661753,0.762603,0.467969,0.432601,0.433815
6,0.3435,0.64535,0.772686,0.553053,0.484751,0.506967
7,0.2532,0.618791,0.782768,0.574797,0.524037,0.538903
8,0.1989,0.604544,0.794684,0.64974,0.592341,0.609736
9,0.1496,0.586749,0.793767,0.667437,0.588977,0.615102
10,0.1272,0.596466,0.791934,0.672417,0.623034,0.63601


[I 2025-03-23 05:19:41,527] Trial 45 finished with value: 0.6862913430338118 and parameters: {'learning_rate': 0.004296780942748746, 'weight_decay': 0.006, 'warmup_steps': 1, 'lambda_param': 0.0, 'temperature': 4.0}. Best is trial 20 with value: 0.6987835836175371.


Trial 46 with params: {'learning_rate': 0.003625231472041747, 'weight_decay': 0.003, 'warmup_steps': 4, 'lambda_param': 0.30000000000000004, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.7996,1.354225,0.477544,0.098976,0.127671,0.09501
2,1.1725,1.014131,0.626948,0.24629,0.241216,0.22976
3,0.8463,0.831141,0.706691,0.359586,0.341972,0.335179
4,0.6277,0.732516,0.735105,0.429663,0.39567,0.398458
5,0.4653,0.666895,0.757104,0.496136,0.438269,0.446767
6,0.353,0.647885,0.777269,0.516138,0.460893,0.474401
7,0.2618,0.626048,0.788268,0.583244,0.535668,0.548367
8,0.2135,0.6122,0.790101,0.617868,0.552457,0.569792
9,0.1594,0.599887,0.79835,0.656035,0.595332,0.610957
10,0.1326,0.591187,0.8011,0.652294,0.617591,0.62363


Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--accuracy/f887c0aab52c2d38e1f8a215681126379eca617f96c447638f751434e8e65b14 (last modified on Sat Oct 12 13:56:14 2024) since it couldn't be found locally at evaluate-metric--accuracy, or remotely on the Hugging Face Hub.
[I 2025-03-23 05:21:40,453] Trial 46 finished with value: 0.6790628793674071 and parameters: {'learning_rate': 0.003625231472041747, 'weight_decay': 0.003, 'warmup_steps': 4, 'lambda_param': 0.30000000000000004, 'temperature': 2.0}. Best is trial 20 with value: 0.6987835836175371.


Trial 47 with params: {'learning_rate': 0.0025789104733638904, 'weight_decay': 0.002, 'warmup_steps': 4, 'lambda_param': 1.0, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8758,1.429933,0.472961,0.104007,0.121384,0.095163
2,1.2595,1.185645,0.55637,0.191521,0.179418,0.162872
3,0.9617,0.880282,0.679193,0.314359,0.303205,0.294796
4,0.7205,0.768694,0.719523,0.370493,0.347876,0.347079
5,0.5544,0.695565,0.746104,0.431251,0.395751,0.395216
6,0.4271,0.65699,0.768103,0.477021,0.438809,0.446876
7,0.3256,0.643085,0.783685,0.522829,0.498695,0.501099
8,0.2655,0.625859,0.786434,0.533913,0.50306,0.509087
9,0.2054,0.608672,0.794684,0.607346,0.540788,0.556731
10,0.1701,0.597415,0.7956,0.635262,0.579426,0.590132


[I 2025-03-23 05:22:32,845] Trial 47 pruned. 


Trial 48 with params: {'learning_rate': 0.004301120382566686, 'weight_decay': 0.004, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 5.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.743,1.341842,0.484876,0.112244,0.130958,0.100915
2,1.1159,0.986909,0.644363,0.281878,0.264735,0.257131
3,0.8088,0.797302,0.707608,0.358118,0.337626,0.335073
4,0.5827,0.711925,0.746104,0.465251,0.403183,0.413386
5,0.4264,0.66928,0.76352,0.511767,0.463626,0.470088
6,0.3104,0.632075,0.775435,0.531414,0.476906,0.492928
7,0.2291,0.61712,0.790101,0.636161,0.553043,0.577845
8,0.1737,0.593778,0.789184,0.648884,0.587141,0.602903
9,0.1348,0.584269,0.8011,0.680957,0.619011,0.640334
10,0.1147,0.58361,0.797434,0.687519,0.636341,0.649469


[I 2025-03-23 05:24:31,580] Trial 48 finished with value: 0.6930311193622893 and parameters: {'learning_rate': 0.004301120382566686, 'weight_decay': 0.004, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 5.0}. Best is trial 20 with value: 0.6987835836175371.


Trial 49 with params: {'learning_rate': 0.0036189865769530086, 'weight_decay': 0.003, 'warmup_steps': 1, 'lambda_param': 0.0, 'temperature': 5.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.7865,1.365261,0.479377,0.113748,0.12845,0.104205
2,1.2008,1.04684,0.611366,0.231913,0.235187,0.215269
3,0.887,0.834552,0.703025,0.361761,0.331932,0.32835
4,0.6563,0.746046,0.726856,0.391615,0.368913,0.371071
5,0.4882,0.669109,0.757104,0.437352,0.426307,0.42484
6,0.366,0.633196,0.774519,0.500349,0.467211,0.475571
7,0.2731,0.617031,0.778185,0.523751,0.493026,0.495747
8,0.2187,0.602265,0.789184,0.58151,0.54256,0.551534
9,0.1657,0.587688,0.793767,0.634005,0.576881,0.592706
10,0.1381,0.587796,0.797434,0.656543,0.606438,0.618881


[I 2025-03-23 05:25:46,837] Trial 49 finished with value: 0.6524015394526623 and parameters: {'learning_rate': 0.0036189865769530086, 'weight_decay': 0.003, 'warmup_steps': 1, 'lambda_param': 0.0, 'temperature': 5.0}. Best is trial 20 with value: 0.6987835836175371.


Trial 50 with params: {'learning_rate': 0.00011155354646039437, 'weight_decay': 0.004, 'warmup_steps': 0, 'lambda_param': 0.30000000000000004, 'temperature': 6.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.3326,2.111481,0.176902,0.003538,0.02,0.006012
2,2.0708,1.996898,0.177819,0.006418,0.020274,0.006545
3,1.9387,1.844379,0.346471,0.041084,0.066474,0.040838
4,1.7976,1.741691,0.355637,0.038588,0.069996,0.046033
5,1.7183,1.650573,0.390467,0.039722,0.08168,0.05288


[I 2025-03-23 05:26:13,476] Trial 50 pruned. 


Trial 51 with params: {'learning_rate': 0.004755427605435055, 'weight_decay': 0.003, 'warmup_steps': 1, 'lambda_param': 0.0, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.9082,1.37692,0.483043,0.087671,0.12706,0.101337
2,1.2047,1.048006,0.611366,0.24627,0.233956,0.215376
3,0.8751,0.840807,0.703025,0.37067,0.339563,0.333038
4,0.627,0.723517,0.731439,0.418631,0.378159,0.382542
5,0.4558,0.665344,0.762603,0.448435,0.442947,0.439029
6,0.3398,0.634864,0.777269,0.531393,0.485899,0.500786
7,0.2467,0.626192,0.780935,0.600174,0.526531,0.538679
8,0.1931,0.608213,0.7956,0.609395,0.569911,0.578897
9,0.1499,0.588228,0.7956,0.644557,0.572773,0.59028
10,0.1245,0.590503,0.79835,0.68739,0.61911,0.638161


[I 2025-03-23 05:28:23,633] Trial 51 finished with value: 0.6704838486639142 and parameters: {'learning_rate': 0.004755427605435055, 'weight_decay': 0.003, 'warmup_steps': 1, 'lambda_param': 0.0, 'temperature': 2.0}. Best is trial 20 with value: 0.6987835836175371.


Trial 52 with params: {'learning_rate': 0.0035822414584640726, 'weight_decay': 0.003, 'warmup_steps': 2, 'lambda_param': 0.2, 'temperature': 5.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.7897,1.355481,0.470211,0.13005,0.127249,0.100457
2,1.1786,1.047847,0.60495,0.260392,0.22915,0.222013
3,0.8731,0.833762,0.708524,0.353205,0.337754,0.330073
4,0.6437,0.728112,0.72594,0.40949,0.376683,0.381663
5,0.4685,0.670005,0.75802,0.464997,0.440083,0.439904
6,0.3518,0.626398,0.778185,0.525263,0.475855,0.485377
7,0.2562,0.621873,0.779102,0.558211,0.504729,0.511783
8,0.2019,0.587835,0.791017,0.626031,0.560748,0.578151
9,0.1553,0.584348,0.796517,0.673641,0.589424,0.614494
10,0.1285,0.580847,0.805683,0.694273,0.631713,0.647182


[I 2025-03-23 05:29:51,758] Trial 52 finished with value: 0.6552932392686417 and parameters: {'learning_rate': 0.0035822414584640726, 'weight_decay': 0.003, 'warmup_steps': 2, 'lambda_param': 0.2, 'temperature': 5.5}. Best is trial 20 with value: 0.6987835836175371.


Trial 53 with params: {'learning_rate': 0.004861413363522314, 'weight_decay': 0.005, 'warmup_steps': 2, 'lambda_param': 0.30000000000000004, 'temperature': 5.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.7564,1.352269,0.473877,0.095678,0.128181,0.100432
2,1.1207,1.019122,0.641613,0.274626,0.273596,0.260781
3,0.8048,0.791118,0.72044,0.385946,0.359668,0.355828
4,0.5705,0.6906,0.75527,0.477562,0.41848,0.426527
5,0.4039,0.645411,0.768103,0.541542,0.482778,0.495885
6,0.294,0.611812,0.785518,0.568367,0.53978,0.543908
7,0.2103,0.597121,0.788268,0.624623,0.559996,0.574962
8,0.1552,0.566426,0.802933,0.654521,0.596734,0.606175
9,0.1207,0.560111,0.805683,0.684518,0.642569,0.651509
10,0.1047,0.564605,0.802016,0.701783,0.65186,0.662106


[I 2025-03-23 05:31:49,901] Trial 53 finished with value: 0.6498913738236896 and parameters: {'learning_rate': 0.004861413363522314, 'weight_decay': 0.005, 'warmup_steps': 2, 'lambda_param': 0.30000000000000004, 'temperature': 5.0}. Best is trial 20 with value: 0.6987835836175371.


Trial 54 with params: {'learning_rate': 0.0001324011031485879, 'weight_decay': 0.007, 'warmup_steps': 4, 'lambda_param': 1.0, 'temperature': 7.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.3313,2.107085,0.176902,0.003538,0.02,0.006012
2,2.0496,1.953313,0.258478,0.032001,0.041942,0.024257
3,1.896,1.801939,0.340055,0.038832,0.064226,0.038476
4,1.7471,1.681109,0.379468,0.039307,0.077087,0.050225
5,1.6581,1.586748,0.404216,0.044954,0.085976,0.056314


[I 2025-03-23 05:32:15,989] Trial 54 pruned. 


Trial 55 with params: {'learning_rate': 0.004251166826739927, 'weight_decay': 0.001, 'warmup_steps': 2, 'lambda_param': 1.0, 'temperature': 5.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.7417,1.340035,0.487626,0.114655,0.132384,0.102481
2,1.1185,0.981663,0.643446,0.27639,0.268952,0.259492
3,0.8146,0.794998,0.710357,0.362128,0.34089,0.336354
4,0.5941,0.707807,0.740605,0.443911,0.39353,0.400987
5,0.426,0.66304,0.768103,0.535442,0.456012,0.472076
6,0.3124,0.643894,0.771769,0.554716,0.496047,0.512051
7,0.2345,0.609626,0.776352,0.592567,0.528756,0.546255
8,0.1759,0.600002,0.785518,0.636837,0.564007,0.585066
9,0.1368,0.588156,0.794684,0.689191,0.609036,0.636871
10,0.1162,0.59194,0.79835,0.707222,0.648922,0.663877


[I 2025-03-23 05:33:35,818] Trial 55 finished with value: 0.670094633166245 and parameters: {'learning_rate': 0.004251166826739927, 'weight_decay': 0.001, 'warmup_steps': 2, 'lambda_param': 1.0, 'temperature': 5.5}. Best is trial 20 with value: 0.6987835836175371.


Trial 56 with params: {'learning_rate': 0.0001413812546509425, 'weight_decay': 0.003, 'warmup_steps': 4, 'lambda_param': 0.8, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.3223,2.100794,0.176902,0.003538,0.02,0.006012
2,2.0363,1.934205,0.298808,0.03218,0.052579,0.032253
3,1.8711,1.774532,0.351054,0.038197,0.067831,0.04218
4,1.7235,1.658809,0.387718,0.040655,0.079393,0.052367
5,1.6335,1.560866,0.413382,0.068714,0.089109,0.060845


[I 2025-03-23 05:34:02,468] Trial 56 pruned. 


Trial 57 with params: {'learning_rate': 0.00012862788348576466, 'weight_decay': 0.003, 'warmup_steps': 4, 'lambda_param': 0.2, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.335,2.108447,0.176902,0.003538,0.02,0.006012
2,2.0543,1.960446,0.230981,0.031546,0.034866,0.021244
3,1.9042,1.808797,0.341888,0.040767,0.064618,0.038366
4,1.7537,1.687769,0.376719,0.03871,0.076294,0.049569
5,1.665,1.59358,0.402383,0.048165,0.085694,0.056561
6,1.5658,1.519655,0.428048,0.068508,0.093456,0.063031
7,1.5076,1.472206,0.445463,0.108181,0.101496,0.075584
8,1.463,1.44206,0.455545,0.112931,0.108516,0.084565
9,1.4271,1.403997,0.471127,0.118547,0.112336,0.086074
10,1.3947,1.390429,0.482126,0.116987,0.120801,0.096624


[I 2025-03-23 05:35:03,619] Trial 57 pruned. 


Trial 58 with params: {'learning_rate': 0.0017382957701584515, 'weight_decay': 0.004, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 6.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.9082,1.484873,0.446379,0.069844,0.104324,0.079233
2,1.3168,1.213534,0.550871,0.148863,0.168563,0.147716
3,1.058,0.939104,0.659028,0.258268,0.255359,0.242476
4,0.8216,0.848048,0.686526,0.349141,0.300946,0.300495
5,0.665,0.764181,0.709441,0.361841,0.337096,0.332247
6,0.5385,0.715172,0.744271,0.424635,0.384131,0.391773
7,0.4364,0.683312,0.757104,0.454392,0.428283,0.426256
8,0.3718,0.660087,0.771769,0.508461,0.454319,0.467935
9,0.3016,0.655846,0.767186,0.538418,0.483954,0.495907
10,0.2527,0.634508,0.771769,0.542852,0.477094,0.493038


[I 2025-03-23 05:36:02,636] Trial 58 pruned. 


Trial 59 with params: {'learning_rate': 0.001238220543860261, 'weight_decay': 0.007, 'warmup_steps': 0, 'lambda_param': 0.5, 'temperature': 5.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.9441,1.566903,0.407883,0.064073,0.090796,0.063984
2,1.3963,1.308347,0.506874,0.144174,0.142354,0.125293
3,1.1627,1.046493,0.613199,0.201953,0.210376,0.191002
4,0.9393,0.950802,0.647113,0.318059,0.249129,0.245891
5,0.7941,0.859064,0.68561,0.328672,0.29969,0.293661


[I 2025-03-23 05:36:28,426] Trial 59 pruned. 


Trial 60 with params: {'learning_rate': 0.00017559280388301614, 'weight_decay': 0.0, 'warmup_steps': 0, 'lambda_param': 1.0, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.2633,2.068878,0.176902,0.003538,0.02,0.006012
2,1.9714,1.846528,0.335472,0.027179,0.062267,0.037326
3,1.7787,1.673221,0.379468,0.040433,0.077451,0.050245
4,1.6281,1.555709,0.409716,0.044003,0.086923,0.057709
5,1.5304,1.459066,0.450046,0.073153,0.102343,0.075836
6,1.4301,1.393911,0.474794,0.104815,0.115244,0.089176
7,1.376,1.354144,0.489459,0.116786,0.125483,0.10068
8,1.3339,1.332619,0.495875,0.128402,0.134575,0.115555
9,1.2944,1.290859,0.513291,0.139159,0.138335,0.115381
10,1.2579,1.27584,0.517874,0.121932,0.147672,0.124461


[I 2025-03-23 05:37:27,325] Trial 60 pruned. 


Trial 61 with params: {'learning_rate': 0.0016206772472476601, 'weight_decay': 0.006, 'warmup_steps': 1, 'lambda_param': 0.0, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.9205,1.50279,0.428964,0.085756,0.098492,0.075286
2,1.333,1.234933,0.538955,0.143088,0.163037,0.14269
3,1.0792,0.974742,0.650779,0.252207,0.253488,0.236139
4,0.8526,0.872888,0.683776,0.339969,0.29417,0.291997
5,0.6985,0.787685,0.710357,0.360911,0.338114,0.334196


[I 2025-03-23 05:37:59,292] Trial 61 pruned. 


Trial 62 with params: {'learning_rate': 0.003935121200970843, 'weight_decay': 0.006, 'warmup_steps': 1, 'lambda_param': 0.1, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8012,1.371501,0.47846,0.113095,0.127278,0.103743
2,1.2055,1.056576,0.610449,0.236839,0.229989,0.212644
3,0.8878,0.84556,0.695692,0.320188,0.325311,0.31296
4,0.6602,0.748411,0.731439,0.426853,0.376121,0.381805
5,0.4802,0.678247,0.753437,0.432556,0.41893,0.416737
6,0.3598,0.645945,0.770852,0.520776,0.464499,0.479863
7,0.2642,0.629147,0.787351,0.586008,0.534027,0.550315
8,0.2097,0.610396,0.789184,0.615681,0.549397,0.572071
9,0.1614,0.593651,0.789184,0.662121,0.578102,0.605204
10,0.1329,0.601459,0.793767,0.659682,0.600199,0.616193


[I 2025-03-23 05:39:28,726] Trial 62 finished with value: 0.6513753854913292 and parameters: {'learning_rate': 0.003935121200970843, 'weight_decay': 0.006, 'warmup_steps': 1, 'lambda_param': 0.1, 'temperature': 4.0}. Best is trial 20 with value: 0.6987835836175371.


Trial 63 with params: {'learning_rate': 0.003390206489038506, 'weight_decay': 0.003, 'warmup_steps': 1, 'lambda_param': 0.0, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.7847,1.367293,0.486709,0.131818,0.131701,0.108027
2,1.2114,1.051767,0.607699,0.223283,0.226122,0.204509
3,0.8989,0.845905,0.698442,0.350404,0.320685,0.313942
4,0.6737,0.762052,0.718607,0.399612,0.362096,0.367467
5,0.5054,0.679925,0.749771,0.449249,0.423118,0.424469
6,0.3796,0.649294,0.767186,0.506744,0.452639,0.467107
7,0.2873,0.627144,0.774519,0.512016,0.47962,0.480972
8,0.2308,0.611011,0.793767,0.61571,0.56027,0.572582
9,0.1739,0.596946,0.794684,0.637956,0.578438,0.593765
10,0.1431,0.594942,0.792851,0.616496,0.572272,0.581184


[I 2025-03-23 05:40:52,700] Trial 63 pruned. 


Trial 64 with params: {'learning_rate': 0.00041098534227771127, 'weight_decay': 0.008, 'warmup_steps': 4, 'lambda_param': 0.0, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.201,1.923349,0.303391,0.052285,0.054411,0.043526
2,1.7481,1.583593,0.421632,0.045339,0.090136,0.05896
3,1.4911,1.385351,0.477544,0.076189,0.115673,0.088162
4,1.3282,1.280707,0.508708,0.134456,0.140645,0.120441
5,1.2129,1.179933,0.55912,0.1537,0.168423,0.147525
6,1.1006,1.131516,0.587534,0.170221,0.199094,0.173024
7,1.0295,1.068987,0.597617,0.198827,0.202139,0.185328
8,0.9647,1.027269,0.626031,0.293687,0.225129,0.216132
9,0.9069,1.006361,0.635197,0.301341,0.237024,0.235016
10,0.855,0.971497,0.652612,0.327398,0.261896,0.258714


[I 2025-03-23 05:42:15,941] Trial 64 pruned. 


Trial 65 with params: {'learning_rate': 0.0035429043293858124, 'weight_decay': 0.006, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 5.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.7933,1.360636,0.477544,0.110611,0.130511,0.103378
2,1.1871,1.044446,0.607699,0.239171,0.224003,0.212953
3,0.8788,0.841134,0.697525,0.346489,0.323319,0.319466
4,0.6494,0.721858,0.736022,0.440394,0.388389,0.392871
5,0.4697,0.673101,0.764436,0.501687,0.452443,0.460299
6,0.3539,0.635087,0.770852,0.547196,0.475717,0.491302
7,0.256,0.623275,0.781852,0.600755,0.516452,0.532674
8,0.2046,0.593334,0.789184,0.625759,0.55541,0.576039
9,0.1552,0.578396,0.791934,0.635713,0.564521,0.587122
10,0.1294,0.573137,0.800183,0.660399,0.57943,0.602827


[I 2025-03-23 05:43:08,541] Trial 65 pruned. 


Trial 66 with params: {'learning_rate': 0.00484269308037669, 'weight_decay': 0.007, 'warmup_steps': 0, 'lambda_param': 0.2, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.7218,1.33206,0.489459,0.091014,0.132506,0.101312
2,1.1177,1.007072,0.640697,0.280495,0.270758,0.254867
3,0.8079,0.817857,0.71494,0.371999,0.349729,0.344431
4,0.5803,0.718278,0.743355,0.431153,0.403336,0.40566
5,0.4231,0.657301,0.76352,0.532019,0.465535,0.477057
6,0.3074,0.621798,0.778185,0.560725,0.503274,0.515766
7,0.2275,0.605857,0.784601,0.61437,0.524239,0.543174
8,0.1682,0.588707,0.791934,0.621172,0.569063,0.582555
9,0.132,0.586975,0.802933,0.658929,0.590917,0.608284
10,0.1135,0.581638,0.805683,0.690099,0.627813,0.640594


[I 2025-03-23 05:44:24,560] Trial 66 finished with value: 0.6421785105093838 and parameters: {'learning_rate': 0.00484269308037669, 'weight_decay': 0.007, 'warmup_steps': 0, 'lambda_param': 0.2, 'temperature': 4.5}. Best is trial 20 with value: 0.6987835836175371.


Trial 67 with params: {'learning_rate': 0.003996537204752903, 'weight_decay': 0.005, 'warmup_steps': 2, 'lambda_param': 0.1, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.7384,1.338892,0.488543,0.134365,0.133113,0.104399
2,1.1121,0.98641,0.648946,0.287345,0.268459,0.26135
3,0.8122,0.792154,0.721357,0.385302,0.354196,0.349743
4,0.595,0.711112,0.754354,0.473728,0.425624,0.43445
5,0.4328,0.662349,0.75527,0.513672,0.454638,0.468407
6,0.3231,0.628306,0.771769,0.531102,0.489209,0.501873
7,0.2368,0.605667,0.786434,0.606164,0.548761,0.560186
8,0.1809,0.591932,0.788268,0.622837,0.574584,0.58588
9,0.137,0.578734,0.80385,0.684568,0.631429,0.647755
10,0.1161,0.579446,0.802016,0.72221,0.658588,0.676356


[I 2025-03-23 05:45:45,816] Trial 67 finished with value: 0.6872042896069332 and parameters: {'learning_rate': 0.003996537204752903, 'weight_decay': 0.005, 'warmup_steps': 2, 'lambda_param': 0.1, 'temperature': 4.5}. Best is trial 20 with value: 0.6987835836175371.


Trial 68 with params: {'learning_rate': 0.004658469383209699, 'weight_decay': 0.001, 'warmup_steps': 3, 'lambda_param': 0.0, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.9446,1.390768,0.463795,0.092353,0.117325,0.095589
2,1.2023,1.080428,0.607699,0.244427,0.239519,0.223119
3,0.8784,0.830525,0.703025,0.324295,0.329681,0.314222
4,0.6344,0.731269,0.736022,0.391978,0.38261,0.380966
5,0.4647,0.651624,0.75802,0.4508,0.420228,0.422439
6,0.3355,0.619568,0.776352,0.523748,0.481674,0.492877
7,0.243,0.605344,0.788268,0.555769,0.521361,0.527346
8,0.1882,0.589944,0.791934,0.617516,0.604291,0.601394
9,0.1487,0.570962,0.802016,0.654627,0.628055,0.63062
10,0.1221,0.574267,0.806599,0.704562,0.660972,0.669323


[I 2025-03-23 05:47:04,433] Trial 68 finished with value: 0.6755679671227521 and parameters: {'learning_rate': 0.004658469383209699, 'weight_decay': 0.001, 'warmup_steps': 3, 'lambda_param': 0.0, 'temperature': 2.5}. Best is trial 20 with value: 0.6987835836175371.


Trial 69 with params: {'learning_rate': 0.00454760453957999, 'weight_decay': 0.004, 'warmup_steps': 2, 'lambda_param': 0.1, 'temperature': 5.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.7529,1.340871,0.477544,0.110075,0.131682,0.106194
2,1.1168,1.013029,0.641613,0.275254,0.270487,0.260316
3,0.802,0.797471,0.71769,0.365312,0.358407,0.350011
4,0.5723,0.701772,0.748854,0.444034,0.409877,0.415424
5,0.4081,0.654592,0.774519,0.539926,0.491,0.504926
6,0.3025,0.607696,0.789184,0.572537,0.515347,0.532289
7,0.2154,0.599649,0.792851,0.615642,0.547355,0.567873
8,0.164,0.58837,0.79835,0.6337,0.581014,0.595672
9,0.1276,0.577763,0.8011,0.649202,0.588217,0.608211
10,0.1096,0.583076,0.793767,0.682931,0.61751,0.637738


[I 2025-03-23 05:48:25,938] Trial 69 finished with value: 0.6626751472221963 and parameters: {'learning_rate': 0.00454760453957999, 'weight_decay': 0.004, 'warmup_steps': 2, 'lambda_param': 0.1, 'temperature': 5.5}. Best is trial 20 with value: 0.6987835836175371.


Trial 70 with params: {'learning_rate': 0.004800557845766139, 'weight_decay': 0.004, 'warmup_steps': 2, 'lambda_param': 0.1, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.7541,1.348049,0.47846,0.116296,0.130625,0.102867
2,1.1237,1.023023,0.64253,0.269745,0.272179,0.255149
3,0.8109,0.795131,0.72594,0.389048,0.358145,0.354013
4,0.5796,0.697622,0.748854,0.437791,0.402993,0.408262
5,0.417,0.650932,0.765353,0.512586,0.443689,0.457697
6,0.3038,0.609697,0.782768,0.575428,0.510307,0.529829
7,0.2176,0.587676,0.794684,0.620685,0.549021,0.571039
8,0.1658,0.574171,0.800183,0.678445,0.598294,0.621901
9,0.1308,0.558522,0.804766,0.707483,0.62836,0.652981
10,0.1115,0.562156,0.810266,0.701619,0.640332,0.657767


[I 2025-03-23 05:49:49,973] Trial 70 finished with value: 0.6845816237648052 and parameters: {'learning_rate': 0.004800557845766139, 'weight_decay': 0.004, 'warmup_steps': 2, 'lambda_param': 0.1, 'temperature': 4.5}. Best is trial 20 with value: 0.6987835836175371.


Trial 71 with params: {'learning_rate': 0.004609010808442423, 'weight_decay': 0.004, 'warmup_steps': 1, 'lambda_param': 0.1, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8818,1.374404,0.494042,0.092409,0.131569,0.101366
2,1.1888,1.020969,0.631531,0.258408,0.259659,0.247466
3,0.8629,0.841874,0.695692,0.331664,0.338485,0.328934
4,0.6309,0.745821,0.730522,0.415042,0.375461,0.3828
5,0.4637,0.670024,0.758937,0.448308,0.424412,0.424154
6,0.3404,0.646359,0.76352,0.528469,0.47247,0.485517
7,0.2464,0.637056,0.772686,0.568115,0.534767,0.539744
8,0.1838,0.610831,0.793767,0.657341,0.616104,0.623403
9,0.1424,0.594164,0.797434,0.677579,0.634356,0.639992
10,0.122,0.596718,0.797434,0.685794,0.644113,0.650483


[I 2025-03-23 05:51:10,023] Trial 71 finished with value: 0.6764099371659804 and parameters: {'learning_rate': 0.004609010808442423, 'weight_decay': 0.004, 'warmup_steps': 1, 'lambda_param': 0.1, 'temperature': 4.0}. Best is trial 20 with value: 0.6987835836175371.


Trial 72 with params: {'learning_rate': 0.004693961072789654, 'weight_decay': 0.003, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.7473,1.363531,0.473877,0.101324,0.128412,0.099955
2,1.1095,1.0116,0.643446,0.287697,0.275176,0.266507
3,0.7959,0.788582,0.721357,0.3794,0.362417,0.359923
4,0.5591,0.702887,0.748854,0.433556,0.409685,0.409865
5,0.4019,0.648555,0.776352,0.52978,0.470229,0.487469
6,0.2978,0.607184,0.776352,0.576088,0.505897,0.526503
7,0.2133,0.601311,0.787351,0.584836,0.535182,0.545891
8,0.1611,0.577567,0.800183,0.656581,0.591531,0.611064
9,0.128,0.571831,0.805683,0.715993,0.647472,0.666207
10,0.1089,0.575459,0.806599,0.701019,0.652946,0.664012


[I 2025-03-23 05:52:26,197] Trial 72 finished with value: 0.6698469087149177 and parameters: {'learning_rate': 0.004693961072789654, 'weight_decay': 0.003, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 4.0}. Best is trial 20 with value: 0.6987835836175371.


Trial 73 with params: {'learning_rate': 0.000629887775196634, 'weight_decay': 0.007, 'warmup_steps': 3, 'lambda_param': 0.5, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.1114,1.780405,0.352887,0.035479,0.070853,0.044052
2,1.6054,1.467873,0.441797,0.116553,0.10094,0.076884
3,1.3559,1.270276,0.509624,0.13311,0.136727,0.109924
4,1.1802,1.144931,0.580202,0.164491,0.185484,0.164865
5,1.0505,1.059979,0.595784,0.210837,0.199934,0.182098


[I 2025-03-23 05:53:04,668] Trial 73 pruned. 


Trial 74 with params: {'learning_rate': 0.0002952710041203322, 'weight_decay': 0.01, 'warmup_steps': 4, 'lambda_param': 0.30000000000000004, 'temperature': 6.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.235,2.009778,0.176902,0.003538,0.02,0.006012
2,1.8482,1.694869,0.377635,0.040105,0.07647,0.048738
3,1.6184,1.504419,0.437214,0.070354,0.098475,0.069748
4,1.4532,1.383011,0.471127,0.096121,0.116766,0.092692
5,1.3429,1.291109,0.51604,0.135949,0.138088,0.115924
6,1.2374,1.239062,0.52154,0.16824,0.15019,0.13326
7,1.1748,1.195518,0.546288,0.15846,0.166054,0.147784
8,1.1233,1.160812,0.571952,0.184635,0.185284,0.163937
9,1.0749,1.133292,0.577452,0.207981,0.184908,0.167989
10,1.031,1.10199,0.593034,0.182876,0.20054,0.17865


[I 2025-03-23 05:53:55,397] Trial 74 pruned. 


Trial 75 with params: {'learning_rate': 0.004218606172627303, 'weight_decay': 0.003, 'warmup_steps': 3, 'lambda_param': 0.0, 'temperature': 5.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8475,1.360757,0.488543,0.111879,0.130723,0.105745
2,1.1751,1.013637,0.632447,0.269171,0.254438,0.244202
3,0.8473,0.849509,0.693859,0.32826,0.331473,0.316876
4,0.6221,0.720318,0.733272,0.391365,0.370499,0.370405
5,0.4471,0.659237,0.756187,0.486894,0.438361,0.442306
6,0.3322,0.623445,0.776352,0.532526,0.482201,0.497438
7,0.2486,0.611825,0.771769,0.522084,0.497983,0.503447
8,0.1938,0.596805,0.793767,0.592109,0.559703,0.567015
9,0.1502,0.580487,0.80385,0.643438,0.611378,0.615351
10,0.1256,0.581467,0.79835,0.657573,0.62444,0.627443


[I 2025-03-23 05:55:52,605] Trial 75 finished with value: 0.67380128712396 and parameters: {'learning_rate': 0.004218606172627303, 'weight_decay': 0.003, 'warmup_steps': 3, 'lambda_param': 0.0, 'temperature': 5.0}. Best is trial 20 with value: 0.6987835836175371.


Trial 76 with params: {'learning_rate': 0.004142818873198246, 'weight_decay': 0.006, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.738,1.343292,0.486709,0.116141,0.132359,0.10287
2,1.1153,0.992896,0.644363,0.28203,0.268367,0.25656
3,0.821,0.803455,0.71494,0.391758,0.356205,0.353551
4,0.5912,0.707382,0.743355,0.427903,0.400327,0.403637
5,0.4327,0.663255,0.761687,0.540116,0.467918,0.486332
6,0.3209,0.632279,0.774519,0.565332,0.494506,0.518943
7,0.2394,0.615161,0.779102,0.609567,0.525439,0.549346
8,0.1814,0.592336,0.787351,0.625475,0.554839,0.576667
9,0.1409,0.585412,0.797434,0.672337,0.603311,0.626925
10,0.1195,0.585311,0.793767,0.710777,0.627687,0.654739


[I 2025-03-23 05:57:24,347] Trial 76 finished with value: 0.6893787481025402 and parameters: {'learning_rate': 0.004142818873198246, 'weight_decay': 0.006, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 4.0}. Best is trial 20 with value: 0.6987835836175371.


Trial 77 with params: {'learning_rate': 0.001686912226206457, 'weight_decay': 0.008, 'warmup_steps': 3, 'lambda_param': 0.0, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.9295,1.487913,0.43538,0.103862,0.100891,0.078859
2,1.3187,1.21841,0.550871,0.169876,0.170793,0.150847
3,1.0513,0.943298,0.659945,0.254281,0.261627,0.248325
4,0.8233,0.852777,0.681943,0.357873,0.303341,0.305006
5,0.6634,0.75821,0.725023,0.36884,0.353199,0.350787


[I 2025-03-23 05:57:51,392] Trial 77 pruned. 


Trial 78 with params: {'learning_rate': 0.0028087287006714028, 'weight_decay': 0.01, 'warmup_steps': 2, 'lambda_param': 0.30000000000000004, 'temperature': 5.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8366,1.426622,0.471127,0.099377,0.124102,0.097128
2,1.2507,1.190619,0.562786,0.192477,0.187287,0.170394
3,0.9564,0.89231,0.684693,0.292433,0.302876,0.289817
4,0.7268,0.777082,0.714024,0.398637,0.345309,0.345663
5,0.5585,0.70889,0.745188,0.429245,0.39978,0.394125


[I 2025-03-23 05:58:25,843] Trial 78 pruned. 


Trial 79 with params: {'learning_rate': 0.0008894692562293435, 'weight_decay': 0.002, 'warmup_steps': 2, 'lambda_param': 0.1, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.0437,1.676838,0.356554,0.064803,0.072346,0.047175
2,1.49,1.338008,0.492209,0.118749,0.127258,0.101806
3,1.2419,1.155897,0.56187,0.16168,0.171346,0.150181
4,1.0458,1.016324,0.63428,0.278145,0.22959,0.221968
5,0.9004,0.93995,0.661778,0.287243,0.271034,0.265461
6,0.7641,0.867161,0.68561,0.327527,0.303115,0.300628
7,0.672,0.822159,0.705775,0.386778,0.323318,0.329808
8,0.6046,0.786427,0.71494,0.388172,0.343457,0.340257
9,0.5343,0.762053,0.716774,0.421012,0.356695,0.370332
10,0.4713,0.739568,0.738772,0.418781,0.39138,0.394283


[I 2025-03-23 05:59:16,673] Trial 79 pruned. 


Trial 80 with params: {'learning_rate': 0.004593511720022291, 'weight_decay': 0.008, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.7532,1.34644,0.476627,0.115659,0.130042,0.102968
2,1.1218,1.004032,0.647113,0.28498,0.273829,0.263176
3,0.8024,0.795672,0.721357,0.373543,0.360064,0.352647
4,0.5717,0.698246,0.751604,0.449236,0.412526,0.419011
5,0.4095,0.658852,0.762603,0.554002,0.462901,0.485159
6,0.302,0.619305,0.786434,0.594627,0.527218,0.547871
7,0.2159,0.602137,0.791017,0.610685,0.549343,0.567473
8,0.1665,0.594711,0.799267,0.659315,0.600425,0.618166
9,0.132,0.581111,0.802016,0.694584,0.626768,0.646656
10,0.1107,0.587744,0.79835,0.702992,0.636879,0.656497


[I 2025-03-23 06:00:37,782] Trial 80 finished with value: 0.6757951186030294 and parameters: {'learning_rate': 0.004593511720022291, 'weight_decay': 0.008, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 4.0}. Best is trial 20 with value: 0.6987835836175371.


Trial 81 with params: {'learning_rate': 0.0006950213656015452, 'weight_decay': 0.006, 'warmup_steps': 2, 'lambda_param': 0.2, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.0852,1.74364,0.345555,0.036149,0.069393,0.043051
2,1.5644,1.422383,0.460128,0.102946,0.108578,0.085049
3,1.3177,1.230989,0.523373,0.134112,0.1461,0.121597
4,1.1384,1.121553,0.588451,0.192678,0.187867,0.170857
5,1.0133,1.028992,0.612282,0.266649,0.214466,0.204333
6,0.8814,0.943945,0.669111,0.321517,0.283029,0.275867
7,0.785,0.894035,0.68286,0.313151,0.294304,0.286323
8,0.7183,0.856012,0.68561,0.330096,0.29822,0.298739
9,0.6536,0.831802,0.695692,0.395678,0.31624,0.324749
10,0.5928,0.797424,0.713107,0.360471,0.335296,0.331804


[I 2025-03-23 06:01:30,649] Trial 81 pruned. 


Trial 82 with params: {'learning_rate': 0.0002726307018738496, 'weight_decay': 0.005, 'warmup_steps': 1, 'lambda_param': 0.8, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.226,2.01649,0.176902,0.003538,0.02,0.006012
2,1.8632,1.71645,0.370302,0.040642,0.074378,0.046812
3,1.6378,1.52572,0.431714,0.069548,0.097245,0.068657
4,1.4744,1.399785,0.472044,0.104892,0.116844,0.092443
5,1.3693,1.320123,0.502291,0.103341,0.129343,0.105175


[I 2025-03-23 06:01:56,676] Trial 82 pruned. 


Trial 83 with params: {'learning_rate': 0.004632397765805303, 'weight_decay': 0.006, 'warmup_steps': 2, 'lambda_param': 0.30000000000000004, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.7495,1.351763,0.473877,0.138221,0.129454,0.102457
2,1.1124,0.998722,0.64253,0.279659,0.271247,0.255357
3,0.7993,0.785898,0.725023,0.389504,0.364799,0.364466
4,0.5675,0.699036,0.751604,0.429348,0.399889,0.404934
5,0.4081,0.637462,0.769019,0.541595,0.463698,0.479591
6,0.3065,0.6119,0.788268,0.576983,0.507786,0.527726
7,0.2127,0.600784,0.788268,0.615274,0.53335,0.55596
8,0.1647,0.566782,0.807516,0.662825,0.595029,0.610984
9,0.1309,0.557495,0.805683,0.639041,0.601787,0.609653
10,0.1114,0.564136,0.802933,0.674699,0.611472,0.627495


[I 2025-03-23 06:03:15,049] Trial 83 finished with value: 0.7016439124452947 and parameters: {'learning_rate': 0.004632397765805303, 'weight_decay': 0.006, 'warmup_steps': 2, 'lambda_param': 0.30000000000000004, 'temperature': 4.0}. Best is trial 83 with value: 0.7016439124452947.


Trial 84 with params: {'learning_rate': 0.00317902694915578, 'weight_decay': 0.008, 'warmup_steps': 2, 'lambda_param': 0.30000000000000004, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.829,1.382947,0.466544,0.121278,0.122626,0.09739
2,1.2176,1.095443,0.597617,0.22368,0.21584,0.194929
3,0.9131,0.867628,0.694775,0.301343,0.314256,0.301409
4,0.6815,0.764811,0.71769,0.382345,0.349667,0.350064
5,0.5202,0.688596,0.748854,0.436317,0.407474,0.40948
6,0.3951,0.656764,0.758937,0.464187,0.421808,0.432104
7,0.2951,0.638592,0.771769,0.49251,0.470688,0.471279
8,0.2385,0.610478,0.783685,0.576478,0.513998,0.52692
9,0.1853,0.600935,0.796517,0.67722,0.554463,0.589509
10,0.154,0.598075,0.789184,0.638822,0.570123,0.589481


[I 2025-03-23 06:04:04,512] Trial 84 pruned. 


Trial 85 with params: {'learning_rate': 0.0032319244266281457, 'weight_decay': 0.005, 'warmup_steps': 2, 'lambda_param': 0.4, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8242,1.372535,0.477544,0.125447,0.1255,0.099562
2,1.2126,1.090954,0.5967,0.233855,0.214724,0.195359
3,0.9061,0.88266,0.681943,0.302645,0.306136,0.290149
4,0.6849,0.756485,0.714024,0.36762,0.356795,0.353985
5,0.5147,0.685368,0.749771,0.40704,0.402448,0.398454
6,0.3922,0.646421,0.76077,0.477341,0.427053,0.437699
7,0.2896,0.649488,0.769019,0.496888,0.487037,0.483951
8,0.2311,0.607107,0.786434,0.563934,0.525901,0.534044
9,0.1799,0.596902,0.786434,0.626555,0.543256,0.56973
10,0.1478,0.597267,0.788268,0.61942,0.572171,0.584061


Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--precision/155d3220d6cd4a6553f12da68eeb3d1f97cf431206304a4bc6e2d564c29502e9 (last modified on Fri Jan 10 23:13:59 2025) since it couldn't be found locally at evaluate-metric--precision, or remotely on the Hugging Face Hub.
[I 2025-03-23 06:05:08,843] Trial 85 pruned. 


Trial 86 with params: {'learning_rate': 0.00472120351769061, 'weight_decay': 0.005, 'warmup_steps': 3, 'lambda_param': 0.1, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.9475,1.387428,0.471127,0.099892,0.119991,0.097737
2,1.2095,1.094456,0.59945,0.228785,0.232902,0.21727
3,0.8892,0.829121,0.705775,0.327943,0.327101,0.315058
4,0.6442,0.726422,0.746104,0.394282,0.388943,0.384632
5,0.4682,0.665021,0.757104,0.459554,0.422022,0.424135
6,0.3425,0.626693,0.768103,0.531144,0.461115,0.476359
7,0.2569,0.621987,0.775435,0.571691,0.516819,0.53281
8,0.1985,0.594988,0.7956,0.597281,0.575721,0.578939
9,0.1539,0.579019,0.802016,0.617211,0.59566,0.599592
10,0.1268,0.574921,0.806599,0.666208,0.629123,0.636662


[I 2025-03-23 06:06:33,141] Trial 86 finished with value: 0.6648714386885887 and parameters: {'learning_rate': 0.00472120351769061, 'weight_decay': 0.005, 'warmup_steps': 3, 'lambda_param': 0.1, 'temperature': 2.0}. Best is trial 83 with value: 0.7016439124452947.


Trial 87 with params: {'learning_rate': 0.0034079466806623904, 'weight_decay': 0.006, 'warmup_steps': 2, 'lambda_param': 0.2, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8018,1.356179,0.48396,0.106955,0.131912,0.101212
2,1.1969,1.054628,0.611366,0.234113,0.222025,0.203193
3,0.8923,0.849423,0.695692,0.341165,0.324228,0.315767
4,0.6606,0.734456,0.729606,0.426188,0.376574,0.377943
5,0.4907,0.681633,0.749771,0.423286,0.420344,0.413175
6,0.3733,0.638956,0.767186,0.462304,0.442785,0.445443
7,0.2735,0.643452,0.775435,0.498039,0.488204,0.481285
8,0.2209,0.605007,0.791017,0.568221,0.547691,0.550025
9,0.1684,0.591878,0.796517,0.623792,0.574751,0.588705
10,0.1408,0.590743,0.792851,0.608242,0.589762,0.591107


[I 2025-03-23 06:07:38,456] Trial 87 pruned. 


Trial 88 with params: {'learning_rate': 0.0030844047870118344, 'weight_decay': 0.002, 'warmup_steps': 2, 'lambda_param': 0.30000000000000004, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8501,1.423037,0.464711,0.106958,0.123342,0.097937
2,1.2449,1.141308,0.578368,0.191437,0.205857,0.181738
3,0.9418,0.881647,0.683776,0.300243,0.300772,0.289188
4,0.7083,0.761853,0.728689,0.380201,0.367427,0.36523
5,0.5379,0.698275,0.740605,0.444007,0.400689,0.397904
6,0.414,0.64143,0.769019,0.478602,0.436637,0.442926
7,0.3075,0.658876,0.764436,0.456917,0.446302,0.441033
8,0.2536,0.626222,0.783685,0.565628,0.516309,0.524478
9,0.194,0.606959,0.791934,0.615541,0.533089,0.551272
10,0.1596,0.608903,0.79835,0.625024,0.582904,0.589743


[I 2025-03-23 06:08:34,775] Trial 88 pruned. 


Trial 89 with params: {'learning_rate': 0.004528729471902319, 'weight_decay': 0.005, 'warmup_steps': 1, 'lambda_param': 0.5, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.881,1.378021,0.489459,0.101142,0.131636,0.103382
2,1.1974,1.042238,0.614115,0.230672,0.247187,0.227682
3,0.8765,0.846689,0.701192,0.357135,0.33619,0.329936
4,0.6454,0.743666,0.728689,0.395081,0.369662,0.372444
5,0.472,0.668713,0.761687,0.431726,0.427684,0.423038
6,0.3489,0.64416,0.764436,0.537369,0.468708,0.487792
7,0.2545,0.637979,0.778185,0.604391,0.52926,0.549352
8,0.1957,0.625335,0.782768,0.634347,0.580773,0.594732
9,0.1499,0.613266,0.783685,0.652009,0.59469,0.608909
10,0.1312,0.606364,0.790101,0.671733,0.610335,0.62847


[I 2025-03-23 06:09:46,978] Trial 89 finished with value: 0.6598583678713785 and parameters: {'learning_rate': 0.004528729471902319, 'weight_decay': 0.005, 'warmup_steps': 1, 'lambda_param': 0.5, 'temperature': 3.0}. Best is trial 83 with value: 0.7016439124452947.


Trial 90 with params: {'learning_rate': 0.0011115662517499805, 'weight_decay': 0.004, 'warmup_steps': 3, 'lambda_param': 0.6000000000000001, 'temperature': 7.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.0254,1.649293,0.371219,0.060944,0.076917,0.050119
2,1.4459,1.338621,0.48396,0.110548,0.123551,0.102973
3,1.2041,1.105136,0.572869,0.190282,0.175782,0.158262
4,0.9847,0.96128,0.646196,0.273847,0.245273,0.237636
5,0.8304,0.887147,0.675527,0.310622,0.290935,0.284742


[I 2025-03-23 06:10:22,852] Trial 90 pruned. 


Trial 91 with params: {'learning_rate': 0.00021177702946688744, 'weight_decay': 0.01, 'warmup_steps': 1, 'lambda_param': 0.4, 'temperature': 6.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.252,2.054896,0.176902,0.003538,0.02,0.006012
2,1.9302,1.797407,0.345555,0.026708,0.065927,0.037677
3,1.7324,1.622823,0.392301,0.042312,0.080803,0.052707
4,1.5737,1.495709,0.439047,0.070143,0.09925,0.073299
5,1.4691,1.406565,0.473877,0.13083,0.112645,0.08627
6,1.37,1.337745,0.497709,0.105729,0.129484,0.104412
7,1.3133,1.299435,0.502291,0.129213,0.134531,0.111214
8,1.2687,1.280778,0.527956,0.143986,0.156185,0.135165
9,1.2244,1.235596,0.535289,0.149535,0.154524,0.133912
10,1.1853,1.217395,0.544455,0.146541,0.168296,0.144385


[I 2025-03-23 06:11:14,409] Trial 91 pruned. 


Trial 92 with params: {'learning_rate': 0.0040084576995248825, 'weight_decay': 0.005, 'warmup_steps': 1, 'lambda_param': 0.0, 'temperature': 5.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8004,1.36728,0.47846,0.113136,0.127361,0.104708
2,1.2003,1.042764,0.618698,0.229278,0.239438,0.221459
3,0.8848,0.843666,0.695692,0.328923,0.323422,0.31562
4,0.6518,0.7485,0.72594,0.427129,0.370878,0.376842
5,0.479,0.662858,0.759853,0.486557,0.438522,0.447822
6,0.3594,0.64075,0.775435,0.502175,0.460591,0.472363
7,0.2608,0.620114,0.783685,0.545348,0.517039,0.522227
8,0.2074,0.60408,0.797434,0.626752,0.578857,0.590816
9,0.1625,0.598634,0.791934,0.64241,0.580365,0.599754
10,0.1328,0.595321,0.802016,0.682127,0.620677,0.633584


[I 2025-03-23 06:12:40,370] Trial 92 finished with value: 0.6962421020204362 and parameters: {'learning_rate': 0.0040084576995248825, 'weight_decay': 0.005, 'warmup_steps': 1, 'lambda_param': 0.0, 'temperature': 5.0}. Best is trial 83 with value: 0.7016439124452947.


Trial 93 with params: {'learning_rate': 0.0019180453077055082, 'weight_decay': 0.005, 'warmup_steps': 1, 'lambda_param': 0.1, 'temperature': 5.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8699,1.465408,0.459212,0.094911,0.109061,0.084846
2,1.301,1.205492,0.551787,0.161351,0.175357,0.154603
3,1.0357,0.943073,0.659945,0.276232,0.267295,0.258092
4,0.8178,0.858886,0.691109,0.403018,0.311156,0.318674
5,0.6541,0.754113,0.724106,0.383047,0.351356,0.351246
6,0.524,0.728546,0.741522,0.432975,0.390149,0.394136
7,0.4195,0.693393,0.75527,0.463934,0.42844,0.43011
8,0.3536,0.665903,0.766269,0.490278,0.436144,0.448364
9,0.2875,0.654078,0.770852,0.524894,0.480787,0.493583
10,0.2378,0.638948,0.781852,0.577874,0.509665,0.525872


[I 2025-03-23 06:14:08,434] Trial 93 pruned. 


Trial 94 with params: {'learning_rate': 0.0032878146110946367, 'weight_decay': 0.004, 'warmup_steps': 0, 'lambda_param': 0.1, 'temperature': 5.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.7409,1.34689,0.494959,0.123348,0.132977,0.111699
2,1.1937,1.079923,0.598533,0.252896,0.228155,0.21333
3,0.8909,0.868985,0.693859,0.365685,0.33244,0.327548
4,0.6715,0.740892,0.735105,0.432154,0.380995,0.387517
5,0.5004,0.680213,0.757104,0.461319,0.424024,0.427694
6,0.3869,0.64516,0.761687,0.503694,0.445169,0.459363
7,0.2884,0.622518,0.788268,0.541027,0.499379,0.51077
8,0.2325,0.620221,0.794684,0.630309,0.575833,0.586793
9,0.18,0.606947,0.8011,0.657317,0.584923,0.607528
10,0.1515,0.597841,0.8011,0.698077,0.614236,0.633196


[I 2025-03-23 06:15:23,074] Trial 94 finished with value: 0.6403632379699374 and parameters: {'learning_rate': 0.0032878146110946367, 'weight_decay': 0.004, 'warmup_steps': 0, 'lambda_param': 0.1, 'temperature': 5.5}. Best is trial 83 with value: 0.7016439124452947.


Trial 95 with params: {'learning_rate': 0.00033622652480271855, 'weight_decay': 0.0, 'warmup_steps': 0, 'lambda_param': 0.4, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.187,1.949239,0.224565,0.036267,0.032945,0.023515
2,1.7892,1.641662,0.399633,0.042649,0.082946,0.053759
3,1.5564,1.444618,0.459212,0.084841,0.108129,0.080985
4,1.3988,1.331068,0.491292,0.109324,0.128927,0.102723
5,1.2882,1.242072,0.535289,0.143984,0.15199,0.133554
6,1.1816,1.195909,0.568286,0.169013,0.183554,0.163233
7,1.1165,1.147618,0.567369,0.167274,0.179929,0.159283
8,1.0614,1.110236,0.594867,0.196887,0.199566,0.178944
9,1.0089,1.078543,0.601283,0.217031,0.201983,0.187404
10,0.9572,1.050148,0.619615,0.237966,0.221839,0.207265


[I 2025-03-23 06:16:18,679] Trial 95 pruned. 


Trial 96 with params: {'learning_rate': 0.00323576055441082, 'weight_decay': 0.005, 'warmup_steps': 2, 'lambda_param': 0.1, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8224,1.368429,0.479377,0.108066,0.125184,0.097668
2,1.2071,1.080265,0.597617,0.227813,0.214533,0.193323
3,0.8994,0.881412,0.686526,0.309225,0.317598,0.30111
4,0.6798,0.755113,0.713107,0.369919,0.350357,0.348825
5,0.5077,0.683641,0.751604,0.46011,0.414195,0.417513
6,0.3883,0.641087,0.769019,0.466796,0.437453,0.444567
7,0.2905,0.649559,0.774519,0.527752,0.495247,0.49614
8,0.2299,0.611413,0.783685,0.564934,0.512207,0.525453
9,0.1794,0.606602,0.791017,0.620329,0.543833,0.568646
10,0.1473,0.594277,0.791017,0.631586,0.5859,0.594112


[I 2025-03-23 06:17:11,998] Trial 96 pruned. 


Trial 97 with params: {'learning_rate': 0.0020336891123946222, 'weight_decay': 0.002, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 5.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8804,1.448765,0.463795,0.105555,0.111393,0.08936
2,1.2824,1.206267,0.546288,0.184828,0.171433,0.153255
3,1.0037,0.909339,0.670944,0.293593,0.27791,0.269804
4,0.7706,0.821165,0.703025,0.38445,0.329524,0.334088
5,0.6174,0.733751,0.735105,0.380608,0.370887,0.367515
6,0.4908,0.69207,0.75802,0.43564,0.40129,0.406872
7,0.3918,0.679875,0.75802,0.496964,0.447855,0.449143
8,0.3296,0.646026,0.775435,0.503189,0.476221,0.481433
9,0.2631,0.643983,0.771769,0.54692,0.488647,0.506051
10,0.2155,0.624371,0.786434,0.603026,0.531041,0.548654


Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--accuracy/f887c0aab52c2d38e1f8a215681126379eca617f96c447638f751434e8e65b14 (last modified on Sat Oct 12 13:56:14 2024) since it couldn't be found locally at evaluate-metric--accuracy, or remotely on the Hugging Face Hub.
Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--recall/11f90e583db35601050aed380d48e83202a896976b9608432fba9244fb447f24 (last modified on Fri Jan 10 23:14:00 2025) since it couldn't be found locally at evaluate-metric--recall, or remotely on the Hugging Face Hub.
[I 2025-03-23 06:18:11,751] Trial 97 pruned. 


Trial 98 with params: {'learning_rate': 0.004653858293237055, 'weight_decay': 0.006, 'warmup_steps': 2, 'lambda_param': 0.2, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.7499,1.35199,0.477544,0.115444,0.13013,0.10202
2,1.1101,0.996622,0.641613,0.292113,0.272214,0.264456
3,0.7977,0.788214,0.72594,0.39192,0.358096,0.355539
4,0.5643,0.702961,0.747021,0.442298,0.404044,0.408932
5,0.406,0.644675,0.765353,0.553177,0.468678,0.488845
6,0.3025,0.603268,0.782768,0.538674,0.503487,0.513759
7,0.2138,0.603353,0.785518,0.595095,0.537054,0.553253
8,0.1655,0.582281,0.79835,0.64062,0.596337,0.608056
9,0.131,0.568443,0.809349,0.672249,0.616653,0.636084
10,0.1121,0.576884,0.802933,0.708029,0.655801,0.667664


[I 2025-03-23 06:19:38,110] Trial 98 finished with value: 0.6766786399692666 and parameters: {'learning_rate': 0.004653858293237055, 'weight_decay': 0.006, 'warmup_steps': 2, 'lambda_param': 0.2, 'temperature': 3.5}. Best is trial 83 with value: 0.7016439124452947.


Trial 99 with params: {'learning_rate': 0.0017422533204379319, 'weight_decay': 0.0, 'warmup_steps': 0, 'lambda_param': 0.6000000000000001, 'temperature': 6.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8677,1.477545,0.449129,0.095845,0.105027,0.081599
2,1.3243,1.270989,0.538038,0.146508,0.163451,0.141317
3,1.0824,0.973144,0.653529,0.256718,0.251295,0.239161
4,0.8476,0.887989,0.679193,0.355867,0.296691,0.298502
5,0.6872,0.775093,0.708524,0.349213,0.336225,0.329127


[I 2025-03-23 06:20:32,165] Trial 99 pruned. 


Trial 100 with params: {'learning_rate': 0.0044105155054517346, 'weight_decay': 0.002, 'warmup_steps': 1, 'lambda_param': 0.4, 'temperature': 5.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.827,1.36452,0.489459,0.109451,0.131226,0.104523
2,1.1909,1.041127,0.612282,0.22069,0.234467,0.217672
3,0.8722,0.852856,0.684693,0.339616,0.317267,0.307839
4,0.6438,0.750699,0.726856,0.415943,0.376228,0.380899
5,0.4667,0.683976,0.759853,0.45654,0.442148,0.437857
6,0.3488,0.648691,0.765353,0.515586,0.468831,0.481072
7,0.2546,0.617483,0.783685,0.60262,0.533441,0.549744
8,0.1996,0.606132,0.794684,0.651309,0.595737,0.611473
9,0.1516,0.591543,0.796517,0.674355,0.604939,0.626387
10,0.1266,0.592767,0.799267,0.712496,0.656528,0.669365


[I 2025-03-23 06:22:21,423] Trial 100 finished with value: 0.6628383948270391 and parameters: {'learning_rate': 0.0044105155054517346, 'weight_decay': 0.002, 'warmup_steps': 1, 'lambda_param': 0.4, 'temperature': 5.0}. Best is trial 83 with value: 0.7016439124452947.


Trial 101 with params: {'learning_rate': 0.0023121420609023675, 'weight_decay': 0.004, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8294,1.434304,0.467461,0.111389,0.122112,0.096015
2,1.255,1.165711,0.55912,0.199224,0.182615,0.168774
3,0.9697,0.892878,0.676444,0.323224,0.292887,0.286383
4,0.7502,0.792599,0.710357,0.391221,0.338076,0.339083
5,0.5861,0.710613,0.743355,0.435597,0.394374,0.398489
6,0.4537,0.662967,0.762603,0.4694,0.429926,0.437029
7,0.3476,0.666419,0.761687,0.499686,0.455399,0.460053
8,0.2905,0.655554,0.775435,0.509591,0.473906,0.481307
9,0.2308,0.63302,0.777269,0.581679,0.512404,0.528922
10,0.1877,0.620118,0.784601,0.633821,0.548725,0.57214


[I 2025-03-23 06:23:51,431] Trial 101 pruned. 


Trial 102 with params: {'learning_rate': 0.004626458342183176, 'weight_decay': 0.005, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.7483,1.360223,0.472044,0.117491,0.128457,0.100433
2,1.1119,0.99219,0.644363,0.283183,0.273676,0.259767
3,0.7963,0.782741,0.72044,0.390983,0.357736,0.358098
4,0.5643,0.701786,0.749771,0.478287,0.41595,0.428873
5,0.406,0.64466,0.768103,0.548239,0.469513,0.487491
6,0.298,0.606763,0.786434,0.580262,0.507654,0.526554
7,0.2102,0.59579,0.789184,0.599066,0.543186,0.561375
8,0.1603,0.581561,0.796517,0.657083,0.59682,0.616595
9,0.1241,0.561851,0.804766,0.712902,0.652454,0.672032
10,0.1067,0.570408,0.80385,0.684197,0.64382,0.653693


[I 2025-03-23 06:25:12,215] Trial 102 finished with value: 0.7001942134300536 and parameters: {'learning_rate': 0.004626458342183176, 'weight_decay': 0.005, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 4.0}. Best is trial 83 with value: 0.7016439124452947.


Trial 103 with params: {'learning_rate': 0.004449763994734355, 'weight_decay': 0.006, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 5.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.7451,1.334274,0.474794,0.094835,0.128001,0.099201
2,1.1113,1.009233,0.64253,0.302074,0.277035,0.26934
3,0.8074,0.783873,0.719523,0.374548,0.3541,0.347999
4,0.5757,0.704746,0.744271,0.421102,0.396199,0.39853
5,0.4183,0.658628,0.767186,0.547939,0.471758,0.491244
6,0.3097,0.613939,0.790101,0.544054,0.49628,0.50893
7,0.2245,0.593939,0.786434,0.583511,0.522647,0.543725
8,0.1685,0.583128,0.797434,0.651465,0.576764,0.598035
9,0.1304,0.573307,0.80385,0.688101,0.60468,0.630233
10,0.1134,0.580727,0.80385,0.71478,0.647715,0.668926


[I 2025-03-23 06:26:37,619] Trial 103 finished with value: 0.6801904558265425 and parameters: {'learning_rate': 0.004449763994734355, 'weight_decay': 0.006, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 5.0}. Best is trial 83 with value: 0.7016439124452947.


Trial 104 with params: {'learning_rate': 0.0025196526390145577, 'weight_decay': 0.004, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8329,1.433516,0.461962,0.099918,0.119761,0.094359
2,1.2555,1.176692,0.553621,0.199823,0.181438,0.16375
3,0.9669,0.897068,0.683776,0.306061,0.303954,0.292391
4,0.7407,0.786925,0.705775,0.392829,0.341568,0.343169
5,0.5711,0.70635,0.751604,0.438028,0.398291,0.399699


[I 2025-03-23 06:27:14,607] Trial 104 pruned. 


Trial 105 with params: {'learning_rate': 0.001394113520827695, 'weight_decay': 0.002, 'warmup_steps': 4, 'lambda_param': 1.0, 'temperature': 6.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.9872,1.55637,0.409716,0.070562,0.091726,0.066955
2,1.3816,1.268118,0.526123,0.148854,0.150546,0.132699
3,1.1291,1.014407,0.620532,0.264678,0.217036,0.208278
4,0.8915,0.896288,0.671861,0.322838,0.27817,0.276105
5,0.7388,0.813008,0.694775,0.356933,0.316232,0.31454
6,0.6069,0.771849,0.735105,0.445255,0.372054,0.382204
7,0.505,0.721467,0.739688,0.425774,0.384137,0.386498
8,0.4394,0.694128,0.756187,0.467791,0.427267,0.435369
9,0.3697,0.691665,0.762603,0.499781,0.453115,0.465959
10,0.3115,0.671764,0.778185,0.515724,0.483153,0.490528


[I 2025-03-23 06:28:19,701] Trial 105 pruned. 


Trial 106 with params: {'learning_rate': 0.004748997557745925, 'weight_decay': 0.004, 'warmup_steps': 1, 'lambda_param': 0.2, 'temperature': 5.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.9073,1.377534,0.483043,0.107889,0.128198,0.102811
2,1.2036,1.048841,0.611366,0.237948,0.235486,0.219181
3,0.8756,0.839166,0.704858,0.348287,0.339188,0.329453
4,0.6331,0.729045,0.728689,0.399564,0.363375,0.368081
5,0.4614,0.678207,0.761687,0.486167,0.452125,0.454607
6,0.3442,0.639772,0.772686,0.528925,0.486429,0.497899
7,0.2536,0.631244,0.780935,0.590141,0.522703,0.53682
8,0.2035,0.61046,0.785518,0.608962,0.554943,0.565292
9,0.156,0.586418,0.7956,0.62965,0.580209,0.588601
10,0.1324,0.588593,0.797434,0.650176,0.614112,0.615863


Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--f1/34c46321f42186df33a6260966e34a368f14868d9cc2ba47d142112e2800d233 (last modified on Fri Jan 10 23:14:01 2025) since it couldn't be found locally at evaluate-metric--f1, or remotely on the Hugging Face Hub.
[I 2025-03-23 06:29:19,746] Trial 106 pruned. 


Trial 107 with params: {'learning_rate': 0.0015488735451512226, 'weight_decay': 0.006, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.9246,1.502611,0.429881,0.077991,0.09846,0.075137
2,1.3325,1.239671,0.537122,0.145889,0.162083,0.141612
3,1.08,0.963611,0.662695,0.274869,0.260573,0.247772
4,0.8512,0.859825,0.679193,0.343245,0.291654,0.290581
5,0.6902,0.77146,0.711274,0.376107,0.338658,0.334208
6,0.5595,0.725587,0.748854,0.43132,0.381559,0.390623
7,0.457,0.703896,0.750687,0.495137,0.433321,0.435893
8,0.3893,0.673882,0.761687,0.484202,0.4505,0.456905
9,0.3193,0.663015,0.765353,0.506804,0.466211,0.47523
10,0.268,0.648345,0.767186,0.539821,0.478743,0.496218


[I 2025-03-23 06:30:21,696] Trial 107 pruned. 


Trial 108 with params: {'learning_rate': 0.004394054796185525, 'weight_decay': 0.004, 'warmup_steps': 2, 'lambda_param': 0.2, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.746,1.339518,0.476627,0.113968,0.129292,0.101628
2,1.1166,0.986888,0.648946,0.284133,0.273464,0.262148
3,0.8119,0.804514,0.710357,0.35548,0.34377,0.335677
4,0.5903,0.71778,0.738772,0.443234,0.392823,0.401875
5,0.4266,0.657288,0.770852,0.539744,0.469294,0.483647
6,0.3165,0.630504,0.775435,0.565383,0.491245,0.508508
7,0.2278,0.600653,0.793767,0.618148,0.547861,0.5687
8,0.1692,0.594794,0.793767,0.679839,0.586953,0.611908
9,0.1317,0.585102,0.797434,0.657876,0.597264,0.612978
10,0.1108,0.596226,0.796517,0.660349,0.609335,0.618968


[I 2025-03-23 06:31:50,568] Trial 108 finished with value: 0.6516777252636615 and parameters: {'learning_rate': 0.004394054796185525, 'weight_decay': 0.004, 'warmup_steps': 2, 'lambda_param': 0.2, 'temperature': 4.5}. Best is trial 83 with value: 0.7016439124452947.


Trial 109 with params: {'learning_rate': 0.0049299797521280015, 'weight_decay': 0.006, 'warmup_steps': 3, 'lambda_param': 0.0, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8523,1.337345,0.489459,0.09462,0.133544,0.104209
2,1.1394,1.005881,0.627864,0.250305,0.249348,0.23793
3,0.815,0.80616,0.715857,0.364675,0.356817,0.346355
4,0.5914,0.720691,0.743355,0.430524,0.395343,0.401013
5,0.422,0.654443,0.769936,0.478998,0.464217,0.462091
6,0.3051,0.626388,0.779102,0.604465,0.509741,0.531117
7,0.2256,0.621008,0.790101,0.602707,0.542892,0.552939
8,0.1723,0.597707,0.804766,0.633615,0.599041,0.604471
9,0.132,0.585974,0.810266,0.69612,0.647497,0.656406
10,0.113,0.582819,0.813016,0.693537,0.679429,0.675496


[I 2025-03-23 06:33:13,022] Trial 109 finished with value: 0.6877723032095566 and parameters: {'learning_rate': 0.0049299797521280015, 'weight_decay': 0.006, 'warmup_steps': 3, 'lambda_param': 0.0, 'temperature': 3.5}. Best is trial 83 with value: 0.7016439124452947.


Trial 110 with params: {'learning_rate': 0.0028619336951857437, 'weight_decay': 0.004, 'warmup_steps': 3, 'lambda_param': 0.1, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8079,1.389003,0.47846,0.086534,0.124445,0.094696
2,1.2196,1.096616,0.598533,0.23598,0.216131,0.200694
3,0.9135,0.879478,0.683776,0.333855,0.318365,0.309319
4,0.6892,0.764693,0.71769,0.37905,0.353892,0.356362
5,0.519,0.691769,0.739688,0.453074,0.399804,0.402648


[I 2025-03-23 06:33:39,551] Trial 110 pruned. 


Trial 111 with params: {'learning_rate': 0.004239835075675203, 'weight_decay': 0.006, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.7425,1.33931,0.486709,0.114568,0.131805,0.102765
2,1.1201,0.986885,0.64528,0.28583,0.26889,0.262357
3,0.8175,0.798667,0.715857,0.357223,0.34978,0.343431
4,0.5946,0.714633,0.737855,0.423616,0.40054,0.403292
5,0.4332,0.662458,0.767186,0.482067,0.453648,0.456581
6,0.3203,0.644059,0.776352,0.546027,0.490092,0.501972
7,0.2376,0.609235,0.782768,0.605366,0.524319,0.548075
8,0.1774,0.592496,0.797434,0.660374,0.590477,0.609281
9,0.1422,0.588672,0.793767,0.652929,0.599463,0.617705
10,0.1187,0.583931,0.797434,0.723935,0.661624,0.678526


[I 2025-03-23 06:35:36,874] Trial 111 finished with value: 0.6852919387689766 and parameters: {'learning_rate': 0.004239835075675203, 'weight_decay': 0.006, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 3.5}. Best is trial 83 with value: 0.7016439124452947.


Trial 112 with params: {'learning_rate': 0.004481574923075768, 'weight_decay': 0.006, 'warmup_steps': 3, 'lambda_param': 0.0, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8591,1.338122,0.505041,0.111316,0.137608,0.107463
2,1.1612,1.010003,0.633364,0.267184,0.251487,0.242589
3,0.8345,0.836491,0.690192,0.353222,0.337333,0.318936
4,0.6113,0.729273,0.732356,0.412912,0.377826,0.384873
5,0.4472,0.66761,0.757104,0.483995,0.431266,0.440234
6,0.3305,0.646338,0.774519,0.552725,0.484135,0.499048
7,0.2378,0.618272,0.780018,0.593062,0.529384,0.542516
8,0.1841,0.612929,0.786434,0.580095,0.561404,0.560403
9,0.1443,0.592985,0.799267,0.627811,0.581612,0.591413
10,0.1198,0.589815,0.796517,0.667852,0.644759,0.645918


[I 2025-03-23 06:37:08,083] Trial 112 finished with value: 0.6458898382683909 and parameters: {'learning_rate': 0.004481574923075768, 'weight_decay': 0.006, 'warmup_steps': 3, 'lambda_param': 0.0, 'temperature': 3.5}. Best is trial 83 with value: 0.7016439124452947.


Trial 113 with params: {'learning_rate': 0.0046256806446763575, 'weight_decay': 0.004, 'warmup_steps': 3, 'lambda_param': 0.0, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8675,1.347654,0.485793,0.108277,0.130872,0.101859
2,1.1576,1.019166,0.628781,0.273725,0.262605,0.251322
3,0.8289,0.824667,0.712191,0.373047,0.353749,0.34204
4,0.6063,0.72581,0.738772,0.415843,0.38421,0.390114
5,0.4393,0.657283,0.747021,0.490854,0.436711,0.446967
6,0.3279,0.634944,0.770852,0.54168,0.472718,0.487877
7,0.2385,0.608692,0.781852,0.572169,0.524082,0.53401
8,0.1871,0.595332,0.787351,0.61609,0.566679,0.578013
9,0.1426,0.577217,0.79835,0.647052,0.579481,0.593867
10,0.1194,0.575716,0.8011,0.66942,0.625682,0.633052


[I 2025-03-23 06:38:26,787] Trial 113 finished with value: 0.6581305282079044 and parameters: {'learning_rate': 0.0046256806446763575, 'weight_decay': 0.004, 'warmup_steps': 3, 'lambda_param': 0.0, 'temperature': 3.0}. Best is trial 83 with value: 0.7016439124452947.


Trial 114 with params: {'learning_rate': 0.0026456473704649522, 'weight_decay': 0.008, 'warmup_steps': 3, 'lambda_param': 0.9, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.821,1.41153,0.470211,0.119426,0.123648,0.096669
2,1.2429,1.127231,0.578368,0.186521,0.193596,0.175511
3,0.9399,0.87686,0.689276,0.33671,0.311249,0.304568
4,0.7144,0.787515,0.715857,0.387486,0.347139,0.35081
5,0.55,0.7109,0.740605,0.442982,0.392528,0.39955


[I 2025-03-23 06:39:04,834] Trial 114 pruned. 


Trial 115 with params: {'learning_rate': 0.0047736931485914515, 'weight_decay': 0.007, 'warmup_steps': 4, 'lambda_param': 0.4, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8008,1.360266,0.494042,0.134557,0.135902,0.109753
2,1.1412,0.972053,0.650779,0.254222,0.264461,0.251806
3,0.8101,0.802371,0.71494,0.368033,0.358517,0.349505
4,0.5774,0.727394,0.740605,0.414115,0.395949,0.39985
5,0.4156,0.636799,0.771769,0.486929,0.471848,0.46794
6,0.3009,0.610048,0.778185,0.572948,0.504236,0.520447
7,0.2203,0.596518,0.792851,0.603976,0.560693,0.569059
8,0.1729,0.581202,0.812099,0.688175,0.639223,0.653243
9,0.1311,0.560986,0.813932,0.694161,0.661387,0.66844
10,0.1096,0.560306,0.816682,0.709282,0.682134,0.684235


[I 2025-03-23 06:40:22,776] Trial 115 finished with value: 0.7000007925807834 and parameters: {'learning_rate': 0.0047736931485914515, 'weight_decay': 0.007, 'warmup_steps': 4, 'lambda_param': 0.4, 'temperature': 3.5}. Best is trial 83 with value: 0.7016439124452947.


Trial 116 with params: {'learning_rate': 0.004291345437309441, 'weight_decay': 0.006, 'warmup_steps': 4, 'lambda_param': 0.5, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.7877,1.352494,0.492209,0.138139,0.135223,0.112711
2,1.1532,1.024804,0.630614,0.296345,0.258817,0.254081
3,0.8234,0.800274,0.710357,0.366518,0.353835,0.345723
4,0.592,0.715458,0.742438,0.43088,0.400949,0.403187
5,0.4276,0.659177,0.754354,0.462269,0.429992,0.429969
6,0.3189,0.634474,0.769936,0.519967,0.454023,0.470543
7,0.2319,0.620132,0.781852,0.626728,0.548589,0.571901
8,0.1766,0.606166,0.7956,0.713483,0.641126,0.663119
9,0.137,0.593275,0.796517,0.721131,0.644447,0.669624
10,0.1141,0.597584,0.799267,0.737264,0.660143,0.684458


Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--precision/155d3220d6cd4a6553f12da68eeb3d1f97cf431206304a4bc6e2d564c29502e9 (last modified on Fri Jan 10 23:13:59 2025) since it couldn't be found locally at evaluate-metric--precision, or remotely on the Hugging Face Hub.
[I 2025-03-23 06:42:42,665] Trial 116 finished with value: 0.6929575753056951 and parameters: {'learning_rate': 0.004291345437309441, 'weight_decay': 0.006, 'warmup_steps': 4, 'lambda_param': 0.5, 'temperature': 3.0}. Best is trial 83 with value: 0.7016439124452947.


Trial 117 with params: {'learning_rate': 0.0017486667882431446, 'weight_decay': 0.008, 'warmup_steps': 4, 'lambda_param': 0.6000000000000001, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.9598,1.50104,0.421632,0.070254,0.094662,0.071351
2,1.3279,1.207163,0.550871,0.175924,0.170618,0.150918
3,1.0519,0.955463,0.661778,0.264833,0.266884,0.254786
4,0.826,0.858382,0.68286,0.358332,0.30278,0.301551
5,0.6664,0.773988,0.72044,0.366065,0.348888,0.345598
6,0.537,0.730126,0.745188,0.420882,0.383838,0.388534
7,0.4337,0.694874,0.75802,0.45456,0.433241,0.431542
8,0.3701,0.668781,0.759853,0.494236,0.447168,0.458575
9,0.3015,0.66118,0.769936,0.531658,0.480074,0.491885
10,0.2482,0.642983,0.775435,0.526335,0.497621,0.504993


[I 2025-03-23 06:43:34,119] Trial 117 pruned. 


Trial 118 with params: {'learning_rate': 0.003999180638991325, 'weight_decay': 0.008, 'warmup_steps': 4, 'lambda_param': 0.4, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.7679,1.34112,0.502291,0.115855,0.137361,0.111253
2,1.1406,1.034565,0.621448,0.280507,0.241814,0.237065
3,0.8253,0.80936,0.71494,0.345289,0.356315,0.344174
4,0.5964,0.724178,0.733272,0.422095,0.387845,0.38909
5,0.4377,0.656018,0.75802,0.479578,0.438842,0.442794
6,0.318,0.627357,0.779102,0.561108,0.494498,0.512779
7,0.2288,0.613185,0.787351,0.560101,0.519807,0.530468
8,0.1759,0.595112,0.802933,0.718905,0.620485,0.651441
9,0.1356,0.586411,0.807516,0.704913,0.639826,0.660652
10,0.1154,0.584618,0.80385,0.719087,0.651836,0.670189


[I 2025-03-23 06:45:32,232] Trial 118 finished with value: 0.7007454094619052 and parameters: {'learning_rate': 0.003999180638991325, 'weight_decay': 0.008, 'warmup_steps': 4, 'lambda_param': 0.4, 'temperature': 3.0}. Best is trial 83 with value: 0.7016439124452947.


Trial 119 with params: {'learning_rate': 0.004073286432200119, 'weight_decay': 0.009000000000000001, 'warmup_steps': 4, 'lambda_param': 0.2, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.7734,1.345458,0.502291,0.125896,0.137454,0.11238
2,1.1513,1.022656,0.624198,0.292682,0.244655,0.236678
3,0.8289,0.81725,0.711274,0.339601,0.355297,0.340964
4,0.5973,0.72572,0.738772,0.430565,0.396113,0.39855
5,0.4317,0.647809,0.76352,0.48513,0.452047,0.457792
6,0.3163,0.625493,0.775435,0.56632,0.497678,0.516392
7,0.2282,0.607886,0.790101,0.611253,0.529747,0.553048
8,0.1774,0.600402,0.807516,0.694091,0.598306,0.623911
9,0.1382,0.58858,0.805683,0.691943,0.60408,0.632163
10,0.1156,0.5898,0.802933,0.726624,0.640498,0.6605


[I 2025-03-23 06:46:59,671] Trial 119 finished with value: 0.6983533894461225 and parameters: {'learning_rate': 0.004073286432200119, 'weight_decay': 0.009000000000000001, 'warmup_steps': 4, 'lambda_param': 0.2, 'temperature': 3.0}. Best is trial 83 with value: 0.7016439124452947.


Trial 120 with params: {'learning_rate': 0.00215656113964331, 'weight_decay': 0.006, 'warmup_steps': 4, 'lambda_param': 0.4, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8817,1.439126,0.47571,0.107439,0.119268,0.093239
2,1.2659,1.169896,0.571952,0.202097,0.192119,0.176497
3,0.9709,0.898137,0.675527,0.314777,0.293477,0.288195
4,0.7442,0.801115,0.705775,0.384898,0.333271,0.340593
5,0.5868,0.708532,0.751604,0.449752,0.41079,0.414629
6,0.4646,0.678944,0.762603,0.458871,0.416724,0.424815
7,0.3626,0.661806,0.765353,0.482364,0.455301,0.455186
8,0.2993,0.634095,0.781852,0.510752,0.495092,0.497059
9,0.2372,0.619712,0.783685,0.564008,0.513176,0.522822
10,0.1946,0.60754,0.794684,0.634526,0.567561,0.584253


[I 2025-03-23 06:47:58,533] Trial 120 pruned. 


Trial 121 with params: {'learning_rate': 0.0034446023300091716, 'weight_decay': 0.005, 'warmup_steps': 4, 'lambda_param': 0.6000000000000001, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8031,1.370795,0.485793,0.114131,0.12884,0.099158
2,1.1752,1.042705,0.610449,0.241719,0.224963,0.215617
3,0.8553,0.835168,0.710357,0.368283,0.351958,0.345185
4,0.6373,0.748521,0.737855,0.41843,0.387843,0.390734
5,0.4872,0.682692,0.76352,0.454521,0.425379,0.421923
6,0.3646,0.647065,0.772686,0.485716,0.451538,0.460973
7,0.2711,0.633633,0.775435,0.526463,0.496929,0.503899
8,0.2131,0.618296,0.781852,0.629597,0.557889,0.572868
9,0.161,0.60455,0.791017,0.639739,0.574814,0.591129
10,0.1362,0.602831,0.790101,0.646502,0.595833,0.606195


[I 2025-03-23 06:48:52,788] Trial 121 pruned. 


Trial 122 with params: {'learning_rate': 0.0030888780966130173, 'weight_decay': 0.01, 'warmup_steps': 4, 'lambda_param': 0.2, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.885,1.410061,0.466544,0.099158,0.121191,0.094544
2,1.2385,1.078816,0.590284,0.196456,0.205083,0.186816
3,0.9209,0.880994,0.699358,0.329522,0.326507,0.31595
4,0.6888,0.759789,0.719523,0.400973,0.360663,0.365472
5,0.5154,0.675455,0.756187,0.455269,0.411931,0.417087
6,0.3957,0.652053,0.769936,0.479698,0.442584,0.450722
7,0.295,0.630965,0.776352,0.498587,0.490718,0.48886
8,0.2397,0.617772,0.787351,0.56452,0.521122,0.530244
9,0.1821,0.605296,0.791934,0.635166,0.555521,0.578931
10,0.1535,0.60482,0.797434,0.638636,0.578151,0.592518


[I 2025-03-23 06:50:14,068] Trial 122 finished with value: 0.6142851556926635 and parameters: {'learning_rate': 0.0030888780966130173, 'weight_decay': 0.01, 'warmup_steps': 4, 'lambda_param': 0.2, 'temperature': 2.5}. Best is trial 83 with value: 0.7016439124452947.


Trial 123 with params: {'learning_rate': 0.004659200347393234, 'weight_decay': 0.009000000000000001, 'warmup_steps': 4, 'lambda_param': 0.4, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.795,1.350448,0.494959,0.113463,0.135539,0.108829
2,1.1367,1.010333,0.643446,0.262777,0.269444,0.254514
3,0.8172,0.794794,0.71494,0.389332,0.352762,0.343918
4,0.5783,0.721828,0.747021,0.446325,0.41168,0.417889
5,0.4172,0.670898,0.757104,0.439935,0.435407,0.427309
6,0.3049,0.62014,0.776352,0.537235,0.476131,0.489945
7,0.2204,0.618213,0.781852,0.542305,0.512816,0.516622
8,0.1705,0.599692,0.7956,0.611255,0.572397,0.582036
9,0.132,0.58413,0.7956,0.641542,0.596163,0.603852
10,0.1122,0.583267,0.793767,0.672189,0.630568,0.637013


Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--recall/11f90e583db35601050aed380d48e83202a896976b9608432fba9244fb447f24 (last modified on Fri Jan 10 23:14:00 2025) since it couldn't be found locally at evaluate-metric--recall, or remotely on the Hugging Face Hub.
[I 2025-03-23 06:52:32,699] Trial 123 finished with value: 0.6563320911412777 and parameters: {'learning_rate': 0.004659200347393234, 'weight_decay': 0.009000000000000001, 'warmup_steps': 4, 'lambda_param': 0.4, 'temperature': 4.5}. Best is trial 83 with value: 0.7016439124452947.


Trial 124 with params: {'learning_rate': 0.0026588173045241767, 'weight_decay': 0.007, 'warmup_steps': 4, 'lambda_param': 0.4, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8803,1.429177,0.469294,0.101086,0.119668,0.094342
2,1.2599,1.139429,0.568286,0.194746,0.185737,0.168885
3,0.9459,0.897897,0.681027,0.311234,0.301186,0.294557
4,0.7146,0.784073,0.713107,0.377156,0.343407,0.343666
5,0.5508,0.698117,0.750687,0.425367,0.395975,0.394148
6,0.4232,0.654686,0.771769,0.506672,0.443741,0.457764
7,0.3214,0.637864,0.778185,0.499313,0.46714,0.470848
8,0.2587,0.628963,0.786434,0.551952,0.509417,0.517822
9,0.1997,0.613099,0.786434,0.579628,0.513387,0.530172
10,0.1663,0.601162,0.793767,0.637393,0.573663,0.591997


[I 2025-03-23 06:53:39,893] Trial 124 pruned. 


Trial 125 with params: {'learning_rate': 0.004134263244248881, 'weight_decay': 0.004, 'warmup_steps': 4, 'lambda_param': 0.6000000000000001, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.7717,1.341051,0.501375,0.134284,0.13773,0.112423
2,1.1412,1.038729,0.625115,0.298635,0.251742,0.248435
3,0.8259,0.813283,0.714024,0.350845,0.355008,0.345732
4,0.5979,0.720715,0.741522,0.405675,0.391047,0.390749
5,0.4317,0.657827,0.758937,0.489626,0.449352,0.453597
6,0.3198,0.62773,0.773602,0.500598,0.472295,0.480236
7,0.2349,0.622233,0.791934,0.588411,0.52623,0.54309
8,0.1838,0.606062,0.796517,0.611086,0.561935,0.574448
9,0.1392,0.589411,0.796517,0.666643,0.606431,0.622368
10,0.116,0.584371,0.806599,0.703769,0.62875,0.650642


[I 2025-03-23 06:55:16,720] Trial 125 finished with value: 0.6992647472436017 and parameters: {'learning_rate': 0.004134263244248881, 'weight_decay': 0.004, 'warmup_steps': 4, 'lambda_param': 0.6000000000000001, 'temperature': 4.5}. Best is trial 83 with value: 0.7016439124452947.


Trial 126 with params: {'learning_rate': 0.0029978801723932413, 'weight_decay': 0.002, 'warmup_steps': 4, 'lambda_param': 0.6000000000000001, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8867,1.410304,0.472044,0.100463,0.120954,0.095107
2,1.2418,1.092393,0.589368,0.189335,0.205576,0.185147
3,0.9262,0.889641,0.691109,0.324109,0.314889,0.305519
4,0.6917,0.773289,0.719523,0.372115,0.350786,0.352636
5,0.5241,0.681934,0.753437,0.45798,0.418169,0.419193
6,0.3982,0.653115,0.769019,0.47051,0.435984,0.444472
7,0.2954,0.63554,0.774519,0.500777,0.48517,0.487743
8,0.2335,0.627632,0.787351,0.583377,0.524273,0.540655
9,0.1806,0.609968,0.782768,0.578637,0.515131,0.535149
10,0.1513,0.601916,0.800183,0.63776,0.572905,0.591886


Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--f1/34c46321f42186df33a6260966e34a368f14868d9cc2ba47d142112e2800d233 (last modified on Fri Jan 10 23:14:01 2025) since it couldn't be found locally at evaluate-metric--f1, or remotely on the Hugging Face Hub.
[I 2025-03-23 06:56:32,532] Trial 126 pruned. 


Trial 127 with params: {'learning_rate': 0.004136089622944499, 'weight_decay': 0.004, 'warmup_steps': 3, 'lambda_param': 0.7000000000000001, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8464,1.374285,0.476627,0.132642,0.126603,0.105512
2,1.1826,1.034573,0.618698,0.226351,0.234965,0.220487
3,0.8636,0.824706,0.702108,0.360678,0.332437,0.323807
4,0.632,0.718401,0.736022,0.407102,0.378823,0.382789
5,0.4657,0.652679,0.759853,0.467339,0.428561,0.427159
6,0.3421,0.623547,0.773602,0.528051,0.469205,0.481855
7,0.2591,0.614617,0.776352,0.526758,0.496915,0.499466
8,0.2092,0.600022,0.785518,0.57604,0.541683,0.550441
9,0.1616,0.586115,0.789184,0.625143,0.583064,0.592318
10,0.1327,0.58015,0.794684,0.641555,0.605568,0.612467


[I 2025-03-23 06:57:36,523] Trial 127 pruned. 


Trial 128 with params: {'learning_rate': 0.003708925666378033, 'weight_decay': 0.005, 'warmup_steps': 4, 'lambda_param': 0.6000000000000001, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.7773,1.349942,0.485793,0.118253,0.131615,0.106533
2,1.1644,1.033772,0.609533,0.282003,0.232425,0.227111
3,0.831,0.826934,0.710357,0.34819,0.351765,0.341942
4,0.6116,0.725493,0.735105,0.413361,0.388372,0.385417
5,0.4459,0.660537,0.75802,0.497857,0.446198,0.452248
6,0.3328,0.642589,0.777269,0.563668,0.480665,0.505526
7,0.2439,0.623423,0.794684,0.621275,0.555137,0.570087
8,0.1992,0.604641,0.793767,0.650336,0.575293,0.594735
9,0.1509,0.58771,0.805683,0.684112,0.625663,0.643342
10,0.1252,0.582719,0.809349,0.698055,0.622767,0.646411


[I 2025-03-23 06:58:49,971] Trial 128 finished with value: 0.6853369980882646 and parameters: {'learning_rate': 0.003708925666378033, 'weight_decay': 0.005, 'warmup_steps': 4, 'lambda_param': 0.6000000000000001, 'temperature': 3.5}. Best is trial 83 with value: 0.7016439124452947.


Trial 129 with params: {'learning_rate': 0.004953270489928273, 'weight_decay': 0.009000000000000001, 'warmup_steps': 4, 'lambda_param': 0.6000000000000001, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8053,1.346739,0.48121,0.131244,0.132454,0.103733
2,1.1376,0.984623,0.644363,0.271806,0.264295,0.253647
3,0.8142,0.809064,0.714024,0.379814,0.350044,0.345745
4,0.589,0.70951,0.746104,0.425678,0.399778,0.400592
5,0.4196,0.635333,0.769019,0.481971,0.455704,0.452876
6,0.3052,0.603925,0.787351,0.540622,0.502539,0.506161
7,0.2169,0.600498,0.794684,0.59441,0.535854,0.549746
8,0.1603,0.576113,0.802933,0.671894,0.612138,0.625722
9,0.1292,0.566096,0.809349,0.678456,0.65081,0.654503
10,0.1091,0.560746,0.807516,0.719227,0.660969,0.677267


[I 2025-03-23 07:00:24,475] Trial 129 finished with value: 0.710917878994125 and parameters: {'learning_rate': 0.004953270489928273, 'weight_decay': 0.009000000000000001, 'warmup_steps': 4, 'lambda_param': 0.6000000000000001, 'temperature': 2.5}. Best is trial 129 with value: 0.710917878994125.


Trial 130 with params: {'learning_rate': 0.004584300153833527, 'weight_decay': 0.008, 'warmup_steps': 4, 'lambda_param': 0.30000000000000004, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.7898,1.350903,0.496792,0.134522,0.135686,0.108429
2,1.1423,0.993773,0.64253,0.270727,0.262928,0.253825
3,0.812,0.804344,0.710357,0.360297,0.346814,0.337855
4,0.5808,0.717867,0.749771,0.439822,0.409699,0.412717
5,0.4165,0.660411,0.75527,0.481502,0.446742,0.446468
6,0.3104,0.623772,0.770852,0.511267,0.474162,0.482825
7,0.2238,0.619766,0.784601,0.58807,0.535384,0.549467
8,0.182,0.598943,0.794684,0.641508,0.594997,0.608825
9,0.1352,0.584713,0.800183,0.697303,0.645484,0.658033
10,0.1136,0.580493,0.8011,0.7001,0.644486,0.656671


[I 2025-03-23 07:02:32,301] Trial 130 finished with value: 0.6915355744918351 and parameters: {'learning_rate': 0.004584300153833527, 'weight_decay': 0.008, 'warmup_steps': 4, 'lambda_param': 0.30000000000000004, 'temperature': 3.0}. Best is trial 129 with value: 0.710917878994125.


Trial 131 with params: {'learning_rate': 0.004676516813675666, 'weight_decay': 0.008, 'warmup_steps': 4, 'lambda_param': 0.30000000000000004, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.7964,1.350448,0.498625,0.114061,0.137008,0.110006
2,1.1352,1.001919,0.643446,0.260031,0.268331,0.254667
3,0.8076,0.813233,0.707608,0.369053,0.355183,0.34631
4,0.5802,0.723507,0.733272,0.418986,0.39383,0.396457
5,0.4112,0.649951,0.769936,0.452793,0.451641,0.442061
6,0.3036,0.621523,0.778185,0.529835,0.488428,0.496046
7,0.2202,0.630449,0.783685,0.596771,0.542043,0.557411
8,0.1799,0.600303,0.799267,0.663649,0.627801,0.630958
9,0.1355,0.584221,0.799267,0.662439,0.624839,0.63287
10,0.1131,0.585825,0.79835,0.675727,0.62971,0.640017


Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--f1/34c46321f42186df33a6260966e34a368f14868d9cc2ba47d142112e2800d233 (last modified on Fri Jan 10 23:14:01 2025) since it couldn't be found locally at evaluate-metric--f1, or remotely on the Hugging Face Hub.
[I 2025-03-23 07:04:04,948] Trial 131 finished with value: 0.6735600205560109 and parameters: {'learning_rate': 0.004676516813675666, 'weight_decay': 0.008, 'warmup_steps': 4, 'lambda_param': 0.30000000000000004, 'temperature': 3.5}. Best is trial 129 with value: 0.710917878994125.


Trial 132 with params: {'learning_rate': 0.004141487490923996, 'weight_decay': 0.005, 'warmup_steps': 4, 'lambda_param': 0.5, 'temperature': 6.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.7707,1.342146,0.501375,0.133625,0.13727,0.11086
2,1.1395,1.044893,0.626031,0.268337,0.249018,0.240211
3,0.8171,0.802597,0.721357,0.363281,0.359772,0.345426
4,0.595,0.718681,0.739688,0.408031,0.392887,0.392595
5,0.431,0.658363,0.757104,0.498676,0.435853,0.44679
6,0.3205,0.621852,0.775435,0.582586,0.501796,0.523931
7,0.2315,0.62305,0.790101,0.631442,0.555225,0.574788
8,0.1776,0.607412,0.791934,0.653987,0.586632,0.604338
9,0.1377,0.591957,0.799267,0.682232,0.61772,0.638506
10,0.1159,0.599725,0.796517,0.671325,0.616633,0.626585


[I 2025-03-23 07:05:18,015] Trial 132 finished with value: 0.6913259991409515 and parameters: {'learning_rate': 0.004141487490923996, 'weight_decay': 0.005, 'warmup_steps': 4, 'lambda_param': 0.5, 'temperature': 6.5}. Best is trial 129 with value: 0.710917878994125.


Trial 133 with params: {'learning_rate': 0.002570985044361167, 'weight_decay': 0.007, 'warmup_steps': 4, 'lambda_param': 0.4, 'temperature': 6.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8771,1.434533,0.472961,0.102646,0.121179,0.094604
2,1.2655,1.21677,0.553621,0.233841,0.182931,0.168911
3,0.9744,0.886687,0.679193,0.321571,0.299914,0.293024
4,0.7333,0.784406,0.716774,0.379776,0.349389,0.347114
5,0.5638,0.706585,0.747938,0.42328,0.391074,0.38828


[I 2025-03-23 07:05:43,263] Trial 133 pruned. 


Trial 134 with params: {'learning_rate': 0.002617793933148862, 'weight_decay': 0.009000000000000001, 'warmup_steps': 4, 'lambda_param': 0.5, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8789,1.42966,0.469294,0.098347,0.119572,0.093615
2,1.2635,1.176813,0.55912,0.207564,0.181257,0.164455
3,0.9582,0.885188,0.681027,0.317425,0.307227,0.299546
4,0.7204,0.773702,0.71769,0.373354,0.341848,0.338711
5,0.5494,0.694332,0.750687,0.444224,0.404276,0.407529
6,0.4258,0.658813,0.766269,0.45956,0.426106,0.432387
7,0.3202,0.634109,0.778185,0.50854,0.489911,0.488619
8,0.261,0.629047,0.791017,0.542758,0.515856,0.519939
9,0.2008,0.609348,0.786434,0.623161,0.543539,0.562848
10,0.1668,0.598715,0.79835,0.626325,0.569295,0.583669


[I 2025-03-23 07:06:42,488] Trial 134 pruned. 


Trial 135 with params: {'learning_rate': 0.0041454061882065145, 'weight_decay': 0.004, 'warmup_steps': 3, 'lambda_param': 0.7000000000000001, 'temperature': 6.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8496,1.383528,0.471127,0.132931,0.125684,0.104769
2,1.1862,1.040924,0.620532,0.274087,0.24084,0.232695
3,0.8675,0.831637,0.704858,0.362448,0.332435,0.323084
4,0.6328,0.726181,0.736022,0.393507,0.374356,0.376215
5,0.4671,0.652118,0.76352,0.50272,0.447192,0.451331
6,0.3442,0.617485,0.771769,0.497134,0.459618,0.466581
7,0.2506,0.620555,0.769936,0.540543,0.494307,0.502176
8,0.1993,0.608012,0.787351,0.595362,0.564649,0.571248
9,0.1569,0.590608,0.794684,0.62047,0.578084,0.588332
10,0.1298,0.579743,0.796517,0.626607,0.596553,0.601161


[I 2025-03-23 07:08:24,533] Trial 135 pruned. 


Trial 136 with params: {'learning_rate': 0.0039053881254811577, 'weight_decay': 0.009000000000000001, 'warmup_steps': 3, 'lambda_param': 0.5, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.832,1.404361,0.468378,0.134104,0.127837,0.103188
2,1.201,1.057229,0.615032,0.247386,0.225728,0.215737
3,0.8722,0.853631,0.692942,0.341857,0.323513,0.312398
4,0.6365,0.746469,0.733272,0.425673,0.381593,0.387371
5,0.474,0.661683,0.753437,0.474445,0.439363,0.444309
6,0.3476,0.642726,0.767186,0.499486,0.44908,0.457752
7,0.2544,0.635538,0.766269,0.515278,0.476261,0.481697
8,0.2034,0.610802,0.789184,0.578328,0.552905,0.558003
9,0.1592,0.607257,0.791934,0.629417,0.562166,0.58
10,0.1322,0.597607,0.79835,0.668814,0.61727,0.627486


[I 2025-03-23 07:09:46,122] Trial 136 finished with value: 0.6544908749577482 and parameters: {'learning_rate': 0.0039053881254811577, 'weight_decay': 0.009000000000000001, 'warmup_steps': 3, 'lambda_param': 0.5, 'temperature': 3.0}. Best is trial 129 with value: 0.710917878994125.


Trial 137 with params: {'learning_rate': 0.0025133588020243083, 'weight_decay': 0.005, 'warmup_steps': 4, 'lambda_param': 0.7000000000000001, 'temperature': 6.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8767,1.436822,0.470211,0.10783,0.119375,0.094428
2,1.2643,1.192529,0.560953,0.213778,0.183533,0.170804
3,0.9665,0.893443,0.675527,0.313909,0.297261,0.286986
4,0.7278,0.785172,0.719523,0.384818,0.343431,0.344009
5,0.5606,0.707291,0.75527,0.437775,0.405287,0.40334


[I 2025-03-23 07:10:13,555] Trial 137 pruned. 


Trial 138 with params: {'learning_rate': 0.0049596710448371354, 'weight_decay': 0.009000000000000001, 'warmup_steps': 4, 'lambda_param': 0.7000000000000001, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8026,1.345192,0.487626,0.113379,0.133438,0.104849
2,1.1338,0.978603,0.637947,0.270267,0.257926,0.249395
3,0.8111,0.803005,0.716774,0.36812,0.352037,0.341309
4,0.5794,0.720177,0.742438,0.434589,0.398803,0.405579
5,0.4134,0.631734,0.775435,0.500143,0.462196,0.463084
6,0.2982,0.624708,0.780935,0.541389,0.481611,0.494612
7,0.2157,0.604997,0.791017,0.578766,0.544165,0.550072
8,0.1709,0.581391,0.8011,0.660933,0.616795,0.623762
9,0.1324,0.565881,0.802933,0.684242,0.640847,0.650059
10,0.1095,0.57096,0.805683,0.677571,0.65109,0.654287


[I 2025-03-23 07:11:36,456] Trial 138 finished with value: 0.6848023807884499 and parameters: {'learning_rate': 0.0049596710448371354, 'weight_decay': 0.009000000000000001, 'warmup_steps': 4, 'lambda_param': 0.7000000000000001, 'temperature': 3.0}. Best is trial 129 with value: 0.710917878994125.


Trial 139 with params: {'learning_rate': 0.00478613346179082, 'weight_decay': 0.006, 'warmup_steps': 4, 'lambda_param': 0.5, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.7984,1.349371,0.494959,0.132599,0.13753,0.111154
2,1.1347,0.995456,0.640697,0.270453,0.266954,0.255147
3,0.8141,0.805789,0.71769,0.37442,0.354885,0.345398
4,0.5802,0.727924,0.739688,0.423342,0.391564,0.396753
5,0.4154,0.653075,0.761687,0.465337,0.452586,0.44586
6,0.3027,0.610962,0.775435,0.566669,0.492782,0.507267
7,0.2177,0.601161,0.796517,0.584292,0.558268,0.558968
8,0.1662,0.59213,0.793767,0.735097,0.627535,0.659873
9,0.1287,0.579783,0.809349,0.752938,0.659955,0.688497
10,0.1099,0.573286,0.806599,0.725727,0.650652,0.669935


[I 2025-03-23 07:13:00,271] Trial 139 finished with value: 0.6962062814260203 and parameters: {'learning_rate': 0.00478613346179082, 'weight_decay': 0.006, 'warmup_steps': 4, 'lambda_param': 0.5, 'temperature': 3.5}. Best is trial 129 with value: 0.710917878994125.


Trial 140 with params: {'learning_rate': 0.003226366211428321, 'weight_decay': 0.006, 'warmup_steps': 4, 'lambda_param': 0.30000000000000004, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8807,1.412069,0.471127,0.105605,0.121012,0.097207
2,1.2481,1.115047,0.573786,0.220473,0.19694,0.185151
3,0.9259,0.885872,0.696609,0.345485,0.329589,0.318071
4,0.69,0.755189,0.721357,0.390592,0.370522,0.367628
5,0.5163,0.679444,0.75527,0.416742,0.403475,0.402294


[I 2025-03-23 07:13:27,252] Trial 140 pruned. 


Trial 141 with params: {'learning_rate': 0.0037399021452138833, 'weight_decay': 0.008, 'warmup_steps': 4, 'lambda_param': 0.30000000000000004, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.7689,1.356985,0.489459,0.139205,0.133065,0.107692
2,1.14,1.011325,0.637947,0.282862,0.250173,0.244091
3,0.817,0.81632,0.715857,0.378726,0.355225,0.348188
4,0.6033,0.726654,0.734189,0.384913,0.378647,0.377365
5,0.4406,0.652792,0.766269,0.501765,0.449406,0.459185
6,0.3244,0.633744,0.775435,0.57118,0.48469,0.50403
7,0.2402,0.615639,0.779102,0.561013,0.507356,0.519878
8,0.1876,0.609218,0.792851,0.667091,0.593435,0.614922
9,0.1457,0.587017,0.800183,0.662187,0.60611,0.620672
10,0.1212,0.582467,0.808433,0.683118,0.633283,0.64728


[I 2025-03-23 07:14:59,475] Trial 141 finished with value: 0.6955742392731632 and parameters: {'learning_rate': 0.0037399021452138833, 'weight_decay': 0.008, 'warmup_steps': 4, 'lambda_param': 0.30000000000000004, 'temperature': 2.5}. Best is trial 129 with value: 0.710917878994125.


Trial 142 with params: {'learning_rate': 0.003417534448034695, 'weight_decay': 0.01, 'warmup_steps': 4, 'lambda_param': 0.0, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8223,1.391987,0.464711,0.083383,0.122352,0.093048
2,1.2066,1.064637,0.6022,0.232546,0.225624,0.211465
3,0.8842,0.846322,0.699358,0.338906,0.34024,0.331283
4,0.6519,0.746144,0.732356,0.400301,0.385777,0.383493
5,0.4873,0.677424,0.75527,0.449958,0.421805,0.423598
6,0.3651,0.629294,0.777269,0.52736,0.468115,0.481291
7,0.2709,0.627686,0.772686,0.52866,0.476446,0.487442
8,0.2163,0.61044,0.793767,0.6058,0.543554,0.562141
9,0.1644,0.591452,0.800183,0.630592,0.560061,0.580831
10,0.1372,0.589163,0.806599,0.650994,0.609451,0.619078


[I 2025-03-23 07:15:54,993] Trial 142 pruned. 


Trial 143 with params: {'learning_rate': 0.0021774730747819077, 'weight_decay': 0.009000000000000001, 'warmup_steps': 4, 'lambda_param': 0.4, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8811,1.442276,0.473877,0.099291,0.117739,0.092336
2,1.2702,1.171046,0.568286,0.195674,0.192385,0.175429
3,0.9718,0.905287,0.670027,0.307073,0.290004,0.280378
4,0.745,0.796963,0.709441,0.374518,0.338443,0.340971
5,0.5819,0.708992,0.745188,0.395021,0.383738,0.379621
6,0.4628,0.676793,0.75802,0.448432,0.411735,0.4199
7,0.3593,0.666233,0.761687,0.485496,0.445607,0.447581
8,0.2956,0.645363,0.773602,0.494686,0.494902,0.489336
9,0.2333,0.623407,0.784601,0.599772,0.523652,0.541739
10,0.1908,0.605476,0.791934,0.661499,0.563631,0.590954


[I 2025-03-23 07:16:50,455] Trial 143 pruned. 


Trial 144 with params: {'learning_rate': 0.004471614527439932, 'weight_decay': 0.009000000000000001, 'warmup_steps': 3, 'lambda_param': 0.4, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8577,1.341494,0.497709,0.111836,0.135516,0.10676
2,1.1553,0.989048,0.648029,0.29459,0.266395,0.256841
3,0.8216,0.846402,0.687443,0.347499,0.323469,0.308494
4,0.5986,0.737235,0.727773,0.402984,0.366275,0.372017
5,0.4373,0.657776,0.756187,0.478027,0.432208,0.438287
6,0.3161,0.628545,0.769019,0.512079,0.465806,0.473388
7,0.2341,0.611112,0.783685,0.568717,0.53068,0.539125
8,0.1799,0.603562,0.7956,0.619211,0.590991,0.593478
9,0.1407,0.589826,0.806599,0.715446,0.633882,0.657251
10,0.1185,0.58426,0.802933,0.699187,0.641067,0.653211


[I 2025-03-23 07:18:18,527] Trial 144 finished with value: 0.681778021857888 and parameters: {'learning_rate': 0.004471614527439932, 'weight_decay': 0.009000000000000001, 'warmup_steps': 3, 'lambda_param': 0.4, 'temperature': 2.0}. Best is trial 129 with value: 0.710917878994125.


Trial 145 with params: {'learning_rate': 0.002824987932224737, 'weight_decay': 0.007, 'warmup_steps': 4, 'lambda_param': 0.30000000000000004, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8909,1.423913,0.471127,0.10882,0.119619,0.094673
2,1.2594,1.146245,0.575619,0.190183,0.193435,0.174892
3,0.9506,0.893894,0.68286,0.317833,0.305815,0.298104
4,0.7122,0.772245,0.72319,0.396266,0.352471,0.354921
5,0.5421,0.696777,0.754354,0.428385,0.404687,0.403174
6,0.4163,0.66588,0.768103,0.477864,0.437247,0.445234
7,0.3126,0.648312,0.76352,0.490422,0.464574,0.468917
8,0.2541,0.628981,0.787351,0.546674,0.506853,0.517588
9,0.1946,0.615951,0.789184,0.631872,0.558178,0.578584
10,0.1629,0.600495,0.794684,0.646005,0.591671,0.607479


[I 2025-03-23 07:19:39,394] Trial 145 finished with value: 0.6107799373210406 and parameters: {'learning_rate': 0.002824987932224737, 'weight_decay': 0.007, 'warmup_steps': 4, 'lambda_param': 0.30000000000000004, 'temperature': 3.5}. Best is trial 129 with value: 0.710917878994125.


Trial 146 with params: {'learning_rate': 0.003099862572019156, 'weight_decay': 0.008, 'warmup_steps': 4, 'lambda_param': 0.1, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8823,1.404569,0.468378,0.099465,0.120984,0.094408
2,1.2354,1.096931,0.589368,0.230693,0.208549,0.19216
3,0.9174,0.889855,0.699358,0.343353,0.328376,0.319158
4,0.6867,0.765612,0.730522,0.382516,0.369245,0.367701
5,0.516,0.685505,0.752521,0.447941,0.408012,0.414156


[I 2025-03-23 07:20:08,087] Trial 146 pruned. 


Trial 147 with params: {'learning_rate': 0.00340137622953104, 'weight_decay': 0.005, 'warmup_steps': 3, 'lambda_param': 0.5, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8024,1.364735,0.47571,0.109491,0.123569,0.098868
2,1.1877,1.044698,0.619615,0.259184,0.234061,0.223138
3,0.8693,0.87839,0.693859,0.360382,0.327477,0.318944
4,0.6482,0.755753,0.727773,0.404037,0.366222,0.370763
5,0.4873,0.67253,0.753437,0.442829,0.415189,0.414872
6,0.3658,0.645826,0.773602,0.495304,0.463414,0.471166
7,0.2686,0.628111,0.780018,0.543698,0.497962,0.504623
8,0.212,0.611899,0.791017,0.57953,0.540056,0.548629
9,0.1655,0.608616,0.788268,0.64417,0.562703,0.58752
10,0.1383,0.60188,0.790101,0.63211,0.583892,0.595298


[I 2025-03-23 07:21:34,266] Trial 147 pruned. 


Trial 148 with params: {'learning_rate': 0.004220497038970213, 'weight_decay': 0.005, 'warmup_steps': 4, 'lambda_param': 0.4, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.7855,1.350547,0.483043,0.114514,0.13048,0.105334
2,1.1423,1.002925,0.637947,0.296287,0.251985,0.246978
3,0.8185,0.806351,0.72594,0.380529,0.357961,0.35195
4,0.596,0.720394,0.746104,0.415648,0.40632,0.401757
5,0.4341,0.663186,0.765353,0.490501,0.446398,0.448692
6,0.3235,0.630522,0.768103,0.499129,0.466473,0.474692
7,0.2323,0.628278,0.779102,0.598041,0.519713,0.538725
8,0.1912,0.621611,0.780935,0.627792,0.566525,0.580954
9,0.1437,0.602461,0.789184,0.695254,0.618226,0.641116
10,0.1198,0.601583,0.791934,0.689965,0.626538,0.642534


[I 2025-03-23 07:23:10,041] Trial 148 finished with value: 0.6859120540876517 and parameters: {'learning_rate': 0.004220497038970213, 'weight_decay': 0.005, 'warmup_steps': 4, 'lambda_param': 0.4, 'temperature': 3.0}. Best is trial 129 with value: 0.710917878994125.


Trial 149 with params: {'learning_rate': 0.004665367038943749, 'weight_decay': 0.008, 'warmup_steps': 4, 'lambda_param': 0.5, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.7978,1.348902,0.497709,0.134315,0.137752,0.112982
2,1.1394,0.998105,0.647113,0.260062,0.271372,0.256358
3,0.8104,0.807651,0.711274,0.361899,0.351499,0.339765
4,0.5818,0.716683,0.745188,0.417631,0.401228,0.402811
5,0.4127,0.64801,0.768103,0.469001,0.454304,0.449133
6,0.2981,0.610265,0.780935,0.535784,0.484648,0.494614
7,0.2202,0.609309,0.788268,0.586897,0.52834,0.541163
8,0.1715,0.587382,0.799267,0.638343,0.600263,0.606458
9,0.1306,0.574505,0.800183,0.661103,0.643192,0.642234
10,0.1099,0.576421,0.800183,0.700864,0.655834,0.663658


[I 2025-03-23 07:24:27,512] Trial 149 finished with value: 0.6716909799449003 and parameters: {'learning_rate': 0.004665367038943749, 'weight_decay': 0.008, 'warmup_steps': 4, 'lambda_param': 0.5, 'temperature': 2.5}. Best is trial 129 with value: 0.710917878994125.


In [32]:
print(best_trial2)

BestRun(run_id='129', objective=0.710917878994125, hyperparameters={'learning_rate': 0.004953270489928273, 'weight_decay': 0.009000000000000001, 'warmup_steps': 4, 'lambda_param': 0.6000000000000001, 'temperature': 2.5}, run_summary=None)


In [33]:
#Nápočet epoch na steps
data_length = len(all_train_data)
min_r = math.ceil(data_length/batch_size)*5
max_r = math.ceil(data_length/batch_size)*num_epochs
warm_up = math.ceil(data_length/batch_size/10)

In [34]:
base.reset_seed()

In [35]:
training_args = base.get_training_args(output_dir=f"~/results/{DATASET}/bilstm-base-aug_fine_hp-search", logging_dir=f"~/logs/{DATASET}/bilstm-base-aug_fine_hp-search", epochs=num_epochs, batch_size=batch_size)

In [36]:
def hp_space(trial):
    params =  {
        "learning_rate": trial.suggest_float("learning_rate", 5e-5, 5e-3, log=True),
        "weight_decay": trial.suggest_float("weight_decay", 0, 1e-2, step=1e-3),
        "warmup_steps" : trial.suggest_int("warmup_steps", 0, warm_up),
    }
    print(f"Trial {trial.number} with params: {params}")
    return params

In [37]:
pruner = optuna.pruners.HyperbandPruner(min_resource=min_r, max_resource=max_r, reduction_factor=2, bootstrap_count=2)
sampler = optuna.samplers.TPESampler(seed=42, multivariate=True)



In [38]:
trainer = Trainer(
    args=training_args,
    train_dataset=all_train_data,
    eval_dataset=eval_data,
    compute_metrics=base.compute_metrics,
    model_init = lambda: get_BiLSTM()
)
  

In [39]:
best_trial3 = trainer.hyperparameter_search(
    direction="maximize",
    backend="optuna",
    hp_space=hp_space,
    compute_objective=lambda metrics: metrics["eval_f1"],
    pruner=pruner,
    sampler=sampler,
    study_name="Test-base-aug",
    n_trials=150
)

[I 2025-03-23 07:24:27,870] A new study created in memory with name: Test-base-aug


Trial 0 with params: {'learning_rate': 0.0002805758207667253, 'weight_decay': 0.01, 'warmup_steps': 39}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.7916,1.234699,0.679193,0.318515,0.308004,0.301556
2,0.7053,1.011577,0.72594,0.518921,0.431581,0.450676
3,0.3659,1.023958,0.75802,0.61659,0.525473,0.550786
4,0.1983,1.11269,0.764436,0.701392,0.604681,0.63125
5,0.1164,1.169958,0.771769,0.696173,0.620105,0.636465


[I 2025-03-23 07:25:43,076] Trial 0 pruned. 


Trial 1 with params: {'learning_rate': 0.0007875660249889869, 'weight_decay': 0.001, 'warmup_steps': 8}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.1676,0.963608,0.751604,0.567428,0.490243,0.513861
2,0.2159,1.037998,0.792851,0.74937,0.660693,0.6806
3,0.0685,1.194788,0.797434,0.728535,0.687118,0.687297
4,0.0315,1.274155,0.786434,0.719479,0.649041,0.663336
5,0.0168,1.38338,0.79835,0.718375,0.673811,0.683204
6,0.01,1.425836,0.791934,0.694726,0.660247,0.663155
7,0.0083,1.429254,0.79835,0.734457,0.674158,0.686828
8,0.0037,1.557415,0.794684,0.731461,0.673855,0.684521
9,0.0035,1.654167,0.793767,0.69616,0.648345,0.654858
10,0.002,1.627974,0.793767,0.713202,0.668757,0.677286


[I 2025-03-23 07:28:38,524] Trial 1 pruned. 


Trial 2 with params: {'learning_rate': 6.533369619026643e-05, 'weight_decay': 0.009000000000000001, 'warmup_steps': 32}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6171,2.100317,0.464711,0.102537,0.112327,0.086712
2,1.7759,1.718441,0.567369,0.174113,0.182006,0.162139
3,1.4407,1.527201,0.610449,0.268417,0.230184,0.222601
4,1.2035,1.375302,0.64528,0.298996,0.270748,0.268573
5,1.0274,1.28646,0.664528,0.332755,0.304348,0.302095


[I 2025-03-23 07:30:11,887] Trial 2 pruned. 


Trial 3 with params: {'learning_rate': 0.0013035123791853842, 'weight_decay': 0.0, 'warmup_steps': 52}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0336,0.969734,0.773602,0.643369,0.575808,0.59105
2,0.1133,1.160202,0.794684,0.738765,0.663991,0.676873
3,0.0354,1.124799,0.808433,0.736856,0.716675,0.715202
4,0.0156,1.317399,0.804766,0.758792,0.686853,0.700911
5,0.0106,1.401361,0.806599,0.742124,0.687003,0.701152
6,0.0078,1.390195,0.818515,0.761304,0.706729,0.721683
7,0.005,1.507194,0.805683,0.794148,0.694664,0.72439
8,0.0041,1.550142,0.813016,0.7365,0.701839,0.706297
9,0.0022,1.529274,0.818515,0.762001,0.725085,0.732808
10,0.0015,1.646739,0.812099,0.757752,0.728037,0.724814


[I 2025-03-23 07:34:25,624] Trial 3 pruned. 


Trial 4 with params: {'learning_rate': 0.002311294500510415, 'weight_decay': 0.002, 'warmup_steps': 9}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.7645,1.064356,0.800183,0.731028,0.6563,0.674728
2,0.0677,1.080352,0.819432,0.744253,0.698855,0.70981
3,0.0225,1.237683,0.805683,0.735537,0.697622,0.704502
4,0.0142,1.281543,0.819432,0.753794,0.70396,0.713431
5,0.0071,1.37819,0.817599,0.739602,0.700765,0.71089
6,0.0099,1.440303,0.832264,0.815772,0.719356,0.745385
7,0.0053,1.381222,0.83593,0.808362,0.752354,0.765745
8,0.0052,1.642645,0.821265,0.789376,0.735971,0.748904
9,0.0016,1.536241,0.827681,0.774004,0.747824,0.749186
10,0.0016,1.681013,0.819432,0.754745,0.706789,0.717007


[I 2025-03-23 07:37:40,883] Trial 4 pruned. 


Trial 5 with params: {'learning_rate': 0.00011635338541918901, 'weight_decay': 0.003, 'warmup_steps': 28}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.3116,1.76075,0.549038,0.139131,0.16669,0.143242
2,1.3698,1.397138,0.638863,0.284778,0.268582,0.259902
3,0.9874,1.208876,0.692026,0.390412,0.343382,0.34903
4,0.7465,1.119417,0.701192,0.418066,0.379414,0.387207
5,0.586,1.091372,0.716774,0.456176,0.419481,0.427664
6,0.4672,1.059791,0.736939,0.540341,0.475641,0.493173
7,0.3788,1.071284,0.735105,0.579889,0.502219,0.525408
8,0.3134,1.09704,0.730522,0.601105,0.512952,0.536877
9,0.2639,1.106213,0.748854,0.608971,0.54613,0.565112
10,0.2241,1.121865,0.736939,0.60385,0.552071,0.562853


[I 2025-03-23 07:40:51,595] Trial 5 pruned. 


Trial 6 with params: {'learning_rate': 0.0003654769917956456, 'weight_decay': 0.003, 'warmup_steps': 33}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.6262,1.114725,0.708524,0.446638,0.370171,0.379406
2,0.5435,1.004796,0.742438,0.569263,0.49511,0.51534
3,0.2418,1.101605,0.76352,0.669547,0.585213,0.604736
4,0.1166,1.17961,0.762603,0.677809,0.619191,0.626448
5,0.066,1.298108,0.769936,0.689843,0.635665,0.648129
6,0.0395,1.307606,0.773602,0.656079,0.630118,0.628113
7,0.0252,1.482246,0.778185,0.679173,0.653339,0.648124
8,0.0166,1.504402,0.784601,0.676645,0.63922,0.64111
9,0.0104,1.493701,0.789184,0.674071,0.629344,0.638697
10,0.007,1.596541,0.780935,0.676623,0.636583,0.643319


[I 2025-03-23 07:43:39,568] Trial 6 pruned. 


Trial 7 with params: {'learning_rate': 9.505122659935192e-05, 'weight_decay': 0.003, 'warmup_steps': 19}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.4181,1.891046,0.525206,0.109496,0.144127,0.116453
2,1.5261,1.510743,0.607699,0.290369,0.22937,0.219606
3,1.1504,1.307124,0.662695,0.335751,0.297019,0.294689
4,0.9062,1.180993,0.687443,0.406697,0.344146,0.355573
5,0.7374,1.127104,0.707608,0.450161,0.394817,0.403968
6,0.6098,1.08081,0.711274,0.430369,0.406883,0.410535
7,0.5132,1.0773,0.721357,0.488172,0.458385,0.462953
8,0.4406,1.084452,0.72044,0.565258,0.476369,0.49741
9,0.3846,1.085357,0.731439,0.560439,0.500455,0.518926
10,0.3393,1.092819,0.733272,0.566921,0.503457,0.519943


[I 2025-03-23 07:46:55,175] Trial 7 pruned. 


Trial 8 with params: {'learning_rate': 0.00040842279473800845, 'weight_decay': 0.008, 'warmup_steps': 10}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.5264,1.108444,0.71769,0.450014,0.384504,0.397024
2,0.4882,0.99684,0.758937,0.590344,0.523562,0.536561
3,0.2035,1.127833,0.770852,0.665174,0.596827,0.61078
4,0.0988,1.205749,0.761687,0.650835,0.620663,0.618527
5,0.0546,1.270382,0.783685,0.67504,0.634682,0.637634
6,0.0317,1.337243,0.784601,0.665207,0.638097,0.637299
7,0.0196,1.500818,0.775435,0.67116,0.6431,0.639626
8,0.0131,1.536734,0.785518,0.692057,0.638647,0.647363
9,0.0074,1.547457,0.792851,0.703494,0.64984,0.662137
10,0.0056,1.601101,0.788268,0.695814,0.643427,0.650407


[I 2025-03-23 07:50:11,691] Trial 8 pruned. 


Trial 9 with params: {'learning_rate': 0.0005338741354740678, 'weight_decay': 0.006, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.3508,1.033193,0.729606,0.454367,0.400374,0.413107
2,0.3551,1.043707,0.774519,0.682393,0.589904,0.617169
3,0.1308,1.202098,0.776352,0.672397,0.632371,0.641191
4,0.0618,1.314524,0.784601,0.702382,0.630112,0.643486
5,0.0312,1.417158,0.778185,0.649789,0.637752,0.63013


[I 2025-03-23 07:51:41,082] Trial 9 pruned. 


Trial 10 with params: {'learning_rate': 0.0015322576261213353, 'weight_decay': 0.003, 'warmup_steps': 53}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.9684,0.98743,0.777269,0.680141,0.607409,0.623941
2,0.0946,1.204806,0.800183,0.710771,0.64965,0.665215
3,0.0267,1.287572,0.802016,0.702245,0.691096,0.686522
4,0.0192,1.282025,0.814849,0.790319,0.693003,0.721216
5,0.0085,1.432924,0.810266,0.729762,0.676294,0.688374
6,0.0077,1.489196,0.812099,0.803899,0.69838,0.734058
7,0.0061,1.576016,0.806599,0.755456,0.690294,0.709254
8,0.0028,1.573679,0.813932,0.739387,0.712488,0.716086
9,0.0021,1.703408,0.812099,0.779322,0.716261,0.735213
10,0.0028,1.658852,0.818515,0.781409,0.720288,0.734785


[I 2025-03-23 07:56:28,231] Trial 10 finished with value: 0.7423309761365634 and parameters: {'learning_rate': 0.0015322576261213353, 'weight_decay': 0.003, 'warmup_steps': 53}. Best is trial 10 with value: 0.7423309761365634.


Trial 11 with params: {'learning_rate': 0.0025419498380802787, 'weight_decay': 0.002, 'warmup_steps': 52}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8007,0.991488,0.813932,0.743764,0.67239,0.69251
2,0.0604,1.121276,0.826764,0.789094,0.713961,0.734723
3,0.0217,1.327986,0.817599,0.795068,0.717742,0.735977
4,0.0156,1.415178,0.818515,0.805606,0.727369,0.748176
5,0.0075,1.514169,0.827681,0.803949,0.721754,0.748421
6,0.0065,1.54487,0.824931,0.819284,0.713658,0.744977
7,0.0057,1.612846,0.820348,0.809594,0.705416,0.735613
8,0.0053,1.545619,0.815765,0.793957,0.714707,0.736018
9,0.003,1.577106,0.827681,0.792023,0.731548,0.744423
10,0.0009,1.741641,0.825848,0.79808,0.728122,0.746723


[I 2025-03-23 08:01:06,443] Trial 11 finished with value: 0.7500511253788568 and parameters: {'learning_rate': 0.0025419498380802787, 'weight_decay': 0.002, 'warmup_steps': 52}. Best is trial 11 with value: 0.7500511253788568.


Trial 12 with params: {'learning_rate': 0.003885078898153256, 'weight_decay': 0.005, 'warmup_steps': 51}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.7178,0.991894,0.809349,0.765669,0.709123,0.721575
2,0.0523,1.136978,0.825848,0.782766,0.702028,0.724843
3,0.0223,1.148632,0.824931,0.772274,0.731254,0.737
4,0.0181,1.334024,0.826764,0.775948,0.725414,0.734647
5,0.0132,1.481614,0.824015,0.763481,0.732003,0.736542
6,0.0113,1.585464,0.811182,0.778046,0.711723,0.730582
7,0.0077,1.626849,0.830431,0.798158,0.759711,0.765757
8,0.0047,1.620497,0.824931,0.793127,0.739009,0.753559
9,0.0032,1.679648,0.830431,0.77376,0.751323,0.750427
10,0.0013,1.725162,0.834097,0.779047,0.757089,0.754032


Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--recall/11f90e583db35601050aed380d48e83202a896976b9608432fba9244fb447f24 (last modified on Fri Jan 10 23:14:00 2025) since it couldn't be found locally at evaluate-metric--recall, or remotely on the Hugging Face Hub.
[I 2025-03-23 08:06:06,043] Trial 12 finished with value: 0.7539819528795496 and parameters: {'learning_rate': 0.003885078898153256, 'weight_decay': 0.005, 'warmup_steps': 51}. Best is trial 12 with value: 0.7539819528795496.


Trial 13 with params: {'learning_rate': 0.004688101271295509, 'weight_decay': 0.006, 'warmup_steps': 48}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6603,0.988224,0.809349,0.713429,0.668392,0.680753
2,0.0538,1.120353,0.815765,0.760683,0.696729,0.713938
3,0.0247,1.39939,0.819432,0.742961,0.701511,0.705233
4,0.0202,1.4761,0.820348,0.753896,0.706804,0.714732
5,0.0204,1.566668,0.821265,0.795043,0.716698,0.736097
6,0.0124,1.706607,0.814849,0.783665,0.701373,0.727295
7,0.0102,1.671309,0.83593,0.810694,0.734293,0.757027
8,0.0046,1.722012,0.824015,0.813777,0.71379,0.738085
9,0.0015,1.771072,0.822181,0.761429,0.686523,0.705956
10,0.001,1.82463,0.826764,0.75174,0.701219,0.712769


[I 2025-03-23 08:10:34,855] Trial 13 finished with value: 0.7314438082660835 and parameters: {'learning_rate': 0.004688101271295509, 'weight_decay': 0.006, 'warmup_steps': 48}. Best is trial 12 with value: 0.7539819528795496.


Trial 14 with params: {'learning_rate': 0.003147266239250273, 'weight_decay': 0.0, 'warmup_steps': 33}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.7131,0.938586,0.814849,0.780312,0.688399,0.715794
2,0.0547,1.122316,0.823098,0.75977,0.695278,0.710634
3,0.0211,1.284735,0.810266,0.742013,0.713678,0.715134
4,0.0158,1.365428,0.810266,0.749404,0.686996,0.706006
5,0.0107,1.446333,0.819432,0.821133,0.724898,0.75612
6,0.0064,1.574666,0.814849,0.789974,0.695449,0.726258
7,0.0071,1.576463,0.823098,0.740902,0.694678,0.70528
8,0.0026,1.67624,0.830431,0.778757,0.719068,0.733653
9,0.0028,1.683741,0.821265,0.752829,0.722498,0.727708
10,0.0024,1.802764,0.811182,0.74351,0.69394,0.702263


[I 2025-03-23 08:13:35,218] Trial 14 pruned. 


Trial 15 with params: {'learning_rate': 0.002917590592290426, 'weight_decay': 0.005, 'warmup_steps': 26}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.7243,1.020978,0.806599,0.737637,0.658284,0.683202
2,0.0563,1.0331,0.831347,0.778991,0.713663,0.733051
3,0.0216,1.275471,0.824015,0.744728,0.68684,0.700847
4,0.0138,1.358935,0.829514,0.779442,0.704774,0.728302
5,0.0099,1.400602,0.828598,0.751812,0.712988,0.719426
6,0.007,1.388771,0.824015,0.762584,0.72861,0.73138
7,0.0079,1.499485,0.831347,0.780592,0.714389,0.730393
8,0.0043,1.398284,0.840513,0.775618,0.738815,0.745923
9,0.0028,1.494293,0.84143,0.786037,0.731721,0.743286
10,0.0012,1.565684,0.846929,0.795589,0.746201,0.758211


[I 2025-03-23 08:18:43,999] Trial 15 finished with value: 0.7472589957353845 and parameters: {'learning_rate': 0.002917590592290426, 'weight_decay': 0.005, 'warmup_steps': 26}. Best is trial 12 with value: 0.7539819528795496.


Trial 16 with params: {'learning_rate': 0.004365222004666955, 'weight_decay': 0.003, 'warmup_steps': 40}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6638,1.007863,0.821265,0.766715,0.690133,0.713398
2,0.0528,1.110049,0.816682,0.785216,0.699798,0.724783
3,0.0235,1.261052,0.826764,0.784723,0.723384,0.741517
4,0.0206,1.301881,0.830431,0.795479,0.744356,0.752519
5,0.0128,1.589761,0.814849,0.773025,0.715037,0.725304
6,0.0137,1.437876,0.831347,0.801464,0.750827,0.759244
7,0.0054,1.597216,0.827681,0.801713,0.729716,0.740445
8,0.0049,1.618837,0.837764,0.800676,0.757147,0.763639
9,0.0036,1.841895,0.829514,0.788824,0.756905,0.75727
10,0.002,1.884716,0.824015,0.768334,0.735124,0.738367


[I 2025-03-23 08:23:52,687] Trial 16 finished with value: 0.7442997468223563 and parameters: {'learning_rate': 0.004365222004666955, 'weight_decay': 0.003, 'warmup_steps': 40}. Best is trial 12 with value: 0.7539819528795496.


Trial 17 with params: {'learning_rate': 0.00044927341984294, 'weight_decay': 0.006, 'warmup_steps': 50}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.5251,1.048651,0.728689,0.479148,0.398117,0.412766
2,0.4253,0.980496,0.777269,0.643677,0.585611,0.601752
3,0.1656,1.104535,0.775435,0.692999,0.627551,0.639835
4,0.0775,1.182715,0.779102,0.679451,0.622071,0.631878
5,0.0441,1.316072,0.786434,0.666232,0.626597,0.633216
6,0.0268,1.356175,0.784601,0.667778,0.623349,0.628316
7,0.0156,1.428475,0.790101,0.685379,0.639731,0.644497
8,0.0108,1.450182,0.788268,0.698124,0.63906,0.654365
9,0.0066,1.552097,0.791934,0.684287,0.652706,0.654985
10,0.0051,1.583649,0.799267,0.713647,0.677389,0.680232


[I 2025-03-23 08:26:47,952] Trial 17 pruned. 


Trial 18 with params: {'learning_rate': 0.0026868566033176914, 'weight_decay': 0.01, 'warmup_steps': 18}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.7522,0.938488,0.805683,0.781327,0.696846,0.721189
2,0.0627,1.069915,0.817599,0.789212,0.684188,0.713622
3,0.0228,1.241693,0.817599,0.769014,0.721909,0.726918
4,0.0141,1.306811,0.829514,0.824073,0.710339,0.746252
5,0.0124,1.302988,0.827681,0.772086,0.707314,0.723984
6,0.0066,1.346859,0.827681,0.801805,0.732758,0.748596
7,0.0043,1.548342,0.824931,0.777991,0.694893,0.714221
8,0.0026,1.561102,0.823098,0.774741,0.705686,0.727659
9,0.0047,1.510798,0.821265,0.776783,0.72381,0.739397
10,0.0021,1.618331,0.821265,0.782179,0.70879,0.727864


[I 2025-03-23 08:31:04,165] Trial 18 finished with value: 0.7457818157708239 and parameters: {'learning_rate': 0.0026868566033176914, 'weight_decay': 0.01, 'warmup_steps': 18}. Best is trial 12 with value: 0.7539819528795496.


Trial 19 with params: {'learning_rate': 0.0001723017969018313, 'weight_decay': 0.001, 'warmup_steps': 52}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.1192,1.527129,0.606783,0.234008,0.22127,0.205637
2,1.0672,1.194128,0.690192,0.40872,0.357666,0.362046
3,0.6856,1.053748,0.719523,0.487453,0.423765,0.441426
4,0.4724,1.040187,0.729606,0.518789,0.45869,0.477128
5,0.3379,1.05575,0.745188,0.643425,0.524428,0.557198


[I 2025-03-23 08:32:19,706] Trial 19 pruned. 


Trial 20 with params: {'learning_rate': 0.003596222613638124, 'weight_decay': 0.009000000000000001, 'warmup_steps': 39}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6946,0.951003,0.813016,0.73176,0.698606,0.700563
2,0.0526,1.09241,0.816682,0.757545,0.734753,0.729357
3,0.0209,1.335558,0.819432,0.773471,0.715405,0.723983
4,0.0149,1.327761,0.817599,0.750572,0.697945,0.706976
5,0.0143,1.490477,0.824931,0.777317,0.707467,0.726095
6,0.0095,1.524039,0.818515,0.774995,0.715376,0.730622
7,0.0062,1.592193,0.822181,0.789734,0.725247,0.741043
8,0.002,1.757894,0.821265,0.801737,0.70893,0.734242
9,0.0031,1.741638,0.820348,0.75556,0.733751,0.728787
10,0.0031,1.686121,0.824931,0.778538,0.736159,0.743686


[I 2025-03-23 08:37:05,438] Trial 20 finished with value: 0.7449304735823926 and parameters: {'learning_rate': 0.003596222613638124, 'weight_decay': 0.009000000000000001, 'warmup_steps': 39}. Best is trial 12 with value: 0.7539819528795496.


Trial 21 with params: {'learning_rate': 0.0011982109964324202, 'weight_decay': 0.005, 'warmup_steps': 27}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0109,0.97784,0.766269,0.64026,0.558075,0.578673
2,0.129,1.221072,0.793767,0.776538,0.689601,0.708001
3,0.0385,1.168946,0.814849,0.75175,0.695887,0.710614
4,0.0187,1.320937,0.812099,0.770085,0.694598,0.71346
5,0.0095,1.411578,0.80385,0.797704,0.712585,0.736453
6,0.0078,1.408267,0.813932,0.796603,0.69578,0.730287
7,0.0055,1.530079,0.802016,0.752612,0.702108,0.712758
8,0.0057,1.497382,0.814849,0.775675,0.700866,0.721521
9,0.0032,1.564894,0.821265,0.79581,0.713846,0.739014
10,0.0018,1.749343,0.817599,0.789417,0.712039,0.739222


[I 2025-03-23 08:41:54,299] Trial 21 finished with value: 0.7293835821570086 and parameters: {'learning_rate': 0.0011982109964324202, 'weight_decay': 0.005, 'warmup_steps': 27}. Best is trial 12 with value: 0.7539819528795496.


Trial 22 with params: {'learning_rate': 0.004353257368392618, 'weight_decay': 0.0, 'warmup_steps': 53}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6883,0.995131,0.813932,0.801121,0.708797,0.73711
2,0.0525,1.099513,0.835014,0.806507,0.740043,0.757766
3,0.0261,1.27593,0.821265,0.751918,0.689413,0.707895
4,0.018,1.389734,0.83868,0.799453,0.708978,0.735873
5,0.0142,1.402899,0.832264,0.750323,0.729863,0.724924
6,0.0096,1.528293,0.830431,0.743418,0.733341,0.720483
7,0.0113,1.537663,0.832264,0.766231,0.722072,0.727641
8,0.0053,1.540068,0.832264,0.79686,0.751224,0.754087
9,0.0022,1.56806,0.83868,0.770472,0.716593,0.728386
10,0.0007,1.627386,0.83868,0.75015,0.715903,0.718536


[I 2025-03-23 08:47:26,152] Trial 22 finished with value: 0.7374889016600925 and parameters: {'learning_rate': 0.004353257368392618, 'weight_decay': 0.0, 'warmup_steps': 53}. Best is trial 12 with value: 0.7539819528795496.


Trial 23 with params: {'learning_rate': 0.0034888452689862503, 'weight_decay': 0.007, 'warmup_steps': 11}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6606,0.956445,0.816682,0.767667,0.712187,0.725721
2,0.0535,1.032763,0.829514,0.793015,0.736805,0.751754
3,0.0224,1.153221,0.824931,0.752216,0.715551,0.719161
4,0.0161,1.245439,0.832264,0.772128,0.700646,0.712694
5,0.0136,1.280978,0.833181,0.807136,0.740002,0.757637
6,0.0084,1.378674,0.834097,0.784819,0.745705,0.750376
7,0.006,1.356421,0.834097,0.760818,0.720966,0.725531
8,0.0044,1.411986,0.833181,0.811871,0.718236,0.743913
9,0.0026,1.499257,0.831347,0.782596,0.719926,0.735481
10,0.0016,1.630188,0.830431,0.783574,0.733013,0.739065


[I 2025-03-23 08:52:17,706] Trial 23 finished with value: 0.7505093739020182 and parameters: {'learning_rate': 0.0034888452689862503, 'weight_decay': 0.007, 'warmup_steps': 11}. Best is trial 12 with value: 0.7539819528795496.


Trial 24 with params: {'learning_rate': 0.004763049594136491, 'weight_decay': 0.006, 'warmup_steps': 8}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5825,1.017402,0.814849,0.726518,0.684115,0.694762
2,0.0558,1.158993,0.830431,0.786467,0.718732,0.735824
3,0.0277,1.254126,0.825848,0.768069,0.740824,0.742359
4,0.019,1.484557,0.822181,0.779032,0.698309,0.723245
5,0.0197,1.56503,0.812099,0.790323,0.709101,0.73237
6,0.0129,1.713905,0.809349,0.798057,0.68678,0.719825
7,0.0077,1.787736,0.822181,0.810737,0.724036,0.749172
8,0.0066,1.647048,0.827681,0.793723,0.733924,0.74561
9,0.0032,1.744068,0.824015,0.803731,0.720717,0.740887
10,0.0024,1.760014,0.825848,0.788401,0.724831,0.739236


[I 2025-03-23 08:56:57,432] Trial 24 finished with value: 0.7543616053486423 and parameters: {'learning_rate': 0.004763049594136491, 'weight_decay': 0.006, 'warmup_steps': 8}. Best is trial 24 with value: 0.7543616053486423.


Trial 25 with params: {'learning_rate': 0.0037463534041693716, 'weight_decay': 0.005, 'warmup_steps': 10}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6599,0.972769,0.805683,0.742872,0.682014,0.696966
2,0.0548,1.191357,0.80385,0.760585,0.703907,0.712322
3,0.0231,1.254261,0.811182,0.764209,0.70974,0.719614
4,0.0191,1.360776,0.815765,0.789764,0.734994,0.748568
5,0.0114,1.495818,0.807516,0.743226,0.689581,0.706566


[I 2025-03-23 08:58:29,922] Trial 25 pruned. 


Trial 26 with params: {'learning_rate': 0.002121429878518589, 'weight_decay': 0.008, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.7658,0.989374,0.7956,0.756239,0.671217,0.699418
2,0.0726,1.187153,0.789184,0.735415,0.651789,0.679558
3,0.0246,1.252658,0.817599,0.737258,0.695508,0.698412
4,0.0165,1.283551,0.822181,0.76627,0.704142,0.719308
5,0.0087,1.428294,0.821265,0.786335,0.711396,0.73263
6,0.0088,1.452031,0.814849,0.72123,0.702342,0.700759
7,0.0064,1.575517,0.821265,0.761567,0.700097,0.714631
8,0.0031,1.653235,0.820348,0.759233,0.696085,0.713246
9,0.0019,1.72172,0.814849,0.753324,0.696842,0.710509
10,0.0021,1.595942,0.825848,0.774544,0.721424,0.736342


Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--precision/155d3220d6cd4a6553f12da68eeb3d1f97cf431206304a4bc6e2d564c29502e9 (last modified on Fri Jan 10 23:13:59 2025) since it couldn't be found locally at evaluate-metric--precision, or remotely on the Hugging Face Hub.
[I 2025-03-23 09:04:17,995] Trial 26 finished with value: 0.7340545524146361 and parameters: {'learning_rate': 0.002121429878518589, 'weight_decay': 0.008, 'warmup_steps': 3}. Best is trial 24 with value: 0.7543616053486423.


Trial 27 with params: {'learning_rate': 0.004953004119979254, 'weight_decay': 0.008, 'warmup_steps': 13}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6324,1.062281,0.812099,0.749038,0.694272,0.707192
2,0.0548,1.159445,0.817599,0.772287,0.704157,0.721346
3,0.0292,1.429065,0.799267,0.72133,0.702238,0.696861
4,0.0245,1.593732,0.817599,0.770998,0.70668,0.724092
5,0.0206,1.49888,0.811182,0.724916,0.679546,0.681471


[I 2025-03-23 09:05:44,502] Trial 27 pruned. 


Trial 28 with params: {'learning_rate': 0.004762588578713454, 'weight_decay': 0.005, 'warmup_steps': 1}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5938,1.002063,0.821265,0.787837,0.707747,0.726552
2,0.057,1.154955,0.823098,0.775556,0.692627,0.715388
3,0.0266,1.251168,0.822181,0.789975,0.731345,0.744472
4,0.0202,1.625365,0.821265,0.769988,0.700147,0.713807
5,0.0142,1.416646,0.824015,0.776979,0.702706,0.722713
6,0.011,1.577799,0.829514,0.798024,0.716666,0.734212
7,0.0069,1.714207,0.832264,0.822795,0.725572,0.752566
8,0.0069,1.723058,0.824931,0.788237,0.703995,0.729052
9,0.0059,1.797984,0.837764,0.781739,0.723656,0.739941
10,0.0027,1.827636,0.824931,0.792464,0.712416,0.734537


[I 2025-03-23 09:08:46,408] Trial 28 pruned. 


Trial 29 with params: {'learning_rate': 0.00011735172641973649, 'weight_decay': 0.003, 'warmup_steps': 0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.2707,1.763413,0.545371,0.140881,0.166917,0.143438
2,1.3543,1.379088,0.653529,0.298247,0.287281,0.279791
3,0.9683,1.191501,0.691109,0.377524,0.344047,0.347633
4,0.7315,1.112117,0.695692,0.422956,0.372952,0.385236
5,0.5719,1.09246,0.709441,0.457211,0.414283,0.426134
6,0.4586,1.072356,0.721357,0.494115,0.458068,0.466165
7,0.3743,1.0922,0.732356,0.54394,0.496469,0.507782
8,0.3096,1.114929,0.71769,0.555915,0.481541,0.500535
9,0.2615,1.136058,0.727773,0.551069,0.497263,0.511824
10,0.2241,1.14765,0.733272,0.558605,0.512486,0.525608


Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--recall/11f90e583db35601050aed380d48e83202a896976b9608432fba9244fb447f24 (last modified on Fri Jan 10 23:14:00 2025) since it couldn't be found locally at evaluate-metric--recall, or remotely on the Hugging Face Hub.
[I 2025-03-23 09:12:02,630] Trial 29 pruned. 


Trial 30 with params: {'learning_rate': 0.0010498743644234646, 'weight_decay': 0.008, 'warmup_steps': 22}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0559,0.971849,0.773602,0.678077,0.572268,0.596492
2,0.147,1.174697,0.797434,0.68484,0.659902,0.657553
3,0.0473,1.273087,0.792851,0.689624,0.673647,0.668667
4,0.022,1.366972,0.793767,0.717497,0.677154,0.677682
5,0.0127,1.462664,0.796517,0.730996,0.680562,0.692211
6,0.007,1.570082,0.793767,0.724193,0.664702,0.684776
7,0.0047,1.570006,0.804766,0.755743,0.695375,0.71123
8,0.0059,1.550778,0.794684,0.694394,0.671761,0.66838
9,0.0031,1.567442,0.802933,0.699206,0.676172,0.67738
10,0.0019,1.694054,0.802016,0.741123,0.692468,0.705271


[I 2025-03-23 09:15:13,687] Trial 30 pruned. 


Trial 31 with params: {'learning_rate': 0.002933412854837344, 'weight_decay': 0.004, 'warmup_steps': 47}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.7637,0.954227,0.812099,0.751065,0.665832,0.691713
2,0.0579,0.998824,0.828598,0.798935,0.74165,0.752522
3,0.0202,1.23946,0.816682,0.790766,0.714018,0.73424
4,0.014,1.291068,0.809349,0.789169,0.73596,0.743289
5,0.0093,1.422263,0.819432,0.797074,0.715207,0.740456
6,0.0088,1.49216,0.805683,0.749292,0.706192,0.706466
7,0.0065,1.548121,0.817599,0.730257,0.698383,0.698948
8,0.0059,1.5166,0.823098,0.769721,0.723426,0.732188
9,0.0032,1.580029,0.823098,0.816904,0.733291,0.756939
10,0.0013,1.608487,0.824015,0.807867,0.732865,0.751323


[I 2025-03-23 09:19:57,737] Trial 31 finished with value: 0.7472678577226418 and parameters: {'learning_rate': 0.002933412854837344, 'weight_decay': 0.004, 'warmup_steps': 47}. Best is trial 24 with value: 0.7543616053486423.


Trial 32 with params: {'learning_rate': 0.0037677911362547166, 'weight_decay': 0.003, 'warmup_steps': 51}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.7198,0.992065,0.820348,0.77427,0.709407,0.725778
2,0.0529,1.133315,0.822181,0.810176,0.719148,0.740693
3,0.0227,1.200109,0.828598,0.769192,0.737614,0.744404
4,0.0203,1.389828,0.812099,0.762185,0.705194,0.718478
5,0.0126,1.55186,0.828598,0.786453,0.692615,0.717327
6,0.0077,1.525083,0.824931,0.774624,0.687645,0.713695
7,0.0065,1.652637,0.830431,0.756679,0.693297,0.712447
8,0.0043,1.708303,0.826764,0.752701,0.689563,0.704448
9,0.0027,1.728964,0.829514,0.795742,0.709161,0.732399
10,0.0013,1.776034,0.831347,0.794371,0.711872,0.738106


[I 2025-03-23 09:25:19,249] Trial 32 finished with value: 0.7648295204787675 and parameters: {'learning_rate': 0.0037677911362547166, 'weight_decay': 0.003, 'warmup_steps': 51}. Best is trial 32 with value: 0.7648295204787675.


Trial 33 with params: {'learning_rate': 0.004970990522184016, 'weight_decay': 0.004, 'warmup_steps': 49}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6604,1.001779,0.808433,0.755411,0.671459,0.69821
2,0.0555,1.167099,0.816682,0.79215,0.688996,0.718471
3,0.0258,1.310243,0.812099,0.769738,0.714095,0.727829
4,0.0206,1.492127,0.804766,0.724007,0.670256,0.682575
5,0.0216,1.732298,0.812099,0.788257,0.685854,0.718288
6,0.0152,1.610427,0.827681,0.789955,0.712335,0.733994
7,0.0057,1.742571,0.824931,0.755603,0.713242,0.720637
8,0.0055,1.772407,0.826764,0.742482,0.71537,0.717256
9,0.0031,1.84239,0.829514,0.754372,0.701006,0.717043
10,0.0021,1.91991,0.831347,0.715458,0.71153,0.70148


[I 2025-03-23 09:28:14,631] Trial 33 pruned. 


Trial 34 with params: {'learning_rate': 0.0030710558740723376, 'weight_decay': 0.007, 'warmup_steps': 52}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.7582,0.939584,0.811182,0.730345,0.679539,0.693906
2,0.055,1.102367,0.816682,0.749725,0.686848,0.705892
3,0.0212,1.187786,0.813016,0.791305,0.738219,0.748151
4,0.0149,1.279654,0.820348,0.78917,0.738784,0.748753
5,0.0112,1.4117,0.818515,0.775438,0.713824,0.724812
6,0.0059,1.547178,0.820348,0.802574,0.720101,0.745448
7,0.0046,1.535506,0.823098,0.802114,0.720928,0.745
8,0.005,1.557763,0.814849,0.785463,0.734175,0.739266
9,0.003,1.536338,0.827681,0.772028,0.735663,0.738146
10,0.0019,1.603279,0.830431,0.803737,0.73652,0.754413


[I 2025-03-23 09:32:42,298] Trial 34 finished with value: 0.7711253273063332 and parameters: {'learning_rate': 0.0030710558740723376, 'weight_decay': 0.007, 'warmup_steps': 52}. Best is trial 34 with value: 0.7711253273063332.


Trial 35 with params: {'learning_rate': 0.002114652803156042, 'weight_decay': 0.009000000000000001, 'warmup_steps': 52}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8558,0.961099,0.794684,0.708461,0.642539,0.659513
2,0.0695,1.187689,0.806599,0.749825,0.697497,0.709588
3,0.022,1.348583,0.802016,0.788116,0.708758,0.732059
4,0.0135,1.336321,0.802933,0.723642,0.696377,0.694769
5,0.0116,1.356587,0.809349,0.78928,0.708613,0.725596
6,0.0053,1.425633,0.818515,0.764575,0.705795,0.720817
7,0.0052,1.488068,0.819432,0.818095,0.715732,0.749652
8,0.0053,1.563849,0.819432,0.791161,0.717142,0.74021
9,0.0024,1.571643,0.817599,0.789063,0.714945,0.736752
10,0.0014,1.71501,0.822181,0.792398,0.716938,0.741778


[I 2025-03-23 09:36:53,581] Trial 35 finished with value: 0.7492587346039846 and parameters: {'learning_rate': 0.002114652803156042, 'weight_decay': 0.009000000000000001, 'warmup_steps': 52}. Best is trial 34 with value: 0.7711253273063332.


Trial 36 with params: {'learning_rate': 0.004986114448457786, 'weight_decay': 0.005, 'warmup_steps': 53}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6608,1.035738,0.802016,0.760963,0.664141,0.692086
2,0.0532,1.289651,0.807516,0.749669,0.694518,0.705682
3,0.03,1.369841,0.826764,0.787064,0.720318,0.731746
4,0.0227,1.3954,0.821265,0.756014,0.682982,0.701335
5,0.014,1.623441,0.824015,0.760114,0.703422,0.715761
6,0.0135,1.666489,0.839597,0.778719,0.726763,0.739592
7,0.0111,1.642148,0.824931,0.80087,0.74184,0.758429
8,0.0068,1.804485,0.819432,0.814532,0.71881,0.748436
9,0.0039,1.831661,0.818515,0.772658,0.74726,0.747816
10,0.002,1.907996,0.823098,0.811954,0.727841,0.753222


[I 2025-03-23 09:41:13,987] Trial 36 finished with value: 0.7581143614146123 and parameters: {'learning_rate': 0.004986114448457786, 'weight_decay': 0.005, 'warmup_steps': 53}. Best is trial 34 with value: 0.7711253273063332.


Trial 37 with params: {'learning_rate': 0.0013546674390960728, 'weight_decay': 0.007, 'warmup_steps': 42}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.9919,0.997813,0.779102,0.683764,0.580538,0.60543
2,0.1101,1.154172,0.810266,0.755522,0.681533,0.698784
3,0.0331,1.251753,0.802016,0.77363,0.690493,0.712789
4,0.0175,1.289903,0.810266,0.738856,0.688706,0.698375
5,0.0098,1.423563,0.809349,0.747784,0.702765,0.70579


[I 2025-03-23 09:42:48,152] Trial 37 pruned. 


Trial 38 with params: {'learning_rate': 0.0018594581278176213, 'weight_decay': 0.005, 'warmup_steps': 53}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8942,0.976296,0.794684,0.718645,0.626434,0.657274
2,0.0753,1.209969,0.809349,0.758641,0.686689,0.704013
3,0.0239,1.308387,0.804766,0.753994,0.696479,0.710399
4,0.0178,1.442046,0.809349,0.755524,0.700954,0.713237
5,0.0078,1.404221,0.816682,0.771893,0.695931,0.717058
6,0.0074,1.446064,0.808433,0.763739,0.710281,0.72167
7,0.0046,1.489943,0.824015,0.800131,0.729383,0.746718
8,0.0054,1.470422,0.819432,0.787038,0.708806,0.731216
9,0.0019,1.542103,0.816682,0.777038,0.711163,0.730269
10,0.0008,1.610755,0.821265,0.766092,0.710281,0.726453


[I 2025-03-23 09:45:44,147] Trial 38 pruned. 


Trial 39 with params: {'learning_rate': 0.003212172047209907, 'weight_decay': 0.008, 'warmup_steps': 51}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.7493,0.959907,0.820348,0.784058,0.687761,0.718678
2,0.0537,1.193973,0.815765,0.80524,0.695229,0.730377
3,0.0237,1.237274,0.817599,0.762286,0.744272,0.740812
4,0.0146,1.26552,0.831347,0.80804,0.747877,0.759957
5,0.0072,1.430435,0.824015,0.779096,0.724179,0.735619
6,0.007,1.513408,0.816682,0.795814,0.725082,0.745159
7,0.0064,1.712082,0.818515,0.766674,0.706978,0.716227
8,0.0073,1.551977,0.831347,0.781919,0.752907,0.7563
9,0.0043,1.571242,0.835014,0.803454,0.745974,0.761809
10,0.0022,1.572049,0.834097,0.808841,0.749575,0.765047


[I 2025-03-23 09:50:55,904] Trial 39 finished with value: 0.7738245266618828 and parameters: {'learning_rate': 0.003212172047209907, 'weight_decay': 0.008, 'warmup_steps': 51}. Best is trial 39 with value: 0.7738245266618828.


Trial 40 with params: {'learning_rate': 0.004295038430964043, 'weight_decay': 0.009000000000000001, 'warmup_steps': 53}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6925,1.031038,0.816682,0.769907,0.687873,0.712516
2,0.0513,1.146816,0.822181,0.791078,0.736466,0.747587
3,0.0235,1.294504,0.824015,0.774092,0.721752,0.732511
4,0.0195,1.349458,0.815765,0.769027,0.737694,0.745309
5,0.0127,1.589688,0.813016,0.758034,0.700268,0.713367


[I 2025-03-23 09:52:33,408] Trial 40 pruned. 


Trial 41 with params: {'learning_rate': 0.0018394777610745148, 'weight_decay': 0.007, 'warmup_steps': 46}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8833,1.040138,0.79835,0.759145,0.66003,0.690021
2,0.0779,1.212116,0.813016,0.805426,0.704433,0.73522
3,0.0274,1.26182,0.812099,0.789842,0.696462,0.721468
4,0.0132,1.443561,0.809349,0.788621,0.710207,0.73011
5,0.0092,1.389305,0.824015,0.778473,0.712708,0.729147
6,0.0088,1.628614,0.814849,0.804593,0.711199,0.736879
7,0.0066,1.517445,0.824015,0.790845,0.725086,0.74117
8,0.0033,1.504355,0.833181,0.780581,0.726631,0.743118
9,0.0013,1.613666,0.827681,0.764798,0.722681,0.731476
10,0.001,1.63337,0.824931,0.776641,0.728079,0.739597


[I 2025-03-23 09:57:43,533] Trial 41 finished with value: 0.7447229156435791 and parameters: {'learning_rate': 0.0018394777610745148, 'weight_decay': 0.007, 'warmup_steps': 46}. Best is trial 39 with value: 0.7738245266618828.


Trial 42 with params: {'learning_rate': 5.3550149515819593e-05, 'weight_decay': 0.005, 'warmup_steps': 49}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.7459,2.251319,0.439963,0.067528,0.100723,0.071762
2,1.9276,1.856227,0.527956,0.139168,0.153859,0.134122
3,1.5994,1.653965,0.575619,0.244868,0.194638,0.181366
4,1.3723,1.498416,0.622365,0.264151,0.236057,0.226855
5,1.2014,1.399613,0.63703,0.28544,0.266108,0.260577


[I 2025-03-23 09:59:10,378] Trial 42 pruned. 


Trial 43 with params: {'learning_rate': 0.004645984924208139, 'weight_decay': 0.007, 'warmup_steps': 40}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6538,0.970468,0.819432,0.771798,0.693966,0.717958
2,0.0511,1.08068,0.83593,0.756945,0.717413,0.72441
3,0.0246,1.232974,0.813016,0.757532,0.716155,0.72485
4,0.0201,1.503013,0.834097,0.803804,0.718701,0.739151
5,0.0168,1.457623,0.823098,0.766888,0.729294,0.733755
6,0.011,1.635991,0.822181,0.813194,0.714214,0.743861
7,0.0079,1.672831,0.822181,0.778508,0.704277,0.725531
8,0.0027,1.696741,0.836847,0.803606,0.745604,0.760114
9,0.0021,1.864619,0.823098,0.787763,0.710861,0.733933
10,0.0042,1.725423,0.834097,0.79457,0.741402,0.75892


Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--precision/155d3220d6cd4a6553f12da68eeb3d1f97cf431206304a4bc6e2d564c29502e9 (last modified on Fri Jan 10 23:13:59 2025) since it couldn't be found locally at evaluate-metric--precision, or remotely on the Hugging Face Hub.
[I 2025-03-23 10:04:02,239] Trial 43 finished with value: 0.7751023083936058 and parameters: {'learning_rate': 0.004645984924208139, 'weight_decay': 0.007, 'warmup_steps': 40}. Best is trial 43 with value: 0.7751023083936058.


Trial 44 with params: {'learning_rate': 0.0035048232495508954, 'weight_decay': 0.007, 'warmup_steps': 40}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6989,1.00784,0.812099,0.75848,0.709771,0.718977
2,0.0536,1.044505,0.815765,0.783175,0.756343,0.75382
3,0.0202,1.254992,0.824015,0.783579,0.768795,0.761941
4,0.0184,1.243105,0.826764,0.786699,0.732851,0.746602
5,0.012,1.3478,0.827681,0.791602,0.738714,0.745997
6,0.0092,1.457993,0.831347,0.805351,0.741445,0.759014
7,0.0065,1.597085,0.833181,0.799316,0.742901,0.755612
8,0.0024,1.693674,0.829514,0.792198,0.736983,0.751811
9,0.0018,1.606129,0.833181,0.783746,0.728953,0.741106
10,0.0009,1.691358,0.832264,0.794106,0.728267,0.746554


[I 2025-03-23 10:08:30,109] Trial 44 finished with value: 0.7454508209373278 and parameters: {'learning_rate': 0.0035048232495508954, 'weight_decay': 0.007, 'warmup_steps': 40}. Best is trial 43 with value: 0.7751023083936058.


Trial 45 with params: {'learning_rate': 0.003950715776116587, 'weight_decay': 0.006, 'warmup_steps': 52}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.7067,1.088796,0.796517,0.775194,0.680174,0.711
2,0.0516,1.19573,0.820348,0.806141,0.69449,0.727735
3,0.0242,1.197248,0.813016,0.744528,0.713373,0.711081
4,0.0193,1.453237,0.816682,0.754538,0.710225,0.716214
5,0.0111,1.478322,0.819432,0.809347,0.739172,0.756912
6,0.0107,1.503867,0.817599,0.784751,0.702763,0.729097
7,0.0089,1.536204,0.821265,0.792654,0.728046,0.747479
8,0.0058,1.652674,0.819432,0.77329,0.713297,0.73142
9,0.002,1.631834,0.829514,0.757805,0.716775,0.729533
10,0.0026,1.646241,0.831347,0.770464,0.726696,0.739726


[I 2025-03-23 10:13:33,769] Trial 45 finished with value: 0.7404373159171977 and parameters: {'learning_rate': 0.003950715776116587, 'weight_decay': 0.006, 'warmup_steps': 52}. Best is trial 43 with value: 0.7751023083936058.


Trial 46 with params: {'learning_rate': 0.004151547221515673, 'weight_decay': 0.008, 'warmup_steps': 45}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6812,1.003056,0.806599,0.735102,0.686985,0.694894
2,0.0533,1.115887,0.83593,0.786392,0.74057,0.751051
3,0.0202,1.200245,0.820348,0.776788,0.730498,0.735338
4,0.0208,1.29764,0.824931,0.804409,0.750804,0.762546
5,0.0155,1.377226,0.836847,0.809993,0.776981,0.780224
6,0.0107,1.477243,0.824015,0.790991,0.733984,0.746352
7,0.0051,1.623819,0.826764,0.799193,0.733933,0.750721
8,0.0053,1.633304,0.822181,0.804707,0.748792,0.757347
9,0.0044,1.678292,0.824931,0.815791,0.75912,0.77081
10,0.0027,1.718114,0.820348,0.803514,0.745323,0.76045


[I 2025-03-23 10:18:01,275] Trial 46 finished with value: 0.7867104184452819 and parameters: {'learning_rate': 0.004151547221515673, 'weight_decay': 0.008, 'warmup_steps': 45}. Best is trial 46 with value: 0.7867104184452819.


Trial 47 with params: {'learning_rate': 0.003937964991183476, 'weight_decay': 0.008, 'warmup_steps': 41}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.682,0.989859,0.822181,0.800976,0.713371,0.738245
2,0.0505,1.020867,0.833181,0.804091,0.741587,0.755234
3,0.0225,1.237653,0.830431,0.749302,0.724482,0.725483
4,0.0146,1.43337,0.806599,0.783819,0.714943,0.729632
5,0.0139,1.391352,0.819432,0.803879,0.73341,0.752404
6,0.0122,1.607735,0.824015,0.765004,0.715959,0.725095
7,0.0077,1.666966,0.824015,0.778634,0.733887,0.741612
8,0.0045,1.656593,0.818515,0.75771,0.714478,0.719268
9,0.0026,1.635264,0.830431,0.762486,0.721094,0.727074
10,0.0012,1.690294,0.830431,0.780477,0.73581,0.744422


[I 2025-03-23 10:22:08,983] Trial 47 finished with value: 0.7478845062058629 and parameters: {'learning_rate': 0.003937964991183476, 'weight_decay': 0.008, 'warmup_steps': 41}. Best is trial 46 with value: 0.7867104184452819.


Trial 48 with params: {'learning_rate': 0.00015433736178353414, 'weight_decay': 0.01, 'warmup_steps': 15}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.1331,1.588278,0.593951,0.230757,0.207066,0.191201
2,1.1486,1.23716,0.673694,0.35808,0.335101,0.330816
3,0.7644,1.070571,0.71769,0.421175,0.400038,0.400057
4,0.5428,1.033251,0.734189,0.508282,0.45733,0.472099
5,0.3979,1.067899,0.734189,0.550146,0.486346,0.501092
6,0.2916,1.068803,0.736022,0.560682,0.493027,0.512297
7,0.2196,1.106059,0.742438,0.565961,0.523333,0.533647
8,0.1716,1.148436,0.742438,0.622425,0.554672,0.570961
9,0.1334,1.183458,0.749771,0.626939,0.55209,0.570933
10,0.1081,1.208179,0.746104,0.626828,0.583574,0.591271


[I 2025-03-23 10:25:07,826] Trial 48 pruned. 


Trial 49 with params: {'learning_rate': 0.001184050583033169, 'weight_decay': 0.01, 'warmup_steps': 42}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0492,0.981299,0.780935,0.719587,0.601753,0.629702
2,0.1263,1.178536,0.796517,0.755971,0.680729,0.700934
3,0.0387,1.19677,0.800183,0.75547,0.684598,0.705586
4,0.0209,1.339607,0.813016,0.718575,0.674987,0.680872
5,0.0091,1.39389,0.816682,0.746911,0.714459,0.715671


[I 2025-03-23 10:26:34,860] Trial 49 pruned. 


Trial 50 with params: {'learning_rate': 0.004684409746772179, 'weight_decay': 0.006, 'warmup_steps': 36}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6495,0.9645,0.823098,0.754019,0.697133,0.711942
2,0.0518,1.171144,0.822181,0.784295,0.723283,0.738119
3,0.0244,1.292591,0.830431,0.79475,0.705231,0.728422
4,0.0209,1.271344,0.836847,0.796055,0.736446,0.753884
5,0.0172,1.505039,0.823098,0.788136,0.72261,0.742889
6,0.0094,1.616494,0.818515,0.751857,0.709926,0.713054
7,0.0105,1.628204,0.830431,0.802208,0.724941,0.74424
8,0.0062,1.682111,0.832264,0.778425,0.70408,0.721573
9,0.0032,1.658106,0.832264,0.791582,0.733786,0.749582
10,0.0015,1.686728,0.832264,0.806058,0.746888,0.764434


[I 2025-03-23 10:31:05,063] Trial 50 finished with value: 0.7793485711522038 and parameters: {'learning_rate': 0.004684409746772179, 'weight_decay': 0.006, 'warmup_steps': 36}. Best is trial 46 with value: 0.7867104184452819.


Trial 51 with params: {'learning_rate': 0.004560958345720308, 'weight_decay': 0.008, 'warmup_steps': 28}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6485,0.988321,0.805683,0.734887,0.651293,0.677147
2,0.0529,1.071,0.820348,0.789095,0.701897,0.728487
3,0.0253,1.390028,0.815765,0.780354,0.689663,0.716457
4,0.0202,1.409987,0.827681,0.79168,0.702261,0.725334
5,0.0157,1.420089,0.827681,0.796996,0.724795,0.746112
6,0.0121,1.3904,0.828598,0.743992,0.724021,0.715967
7,0.0088,1.564169,0.827681,0.781752,0.716967,0.733904
8,0.0065,1.623013,0.827681,0.783227,0.721697,0.736068
9,0.0035,1.6934,0.826764,0.808804,0.727294,0.747958
10,0.0027,1.625854,0.824931,0.769726,0.735535,0.738519


[I 2025-03-23 10:35:50,571] Trial 51 finished with value: 0.7583320326768519 and parameters: {'learning_rate': 0.004560958345720308, 'weight_decay': 0.008, 'warmup_steps': 28}. Best is trial 46 with value: 0.7867104184452819.


Trial 52 with params: {'learning_rate': 0.004885590720769151, 'weight_decay': 0.007, 'warmup_steps': 40}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6444,0.960448,0.823098,0.72316,0.706561,0.698642
2,0.0535,1.256561,0.819432,0.772171,0.713217,0.730616
3,0.0277,1.24981,0.826764,0.783762,0.714595,0.737771
4,0.0237,1.402668,0.826764,0.784858,0.721235,0.731728
5,0.0144,1.481047,0.830431,0.784585,0.744398,0.747795
6,0.0135,1.516062,0.833181,0.826911,0.742514,0.766136
7,0.0104,1.511264,0.83593,0.78924,0.752896,0.757503
8,0.0042,1.711367,0.830431,0.80621,0.731617,0.753136
9,0.0032,1.674806,0.829514,0.812443,0.721829,0.749296
10,0.0018,1.760959,0.84143,0.792657,0.740148,0.753909


[I 2025-03-23 10:40:36,324] Trial 52 finished with value: 0.7676622516384647 and parameters: {'learning_rate': 0.004885590720769151, 'weight_decay': 0.007, 'warmup_steps': 40}. Best is trial 46 with value: 0.7867104184452819.


Trial 53 with params: {'learning_rate': 0.0049388754339439985, 'weight_decay': 0.006, 'warmup_steps': 32}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6449,0.975695,0.814849,0.772962,0.672097,0.703094
2,0.0572,1.131078,0.821265,0.781446,0.702395,0.721037
3,0.03,1.244129,0.825848,0.742042,0.724297,0.724131
4,0.0205,1.394678,0.824015,0.759817,0.678283,0.701369
5,0.0171,1.508352,0.832264,0.774955,0.712004,0.726105
6,0.0097,1.544476,0.839597,0.811624,0.734951,0.756323
7,0.0066,1.563942,0.836847,0.788098,0.737838,0.752454
8,0.0048,1.740601,0.821265,0.745271,0.708296,0.714596
9,0.0063,1.826979,0.814849,0.75639,0.716875,0.724243
10,0.0047,1.851045,0.820348,0.75642,0.697253,0.717413


[I 2025-03-23 10:44:02,810] Trial 53 pruned. 


Trial 54 with params: {'learning_rate': 0.004491670103225964, 'weight_decay': 0.008, 'warmup_steps': 41}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6659,1.08974,0.802016,0.727206,0.681422,0.690279
2,0.0528,1.107573,0.818515,0.768245,0.720236,0.727235
3,0.0237,1.159822,0.833181,0.794433,0.741426,0.756174
4,0.0213,1.303778,0.832264,0.792939,0.730248,0.747278
5,0.0124,1.364361,0.833181,0.811658,0.740781,0.76227
6,0.0093,1.489487,0.825848,0.774211,0.733338,0.737842
7,0.0098,1.588987,0.830431,0.777375,0.739383,0.747874
8,0.0037,1.735377,0.824931,0.784088,0.739115,0.746235
9,0.0041,1.771995,0.824015,0.765654,0.753373,0.74629
10,0.0023,1.797879,0.834097,0.809446,0.741948,0.762625


[I 2025-03-23 10:48:47,460] Trial 54 finished with value: 0.7322752597783031 and parameters: {'learning_rate': 0.004491670103225964, 'weight_decay': 0.008, 'warmup_steps': 41}. Best is trial 46 with value: 0.7867104184452819.


Trial 55 with params: {'learning_rate': 0.0008953750478722926, 'weight_decay': 0.0, 'warmup_steps': 21}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.1178,0.981275,0.764436,0.638216,0.525584,0.554882
2,0.1773,1.109193,0.788268,0.72862,0.655507,0.671949
3,0.0569,1.234728,0.785518,0.713141,0.669018,0.670224
4,0.0262,1.343477,0.786434,0.700322,0.643602,0.654216
5,0.0142,1.469484,0.806599,0.746791,0.702271,0.710204
6,0.0093,1.428809,0.8011,0.732032,0.687528,0.698404
7,0.0054,1.535534,0.808433,0.771456,0.687292,0.707325
8,0.0048,1.569931,0.8011,0.711365,0.688459,0.684295
9,0.0027,1.468158,0.821265,0.815588,0.723829,0.749472
10,0.0014,1.583667,0.808433,0.723683,0.685179,0.691828


[I 2025-03-23 10:52:02,706] Trial 55 pruned. 


Trial 56 with params: {'learning_rate': 0.003845392107804202, 'weight_decay': 0.005, 'warmup_steps': 36}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6852,1.02072,0.804766,0.753222,0.679685,0.699012
2,0.0526,1.136815,0.810266,0.771989,0.699953,0.721399
3,0.0221,1.394073,0.804766,0.734393,0.688192,0.699191
4,0.0109,1.478457,0.815765,0.793166,0.727943,0.749776
5,0.0126,1.51322,0.822181,0.774621,0.720142,0.734721
6,0.014,1.562174,0.817599,0.75715,0.702591,0.716627
7,0.0076,1.694599,0.829514,0.780341,0.740862,0.742529
8,0.0035,1.730604,0.824015,0.765679,0.727511,0.730535
9,0.0022,1.755949,0.828598,0.778366,0.735755,0.739692
10,0.0019,1.794436,0.829514,0.798034,0.737477,0.750029


[I 2025-03-23 10:56:36,358] Trial 56 finished with value: 0.7645057282764928 and parameters: {'learning_rate': 0.003845392107804202, 'weight_decay': 0.005, 'warmup_steps': 36}. Best is trial 46 with value: 0.7867104184452819.


Trial 57 with params: {'learning_rate': 0.0047009420237326535, 'weight_decay': 0.007, 'warmup_steps': 45}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6686,1.031353,0.812099,0.745754,0.681747,0.698949
2,0.0529,1.112424,0.819432,0.767358,0.728295,0.729453
3,0.0262,1.22824,0.824931,0.746811,0.710076,0.717388
4,0.0211,1.34919,0.818515,0.784573,0.717378,0.733572
5,0.0174,1.497304,0.825848,0.761672,0.717281,0.721388
6,0.0101,1.605474,0.824931,0.757726,0.70119,0.716489
7,0.0093,1.715173,0.825848,0.773795,0.728766,0.735988
8,0.0077,1.573097,0.832264,0.744688,0.732511,0.727825
9,0.0053,1.734728,0.832264,0.746083,0.72535,0.72574
10,0.0023,1.904209,0.822181,0.743872,0.700441,0.707542


[I 2025-03-23 10:59:48,793] Trial 57 pruned. 


Trial 58 with params: {'learning_rate': 0.002063961484993637, 'weight_decay': 0.008, 'warmup_steps': 50}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8567,1.006491,0.791934,0.737144,0.662697,0.678539
2,0.0695,1.158431,0.805683,0.75642,0.692363,0.708921
3,0.0243,1.318889,0.802933,0.793625,0.695372,0.721636
4,0.0135,1.365511,0.811182,0.78213,0.712765,0.733415
5,0.0088,1.390258,0.814849,0.780836,0.720364,0.733836
6,0.0068,1.438035,0.824015,0.806273,0.728319,0.748951
7,0.0052,1.486454,0.824015,0.812826,0.73536,0.754473
8,0.0046,1.486476,0.824015,0.794612,0.726863,0.740713
9,0.0017,1.508743,0.826764,0.810523,0.736006,0.755801
10,0.0007,1.5733,0.826764,0.826077,0.738515,0.76596


[I 2025-03-23 11:04:52,850] Trial 58 finished with value: 0.7653783732654805 and parameters: {'learning_rate': 0.002063961484993637, 'weight_decay': 0.008, 'warmup_steps': 50}. Best is trial 46 with value: 0.7867104184452819.


Trial 59 with params: {'learning_rate': 0.0038067411088025666, 'weight_decay': 0.006, 'warmup_steps': 37}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.688,1.070909,0.807516,0.779903,0.691134,0.715163
2,0.0509,1.141723,0.824931,0.729306,0.70032,0.699636
3,0.0213,1.387145,0.816682,0.782221,0.723376,0.737639
4,0.0186,1.382367,0.817599,0.763328,0.698801,0.712386
5,0.0132,1.470579,0.817599,0.766419,0.705577,0.716252


[I 2025-03-23 11:06:19,305] Trial 59 pruned. 


Trial 60 with params: {'learning_rate': 0.004791875035680076, 'weight_decay': 0.008, 'warmup_steps': 42}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6481,1.067488,0.821265,0.775019,0.681602,0.713084
2,0.0536,1.081926,0.832264,0.807431,0.725303,0.746987
3,0.0243,1.310991,0.816682,0.747965,0.701207,0.709302
4,0.0188,1.277254,0.828598,0.785349,0.760839,0.752812
5,0.0157,1.524697,0.813932,0.762897,0.685077,0.70198
6,0.0102,1.479485,0.834097,0.788621,0.71793,0.731126
7,0.0099,1.649051,0.828598,0.76354,0.696861,0.710273
8,0.0053,1.712302,0.830431,0.803333,0.710364,0.734678
9,0.005,1.755685,0.832264,0.80684,0.726937,0.74783
10,0.002,1.818206,0.83593,0.807149,0.72393,0.748079


[I 2025-03-23 11:11:06,225] Trial 60 finished with value: 0.7606641931383625 and parameters: {'learning_rate': 0.004791875035680076, 'weight_decay': 0.008, 'warmup_steps': 42}. Best is trial 46 with value: 0.7867104184452819.


Trial 61 with params: {'learning_rate': 0.0008876822472990228, 'weight_decay': 0.007, 'warmup_steps': 51}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.1829,1.002988,0.764436,0.651953,0.535241,0.565302
2,0.1785,1.145841,0.780935,0.697865,0.655128,0.659338
3,0.0553,1.220781,0.799267,0.726552,0.68016,0.691229
4,0.0251,1.351654,0.785518,0.699164,0.654816,0.661298
5,0.016,1.442842,0.79835,0.706877,0.672243,0.676568


[I 2025-03-23 11:12:36,556] Trial 61 pruned. 


Trial 62 with params: {'learning_rate': 0.001986032077082638, 'weight_decay': 0.008, 'warmup_steps': 49}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8713,1.034098,0.791017,0.70131,0.636747,0.650326
2,0.0748,1.207832,0.804766,0.786484,0.677892,0.709572
3,0.0246,1.244315,0.813016,0.769063,0.700694,0.716316
4,0.0175,1.202994,0.824015,0.775878,0.737607,0.74269
5,0.0081,1.345875,0.822181,0.783461,0.732969,0.740865
6,0.0066,1.523955,0.815765,0.795312,0.720011,0.743643
7,0.0068,1.519688,0.816682,0.809441,0.708305,0.738985
8,0.0034,1.548713,0.823098,0.791447,0.716164,0.739152
9,0.0022,1.533879,0.818515,0.788444,0.708648,0.73514
10,0.0009,1.649924,0.824931,0.809288,0.716333,0.746688


[I 2025-03-23 11:17:24,123] Trial 62 finished with value: 0.7390144661030424 and parameters: {'learning_rate': 0.001986032077082638, 'weight_decay': 0.008, 'warmup_steps': 49}. Best is trial 46 with value: 0.7867104184452819.


Trial 63 with params: {'learning_rate': 0.0024272400409472814, 'weight_decay': 0.007, 'warmup_steps': 52}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8164,0.975655,0.802016,0.760292,0.695773,0.715285
2,0.0635,1.101192,0.813016,0.795842,0.719259,0.734616
3,0.0222,1.24965,0.822181,0.795135,0.724393,0.741647
4,0.0141,1.322213,0.823098,0.776382,0.706471,0.721006
5,0.01,1.432081,0.820348,0.812863,0.723036,0.749071
6,0.0073,1.387626,0.832264,0.809014,0.724648,0.746421
7,0.0044,1.44758,0.813016,0.798146,0.710585,0.733664
8,0.0042,1.497698,0.827681,0.798791,0.736097,0.745974
9,0.0038,1.539761,0.825848,0.797412,0.722789,0.74158
10,0.0016,1.628284,0.834097,0.794489,0.746292,0.757945


[I 2025-03-23 11:22:09,898] Trial 63 finished with value: 0.7617427364024532 and parameters: {'learning_rate': 0.0024272400409472814, 'weight_decay': 0.007, 'warmup_steps': 52}. Best is trial 46 with value: 0.7867104184452819.


Trial 64 with params: {'learning_rate': 0.00011912397327149118, 'weight_decay': 0.006, 'warmup_steps': 22}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.2906,1.743632,0.552704,0.140418,0.170329,0.145956
2,1.3469,1.368855,0.648029,0.285069,0.276069,0.267703
3,0.9647,1.183709,0.696609,0.38663,0.351326,0.353711
4,0.7284,1.093899,0.714024,0.431603,0.388032,0.40078
5,0.5701,1.076781,0.725023,0.486836,0.425365,0.439866
6,0.4538,1.054542,0.730522,0.57958,0.4758,0.504935
7,0.3677,1.067792,0.737855,0.572408,0.504387,0.524601
8,0.3041,1.09695,0.731439,0.609186,0.517192,0.543141
9,0.2547,1.101304,0.750687,0.617812,0.548707,0.567447
10,0.2168,1.111698,0.749771,0.626039,0.553648,0.574393


[I 2025-03-23 11:25:11,651] Trial 64 pruned. 


Trial 65 with params: {'learning_rate': 0.000241251747353242, 'weight_decay': 0.009000000000000001, 'warmup_steps': 52}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.9122,1.330394,0.651696,0.293047,0.271355,0.26288
2,0.8143,1.058177,0.713107,0.435513,0.410701,0.409974
3,0.4539,1.009523,0.750687,0.585796,0.503439,0.525223
4,0.2704,1.070049,0.744271,0.598243,0.551753,0.562004
5,0.1658,1.13475,0.756187,0.634432,0.562915,0.579313


[I 2025-03-23 11:26:41,586] Trial 65 pruned. 


Trial 66 with params: {'learning_rate': 0.0035816806287740227, 'weight_decay': 0.01, 'warmup_steps': 45}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.7125,0.985686,0.814849,0.748218,0.677842,0.69793
2,0.0512,1.070507,0.829514,0.797247,0.7415,0.75256
3,0.0235,1.243685,0.829514,0.809469,0.74016,0.758782
4,0.0156,1.326305,0.826764,0.798864,0.729322,0.748573
5,0.0106,1.44063,0.822181,0.779496,0.726494,0.732043
6,0.0074,1.543431,0.84418,0.774622,0.728157,0.741096
7,0.0084,1.56265,0.834097,0.821601,0.752105,0.771315
8,0.0062,1.599835,0.83593,0.805075,0.730447,0.748475
9,0.0025,1.653437,0.83868,0.801267,0.726601,0.744643
10,0.0012,1.767159,0.837764,0.790252,0.719042,0.737061


[I 2025-03-23 11:31:34,358] Trial 66 finished with value: 0.749856190844934 and parameters: {'learning_rate': 0.0035816806287740227, 'weight_decay': 0.01, 'warmup_steps': 45}. Best is trial 46 with value: 0.7867104184452819.


Trial 67 with params: {'learning_rate': 0.001889152260617971, 'weight_decay': 0.007, 'warmup_steps': 35}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8627,1.014277,0.802933,0.744398,0.671692,0.690411
2,0.0773,1.12956,0.810266,0.772584,0.70051,0.721722
3,0.0267,1.30884,0.80385,0.767235,0.702337,0.717618
4,0.0155,1.309284,0.818515,0.760408,0.739054,0.736677
5,0.0093,1.475901,0.817599,0.793229,0.732122,0.747367
6,0.0063,1.552288,0.822181,0.79873,0.735911,0.754499
7,0.0032,1.618373,0.824931,0.811171,0.733325,0.754621
8,0.005,1.62646,0.817599,0.782272,0.729362,0.739231
9,0.0025,1.572061,0.813932,0.795241,0.718983,0.738721
10,0.0015,1.634727,0.829514,0.802489,0.733459,0.750199


[I 2025-03-23 11:36:01,053] Trial 67 finished with value: 0.7538621407170524 and parameters: {'learning_rate': 0.001889152260617971, 'weight_decay': 0.007, 'warmup_steps': 35}. Best is trial 46 with value: 0.7867104184452819.


Trial 68 with params: {'learning_rate': 0.0024008782510295096, 'weight_decay': 0.006, 'warmup_steps': 42}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.7968,1.02283,0.814849,0.746271,0.692413,0.703417
2,0.0643,1.156751,0.812099,0.792094,0.697868,0.724463
3,0.0215,1.262362,0.812099,0.757693,0.711698,0.716295
4,0.0143,1.297258,0.821265,0.741718,0.703166,0.709774
5,0.0068,1.5155,0.814849,0.736513,0.714683,0.711238


[I 2025-03-23 11:37:29,262] Trial 68 pruned. 


Trial 69 with params: {'learning_rate': 0.003123228876646941, 'weight_decay': 0.008, 'warmup_steps': 49}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.7533,0.931977,0.804766,0.715913,0.662569,0.675943
2,0.0552,1.094549,0.817599,0.782483,0.724952,0.731794
3,0.0213,1.24472,0.806599,0.761918,0.698283,0.715931
4,0.0155,1.302434,0.826764,0.818694,0.720205,0.746885
5,0.0126,1.365929,0.826764,0.788094,0.723053,0.74157
6,0.0074,1.399771,0.833181,0.793325,0.731968,0.748268
7,0.0065,1.472327,0.829514,0.781516,0.71907,0.735122
8,0.0042,1.539266,0.831347,0.791167,0.726519,0.744641
9,0.0033,1.655557,0.817599,0.758129,0.71335,0.723878
10,0.0014,1.662698,0.820348,0.772875,0.720558,0.733643


[I 2025-03-23 11:40:44,051] Trial 69 pruned. 


Trial 70 with params: {'learning_rate': 0.004899769349678546, 'weight_decay': 0.006, 'warmup_steps': 38}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6469,1.133776,0.806599,0.777234,0.697188,0.722961
2,0.0539,1.2805,0.80385,0.778872,0.635492,0.676653
3,0.0277,1.273523,0.813016,0.75108,0.713467,0.720562
4,0.0212,1.551176,0.816682,0.785913,0.707717,0.728663
5,0.0163,1.559101,0.824931,0.76297,0.702065,0.714576
6,0.0092,1.765706,0.814849,0.786124,0.708245,0.725985
7,0.009,1.855787,0.813932,0.760509,0.709957,0.717886
8,0.0082,1.729192,0.823098,0.784265,0.72389,0.739779
9,0.0035,1.783339,0.824931,0.779332,0.72212,0.734517
10,0.002,1.809312,0.835014,0.798225,0.746622,0.759059


[I 2025-03-23 11:45:42,671] Trial 70 finished with value: 0.7259998379727992 and parameters: {'learning_rate': 0.004899769349678546, 'weight_decay': 0.006, 'warmup_steps': 38}. Best is trial 46 with value: 0.7867104184452819.


Trial 71 with params: {'learning_rate': 0.0018746833793324906, 'weight_decay': 0.008, 'warmup_steps': 53}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.89,0.981567,0.789184,0.684185,0.610301,0.631585
2,0.0769,1.175904,0.813016,0.789857,0.706079,0.728339
3,0.0241,1.257986,0.812099,0.773645,0.718213,0.734009
4,0.0121,1.414359,0.80385,0.758193,0.690313,0.705721
5,0.0102,1.42912,0.813016,0.793219,0.720303,0.740755
6,0.0056,1.395819,0.824015,0.798893,0.703217,0.729119
7,0.0093,1.41426,0.820348,0.804112,0.71361,0.741134
8,0.0026,1.628672,0.815765,0.800745,0.698653,0.730521
9,0.0017,1.674873,0.810266,0.775999,0.69645,0.720922
10,0.0006,1.691743,0.819432,0.797394,0.717959,0.740644


[I 2025-03-23 11:49:08,808] Trial 71 pruned. 


Trial 72 with params: {'learning_rate': 5.342663786555515e-05, 'weight_decay': 0.001, 'warmup_steps': 24}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.7143,2.241209,0.44088,0.073414,0.101335,0.073433
2,1.9289,1.866061,0.530706,0.136333,0.155366,0.134795
3,1.6081,1.666628,0.580202,0.237163,0.197411,0.183206
4,1.3788,1.511863,0.618698,0.270728,0.235181,0.226972
5,1.2057,1.41368,0.632447,0.298182,0.266245,0.261782
6,1.0697,1.331637,0.649863,0.323971,0.293343,0.290815
7,0.9636,1.282673,0.667278,0.369902,0.324508,0.326625
8,0.8772,1.249844,0.670944,0.372488,0.332997,0.334504
9,0.8102,1.216418,0.681027,0.364983,0.342457,0.341473
10,0.7533,1.20107,0.687443,0.40543,0.366518,0.371978


[I 2025-03-23 11:52:08,958] Trial 72 pruned. 


Trial 73 with params: {'learning_rate': 5.953168512495511e-05, 'weight_decay': 0.01, 'warmup_steps': 46}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6842,2.172505,0.454629,0.076762,0.107105,0.079561
2,1.8496,1.780897,0.549038,0.143465,0.168306,0.148182
3,1.5141,1.585163,0.598533,0.251154,0.213157,0.202473
4,1.2822,1.432981,0.630614,0.277137,0.251686,0.247493
5,1.1057,1.33788,0.651696,0.316931,0.284768,0.284346


[I 2025-03-23 11:53:39,238] Trial 73 pruned. 


Trial 74 with params: {'learning_rate': 0.00371389679304936, 'weight_decay': 0.007, 'warmup_steps': 47}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.7098,1.028884,0.816682,0.756063,0.688119,0.702796
2,0.0531,1.047522,0.832264,0.796389,0.743984,0.754066
3,0.0213,1.253482,0.83868,0.788773,0.746874,0.756766
4,0.0156,1.356212,0.83593,0.829631,0.756956,0.777837
5,0.0095,1.440607,0.828598,0.78964,0.719421,0.73677
6,0.0132,1.419433,0.840513,0.80932,0.732214,0.747991
7,0.0076,1.575313,0.822181,0.801817,0.72674,0.744509
8,0.0053,1.686558,0.812099,0.75507,0.704736,0.712992
9,0.0041,1.573932,0.83868,0.776734,0.741702,0.748325
10,0.0011,1.648589,0.83593,0.783483,0.745845,0.752095


[I 2025-03-23 11:58:32,681] Trial 74 finished with value: 0.7565859828496013 and parameters: {'learning_rate': 0.00371389679304936, 'weight_decay': 0.007, 'warmup_steps': 47}. Best is trial 46 with value: 0.7867104184452819.


Trial 75 with params: {'learning_rate': 0.0023274987431902666, 'weight_decay': 0.002, 'warmup_steps': 46}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8214,0.976412,0.802933,0.712508,0.664251,0.669963
2,0.063,1.144724,0.813932,0.764463,0.724351,0.727674
3,0.0222,1.111481,0.826764,0.723625,0.695507,0.696825
4,0.0124,1.468521,0.813016,0.778486,0.702797,0.720063
5,0.0106,1.383521,0.814849,0.734278,0.696905,0.701639


[I 2025-03-23 12:00:26,155] Trial 75 pruned. 


Trial 76 with params: {'learning_rate': 0.0022357052221614326, 'weight_decay': 0.009000000000000001, 'warmup_steps': 36}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8229,0.971065,0.79835,0.749629,0.670912,0.694892
2,0.067,1.111554,0.816682,0.746008,0.709204,0.713877
3,0.0235,1.297593,0.814849,0.775514,0.726139,0.730991
4,0.015,1.368808,0.815765,0.762567,0.706895,0.722107
5,0.0084,1.3245,0.830431,0.776589,0.741013,0.744794
6,0.0057,1.610805,0.812099,0.7758,0.715701,0.729365
7,0.0067,1.653603,0.808433,0.771281,0.701029,0.720909
8,0.0049,1.637776,0.823098,0.779173,0.712941,0.727555
9,0.0025,1.622683,0.828598,0.7891,0.712998,0.735579
10,0.0009,1.625837,0.829514,0.794114,0.726883,0.745604


[I 2025-03-23 12:05:15,459] Trial 76 finished with value: 0.7532430099266811 and parameters: {'learning_rate': 0.0022357052221614326, 'weight_decay': 0.009000000000000001, 'warmup_steps': 36}. Best is trial 46 with value: 0.7867104184452819.


Trial 77 with params: {'learning_rate': 0.001007761125954244, 'weight_decay': 0.01, 'warmup_steps': 52}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.1305,0.973144,0.769936,0.660197,0.548494,0.576014
2,0.1535,1.121059,0.786434,0.723182,0.645773,0.659899
3,0.0482,1.249487,0.794684,0.709184,0.670181,0.670687
4,0.0227,1.389614,0.785518,0.721321,0.64367,0.665504
5,0.0135,1.39125,0.799267,0.70492,0.672176,0.677748


[I 2025-03-23 12:06:45,955] Trial 77 pruned. 


Trial 78 with params: {'learning_rate': 0.0025157799606248176, 'weight_decay': 0.003, 'warmup_steps': 52}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8022,0.958163,0.812099,0.765223,0.695727,0.713551
2,0.0615,1.033879,0.823098,0.799442,0.70848,0.732953
3,0.0215,1.18863,0.817599,0.766623,0.716121,0.723779
4,0.0134,1.382973,0.813932,0.74577,0.706479,0.70927
5,0.0104,1.336818,0.829514,0.797424,0.716767,0.741687
6,0.0073,1.354495,0.826764,0.782463,0.730488,0.740188
7,0.0041,1.499603,0.824931,0.79702,0.743866,0.757246
8,0.0043,1.450485,0.831347,0.813572,0.761827,0.773123
9,0.0018,1.616916,0.836847,0.813209,0.744917,0.763116
10,0.0017,1.628773,0.831347,0.828616,0.725124,0.754219


[I 2025-03-23 12:11:26,769] Trial 78 finished with value: 0.7709826701201915 and parameters: {'learning_rate': 0.0025157799606248176, 'weight_decay': 0.003, 'warmup_steps': 52}. Best is trial 46 with value: 0.7867104184452819.


Trial 79 with params: {'learning_rate': 0.000938335717379748, 'weight_decay': 0.004, 'warmup_steps': 40}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.1404,0.948839,0.770852,0.639698,0.56326,0.580235
2,0.1627,1.132081,0.7956,0.743521,0.674715,0.690994
3,0.0488,1.274091,0.790101,0.723737,0.656857,0.674542
4,0.0237,1.358311,0.793767,0.720316,0.659781,0.67302
5,0.0134,1.349069,0.807516,0.747607,0.693823,0.705693
6,0.0086,1.513808,0.7956,0.71477,0.666507,0.674151
7,0.0081,1.523074,0.79835,0.720796,0.68112,0.686982
8,0.0048,1.609943,0.811182,0.726234,0.683857,0.696444
9,0.0022,1.51948,0.812099,0.752051,0.699484,0.713964
10,0.0023,1.680792,0.810266,0.774332,0.709433,0.728


[I 2025-03-23 12:14:30,496] Trial 79 pruned. 


Trial 80 with params: {'learning_rate': 0.0009294201414546684, 'weight_decay': 0.001, 'warmup_steps': 47}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.1556,0.967804,0.761687,0.63622,0.528685,0.557643
2,0.1697,1.1429,0.784601,0.729233,0.656569,0.666034
3,0.0551,1.239537,0.792851,0.717829,0.655777,0.669701
4,0.0253,1.345222,0.796517,0.723099,0.651176,0.668565
5,0.0133,1.439078,0.8011,0.71718,0.686112,0.688923


[I 2025-03-23 12:16:17,562] Trial 80 pruned. 


Trial 81 with params: {'learning_rate': 0.0001836528191727622, 'weight_decay': 0.0, 'warmup_steps': 11}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.0287,1.485444,0.607699,0.231192,0.218474,0.20282
2,1.028,1.149105,0.696609,0.410233,0.375551,0.375588
3,0.6511,1.040656,0.727773,0.503369,0.44905,0.462975
4,0.4385,1.051285,0.735105,0.511454,0.463762,0.476231
5,0.3043,1.090018,0.733272,0.567523,0.491507,0.513793
6,0.2098,1.117024,0.741522,0.593083,0.553991,0.560564
7,0.1501,1.206498,0.748854,0.600231,0.559945,0.567954
8,0.1115,1.205695,0.745188,0.614674,0.570013,0.580958
9,0.0834,1.258098,0.754354,0.62771,0.574762,0.584422
10,0.0647,1.346869,0.752521,0.651616,0.593487,0.607994


[I 2025-03-23 12:19:24,897] Trial 81 pruned. 


Trial 82 with params: {'learning_rate': 0.004772550895126975, 'weight_decay': 0.002, 'warmup_steps': 50}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6648,1.049162,0.810266,0.771086,0.681488,0.709863
2,0.0531,1.115655,0.818515,0.76863,0.725866,0.730396
3,0.0253,1.418049,0.806599,0.777381,0.689266,0.708655
4,0.0204,1.417135,0.815765,0.785619,0.719222,0.735984
5,0.016,1.583762,0.799267,0.777955,0.690726,0.714553


[I 2025-03-23 12:21:09,333] Trial 82 pruned. 


Trial 83 with params: {'learning_rate': 0.0036337972465798085, 'weight_decay': 0.003, 'warmup_steps': 52}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.7194,1.024073,0.806599,0.77695,0.714052,0.72318
2,0.0529,1.181284,0.824931,0.807172,0.72753,0.745764
3,0.0245,1.17213,0.826764,0.765403,0.74163,0.742419
4,0.0143,1.328737,0.831347,0.811303,0.721594,0.751404
5,0.0101,1.384763,0.836847,0.795638,0.733432,0.749795
6,0.011,1.439689,0.831347,0.802276,0.748558,0.759803
7,0.0067,1.542705,0.828598,0.792246,0.744109,0.753208
8,0.0056,1.530064,0.835014,0.79239,0.748607,0.756391
9,0.0029,1.604272,0.828598,0.782923,0.731975,0.742039
10,0.0017,1.610965,0.831347,0.782284,0.734326,0.743784


[I 2025-03-23 12:26:12,667] Trial 83 finished with value: 0.7561416547333457 and parameters: {'learning_rate': 0.0036337972465798085, 'weight_decay': 0.003, 'warmup_steps': 52}. Best is trial 46 with value: 0.7867104184452819.


Trial 84 with params: {'learning_rate': 0.0013775764696349329, 'weight_decay': 0.003, 'warmup_steps': 52}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.012,0.945362,0.776352,0.675651,0.592519,0.614227
2,0.1085,1.141624,0.805683,0.751659,0.683527,0.700318
3,0.0314,1.221436,0.809349,0.772809,0.697182,0.720301
4,0.0177,1.270969,0.810266,0.753219,0.69928,0.714654
5,0.0093,1.411333,0.805683,0.760432,0.691358,0.710646


[I 2025-03-23 12:27:47,642] Trial 84 pruned. 


Trial 85 with params: {'learning_rate': 0.004540864745076144, 'weight_decay': 0.008, 'warmup_steps': 32}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6574,0.991318,0.810266,0.720604,0.686885,0.691568
2,0.0519,1.090886,0.819432,0.811237,0.708914,0.740573
3,0.0219,1.388683,0.811182,0.806293,0.705356,0.732377
4,0.0221,1.371168,0.814849,0.770409,0.691924,0.711948
5,0.0143,1.514955,0.815765,0.753249,0.693599,0.707162
6,0.01,1.516962,0.812099,0.70019,0.655865,0.667075
7,0.009,1.684548,0.819432,0.749209,0.715981,0.719265
8,0.0049,1.765067,0.826764,0.742492,0.697941,0.711735
9,0.0046,1.90905,0.824015,0.801831,0.706847,0.733487
10,0.0023,1.94138,0.826764,0.757894,0.699644,0.715833


[I 2025-03-23 12:30:51,775] Trial 85 pruned. 


Trial 86 with params: {'learning_rate': 0.004186380237076967, 'weight_decay': 0.008, 'warmup_steps': 53}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6957,0.985478,0.816682,0.787039,0.708664,0.728125
2,0.0507,1.057165,0.828598,0.768717,0.738577,0.738827
3,0.0251,1.253924,0.813016,0.734811,0.718567,0.714146
4,0.0163,1.409827,0.832264,0.825484,0.735033,0.75635
5,0.0152,1.467052,0.824931,0.812487,0.747732,0.755005
6,0.0084,1.542309,0.826764,0.737749,0.736273,0.715566
7,0.0089,1.725119,0.818515,0.813896,0.721118,0.742398
8,0.0064,1.738652,0.826764,0.791911,0.738351,0.746612
9,0.0035,1.761483,0.830431,0.801445,0.747515,0.756342
10,0.0023,1.719395,0.827681,0.785024,0.746513,0.752604


[I 2025-03-23 12:35:34,899] Trial 86 finished with value: 0.7600581747282683 and parameters: {'learning_rate': 0.004186380237076967, 'weight_decay': 0.008, 'warmup_steps': 53}. Best is trial 46 with value: 0.7867104184452819.


Trial 87 with params: {'learning_rate': 0.0022990426056880213, 'weight_decay': 0.005, 'warmup_steps': 43}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8206,1.002665,0.796517,0.749609,0.674816,0.696546
2,0.0642,1.175451,0.802016,0.776808,0.692895,0.714172
3,0.0234,1.342936,0.813932,0.768136,0.712915,0.723119
4,0.0153,1.330694,0.810266,0.756794,0.70235,0.715218
5,0.0081,1.464853,0.818515,0.800337,0.724435,0.748363
6,0.0053,1.436704,0.827681,0.80864,0.741071,0.758089
7,0.0064,1.590841,0.809349,0.802817,0.733565,0.75129
8,0.002,1.667604,0.817599,0.786843,0.724918,0.742363
9,0.0016,1.640234,0.823098,0.787005,0.721132,0.737962
10,0.0012,1.71152,0.822181,0.808341,0.736286,0.756408


[I 2025-03-23 12:40:23,541] Trial 87 finished with value: 0.7373231539471107 and parameters: {'learning_rate': 0.0022990426056880213, 'weight_decay': 0.005, 'warmup_steps': 43}. Best is trial 46 with value: 0.7867104184452819.


Trial 88 with params: {'learning_rate': 0.0028131716211822685, 'weight_decay': 0.003, 'warmup_steps': 53}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.7834,0.914939,0.824015,0.772749,0.705401,0.723357
2,0.0578,1.155101,0.821265,0.791165,0.704216,0.729067
3,0.0222,1.2675,0.820348,0.792241,0.720213,0.740784
4,0.0123,1.330223,0.814849,0.798708,0.715478,0.735729
5,0.0142,1.419651,0.824015,0.799832,0.721869,0.743435
6,0.0069,1.426205,0.822181,0.822915,0.737362,0.759106
7,0.0077,1.581499,0.825848,0.762667,0.730713,0.734781
8,0.003,1.620518,0.826764,0.789316,0.729588,0.748029
9,0.0012,1.64398,0.831347,0.811546,0.744998,0.765213
10,0.0011,1.591209,0.829514,0.792607,0.74088,0.753106


[I 2025-03-23 12:45:14,366] Trial 88 finished with value: 0.7576991725008975 and parameters: {'learning_rate': 0.0028131716211822685, 'weight_decay': 0.003, 'warmup_steps': 53}. Best is trial 46 with value: 0.7867104184452819.


Trial 89 with params: {'learning_rate': 0.004573256835335805, 'weight_decay': 0.003, 'warmup_steps': 32}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6594,1.067104,0.814849,0.764722,0.679841,0.705853
2,0.0539,1.196901,0.805683,0.764651,0.70211,0.716626
3,0.026,1.312169,0.806599,0.760976,0.707145,0.720482
4,0.0215,1.354324,0.829514,0.79005,0.713406,0.735485
5,0.014,1.402417,0.823098,0.792481,0.712972,0.735185
6,0.0104,1.607006,0.814849,0.770607,0.69387,0.713953
7,0.0099,1.622233,0.813932,0.797463,0.706688,0.733637
8,0.0044,1.784286,0.824931,0.808332,0.702687,0.738205
9,0.0017,1.871543,0.826764,0.799183,0.707876,0.736764
10,0.0008,1.867903,0.826764,0.78824,0.720785,0.742539


[I 2025-03-23 12:48:13,965] Trial 89 pruned. 


Trial 90 with params: {'learning_rate': 0.004709026293213274, 'weight_decay': 0.008, 'warmup_steps': 41}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.649,1.101718,0.813932,0.775247,0.689314,0.716822
2,0.0494,1.268622,0.8011,0.781427,0.707674,0.720736
3,0.0245,1.39824,0.817599,0.798266,0.690325,0.721927
4,0.022,1.298554,0.815765,0.798076,0.706489,0.735395
5,0.0128,1.529009,0.816682,0.775257,0.729382,0.735206
6,0.0101,1.712044,0.823098,0.746896,0.712712,0.719729
7,0.0103,1.813819,0.823098,0.751871,0.696067,0.712455
8,0.0062,1.835449,0.817599,0.73191,0.689258,0.692221
9,0.0067,1.777712,0.824931,0.748482,0.727996,0.725902
10,0.0028,1.870109,0.824931,0.789338,0.744258,0.757154


[I 2025-03-23 12:53:08,779] Trial 90 finished with value: 0.751557319613735 and parameters: {'learning_rate': 0.004709026293213274, 'weight_decay': 0.008, 'warmup_steps': 41}. Best is trial 46 with value: 0.7867104184452819.


Trial 91 with params: {'learning_rate': 0.00214197340392235, 'weight_decay': 0.005, 'warmup_steps': 32}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8313,0.977008,0.811182,0.760387,0.680578,0.704904
2,0.0681,1.11685,0.820348,0.776542,0.732382,0.74003
3,0.0243,1.197527,0.802933,0.752789,0.729326,0.727364
4,0.0133,1.304301,0.820348,0.772702,0.722986,0.734557
5,0.0084,1.384201,0.813932,0.75175,0.689013,0.701075
6,0.0094,1.500989,0.821265,0.782503,0.692464,0.716382
7,0.0057,1.474869,0.823098,0.787708,0.719647,0.737704
8,0.004,1.459452,0.828598,0.800817,0.735413,0.754994
9,0.0027,1.551227,0.823098,0.775231,0.730663,0.736836
10,0.0031,1.602954,0.823098,0.783938,0.740011,0.74834


[I 2025-03-23 12:57:56,772] Trial 91 finished with value: 0.7500320862408957 and parameters: {'learning_rate': 0.00214197340392235, 'weight_decay': 0.005, 'warmup_steps': 32}. Best is trial 46 with value: 0.7867104184452819.


Trial 92 with params: {'learning_rate': 0.002666318640637334, 'weight_decay': 0.005, 'warmup_steps': 35}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.7709,0.938302,0.815765,0.776476,0.699002,0.72182
2,0.0601,1.09989,0.816682,0.751522,0.714877,0.718324
3,0.0228,1.168016,0.828598,0.799472,0.729003,0.747517
4,0.0156,1.386179,0.813932,0.807724,0.705293,0.734547
5,0.0074,1.477766,0.816682,0.757125,0.733693,0.726799
6,0.0084,1.492309,0.828598,0.797726,0.721006,0.745765
7,0.0059,1.5137,0.825848,0.77169,0.715359,0.731622
8,0.0034,1.508084,0.821265,0.793551,0.707551,0.731509
9,0.003,1.657819,0.823098,0.754101,0.729604,0.72723
10,0.0017,1.693207,0.822181,0.796118,0.726186,0.747861


[I 2025-03-23 13:02:41,287] Trial 92 finished with value: 0.7471345801197 and parameters: {'learning_rate': 0.002666318640637334, 'weight_decay': 0.005, 'warmup_steps': 35}. Best is trial 46 with value: 0.7867104184452819.


Trial 93 with params: {'learning_rate': 0.0038120993842298005, 'weight_decay': 0.006, 'warmup_steps': 33}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6722,1.050856,0.817599,0.736595,0.678939,0.691126
2,0.0506,1.158147,0.815765,0.762365,0.692591,0.711594
3,0.0218,1.13513,0.825848,0.722587,0.723996,0.708809
4,0.0154,1.349086,0.825848,0.771367,0.729463,0.730121
5,0.0113,1.47602,0.818515,0.773981,0.706307,0.719833
6,0.0108,1.436631,0.820348,0.791435,0.70696,0.730644
7,0.0077,1.663526,0.816682,0.772141,0.713902,0.725275
8,0.0068,1.801107,0.826764,0.786306,0.71215,0.72845
9,0.0038,1.615268,0.836847,0.798542,0.732577,0.750373
10,0.001,1.710291,0.843263,0.811358,0.727553,0.752437


[I 2025-03-23 13:07:35,300] Trial 93 finished with value: 0.7540449614121911 and parameters: {'learning_rate': 0.0038120993842298005, 'weight_decay': 0.006, 'warmup_steps': 33}. Best is trial 46 with value: 0.7867104184452819.


Trial 94 with params: {'learning_rate': 0.004532392086197323, 'weight_decay': 0.007, 'warmup_steps': 44}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6663,0.971062,0.804766,0.707083,0.661136,0.670419
2,0.0503,1.123567,0.821265,0.79578,0.735457,0.750425
3,0.0271,1.257666,0.814849,0.767615,0.707711,0.72487
4,0.0191,1.334152,0.821265,0.767056,0.713724,0.726107
5,0.0141,1.571666,0.817599,0.74329,0.712746,0.716135


[I 2025-03-23 13:09:23,226] Trial 94 pruned. 


Trial 95 with params: {'learning_rate': 0.00464020403456773, 'weight_decay': 0.005, 'warmup_steps': 45}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6628,0.997949,0.812099,0.729416,0.680918,0.690908
2,0.0543,1.116837,0.824015,0.816621,0.726396,0.752696
3,0.0255,1.240504,0.832264,0.775163,0.744023,0.747094
4,0.0211,1.433882,0.827681,0.760337,0.718462,0.727135
5,0.0154,1.722427,0.818515,0.775349,0.699828,0.724777
6,0.0126,1.539369,0.821265,0.770776,0.724001,0.72999
7,0.0094,1.684001,0.820348,0.767304,0.72025,0.726538
8,0.0055,1.884648,0.817599,0.79526,0.734189,0.745851
9,0.0031,1.733838,0.826764,0.77107,0.758228,0.755213
10,0.002,1.799377,0.828598,0.792872,0.732657,0.749089


[I 2025-03-23 13:14:22,828] Trial 95 finished with value: 0.7268289576039751 and parameters: {'learning_rate': 0.00464020403456773, 'weight_decay': 0.005, 'warmup_steps': 45}. Best is trial 46 with value: 0.7867104184452819.


Trial 96 with params: {'learning_rate': 0.004825349198382344, 'weight_decay': 0.005, 'warmup_steps': 34}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6514,1.015167,0.813932,0.761394,0.683593,0.705564
2,0.0544,1.239988,0.813932,0.779194,0.701227,0.720531
3,0.0264,1.241357,0.815765,0.769506,0.707056,0.723335
4,0.0222,1.453082,0.820348,0.760975,0.693676,0.709691
5,0.0179,1.449411,0.832264,0.779534,0.730402,0.731274
6,0.0126,1.576206,0.818515,0.761972,0.714935,0.721655
7,0.0108,1.674756,0.829514,0.783573,0.727796,0.741129
8,0.0051,1.712968,0.818515,0.767047,0.725808,0.730252
9,0.0042,1.763183,0.833181,0.801368,0.744986,0.755941
10,0.0023,1.888209,0.831347,0.794273,0.727577,0.74645


[I 2025-03-23 13:17:37,881] Trial 96 pruned. 


Trial 97 with params: {'learning_rate': 0.0023957492333828937, 'weight_decay': 0.007, 'warmup_steps': 53}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8143,0.960044,0.802016,0.752859,0.683307,0.701329
2,0.0622,1.206789,0.813016,0.796561,0.696982,0.722613
3,0.0226,1.256509,0.817599,0.770223,0.737874,0.738621
4,0.0109,1.307269,0.829514,0.780377,0.726454,0.740254
5,0.0128,1.401839,0.821265,0.806557,0.724694,0.741985
6,0.0075,1.555547,0.817599,0.756168,0.703644,0.715686
7,0.0043,1.590674,0.832264,0.788419,0.721505,0.741716
8,0.0018,1.634923,0.829514,0.777438,0.730896,0.741937
9,0.0026,1.691703,0.829514,0.778006,0.718994,0.731702
10,0.0022,1.710479,0.832264,0.794093,0.746738,0.755295


[I 2025-03-23 13:22:13,415] Trial 97 finished with value: 0.7526734158842895 and parameters: {'learning_rate': 0.0023957492333828937, 'weight_decay': 0.007, 'warmup_steps': 53}. Best is trial 46 with value: 0.7867104184452819.


Trial 98 with params: {'learning_rate': 0.0009886034014177738, 'weight_decay': 0.004, 'warmup_steps': 15}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0761,0.939465,0.767186,0.672653,0.564253,0.591844
2,0.1569,1.174974,0.774519,0.673446,0.65063,0.646787
3,0.0499,1.304244,0.794684,0.690965,0.629897,0.643309
4,0.0237,1.297348,0.800183,0.689861,0.663792,0.666569
5,0.0129,1.435548,0.794684,0.700187,0.663706,0.666382
6,0.0082,1.486109,0.80385,0.708654,0.665412,0.675863
7,0.0054,1.499603,0.799267,0.706945,0.649771,0.659256
8,0.0034,1.538,0.810266,0.767233,0.69834,0.719524
9,0.0025,1.548395,0.809349,0.749045,0.687999,0.701371
10,0.0038,1.537134,0.805683,0.724521,0.685686,0.689482


[I 2025-03-23 13:25:32,830] Trial 98 pruned. 


Trial 99 with params: {'learning_rate': 0.0018118938050599703, 'weight_decay': 0.008, 'warmup_steps': 47}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8848,1.009272,0.784601,0.705241,0.616238,0.642785
2,0.0793,1.212334,0.809349,0.704518,0.64857,0.664323
3,0.0268,1.225079,0.811182,0.752386,0.709494,0.720187
4,0.0131,1.399181,0.824015,0.780229,0.70765,0.728646
5,0.0087,1.502216,0.824015,0.783502,0.724632,0.741261
6,0.0083,1.478041,0.818515,0.779875,0.723699,0.738733
7,0.0054,1.495725,0.829514,0.771957,0.711072,0.728792
8,0.0043,1.493901,0.823098,0.727396,0.693008,0.699868
9,0.0011,1.633976,0.83593,0.773782,0.733141,0.744415
10,0.0022,1.671581,0.824931,0.764064,0.730675,0.736169


[I 2025-03-23 13:28:37,449] Trial 99 pruned. 


Trial 100 with params: {'learning_rate': 0.002397390803137355, 'weight_decay': 0.004, 'warmup_steps': 43}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.806,0.933917,0.807516,0.721235,0.655232,0.670767
2,0.0624,1.081562,0.822181,0.782631,0.714616,0.737104
3,0.0214,1.228931,0.812099,0.767267,0.71728,0.727123
4,0.0155,1.220385,0.811182,0.771343,0.714923,0.731428
5,0.0091,1.492848,0.815765,0.780929,0.718018,0.730212
6,0.0065,1.550242,0.814849,0.794566,0.700926,0.730036
7,0.0083,1.478816,0.829514,0.79174,0.7209,0.7339
8,0.002,1.735595,0.820348,0.766626,0.7016,0.713454
9,0.0032,1.55775,0.834097,0.798816,0.72926,0.741921
10,0.0015,1.699555,0.827681,0.793205,0.72578,0.743264


[I 2025-03-23 13:31:32,493] Trial 100 pruned. 


Trial 101 with params: {'learning_rate': 0.0013626818177371815, 'weight_decay': 0.006, 'warmup_steps': 47}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.9984,0.960819,0.772686,0.645604,0.58399,0.592746
2,0.1054,1.188436,0.79835,0.719617,0.664574,0.675171
3,0.0327,1.243229,0.808433,0.754368,0.708359,0.716945
4,0.0167,1.307121,0.809349,0.757303,0.694322,0.711152
5,0.0079,1.413639,0.810266,0.764259,0.691591,0.711861
6,0.0078,1.455509,0.816682,0.743732,0.708758,0.714237
7,0.0062,1.441425,0.815765,0.752013,0.702765,0.715021
8,0.0036,1.599705,0.816682,0.753663,0.693744,0.709268
9,0.0017,1.696472,0.805683,0.736343,0.693144,0.702176
10,0.002,1.64706,0.819432,0.758154,0.714297,0.722204


[I 2025-03-23 13:34:35,959] Trial 101 pruned. 


Trial 102 with params: {'learning_rate': 0.0017015172986118066, 'weight_decay': 0.007, 'warmup_steps': 47}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.91,0.95684,0.783685,0.753515,0.647547,0.67456
2,0.0816,1.126421,0.818515,0.771073,0.691242,0.709923
3,0.0251,1.224821,0.807516,0.766904,0.697626,0.715064
4,0.0139,1.441414,0.8011,0.7692,0.676218,0.700492
5,0.0108,1.365929,0.814849,0.762403,0.70266,0.712131


[I 2025-03-23 13:36:12,083] Trial 102 pruned. 


Trial 103 with params: {'learning_rate': 0.0023479529268515377, 'weight_decay': 0.006, 'warmup_steps': 53}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8207,0.99715,0.80385,0.737123,0.680732,0.692144
2,0.0614,1.193952,0.815765,0.773754,0.717522,0.732087
3,0.0224,1.297798,0.821265,0.768795,0.725527,0.735188
4,0.0131,1.406346,0.819432,0.762292,0.711544,0.723814
5,0.0079,1.541715,0.824015,0.742072,0.706006,0.710692
6,0.0071,1.672367,0.807516,0.774959,0.705157,0.722516
7,0.0077,1.55276,0.814849,0.78084,0.697922,0.721975
8,0.0049,1.622114,0.822181,0.761167,0.718091,0.725411
9,0.0019,1.568754,0.829514,0.778601,0.710542,0.730077
10,0.0018,1.649668,0.824931,0.774541,0.72499,0.733591


[I 2025-03-23 13:39:28,372] Trial 103 pruned. 


Trial 104 with params: {'learning_rate': 0.0022575504559235334, 'weight_decay': 0.008, 'warmup_steps': 53}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8329,0.956395,0.806599,0.743993,0.680931,0.700303
2,0.0663,1.22488,0.79835,0.774416,0.688097,0.710184
3,0.0219,1.231818,0.820348,0.76538,0.718521,0.721509
4,0.0149,1.319554,0.821265,0.768784,0.730391,0.736354
5,0.0083,1.346393,0.824015,0.784184,0.727965,0.740899
6,0.0088,1.416381,0.810266,0.761113,0.695259,0.713869
7,0.0064,1.449425,0.823098,0.788445,0.709237,0.730084
8,0.0045,1.618625,0.822181,0.797366,0.709731,0.733446
9,0.0018,1.594266,0.831347,0.815685,0.72153,0.747525
10,0.0013,1.585972,0.831347,0.776613,0.723259,0.731858


[I 2025-03-23 13:42:43,936] Trial 104 pruned. 


Trial 105 with params: {'learning_rate': 0.0006078662726350267, 'weight_decay': 0.01, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.2928,0.993967,0.745188,0.543664,0.447343,0.471558
2,0.3068,1.029029,0.782768,0.697904,0.640467,0.65171
3,0.1046,1.147055,0.785518,0.689001,0.658842,0.654107
4,0.0504,1.275364,0.782768,0.680379,0.61463,0.626278
5,0.0262,1.405485,0.791934,0.668773,0.655919,0.649326
6,0.0139,1.393056,0.799267,0.695547,0.678843,0.670507
7,0.0097,1.470133,0.782768,0.694945,0.65292,0.656949
8,0.0066,1.57542,0.792851,0.706371,0.662824,0.665335
9,0.0042,1.569528,0.790101,0.653113,0.650219,0.63388
10,0.0041,1.586142,0.80385,0.713156,0.673295,0.674095


[I 2025-03-23 13:45:39,556] Trial 105 pruned. 


Trial 106 with params: {'learning_rate': 0.0025159479957240563, 'weight_decay': 0.007, 'warmup_steps': 41}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.7906,1.006476,0.805683,0.778532,0.685264,0.711942
2,0.061,1.058877,0.824015,0.790415,0.704958,0.732056
3,0.021,1.208053,0.827681,0.778451,0.712054,0.731544
4,0.0138,1.389472,0.824931,0.805729,0.722774,0.741634
5,0.0101,1.348698,0.830431,0.79882,0.727885,0.740898
6,0.0061,1.472584,0.813932,0.768618,0.697242,0.718135
7,0.008,1.513161,0.818515,0.789534,0.695778,0.724373
8,0.0047,1.453181,0.837764,0.776429,0.722469,0.736332
9,0.002,1.555639,0.832264,0.782428,0.729527,0.741527
10,0.0009,1.610212,0.836847,0.798107,0.722235,0.744135


[I 2025-03-23 13:50:24,085] Trial 106 finished with value: 0.7424376936359451 and parameters: {'learning_rate': 0.0025159479957240563, 'weight_decay': 0.007, 'warmup_steps': 41}. Best is trial 46 with value: 0.7867104184452819.


Trial 107 with params: {'learning_rate': 0.004876931770314574, 'weight_decay': 0.009000000000000001, 'warmup_steps': 49}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6564,1.007631,0.823098,0.771429,0.678588,0.707633
2,0.0514,1.152189,0.824015,0.788428,0.728874,0.744391
3,0.0257,1.415074,0.823098,0.781311,0.738835,0.739732
4,0.0249,1.453932,0.819432,0.770736,0.737197,0.736167
5,0.0184,1.741185,0.815765,0.736142,0.730207,0.721041
6,0.0121,1.699812,0.830431,0.79032,0.754921,0.760775
7,0.008,1.685751,0.839597,0.826601,0.748196,0.770286
8,0.0067,1.701739,0.832264,0.783613,0.738639,0.746731
9,0.0043,1.84775,0.832264,0.817989,0.765554,0.777182
10,0.0032,1.872603,0.831347,0.801616,0.752148,0.763435


[I 2025-03-23 13:54:45,785] Trial 107 finished with value: 0.7879411361007308 and parameters: {'learning_rate': 0.004876931770314574, 'weight_decay': 0.009000000000000001, 'warmup_steps': 49}. Best is trial 107 with value: 0.7879411361007308.


Trial 108 with params: {'learning_rate': 0.0029541524866169757, 'weight_decay': 0.009000000000000001, 'warmup_steps': 49}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.7625,1.029195,0.794684,0.702901,0.668603,0.6763
2,0.056,1.203587,0.809349,0.768717,0.698629,0.716168
3,0.0222,1.28591,0.810266,0.762324,0.694971,0.713343
4,0.0147,1.248825,0.816682,0.809607,0.726837,0.747485
5,0.013,1.379246,0.808433,0.775534,0.713924,0.729101
6,0.0098,1.444385,0.825848,0.7847,0.706565,0.72558
7,0.0056,1.370738,0.823098,0.799868,0.714071,0.737725
8,0.0021,1.459414,0.826764,0.780889,0.731141,0.740401
9,0.0011,1.478283,0.830431,0.786873,0.728818,0.742591
10,0.0007,1.552957,0.824931,0.766796,0.720388,0.729378


[I 2025-03-23 13:58:06,448] Trial 108 pruned. 


Trial 109 with params: {'learning_rate': 0.004942386627295451, 'weight_decay': 0.009000000000000001, 'warmup_steps': 48}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6605,1.092688,0.800183,0.723758,0.66421,0.677092
2,0.0538,1.236154,0.825848,0.759907,0.692032,0.708992
3,0.0295,1.309564,0.826764,0.788148,0.733182,0.745047
4,0.0206,1.425095,0.813016,0.726955,0.666124,0.680172
5,0.0175,1.600059,0.820348,0.785423,0.700676,0.728017
6,0.0136,1.628861,0.825848,0.739746,0.68918,0.699017
7,0.0064,1.757144,0.830431,0.770265,0.719264,0.733757
8,0.0043,1.759394,0.831347,0.758201,0.758707,0.745076
9,0.0041,1.762846,0.827681,0.766851,0.746902,0.748143
10,0.0031,1.839948,0.824015,0.759697,0.730642,0.736336


[I 2025-03-23 14:00:59,706] Trial 109 pruned. 


Trial 110 with params: {'learning_rate': 0.004738287031154641, 'weight_decay': 0.009000000000000001, 'warmup_steps': 47}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6683,0.999151,0.823098,0.76055,0.678257,0.699398
2,0.0555,1.177737,0.837764,0.791435,0.750222,0.754376
3,0.0253,1.335199,0.821265,0.786559,0.762752,0.762315
4,0.0225,1.494788,0.819432,0.759315,0.744371,0.739255
5,0.0129,1.51791,0.830431,0.800164,0.751967,0.759868
6,0.014,1.690843,0.819432,0.776639,0.706641,0.726246
7,0.0119,1.734197,0.826764,0.769805,0.752348,0.746313
8,0.0049,1.667297,0.826764,0.777596,0.726223,0.737825
9,0.0019,1.713247,0.83868,0.807024,0.747723,0.760577
10,0.002,1.780509,0.826764,0.784398,0.737967,0.747781


[I 2025-03-23 14:05:36,816] Trial 110 finished with value: 0.746710357751186 and parameters: {'learning_rate': 0.004738287031154641, 'weight_decay': 0.009000000000000001, 'warmup_steps': 47}. Best is trial 107 with value: 0.7879411361007308.


Trial 111 with params: {'learning_rate': 0.0047522430735843805, 'weight_decay': 0.008, 'warmup_steps': 53}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6664,1.037842,0.808433,0.780505,0.702803,0.72792
2,0.0545,1.14998,0.812099,0.777605,0.706281,0.72469
3,0.0255,1.246553,0.829514,0.796438,0.748289,0.758434
4,0.0209,1.401034,0.823098,0.80353,0.731936,0.753986
5,0.0172,1.370769,0.834097,0.821543,0.730301,0.759675
6,0.0133,1.493649,0.822181,0.770676,0.71244,0.727411
7,0.0079,1.56697,0.831347,0.819773,0.752725,0.772863
8,0.0034,1.600584,0.829514,0.810477,0.755281,0.769028
9,0.0024,1.742069,0.834097,0.81351,0.753966,0.768754
10,0.0026,1.76248,0.842346,0.817184,0.756498,0.775337


[I 2025-03-23 14:10:05,751] Trial 111 finished with value: 0.7638718414352348 and parameters: {'learning_rate': 0.0047522430735843805, 'weight_decay': 0.008, 'warmup_steps': 53}. Best is trial 107 with value: 0.7879411361007308.


Trial 112 with params: {'learning_rate': 0.004981360773003558, 'weight_decay': 0.007, 'warmup_steps': 41}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6474,1.098135,0.810266,0.780141,0.689683,0.713954
2,0.0515,1.227237,0.820348,0.789942,0.748744,0.748135
3,0.027,1.337933,0.813932,0.773811,0.685673,0.710773
4,0.0228,1.390849,0.825848,0.784552,0.740364,0.747074
5,0.0169,1.551947,0.815765,0.770652,0.692568,0.715202


[I 2025-03-23 14:11:42,840] Trial 112 pruned. 


Trial 113 with params: {'learning_rate': 0.004536589270199615, 'weight_decay': 0.007, 'warmup_steps': 51}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6696,1.009795,0.822181,0.772775,0.694505,0.720749
2,0.0489,1.177988,0.823098,0.827827,0.730511,0.764528
3,0.0284,1.265486,0.822181,0.808828,0.72374,0.748684
4,0.02,1.407647,0.819432,0.832683,0.733759,0.766507
5,0.0154,1.530693,0.817599,0.770511,0.738437,0.743713
6,0.0083,1.763158,0.827681,0.779793,0.726654,0.74167
7,0.0092,1.778053,0.818515,0.794968,0.717807,0.737868
8,0.0071,1.76982,0.815765,0.768168,0.707748,0.726452
9,0.0033,1.778076,0.827681,0.783553,0.720312,0.733485
10,0.0017,1.744662,0.824931,0.7791,0.713022,0.732992


[I 2025-03-23 14:14:51,821] Trial 113 pruned. 


Trial 114 with params: {'learning_rate': 0.004402451370708722, 'weight_decay': 0.01, 'warmup_steps': 51}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6767,1.072269,0.806599,0.761574,0.683283,0.707579
2,0.0508,1.16487,0.822181,0.804601,0.716294,0.737882
3,0.0262,1.295152,0.836847,0.798065,0.730197,0.744785
4,0.016,1.357004,0.820348,0.790048,0.715806,0.739253
5,0.013,1.573324,0.825848,0.773893,0.743392,0.747738
6,0.0133,1.530092,0.822181,0.753503,0.701714,0.711774
7,0.0084,1.601615,0.832264,0.792349,0.723928,0.742397
8,0.0056,1.686549,0.829514,0.789211,0.730368,0.748556
9,0.0021,1.680273,0.833181,0.790997,0.721785,0.741029
10,0.0017,1.748983,0.84418,0.782612,0.736044,0.747934


[I 2025-03-23 14:19:30,855] Trial 114 finished with value: 0.7592107497568 and parameters: {'learning_rate': 0.004402451370708722, 'weight_decay': 0.01, 'warmup_steps': 51}. Best is trial 107 with value: 0.7879411361007308.


Trial 115 with params: {'learning_rate': 0.004539279266507493, 'weight_decay': 0.007, 'warmup_steps': 45}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6665,1.005663,0.826764,0.784848,0.703754,0.724072
2,0.0519,1.122778,0.837764,0.763879,0.72954,0.731362
3,0.0254,1.298353,0.826764,0.795923,0.729958,0.749805
4,0.0194,1.385944,0.816682,0.80074,0.720191,0.741978
5,0.0184,1.498479,0.826764,0.785122,0.712176,0.72857
6,0.0112,1.548091,0.825848,0.763326,0.702405,0.710864
7,0.0077,1.560675,0.833181,0.807864,0.733903,0.7525
8,0.0037,1.663668,0.836847,0.802308,0.734757,0.754891
9,0.0048,1.70561,0.839597,0.827941,0.762,0.779961
10,0.0025,1.816875,0.836847,0.819004,0.720424,0.748889


[I 2025-03-23 14:24:38,873] Trial 115 finished with value: 0.7561220259967151 and parameters: {'learning_rate': 0.004539279266507493, 'weight_decay': 0.007, 'warmup_steps': 45}. Best is trial 107 with value: 0.7879411361007308.


Trial 116 with params: {'learning_rate': 0.004897562783017868, 'weight_decay': 0.009000000000000001, 'warmup_steps': 51}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.654,1.065127,0.813016,0.728362,0.698755,0.702184
2,0.0536,1.164975,0.820348,0.718301,0.669135,0.674563
3,0.0256,1.313691,0.831347,0.773929,0.741957,0.741287
4,0.019,1.454606,0.813016,0.780297,0.702817,0.72133
5,0.0191,1.545775,0.824931,0.764877,0.703171,0.717192


[I 2025-03-23 14:26:06,388] Trial 116 pruned. 


Trial 117 with params: {'learning_rate': 0.00012486032116326294, 'weight_decay': 0.004, 'warmup_steps': 37}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.2799,1.719574,0.56187,0.146812,0.177153,0.152595
2,1.312,1.353965,0.653529,0.310445,0.293245,0.28596
3,0.9266,1.173165,0.701192,0.419903,0.365554,0.373056
4,0.6935,1.105677,0.709441,0.426854,0.385514,0.396294
5,0.5362,1.075469,0.72319,0.513521,0.44994,0.463651
6,0.42,1.067032,0.732356,0.565035,0.482865,0.506097
7,0.3356,1.081238,0.746104,0.625779,0.531098,0.558057
8,0.2744,1.117572,0.727773,0.632021,0.531354,0.558003
9,0.2274,1.119451,0.747021,0.639811,0.564932,0.583568
10,0.1915,1.13247,0.748854,0.654777,0.601235,0.610935


[I 2025-03-23 14:29:09,529] Trial 117 pruned. 


Trial 118 with params: {'learning_rate': 0.0048885031884996315, 'weight_decay': 0.01, 'warmup_steps': 38}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6512,1.004619,0.822181,0.767046,0.698826,0.715922
2,0.0536,1.295696,0.810266,0.762229,0.676616,0.693722
3,0.0259,1.352813,0.821265,0.767899,0.720963,0.729488
4,0.0226,1.412648,0.829514,0.791593,0.741616,0.749989
5,0.0159,1.642762,0.816682,0.792514,0.710246,0.73321
6,0.0134,1.803537,0.813932,0.757519,0.70219,0.709971
7,0.0108,1.794294,0.826764,0.767678,0.718817,0.729875
8,0.007,1.940353,0.824931,0.793682,0.717971,0.737076
9,0.0035,1.914308,0.827681,0.798421,0.717496,0.739579
10,0.0025,1.994613,0.827681,0.804587,0.720421,0.746665


[I 2025-03-23 14:33:50,146] Trial 118 finished with value: 0.7249640040978335 and parameters: {'learning_rate': 0.0048885031884996315, 'weight_decay': 0.01, 'warmup_steps': 38}. Best is trial 107 with value: 0.7879411361007308.


Trial 119 with params: {'learning_rate': 0.002401753683008663, 'weight_decay': 0.009000000000000001, 'warmup_steps': 39}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.796,0.998834,0.806599,0.752748,0.666358,0.69515
2,0.0638,1.157973,0.826764,0.805886,0.71881,0.739261
3,0.022,1.27327,0.814849,0.778653,0.704848,0.725829
4,0.0154,1.349115,0.821265,0.795323,0.72386,0.742949
5,0.0063,1.475943,0.835014,0.813868,0.738876,0.759483
6,0.0062,1.592339,0.826764,0.802966,0.721278,0.747104
7,0.0078,1.535155,0.821265,0.757305,0.727598,0.729559
8,0.0035,1.535388,0.827681,0.770973,0.713538,0.726751
9,0.0033,1.510246,0.825848,0.750021,0.722046,0.721417
10,0.0012,1.526842,0.835014,0.799848,0.736639,0.751044


[I 2025-03-23 14:38:21,424] Trial 119 finished with value: 0.7530385557136401 and parameters: {'learning_rate': 0.002401753683008663, 'weight_decay': 0.009000000000000001, 'warmup_steps': 39}. Best is trial 107 with value: 0.7879411361007308.


Trial 120 with params: {'learning_rate': 0.004002488116615034, 'weight_decay': 0.003, 'warmup_steps': 41}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6814,1.023672,0.805683,0.773328,0.677107,0.700624
2,0.0515,1.075873,0.835014,0.800217,0.743399,0.758075
3,0.0243,1.236071,0.823098,0.78682,0.722595,0.741349
4,0.0186,1.315867,0.824015,0.799081,0.727212,0.751876
5,0.0102,1.598643,0.831347,0.790423,0.734277,0.748024
6,0.0119,1.55376,0.822181,0.813961,0.731406,0.755886
7,0.0073,1.705112,0.828598,0.804588,0.728159,0.749262
8,0.0055,1.681635,0.827681,0.784774,0.722666,0.739845
9,0.0026,1.647202,0.835014,0.793864,0.733688,0.748212
10,0.0013,1.604731,0.837764,0.779865,0.731784,0.743355


[I 2025-03-23 14:42:53,575] Trial 120 finished with value: 0.7220668365866802 and parameters: {'learning_rate': 0.004002488116615034, 'weight_decay': 0.003, 'warmup_steps': 41}. Best is trial 107 with value: 0.7879411361007308.


Trial 121 with params: {'learning_rate': 0.0020816775559341855, 'weight_decay': 0.008, 'warmup_steps': 50}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8567,1.03048,0.792851,0.705399,0.642922,0.663507
2,0.0705,1.287198,0.805683,0.743267,0.670258,0.686716
3,0.0247,1.26062,0.821265,0.747215,0.703126,0.709769
4,0.0149,1.37422,0.815765,0.805696,0.713437,0.742838
5,0.01,1.501963,0.813016,0.788342,0.718439,0.737102
6,0.0063,1.42971,0.830431,0.760385,0.717088,0.726439
7,0.0021,1.644957,0.828598,0.797111,0.718687,0.740725
8,0.0063,1.636063,0.816682,0.762071,0.7141,0.725163
9,0.0033,1.699285,0.828598,0.747553,0.725985,0.724789
10,0.0031,1.700114,0.828598,0.796097,0.726,0.748558


[I 2025-03-23 14:47:32,188] Trial 121 finished with value: 0.7436677094027158 and parameters: {'learning_rate': 0.0020816775559341855, 'weight_decay': 0.008, 'warmup_steps': 50}. Best is trial 107 with value: 0.7879411361007308.


Trial 122 with params: {'learning_rate': 0.003637502046526243, 'weight_decay': 0.008, 'warmup_steps': 53}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.7264,0.969077,0.811182,0.785749,0.714087,0.72981
2,0.0521,1.15085,0.822181,0.817874,0.714233,0.741805
3,0.0227,1.24186,0.811182,0.803597,0.7077,0.732012
4,0.017,1.316874,0.823098,0.783535,0.716989,0.735297
5,0.012,1.425691,0.820348,0.787488,0.71681,0.728722
6,0.0081,1.41129,0.822181,0.779967,0.716267,0.733505
7,0.0071,1.448555,0.834097,0.805289,0.746954,0.762222
8,0.0066,1.441528,0.819432,0.794561,0.712868,0.736181
9,0.0034,1.36262,0.840513,0.824237,0.751098,0.768283
10,0.001,1.466428,0.83868,0.7815,0.74315,0.747546


[I 2025-03-23 14:52:22,644] Trial 122 finished with value: 0.7637889876361105 and parameters: {'learning_rate': 0.003637502046526243, 'weight_decay': 0.008, 'warmup_steps': 53}. Best is trial 107 with value: 0.7879411361007308.


Trial 123 with params: {'learning_rate': 0.0033323356408561976, 'weight_decay': 0.007, 'warmup_steps': 49}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.7403,0.987259,0.806599,0.78286,0.688468,0.70843
2,0.0536,1.197431,0.808433,0.752322,0.718759,0.72074
3,0.0221,1.395961,0.816682,0.741731,0.71059,0.710855
4,0.0162,1.413319,0.817599,0.801166,0.699152,0.729299
5,0.0106,1.476835,0.824015,0.76969,0.741077,0.736263
6,0.0098,1.444827,0.818515,0.76185,0.71592,0.721442
7,0.0062,1.654131,0.817599,0.760907,0.70167,0.713035
8,0.005,1.509118,0.83593,0.761197,0.706238,0.719813
9,0.0018,1.534954,0.829514,0.727605,0.717732,0.709632
10,0.0005,1.585057,0.837764,0.772357,0.72801,0.736825


[I 2025-03-23 14:55:21,650] Trial 123 pruned. 


Trial 124 with params: {'learning_rate': 0.0042838228953377576, 'weight_decay': 0.008, 'warmup_steps': 53}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6978,1.018871,0.813932,0.795899,0.701179,0.727844
2,0.0519,1.123792,0.820348,0.790847,0.730482,0.743077
3,0.0244,1.270491,0.816682,0.758462,0.73115,0.733183
4,0.0169,1.413524,0.820348,0.764455,0.718725,0.72812
5,0.0157,1.473718,0.824931,0.790665,0.73723,0.747776
6,0.0099,1.605926,0.825848,0.78479,0.708559,0.733191
7,0.0061,1.803224,0.817599,0.787567,0.721089,0.739489
8,0.0067,1.644496,0.821265,0.730858,0.717706,0.712192
9,0.0036,1.707796,0.823098,0.763351,0.720709,0.727266
10,0.0022,1.752452,0.828598,0.757417,0.721086,0.727155


[I 2025-03-23 14:58:46,273] Trial 124 pruned. 


Trial 125 with params: {'learning_rate': 0.004927780023086574, 'weight_decay': 0.008, 'warmup_steps': 50}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6546,1.063941,0.816682,0.748154,0.706321,0.713731
2,0.0555,1.138281,0.809349,0.719749,0.706505,0.697338
3,0.0271,1.338008,0.819432,0.799845,0.713511,0.735519
4,0.0209,1.426667,0.823098,0.771481,0.713097,0.728778
5,0.0198,1.586177,0.817599,0.784119,0.725762,0.744388
6,0.0123,1.644521,0.817599,0.788489,0.699582,0.727477
7,0.0085,1.623507,0.828598,0.791943,0.721112,0.737528
8,0.006,1.760876,0.830431,0.765746,0.720477,0.7287
9,0.0026,1.778075,0.831347,0.779332,0.728276,0.739732
10,0.0022,1.868946,0.831347,0.784774,0.730273,0.742073


[I 2025-03-23 15:02:01,240] Trial 125 pruned. 


Trial 126 with params: {'learning_rate': 0.002092378752080202, 'weight_decay': 0.01, 'warmup_steps': 53}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8643,1.021083,0.800183,0.750479,0.673366,0.697273
2,0.0697,1.171876,0.811182,0.799538,0.711124,0.735875
3,0.0232,1.231934,0.822181,0.777908,0.729114,0.73626
4,0.0121,1.352533,0.809349,0.762015,0.693706,0.711504
5,0.0079,1.381764,0.824931,0.784094,0.713533,0.730205
6,0.0075,1.481608,0.820348,0.783023,0.70853,0.732542
7,0.0077,1.394529,0.821265,0.79543,0.72874,0.742261
8,0.0049,1.501333,0.816682,0.796906,0.731052,0.747923
9,0.0028,1.576534,0.816682,0.775598,0.720491,0.734142
10,0.0019,1.641051,0.826764,0.811002,0.741597,0.757694


[I 2025-03-23 15:06:49,229] Trial 126 finished with value: 0.7619209395447574 and parameters: {'learning_rate': 0.002092378752080202, 'weight_decay': 0.01, 'warmup_steps': 53}. Best is trial 107 with value: 0.7879411361007308.


Trial 127 with params: {'learning_rate': 0.0021446942356796415, 'weight_decay': 0.007, 'warmup_steps': 32}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8293,1.008501,0.809349,0.744918,0.691891,0.706464
2,0.0682,1.178598,0.813932,0.785002,0.711404,0.73197
3,0.0225,1.328469,0.809349,0.777304,0.698404,0.720932
4,0.0141,1.24307,0.809349,0.753415,0.701151,0.71304
5,0.0107,1.384218,0.811182,0.767273,0.71042,0.724038


[I 2025-03-23 15:08:18,196] Trial 127 pruned. 


Trial 128 with params: {'learning_rate': 0.003779936684129658, 'weight_decay': 0.006, 'warmup_steps': 38}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6865,1.004394,0.805683,0.775521,0.678534,0.708476
2,0.0548,1.059783,0.816682,0.74813,0.697528,0.702336
3,0.0199,1.225008,0.823098,0.770781,0.703826,0.720023
4,0.0181,1.290809,0.825848,0.752455,0.71658,0.711897
5,0.0115,1.53777,0.824931,0.793727,0.730941,0.742726
6,0.0117,1.472869,0.83593,0.813576,0.743264,0.756311
7,0.0091,1.46446,0.824931,0.781981,0.724392,0.736292
8,0.0054,1.558876,0.833181,0.816686,0.746898,0.764701
9,0.0014,1.593519,0.83868,0.814976,0.754631,0.770113
10,0.0016,1.651879,0.835014,0.807095,0.750802,0.763902


[I 2025-03-23 15:12:54,588] Trial 128 finished with value: 0.7514251164509631 and parameters: {'learning_rate': 0.003779936684129658, 'weight_decay': 0.006, 'warmup_steps': 38}. Best is trial 107 with value: 0.7879411361007308.


Trial 129 with params: {'learning_rate': 0.004428460594443204, 'weight_decay': 0.006, 'warmup_steps': 51}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6748,0.956647,0.821265,0.755923,0.713609,0.715629
2,0.0528,1.174981,0.815765,0.778958,0.72193,0.732616
3,0.0233,1.242897,0.810266,0.758142,0.728572,0.730348
4,0.0214,1.408337,0.820348,0.764222,0.745852,0.739216
5,0.0175,1.485379,0.821265,0.781926,0.743503,0.745737
6,0.0091,1.559745,0.83593,0.793684,0.721644,0.734505
7,0.0066,1.677599,0.824015,0.761878,0.69816,0.710765
8,0.0037,1.754039,0.823098,0.769544,0.723487,0.724493
9,0.0035,1.830252,0.830431,0.759631,0.732435,0.731049
10,0.0044,1.778464,0.829514,0.772926,0.732513,0.737651


[I 2025-03-23 15:16:20,266] Trial 129 pruned. 


Trial 130 with params: {'learning_rate': 0.002619246127862628, 'weight_decay': 0.002, 'warmup_steps': 51}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8007,1.062043,0.8011,0.741393,0.666951,0.685255
2,0.0613,1.205618,0.802933,0.758618,0.683829,0.70357
3,0.0216,1.238145,0.815765,0.75934,0.714233,0.722305
4,0.016,1.366255,0.813016,0.776983,0.712496,0.732672
5,0.01,1.425801,0.824015,0.787615,0.719858,0.735483
6,0.008,1.562569,0.821265,0.751591,0.678263,0.700382
7,0.0043,1.49383,0.815765,0.765689,0.711981,0.722819
8,0.0045,1.722894,0.813932,0.780788,0.716079,0.733161
9,0.003,1.717804,0.819432,0.768053,0.713358,0.723327
10,0.0013,1.735508,0.832264,0.795073,0.715239,0.737432


[I 2025-03-23 15:19:26,362] Trial 130 pruned. 


Trial 131 with params: {'learning_rate': 0.0013420102030541094, 'weight_decay': 0.009000000000000001, 'warmup_steps': 49}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0129,0.988917,0.772686,0.633149,0.567329,0.584737
2,0.1074,1.205904,0.800183,0.751726,0.668699,0.68778
3,0.0314,1.215069,0.816682,0.794664,0.7145,0.740053
4,0.018,1.347729,0.806599,0.789243,0.705724,0.729828
5,0.0085,1.325776,0.814849,0.791825,0.709723,0.735261
6,0.0042,1.508375,0.818515,0.799867,0.718551,0.740273
7,0.0069,1.542796,0.808433,0.79215,0.702349,0.728925
8,0.0059,1.572452,0.807516,0.791244,0.715229,0.736403
9,0.0022,1.513387,0.828598,0.788136,0.733349,0.745749
10,0.0007,1.601951,0.817599,0.79884,0.719157,0.743393


[I 2025-03-23 15:22:28,092] Trial 131 pruned. 


Trial 132 with params: {'learning_rate': 0.0030318052973670093, 'weight_decay': 0.009000000000000001, 'warmup_steps': 48}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.7528,0.954365,0.821265,0.804389,0.711126,0.741051
2,0.0539,1.192534,0.810266,0.766985,0.706724,0.719161
3,0.0221,1.269849,0.820348,0.781271,0.713096,0.732961
4,0.0149,1.379271,0.821265,0.790762,0.727433,0.743541
5,0.0093,1.492551,0.822181,0.753297,0.728546,0.725844
6,0.0107,1.489957,0.823098,0.769897,0.711864,0.726145
7,0.004,1.460751,0.825848,0.793057,0.746946,0.757431
8,0.0051,1.569591,0.813016,0.768188,0.689423,0.707005
9,0.0049,1.637225,0.819432,0.771654,0.741314,0.740157
10,0.0019,1.607889,0.827681,0.796119,0.72558,0.747897


[I 2025-03-23 15:27:43,393] Trial 132 finished with value: 0.7539958735699318 and parameters: {'learning_rate': 0.0030318052973670093, 'weight_decay': 0.009000000000000001, 'warmup_steps': 48}. Best is trial 107 with value: 0.7879411361007308.


Trial 133 with params: {'learning_rate': 8.153014791034117e-05, 'weight_decay': 0.0, 'warmup_steps': 40}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.5168,1.97187,0.500458,0.103538,0.131295,0.105476
2,1.6227,1.586986,0.593951,0.260909,0.208185,0.193732
3,1.2632,1.382784,0.643446,0.281607,0.270436,0.264229
4,1.0177,1.249345,0.67736,0.387963,0.318572,0.323972
5,0.845,1.177355,0.692942,0.413128,0.367318,0.376132


[I 2025-03-23 15:29:24,704] Trial 133 pruned. 


Trial 134 with params: {'learning_rate': 0.0017284104826616178, 'weight_decay': 0.01, 'warmup_steps': 52}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.9192,0.962477,0.794684,0.747118,0.647957,0.68063
2,0.0801,1.335868,0.784601,0.771896,0.655429,0.685785
3,0.0265,1.412535,0.793767,0.77503,0.698103,0.716825
4,0.0141,1.381216,0.819432,0.798722,0.709352,0.736632
5,0.0116,1.35282,0.820348,0.800749,0.731173,0.749923
6,0.0068,1.51419,0.821265,0.811547,0.720821,0.746466
7,0.0048,1.588675,0.809349,0.796717,0.694574,0.724362
8,0.0039,1.601976,0.800183,0.726553,0.673045,0.686767
9,0.0028,1.557414,0.819432,0.772395,0.727896,0.736881
10,0.002,1.631072,0.824015,0.799979,0.727246,0.745211


[I 2025-03-23 15:34:55,075] Trial 134 finished with value: 0.7436546750849393 and parameters: {'learning_rate': 0.0017284104826616178, 'weight_decay': 0.01, 'warmup_steps': 52}. Best is trial 107 with value: 0.7879411361007308.


Trial 135 with params: {'learning_rate': 0.0027475566229486132, 'weight_decay': 0.0, 'warmup_steps': 7}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.7157,1.017996,0.792851,0.72578,0.665013,0.67932
2,0.0613,1.130738,0.815765,0.788728,0.709027,0.729005
3,0.0257,1.237554,0.811182,0.787299,0.707476,0.728198
4,0.0149,1.241671,0.826764,0.765834,0.722938,0.732696
5,0.011,1.422935,0.826764,0.789329,0.716095,0.734769
6,0.007,1.407098,0.820348,0.782678,0.724718,0.73826
7,0.0043,1.456001,0.832264,0.76843,0.723046,0.734774
8,0.007,1.541755,0.826764,0.773902,0.70517,0.727185
9,0.0041,1.601394,0.828598,0.79187,0.730662,0.743938
10,0.0012,1.564664,0.823098,0.739894,0.713298,0.712245


[I 2025-03-23 15:38:37,148] Trial 135 pruned. 


Trial 136 with params: {'learning_rate': 0.001829197984094007, 'weight_decay': 0.009000000000000001, 'warmup_steps': 50}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8818,0.968157,0.789184,0.73671,0.635704,0.666983
2,0.0781,1.190758,0.814849,0.770859,0.707836,0.723368
3,0.0244,1.315167,0.806599,0.772046,0.676081,0.707978
4,0.0153,1.293595,0.814849,0.734522,0.709078,0.704196
5,0.0106,1.433708,0.823098,0.764013,0.71868,0.728447
6,0.0066,1.345397,0.818515,0.750528,0.709027,0.714588
7,0.0033,1.438395,0.830431,0.782569,0.717957,0.737735
8,0.0042,1.558267,0.813932,0.74427,0.706837,0.712055
9,0.0046,1.485052,0.826764,0.770374,0.726721,0.73719
10,0.0019,1.52761,0.835014,0.795368,0.726493,0.747787


[I 2025-03-23 15:43:57,679] Trial 136 finished with value: 0.7599119570001439 and parameters: {'learning_rate': 0.001829197984094007, 'weight_decay': 0.009000000000000001, 'warmup_steps': 50}. Best is trial 107 with value: 0.7879411361007308.


Trial 137 with params: {'learning_rate': 0.0023139916771222828, 'weight_decay': 0.01, 'warmup_steps': 52}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8205,0.994651,0.789184,0.731673,0.680364,0.693691
2,0.0648,1.17159,0.804766,0.756309,0.704857,0.71548
3,0.0231,1.290333,0.817599,0.775748,0.732837,0.741739
4,0.0159,1.273848,0.818515,0.762179,0.71695,0.723705
5,0.0078,1.382894,0.824931,0.783844,0.720474,0.738857
6,0.0077,1.357687,0.824931,0.80216,0.738841,0.755508
7,0.0041,1.509488,0.829514,0.807547,0.725204,0.74933
8,0.0027,1.567314,0.828598,0.788515,0.730553,0.744526
9,0.0039,1.561642,0.819432,0.781975,0.739287,0.747582
10,0.0013,1.685768,0.821265,0.792083,0.7249,0.742785


[I 2025-03-23 15:47:40,313] Trial 137 pruned. 


Trial 138 with params: {'learning_rate': 0.0003504111700780832, 'weight_decay': 0.006, 'warmup_steps': 35}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.6512,1.123667,0.704858,0.423532,0.361871,0.368398
2,0.5657,0.980025,0.749771,0.582597,0.491502,0.516361
3,0.2598,1.081669,0.754354,0.630354,0.546655,0.566837
4,0.1282,1.13638,0.779102,0.684081,0.627332,0.643933
5,0.0731,1.2202,0.779102,0.681913,0.648004,0.654797


[I 2025-03-23 15:49:36,965] Trial 138 pruned. 


Trial 139 with params: {'learning_rate': 0.004879962171867373, 'weight_decay': 0.008, 'warmup_steps': 37}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6461,1.024178,0.813016,0.734473,0.696205,0.704868
2,0.0535,1.193692,0.819432,0.786553,0.716801,0.731639
3,0.0273,1.220459,0.821265,0.737941,0.678464,0.695431
4,0.0196,1.32904,0.833181,0.769812,0.718456,0.725162
5,0.0173,1.531214,0.824931,0.764793,0.679103,0.702782
6,0.0179,1.661401,0.823098,0.789678,0.703803,0.718453
7,0.0101,1.600966,0.824931,0.781769,0.718343,0.732369
8,0.0049,1.700517,0.842346,0.769615,0.724029,0.726287
9,0.0032,1.631582,0.84143,0.794479,0.747407,0.755074
10,0.002,1.713573,0.842346,0.806524,0.745255,0.759699


[I 2025-03-23 15:55:18,592] Trial 139 finished with value: 0.7314604570133324 and parameters: {'learning_rate': 0.004879962171867373, 'weight_decay': 0.008, 'warmup_steps': 37}. Best is trial 107 with value: 0.7879411361007308.


Trial 140 with params: {'learning_rate': 0.0023326306389688605, 'weight_decay': 0.008, 'warmup_steps': 53}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8274,1.034456,0.796517,0.690969,0.642692,0.650057
2,0.0631,1.153441,0.815765,0.817601,0.716133,0.743031
3,0.022,1.353447,0.814849,0.745632,0.706597,0.711318
4,0.0149,1.374805,0.806599,0.710018,0.701603,0.693237
5,0.0086,1.4822,0.809349,0.760779,0.724323,0.7317
6,0.0086,1.547805,0.826764,0.808736,0.706748,0.737843
7,0.0026,1.665374,0.820348,0.738329,0.70996,0.710949
8,0.0023,1.725679,0.821265,0.788998,0.727059,0.743324
9,0.002,1.762514,0.806599,0.755159,0.700692,0.716332
10,0.0018,1.738836,0.823098,0.790586,0.737602,0.746425


[I 2025-03-23 16:00:53,635] Trial 140 finished with value: 0.7344309309419447 and parameters: {'learning_rate': 0.0023326306389688605, 'weight_decay': 0.008, 'warmup_steps': 53}. Best is trial 107 with value: 0.7879411361007308.


Trial 141 with params: {'learning_rate': 0.0022272198872003834, 'weight_decay': 0.006, 'warmup_steps': 53}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8419,1.003587,0.799267,0.717138,0.674201,0.684152
2,0.0673,1.120266,0.808433,0.800883,0.711709,0.732988
3,0.0218,1.297339,0.815765,0.768943,0.705537,0.723127
4,0.0144,1.334443,0.821265,0.784649,0.728025,0.737994
5,0.0112,1.470372,0.819432,0.800468,0.729052,0.746759
6,0.0061,1.431845,0.816682,0.736001,0.710491,0.710251
7,0.0068,1.510112,0.821265,0.775153,0.709831,0.73031
8,0.0031,1.570874,0.827681,0.787724,0.734167,0.743225
9,0.0019,1.485509,0.835014,0.78713,0.747043,0.75425
10,0.0007,1.602084,0.83593,0.799098,0.742138,0.75716


[I 2025-03-23 16:06:28,264] Trial 141 finished with value: 0.7501048083889073 and parameters: {'learning_rate': 0.0022272198872003834, 'weight_decay': 0.006, 'warmup_steps': 53}. Best is trial 107 with value: 0.7879411361007308.


Trial 142 with params: {'learning_rate': 0.0024029073405858717, 'weight_decay': 0.007, 'warmup_steps': 53}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8172,1.004906,0.792851,0.729592,0.658825,0.681023
2,0.0626,1.143657,0.809349,0.768111,0.69183,0.710767
3,0.0232,1.206588,0.812099,0.756018,0.724987,0.725482
4,0.0137,1.303357,0.815765,0.773202,0.719236,0.731059
5,0.0084,1.442378,0.815765,0.769356,0.709551,0.721714


[I 2025-03-23 16:08:25,060] Trial 142 pruned. 


Trial 143 with params: {'learning_rate': 0.002257845909650475, 'weight_decay': 0.004, 'warmup_steps': 46}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8231,0.968661,0.802933,0.73218,0.674724,0.690285
2,0.0642,1.158529,0.813932,0.781523,0.708727,0.724402
3,0.0239,1.191368,0.820348,0.785371,0.728284,0.742461
4,0.0127,1.328788,0.827681,0.785341,0.735303,0.748277
5,0.0098,1.357967,0.833181,0.812393,0.732457,0.757834
6,0.0087,1.368462,0.834097,0.803209,0.727685,0.744749
7,0.0061,1.325057,0.834097,0.81305,0.74215,0.762668
8,0.0025,1.506288,0.837764,0.806105,0.744116,0.76135
9,0.001,1.516289,0.833181,0.797694,0.728117,0.747317
10,0.0009,1.484467,0.837764,0.815753,0.728737,0.7543


[I 2025-03-23 16:14:23,873] Trial 143 finished with value: 0.7575299138540111 and parameters: {'learning_rate': 0.002257845909650475, 'weight_decay': 0.004, 'warmup_steps': 46}. Best is trial 107 with value: 0.7879411361007308.


Trial 144 with params: {'learning_rate': 0.002887074052362417, 'weight_decay': 0.005, 'warmup_steps': 23}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.7303,1.019531,0.814849,0.784621,0.680065,0.707833
2,0.0584,1.116451,0.820348,0.773112,0.701733,0.722071
3,0.0209,1.288038,0.812099,0.775232,0.730876,0.740842
4,0.0143,1.269399,0.812099,0.7713,0.716787,0.729561
5,0.0121,1.33148,0.834097,0.81642,0.739423,0.76111
6,0.0111,1.413049,0.829514,0.764173,0.711193,0.7229
7,0.0057,1.452643,0.824015,0.788208,0.7209,0.733548
8,0.0049,1.428944,0.827681,0.807352,0.723608,0.752969
9,0.0033,1.509982,0.823098,0.779257,0.721878,0.73337
10,0.0008,1.473536,0.827681,0.791427,0.732176,0.743791


[I 2025-03-23 16:18:20,390] Trial 144 pruned. 


Trial 145 with params: {'learning_rate': 0.0017593404097967994, 'weight_decay': 0.008, 'warmup_steps': 40}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.892,0.971736,0.792851,0.715243,0.615279,0.645091
2,0.0839,1.142847,0.819432,0.804212,0.692672,0.727117
3,0.0247,1.211455,0.819432,0.746533,0.700929,0.708978
4,0.0143,1.347856,0.800183,0.754649,0.696905,0.709621
5,0.0106,1.351056,0.819432,0.759998,0.698736,0.715608
6,0.0064,1.46141,0.824015,0.798474,0.704933,0.735755
7,0.0052,1.454284,0.820348,0.797493,0.720589,0.743192
8,0.0021,1.521114,0.817599,0.790829,0.713813,0.734003
9,0.0034,1.558686,0.819432,0.77612,0.722478,0.732034
10,0.0013,1.607029,0.821265,0.793487,0.725473,0.742757


[I 2025-03-23 16:24:07,284] Trial 145 finished with value: 0.7480180664374511 and parameters: {'learning_rate': 0.0017593404097967994, 'weight_decay': 0.008, 'warmup_steps': 40}. Best is trial 107 with value: 0.7879411361007308.


Trial 146 with params: {'learning_rate': 0.004637144921431228, 'weight_decay': 0.007, 'warmup_steps': 51}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6679,0.992384,0.810266,0.744519,0.697542,0.70466
2,0.0518,1.078229,0.824015,0.818499,0.744444,0.765851
3,0.0257,1.347428,0.824015,0.789326,0.72325,0.73748
4,0.0229,1.45859,0.826764,0.828883,0.770992,0.784624
5,0.0139,1.487753,0.823098,0.810151,0.735527,0.755931
6,0.011,1.68408,0.832264,0.799377,0.745875,0.758639
7,0.0074,1.699561,0.826764,0.812952,0.760126,0.772167
8,0.0083,1.665868,0.828598,0.786411,0.75976,0.763654
9,0.0032,1.740359,0.827681,0.794719,0.76167,0.769144
10,0.0012,1.846053,0.834097,0.807424,0.758189,0.771806


[I 2025-03-23 16:30:07,680] Trial 146 finished with value: 0.7771405648576384 and parameters: {'learning_rate': 0.004637144921431228, 'weight_decay': 0.007, 'warmup_steps': 51}. Best is trial 107 with value: 0.7879411361007308.


Trial 147 with params: {'learning_rate': 0.004768673184134939, 'weight_decay': 0.004, 'warmup_steps': 53}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6704,1.058848,0.806599,0.751993,0.655357,0.68454
2,0.0515,1.174175,0.813932,0.815554,0.693304,0.726057
3,0.025,1.265192,0.816682,0.779248,0.729859,0.73907
4,0.0232,1.369502,0.816682,0.798044,0.725007,0.7417
5,0.0149,1.563665,0.815765,0.762282,0.705225,0.717362
6,0.0101,1.705793,0.821265,0.77219,0.70389,0.722015
7,0.0099,1.602784,0.827681,0.81943,0.730414,0.755461
8,0.0075,1.749292,0.829514,0.787273,0.732873,0.74552
9,0.0044,1.914529,0.826764,0.803059,0.739623,0.758544
10,0.0031,1.930036,0.824931,0.792826,0.741425,0.751967


[I 2025-03-23 16:35:57,215] Trial 147 finished with value: 0.7640192773334502 and parameters: {'learning_rate': 0.004768673184134939, 'weight_decay': 0.004, 'warmup_steps': 53}. Best is trial 107 with value: 0.7879411361007308.


Trial 148 with params: {'learning_rate': 0.00300677469965397, 'weight_decay': 0.003, 'warmup_steps': 48}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.757,1.010443,0.807516,0.781515,0.700111,0.722714
2,0.0563,1.110456,0.825848,0.818385,0.728808,0.758874
3,0.0229,1.221233,0.816682,0.772324,0.703619,0.725131
4,0.0173,1.262079,0.813016,0.794622,0.724658,0.742528
5,0.009,1.454345,0.818515,0.7743,0.720013,0.732202
6,0.0061,1.491798,0.819432,0.785125,0.716998,0.736722
7,0.0072,1.563513,0.809349,0.791529,0.710223,0.728185
8,0.0067,1.556836,0.812099,0.772482,0.702906,0.721896
9,0.003,1.570837,0.816682,0.770857,0.719335,0.730453
10,0.0016,1.554241,0.821265,0.781651,0.717075,0.73484


[I 2025-03-23 16:39:44,836] Trial 148 pruned. 


Trial 149 with params: {'learning_rate': 0.0039687155845044425, 'weight_decay': 0.005, 'warmup_steps': 49}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.7006,0.976059,0.817599,0.775128,0.698339,0.718287
2,0.0508,1.179364,0.821265,0.768623,0.719402,0.731652
3,0.0249,1.290448,0.813932,0.791752,0.736863,0.749726
4,0.0159,1.441395,0.818515,0.83914,0.718445,0.746368
5,0.0119,1.53018,0.817599,0.783817,0.702146,0.724443


[I 2025-03-23 16:41:41,808] Trial 149 pruned. 


In [40]:
print(best_trial3)

BestRun(run_id='107', objective=0.7879411361007308, hyperparameters={'learning_rate': 0.004876931770314574, 'weight_decay': 0.009000000000000001, 'warmup_steps': 49}, run_summary=None)


In [41]:
base.reset_seed()

In [42]:
training_args = base.get_training_args(output_dir=f"~/results/{DATASET}/bilstm-distill-aug_fine_hp-search", logging_dir=f"~/logs/{DATASET}/bilstm-distill-aug_fine_hp-search", remove_unused_columns=False, epochs=num_epochs, batch_size=batch_size)

In [43]:
def hp_space(trial):
    params =  {
        "learning_rate": trial.suggest_float("learning_rate", 5e-5, 5e-3, log=True),
        "weight_decay": trial.suggest_float("weight_decay", 0, 1e-2, step=1e-3),
        "warmup_steps" : trial.suggest_int("warmup_steps", 0, warm_up),
        "lambda_param": trial.suggest_float("lambda_param",0,1,step=.1),
        "temperature": trial.suggest_float("temperature", 2,7, step=.5)
    }
    print(f"Trial {trial.number} with params: {params}")
    return params

In [44]:
pruner = optuna.pruners.HyperbandPruner(min_resource=min_r, max_resource=max_r, reduction_factor=2, bootstrap_count=2)
sampler = optuna.samplers.TPESampler(seed=42, multivariate=True)



In [45]:
trainer = base.DistilTrainer(
    args=training_args,
    train_dataset=all_train_data,
    eval_dataset=eval_data,
    compute_metrics=base.compute_metrics,
    model_init = lambda: get_BiLSTM(),
)
  

In [46]:
best_trial4 = trainer.hyperparameter_search(
    direction="maximize",
    backend="optuna",
    hp_space=hp_space,
    compute_objective=lambda metrics: metrics["eval_f1"],
    pruner=pruner,
    sampler=sampler,
    study_name="Test-Distill-aug",
    n_trials=150
)

[I 2025-03-23 16:41:42,184] A new study created in memory with name: Test-Distill-aug


Trial 0 with params: {'learning_rate': 0.0002805758207667253, 'weight_decay': 0.01, 'warmup_steps': 39, 'lambda_param': 0.6000000000000001, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.2265,0.889369,0.675527,0.31836,0.275223,0.268805
2,0.5956,0.715353,0.747021,0.460645,0.410523,0.417359
3,0.395,0.654787,0.767186,0.528512,0.463471,0.477897
4,0.2857,0.640508,0.776352,0.592815,0.504493,0.527473
5,0.2188,0.61125,0.799267,0.67751,0.589453,0.611789


[I 2025-03-23 16:43:34,822] Trial 0 pruned. 


Trial 1 with params: {'learning_rate': 0.00010255552094216992, 'weight_decay': 0.0, 'warmup_steps': 46, 'lambda_param': 0.6000000000000001, 'temperature': 5.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.576,1.245115,0.526123,0.130308,0.146182,0.121622
2,1.0356,1.026319,0.626031,0.258532,0.227365,0.208391
3,0.8206,0.907336,0.668194,0.319624,0.278159,0.270992
4,0.6861,0.845556,0.694775,0.34531,0.310233,0.307792
5,0.5936,0.803251,0.711274,0.34921,0.332563,0.32966
6,0.5268,0.773473,0.714024,0.372267,0.349466,0.350523
7,0.4732,0.753113,0.736022,0.417786,0.381474,0.38264
8,0.4328,0.736117,0.743355,0.497162,0.414128,0.431649
9,0.4008,0.726955,0.757104,0.48567,0.440403,0.450911
10,0.3747,0.727817,0.749771,0.492375,0.43255,0.447944


[I 2025-03-23 16:47:22,174] Trial 1 pruned. 


Trial 2 with params: {'learning_rate': 5.497167787383099e-05, 'weight_decay': 0.01, 'warmup_steps': 44, 'lambda_param': 0.2, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.7679,1.448165,0.448213,0.073147,0.103475,0.075039
2,1.2731,1.244699,0.531622,0.157011,0.15085,0.130937
3,1.1021,1.136169,0.587534,0.170189,0.1972,0.174411
4,0.9738,1.04066,0.621448,0.218341,0.219623,0.20183
5,0.8715,0.978076,0.64528,0.262876,0.24496,0.23236


[I 2025-03-23 16:49:17,314] Trial 2 pruned. 


Trial 3 with params: {'learning_rate': 0.00011635338541918901, 'weight_decay': 0.003, 'warmup_steps': 28, 'lambda_param': 0.4, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.5234,1.195154,0.551787,0.140473,0.16567,0.139475
2,0.9761,0.972458,0.638863,0.256475,0.239094,0.223348
3,0.7587,0.864427,0.687443,0.342364,0.30022,0.298155
4,0.6292,0.810546,0.704858,0.344622,0.321708,0.320151
5,0.5372,0.771018,0.72594,0.401378,0.354013,0.356148
6,0.472,0.741191,0.731439,0.448523,0.381726,0.394276
7,0.4195,0.725433,0.741522,0.461403,0.415481,0.425985
8,0.3813,0.712257,0.753437,0.502656,0.44628,0.458657
9,0.3509,0.704771,0.759853,0.510781,0.453855,0.467597
10,0.3259,0.706691,0.757104,0.535666,0.463713,0.483856


[I 2025-03-23 16:53:02,131] Trial 3 pruned. 


Trial 4 with params: {'learning_rate': 0.0008369042894376068, 'weight_decay': 0.001, 'warmup_steps': 15, 'lambda_param': 0.4, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8374,0.640095,0.767186,0.489605,0.448499,0.458394
2,0.2442,0.57085,0.807516,0.676538,0.615113,0.630703
3,0.1356,0.536201,0.819432,0.740389,0.648842,0.680432
4,0.1017,0.530481,0.815765,0.772403,0.705214,0.726201
5,0.0867,0.527287,0.817599,0.796584,0.694511,0.72549
6,0.0783,0.518024,0.819432,0.810251,0.7069,0.740798
7,0.0734,0.516256,0.824015,0.788076,0.696022,0.729461
8,0.0711,0.512969,0.826764,0.822975,0.703068,0.742876
9,0.0683,0.513875,0.829514,0.822188,0.713272,0.74954
10,0.0669,0.523842,0.824015,0.808633,0.70957,0.740937


[I 2025-03-23 16:56:45,464] Trial 4 pruned. 


Trial 5 with params: {'learning_rate': 0.0018591820902866042, 'weight_decay': 0.002, 'warmup_steps': 27, 'lambda_param': 0.6000000000000001, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6542,0.560435,0.796517,0.654619,0.57878,0.601528
2,0.139,0.505894,0.829514,0.783781,0.702627,0.72961
3,0.0905,0.492677,0.833181,0.808829,0.731124,0.754902
4,0.0773,0.494512,0.839597,0.804168,0.735113,0.75106
5,0.0709,0.484283,0.840513,0.837408,0.736395,0.766104
6,0.0686,0.491206,0.834097,0.830148,0.749654,0.772756
7,0.0667,0.482775,0.83868,0.825779,0.747132,0.770546
8,0.0656,0.472465,0.842346,0.837586,0.753386,0.777277
9,0.0641,0.475171,0.843263,0.843104,0.753256,0.78053
10,0.0632,0.472767,0.840513,0.83154,0.746052,0.772101


[I 2025-03-23 17:02:25,347] Trial 5 finished with value: 0.7790968070911515 and parameters: {'learning_rate': 0.0018591820902866042, 'weight_decay': 0.002, 'warmup_steps': 27, 'lambda_param': 0.6000000000000001, 'temperature': 2.0}. Best is trial 5 with value: 0.7790968070911515.


Trial 6 with params: {'learning_rate': 0.0008204643365323959, 'weight_decay': 0.001, 'warmup_steps': 3, 'lambda_param': 1.0, 'temperature': 7.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8339,0.646435,0.762603,0.501524,0.453487,0.46447
2,0.2544,0.571264,0.79835,0.641747,0.588953,0.603482
3,0.1385,0.53656,0.812099,0.741589,0.63715,0.673736
4,0.1036,0.527092,0.824931,0.803175,0.714652,0.742282
5,0.0879,0.526598,0.817599,0.794914,0.68489,0.716924
6,0.0788,0.524498,0.819432,0.792861,0.682115,0.715947
7,0.0749,0.533109,0.818515,0.806755,0.682643,0.723476
8,0.0715,0.525439,0.817599,0.806998,0.677362,0.717137
9,0.0688,0.514032,0.827681,0.815238,0.706368,0.740964
10,0.0675,0.52365,0.822181,0.798708,0.692978,0.724267


[I 2025-03-23 17:06:20,217] Trial 6 pruned. 


Trial 7 with params: {'learning_rate': 0.0020690200562805084, 'weight_decay': 0.003, 'warmup_steps': 5, 'lambda_param': 0.7000000000000001, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6113,0.548763,0.810266,0.716299,0.604745,0.636484
2,0.134,0.511967,0.825848,0.804448,0.709397,0.738919
3,0.0887,0.505101,0.83593,0.750644,0.690482,0.709042
4,0.0787,0.497396,0.839597,0.796828,0.72138,0.74258
5,0.0728,0.50331,0.830431,0.770632,0.707543,0.724057
6,0.069,0.497829,0.831347,0.785073,0.721433,0.740447
7,0.0666,0.490032,0.832264,0.825373,0.736954,0.766292
8,0.0652,0.492555,0.829514,0.802734,0.719374,0.743646
9,0.0645,0.483984,0.83868,0.801432,0.735717,0.753679
10,0.0642,0.488145,0.831347,0.78756,0.72087,0.738619


[I 2025-03-23 17:12:16,296] Trial 7 finished with value: 0.7420116678238454 and parameters: {'learning_rate': 0.0020690200562805084, 'weight_decay': 0.003, 'warmup_steps': 5, 'lambda_param': 0.7000000000000001, 'temperature': 4.0}. Best is trial 5 with value: 0.7790968070911515.


Trial 8 with params: {'learning_rate': 8.770946743725407e-05, 'weight_decay': 0.005, 'warmup_steps': 1, 'lambda_param': 1.0, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.5943,1.296216,0.505958,0.107268,0.133706,0.107423
2,1.0994,1.087478,0.59945,0.187187,0.205832,0.180319
3,0.8963,0.960065,0.649863,0.280476,0.254135,0.24033
4,0.7595,0.888417,0.67736,0.291939,0.286837,0.279575
5,0.6649,0.84357,0.687443,0.331136,0.303993,0.30033
6,0.5963,0.808202,0.703025,0.342506,0.322878,0.319919
7,0.5421,0.787252,0.724106,0.399663,0.352374,0.355639
8,0.5005,0.7706,0.718607,0.375035,0.350439,0.350792
9,0.467,0.759031,0.729606,0.412731,0.376884,0.380884
10,0.4403,0.75486,0.72594,0.444491,0.382694,0.39445


[I 2025-03-23 17:15:59,363] Trial 8 pruned. 


Trial 9 with params: {'learning_rate': 0.0010568529720322872, 'weight_decay': 0.003, 'warmup_steps': 28, 'lambda_param': 0.6000000000000001, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.7815,0.621158,0.774519,0.547685,0.486232,0.500938
2,0.202,0.550704,0.813932,0.710498,0.644562,0.661248
3,0.1154,0.520257,0.822181,0.798876,0.681462,0.71822
4,0.0899,0.523049,0.820348,0.805397,0.709836,0.739874
5,0.079,0.513601,0.831347,0.822377,0.72123,0.753959
6,0.0738,0.51835,0.824931,0.810681,0.724927,0.754707
7,0.0709,0.512984,0.832264,0.832941,0.739976,0.769684
8,0.0682,0.502448,0.827681,0.826955,0.724192,0.759903
9,0.0658,0.503155,0.834097,0.824785,0.742826,0.770521
10,0.0648,0.505479,0.829514,0.816082,0.742607,0.767123


[I 2025-03-23 17:21:49,478] Trial 9 finished with value: 0.7634195823039167 and parameters: {'learning_rate': 0.0010568529720322872, 'weight_decay': 0.003, 'warmup_steps': 28, 'lambda_param': 0.6000000000000001, 'temperature': 3.0}. Best is trial 5 with value: 0.7790968070911515.


Trial 10 with params: {'learning_rate': 0.003553256925699131, 'weight_decay': 0.003, 'warmup_steps': 32, 'lambda_param': 0.1, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5506,0.519786,0.831347,0.777531,0.694679,0.722755
2,0.114,0.501934,0.834097,0.816558,0.716796,0.751187
3,0.0817,0.482163,0.84143,0.807756,0.72936,0.752075
4,0.0733,0.468844,0.845096,0.8366,0.74746,0.777335
5,0.0706,0.510066,0.835014,0.828946,0.736352,0.768346
6,0.0691,0.463933,0.846929,0.842223,0.739656,0.773179
7,0.066,0.462596,0.84418,0.828764,0.733796,0.766472
8,0.0657,0.499954,0.83868,0.821419,0.750525,0.772339
9,0.0656,0.468119,0.849679,0.855428,0.756729,0.790819
10,0.0636,0.474391,0.83868,0.829847,0.742194,0.771894


[I 2025-03-23 17:27:51,400] Trial 10 finished with value: 0.7760820273287785 and parameters: {'learning_rate': 0.003553256925699131, 'weight_decay': 0.003, 'warmup_steps': 32, 'lambda_param': 0.1, 'temperature': 2.0}. Best is trial 5 with value: 0.7790968070911515.


Trial 11 with params: {'learning_rate': 0.0036979694616670403, 'weight_decay': 0.006, 'warmup_steps': 46, 'lambda_param': 0.1, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5508,0.508034,0.824015,0.771134,0.67722,0.708772
2,0.1123,0.492475,0.832264,0.808095,0.722917,0.746648
3,0.0827,0.485128,0.842346,0.833989,0.736789,0.768039
4,0.0731,0.487633,0.835014,0.817223,0.735561,0.759077
5,0.0718,0.507236,0.830431,0.820983,0.732176,0.75839
6,0.0681,0.49299,0.83868,0.835795,0.745545,0.772719
7,0.0666,0.497746,0.831347,0.841367,0.736817,0.77082
8,0.065,0.493591,0.840513,0.851689,0.749978,0.783204
9,0.0647,0.482725,0.846929,0.847822,0.75246,0.78353
10,0.066,0.482741,0.846929,0.837129,0.750138,0.777968


[I 2025-03-23 17:33:54,732] Trial 11 finished with value: 0.7947738310704299 and parameters: {'learning_rate': 0.0036979694616670403, 'weight_decay': 0.006, 'warmup_steps': 46, 'lambda_param': 0.1, 'temperature': 2.5}. Best is trial 11 with value: 0.7947738310704299.


Trial 12 with params: {'learning_rate': 0.0025830086627210576, 'weight_decay': 0.008, 'warmup_steps': 39, 'lambda_param': 0.30000000000000004, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6047,0.528611,0.823098,0.728778,0.649412,0.671675
2,0.1233,0.495207,0.828598,0.825044,0.734323,0.76099
3,0.0848,0.502795,0.83868,0.803289,0.740288,0.759091
4,0.0754,0.500766,0.834097,0.794615,0.733365,0.75232
5,0.0718,0.470603,0.845096,0.841653,0.747929,0.774755
6,0.0687,0.471171,0.83593,0.815468,0.736854,0.758967
7,0.0678,0.489693,0.831347,0.813426,0.728753,0.756821
8,0.0646,0.492719,0.83593,0.825615,0.748628,0.770815
9,0.0638,0.481865,0.84143,0.808886,0.749467,0.76414
10,0.0626,0.486429,0.83868,0.822446,0.749013,0.769559


[I 2025-03-23 17:39:53,995] Trial 12 finished with value: 0.7696883634411662 and parameters: {'learning_rate': 0.0025830086627210576, 'weight_decay': 0.008, 'warmup_steps': 39, 'lambda_param': 0.30000000000000004, 'temperature': 3.5}. Best is trial 11 with value: 0.7947738310704299.


Trial 13 with params: {'learning_rate': 0.0017081697191730389, 'weight_decay': 0.0, 'warmup_steps': 53, 'lambda_param': 0.30000000000000004, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.7032,0.583073,0.797434,0.592088,0.563765,0.565209
2,0.1461,0.528426,0.825848,0.769503,0.700784,0.72284
3,0.0926,0.504541,0.830431,0.778187,0.719065,0.73698
4,0.0777,0.512853,0.826764,0.809041,0.714821,0.744935
5,0.0715,0.507779,0.829514,0.823039,0.723157,0.75663
6,0.0682,0.499949,0.833181,0.830689,0.73991,0.767517
7,0.0659,0.495051,0.83593,0.826293,0.742316,0.769273
8,0.0649,0.509321,0.828598,0.804629,0.720421,0.744696
9,0.0661,0.519285,0.825848,0.80484,0.734418,0.755578
10,0.0636,0.497365,0.835014,0.818558,0.747143,0.768935


[I 2025-03-23 17:45:50,479] Trial 13 finished with value: 0.7671940507712304 and parameters: {'learning_rate': 0.0017081697191730389, 'weight_decay': 0.0, 'warmup_steps': 53, 'lambda_param': 0.30000000000000004, 'temperature': 4.5}. Best is trial 11 with value: 0.7947738310704299.


Trial 14 with params: {'learning_rate': 0.003022132406781635, 'weight_decay': 0.005, 'warmup_steps': 53, 'lambda_param': 0.1, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5932,0.514663,0.823098,0.746186,0.664433,0.690612
2,0.1184,0.474658,0.829514,0.792377,0.716479,0.737492
3,0.0828,0.479993,0.845096,0.782907,0.740649,0.752598
4,0.0733,0.468771,0.848763,0.830644,0.75239,0.7772
5,0.069,0.477872,0.840513,0.805026,0.752502,0.767694
6,0.0675,0.459708,0.852429,0.833535,0.752662,0.775168
7,0.0661,0.45998,0.84418,0.833422,0.74427,0.774266
8,0.0641,0.459742,0.840513,0.834273,0.747618,0.775015
9,0.0638,0.458243,0.855179,0.841282,0.762389,0.787077
10,0.0633,0.461156,0.845096,0.825963,0.75426,0.774427


[I 2025-03-23 17:51:46,451] Trial 14 finished with value: 0.7723494764696076 and parameters: {'learning_rate': 0.003022132406781635, 'weight_decay': 0.005, 'warmup_steps': 53, 'lambda_param': 0.1, 'temperature': 3.0}. Best is trial 11 with value: 0.7947738310704299.


Trial 15 with params: {'learning_rate': 0.0003965725452330662, 'weight_decay': 0.001, 'warmup_steps': 7, 'lambda_param': 0.5, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.085,0.790606,0.705775,0.321245,0.322464,0.311568
2,0.4709,0.646952,0.779102,0.49927,0.476,0.478159
3,0.2824,0.608626,0.782768,0.600135,0.530108,0.550282
4,0.1933,0.590083,0.797434,0.657377,0.595583,0.60998
5,0.1466,0.577426,0.802016,0.695413,0.618923,0.641098
6,0.1218,0.564579,0.800183,0.707079,0.628668,0.650758
7,0.1068,0.556824,0.811182,0.724265,0.650367,0.674062
8,0.0978,0.547604,0.808433,0.754905,0.64402,0.676755
9,0.0911,0.544027,0.810266,0.72782,0.666149,0.686381
10,0.0863,0.561276,0.805683,0.761856,0.666203,0.695877


[I 2025-03-23 17:55:45,483] Trial 15 pruned. 


Trial 16 with params: {'learning_rate': 0.0008659141228624079, 'weight_decay': 0.007, 'warmup_steps': 27, 'lambda_param': 0.30000000000000004, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8366,0.633969,0.765353,0.497657,0.455283,0.466768
2,0.2359,0.554693,0.813932,0.672841,0.636413,0.642552
3,0.1322,0.529487,0.823098,0.73522,0.671917,0.688554
4,0.0991,0.533683,0.820348,0.78768,0.704789,0.733664
5,0.0847,0.525317,0.822181,0.78467,0.71245,0.732431


[I 2025-03-23 17:57:46,708] Trial 16 pruned. 


Trial 17 with params: {'learning_rate': 0.004744714416072387, 'weight_decay': 0.001, 'warmup_steps': 14, 'lambda_param': 0.7000000000000001, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5032,0.49339,0.829514,0.746889,0.674988,0.69507
2,0.1088,0.472133,0.845096,0.843867,0.734868,0.770299
3,0.0824,0.462859,0.84418,0.843658,0.729404,0.766188
4,0.0748,0.471585,0.842346,0.817845,0.729355,0.756719
5,0.0748,0.476291,0.840513,0.834611,0.728378,0.763809
6,0.0719,0.43993,0.851512,0.844202,0.742291,0.773344
7,0.0684,0.446175,0.853346,0.856736,0.755917,0.788242
8,0.0659,0.44914,0.852429,0.852887,0.754456,0.785878
9,0.0658,0.44199,0.853346,0.85585,0.745821,0.781619
10,0.0641,0.442179,0.848763,0.857201,0.751026,0.781366


[I 2025-03-23 18:03:29,571] Trial 17 finished with value: 0.7868925657139707 and parameters: {'learning_rate': 0.004744714416072387, 'weight_decay': 0.001, 'warmup_steps': 14, 'lambda_param': 0.7000000000000001, 'temperature': 2.5}. Best is trial 11 with value: 0.7947738310704299.


Trial 18 with params: {'learning_rate': 0.004645734057580748, 'weight_decay': 0.002, 'warmup_steps': 19, 'lambda_param': 1.0, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5065,0.503681,0.824015,0.765577,0.694704,0.716001
2,0.1102,0.473508,0.839597,0.815498,0.744673,0.761703
3,0.0833,0.480434,0.84143,0.815734,0.732824,0.760915
4,0.0748,0.469756,0.856095,0.842027,0.772501,0.792161
5,0.0703,0.462283,0.846929,0.827439,0.757195,0.776847
6,0.0694,0.466037,0.840513,0.817744,0.740256,0.76504
7,0.0707,0.475788,0.842346,0.831687,0.741576,0.769951
8,0.0692,0.485563,0.840513,0.841325,0.759675,0.785847
9,0.0658,0.479391,0.846013,0.841645,0.753019,0.781719
10,0.0645,0.465587,0.84418,0.844619,0.75723,0.785739


[I 2025-03-23 18:09:26,497] Trial 18 finished with value: 0.7908020795634835 and parameters: {'learning_rate': 0.004645734057580748, 'weight_decay': 0.002, 'warmup_steps': 19, 'lambda_param': 1.0, 'temperature': 4.0}. Best is trial 11 with value: 0.7947738310704299.


Trial 19 with params: {'learning_rate': 0.0025745723287984657, 'weight_decay': 0.001, 'warmup_steps': 27, 'lambda_param': 1.0, 'temperature': 5.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5982,0.545346,0.816682,0.757342,0.647199,0.681584
2,0.1226,0.49583,0.839597,0.82374,0.725613,0.757275
3,0.085,0.475119,0.839597,0.840234,0.734937,0.768507
4,0.0762,0.485209,0.84418,0.813521,0.739601,0.762235
5,0.07,0.471385,0.843263,0.796404,0.739281,0.753214
6,0.0676,0.475543,0.846013,0.805446,0.745047,0.76336
7,0.0664,0.471507,0.842346,0.790796,0.735529,0.751489
8,0.0664,0.504915,0.832264,0.791891,0.722795,0.739567
9,0.0659,0.476273,0.84418,0.806024,0.738,0.754982
10,0.0647,0.463243,0.849679,0.826637,0.757639,0.776454


[I 2025-03-23 18:15:05,216] Trial 19 finished with value: 0.770077653277765 and parameters: {'learning_rate': 0.0025745723287984657, 'weight_decay': 0.001, 'warmup_steps': 27, 'lambda_param': 1.0, 'temperature': 5.5}. Best is trial 11 with value: 0.7947738310704299.


Trial 20 with params: {'learning_rate': 0.002151030638055308, 'weight_decay': 0.005, 'warmup_steps': 24, 'lambda_param': 1.0, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.624,0.55841,0.809349,0.6955,0.608004,0.633181
2,0.1321,0.508995,0.828598,0.809405,0.714159,0.741768
3,0.0881,0.495287,0.829514,0.781827,0.698564,0.72604
4,0.0752,0.489712,0.83593,0.77515,0.697585,0.722955
5,0.0709,0.467847,0.839597,0.78772,0.713493,0.738871
6,0.0677,0.473821,0.83868,0.825444,0.727927,0.756123
7,0.0659,0.471777,0.842346,0.801371,0.736226,0.755295
8,0.0661,0.480169,0.835014,0.809163,0.734189,0.758782
9,0.0661,0.489524,0.840513,0.812701,0.73259,0.756831
10,0.0643,0.485601,0.836847,0.812172,0.726021,0.754954


[I 2025-03-23 18:18:57,433] Trial 20 pruned. 


Trial 21 with params: {'learning_rate': 0.002693993764698559, 'weight_decay': 0.0, 'warmup_steps': 10, 'lambda_param': 0.9, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5653,0.52294,0.830431,0.74911,0.684561,0.703876
2,0.1197,0.488317,0.833181,0.823465,0.71689,0.752566
3,0.0836,0.500836,0.836847,0.791146,0.730562,0.749134
4,0.0746,0.483134,0.842346,0.844066,0.732792,0.76581
5,0.0718,0.502396,0.834097,0.809029,0.723553,0.748364


[I 2025-03-23 18:20:55,770] Trial 21 pruned. 


Trial 22 with params: {'learning_rate': 0.004274099104009217, 'weight_decay': 0.006, 'warmup_steps': 48, 'lambda_param': 0.4, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5325,0.50022,0.829514,0.771081,0.682244,0.710398
2,0.1109,0.448991,0.846929,0.821641,0.737191,0.766003
3,0.0809,0.453252,0.849679,0.812503,0.744694,0.761581
4,0.0741,0.455176,0.848763,0.827954,0.757966,0.777661
5,0.0716,0.470499,0.846929,0.814746,0.73995,0.761349
6,0.0722,0.466657,0.845096,0.827151,0.747836,0.770931
7,0.0725,0.466913,0.851512,0.840226,0.749938,0.777728
8,0.0668,0.474064,0.843263,0.822147,0.740786,0.761884
9,0.0653,0.454085,0.849679,0.833393,0.751066,0.775442
10,0.0642,0.45615,0.849679,0.837967,0.749953,0.775302


[I 2025-03-23 18:26:55,422] Trial 22 finished with value: 0.7760442892365707 and parameters: {'learning_rate': 0.004274099104009217, 'weight_decay': 0.006, 'warmup_steps': 48, 'lambda_param': 0.4, 'temperature': 2.0}. Best is trial 11 with value: 0.7947738310704299.


Trial 23 with params: {'learning_rate': 0.0034216601612476194, 'weight_decay': 0.003, 'warmup_steps': 15, 'lambda_param': 0.9, 'temperature': 5.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5385,0.511499,0.824015,0.762404,0.667354,0.699328
2,0.1132,0.473792,0.839597,0.832314,0.735528,0.764406
3,0.0818,0.478414,0.840513,0.805011,0.729109,0.750424
4,0.0747,0.469817,0.846929,0.839588,0.744387,0.773188
5,0.0711,0.492608,0.835014,0.788371,0.74287,0.752478
6,0.068,0.478849,0.843263,0.843105,0.75634,0.782263
7,0.067,0.477547,0.84143,0.836772,0.743988,0.772743
8,0.066,0.494046,0.832264,0.804028,0.730786,0.752155
9,0.065,0.476981,0.843263,0.829048,0.744651,0.769344
10,0.0633,0.477863,0.84418,0.831901,0.741975,0.769416


[I 2025-03-23 18:30:51,412] Trial 23 pruned. 


Trial 24 with params: {'learning_rate': 0.004313782480766188, 'weight_decay': 0.003, 'warmup_steps': 15, 'lambda_param': 0.4, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5126,0.511948,0.827681,0.761701,0.672223,0.699799
2,0.1091,0.492573,0.84143,0.82385,0.723739,0.752397
3,0.0824,0.473364,0.840513,0.806928,0.719169,0.745196
4,0.0741,0.486389,0.845096,0.831448,0.735479,0.763159
5,0.0716,0.463907,0.846013,0.852223,0.741431,0.773012
6,0.0691,0.476283,0.840513,0.838997,0.740607,0.766358
7,0.0681,0.472262,0.850596,0.86623,0.746136,0.782374
8,0.0656,0.478712,0.843263,0.85369,0.736058,0.770463
9,0.0653,0.47884,0.839597,0.860137,0.733274,0.770307
10,0.0662,0.462208,0.845096,0.855143,0.740317,0.775667


[I 2025-03-23 18:36:54,660] Trial 24 finished with value: 0.7736751378169799 and parameters: {'learning_rate': 0.004313782480766188, 'weight_decay': 0.003, 'warmup_steps': 15, 'lambda_param': 0.4, 'temperature': 3.0}. Best is trial 11 with value: 0.7947738310704299.


Trial 25 with params: {'learning_rate': 0.0009546626473434354, 'weight_decay': 0.005, 'warmup_steps': 47, 'lambda_param': 0.1, 'temperature': 5.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8398,0.629469,0.768103,0.512401,0.468971,0.480875
2,0.2213,0.566547,0.796517,0.655606,0.590418,0.608188
3,0.1234,0.515012,0.820348,0.742544,0.671579,0.691453
4,0.0945,0.515921,0.820348,0.819196,0.700645,0.737104
5,0.0824,0.507784,0.823098,0.766268,0.697437,0.71729


[I 2025-03-23 18:38:46,479] Trial 25 pruned. 


Trial 26 with params: {'learning_rate': 9.951368192159822e-05, 'weight_decay': 0.009000000000000001, 'warmup_steps': 12, 'lambda_param': 0.4, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.5618,1.254707,0.517874,0.131972,0.140583,0.11623
2,1.0446,1.034802,0.623281,0.257443,0.224218,0.206188
3,0.8316,0.913923,0.665445,0.305829,0.272593,0.262806
4,0.6981,0.850164,0.696609,0.354466,0.310167,0.307916
5,0.6041,0.807022,0.706691,0.354187,0.32318,0.320665
6,0.5386,0.774503,0.713107,0.372953,0.340296,0.341368
7,0.4854,0.754626,0.736939,0.440479,0.378791,0.385253
8,0.4455,0.738489,0.739688,0.466651,0.390868,0.403045
9,0.4137,0.731011,0.753437,0.502063,0.417796,0.437819
10,0.3878,0.726884,0.752521,0.510955,0.429838,0.45232


[I 2025-03-23 18:42:32,156] Trial 26 pruned. 


Trial 27 with params: {'learning_rate': 0.0044758354571493965, 'weight_decay': 0.007, 'warmup_steps': 29, 'lambda_param': 0.0, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5124,0.486442,0.84143,0.793268,0.69435,0.724059
2,0.1112,0.462855,0.846013,0.839605,0.733273,0.764557
3,0.0834,0.468234,0.845096,0.818509,0.74615,0.763121
4,0.0739,0.473952,0.851512,0.833006,0.735866,0.764002
5,0.0715,0.467488,0.851512,0.821577,0.762569,0.78014
6,0.0703,0.479727,0.851512,0.841942,0.751428,0.77727
7,0.0685,0.480554,0.846013,0.844666,0.738766,0.768809
8,0.0653,0.469029,0.843263,0.824967,0.737207,0.761592
9,0.0642,0.454704,0.851512,0.85373,0.758415,0.788576
10,0.0636,0.456271,0.851512,0.832395,0.756797,0.778801


[I 2025-03-23 18:48:39,181] Trial 27 finished with value: 0.7868572940282402 and parameters: {'learning_rate': 0.0044758354571493965, 'weight_decay': 0.007, 'warmup_steps': 29, 'lambda_param': 0.0, 'temperature': 3.5}. Best is trial 11 with value: 0.7947738310704299.


Trial 28 with params: {'learning_rate': 0.0021332502049505155, 'weight_decay': 0.002, 'warmup_steps': 23, 'lambda_param': 0.8, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6269,0.556982,0.813016,0.709156,0.623039,0.645027
2,0.1323,0.504451,0.828598,0.814793,0.724106,0.753677
3,0.088,0.498419,0.835014,0.779345,0.70659,0.730322
4,0.0756,0.502069,0.831347,0.766317,0.697958,0.719301
5,0.0716,0.501584,0.830431,0.810479,0.722301,0.7489


[I 2025-03-23 18:50:41,425] Trial 28 pruned. 


Trial 29 with params: {'learning_rate': 0.004465858399905994, 'weight_decay': 0.002, 'warmup_steps': 49, 'lambda_param': 0.9, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5324,0.479893,0.846013,0.786914,0.699877,0.725962
2,0.1101,0.478811,0.839597,0.835164,0.723597,0.759662
3,0.082,0.446517,0.865261,0.856824,0.757062,0.788707
4,0.075,0.476903,0.846013,0.837828,0.740592,0.772632
5,0.0716,0.457671,0.848763,0.834012,0.739169,0.766291
6,0.0673,0.453534,0.849679,0.845696,0.735599,0.7697
7,0.066,0.453747,0.855179,0.851995,0.756996,0.785821
8,0.0673,0.480195,0.852429,0.8505,0.753421,0.780987
9,0.0673,0.475521,0.836847,0.824522,0.731413,0.757457
10,0.0648,0.465174,0.849679,0.81306,0.756827,0.769437


[I 2025-03-23 18:56:21,871] Trial 29 finished with value: 0.7869826633227364 and parameters: {'learning_rate': 0.004465858399905994, 'weight_decay': 0.002, 'warmup_steps': 49, 'lambda_param': 0.9, 'temperature': 2.0}. Best is trial 11 with value: 0.7947738310704299.


Trial 30 with params: {'learning_rate': 0.0029791687652579685, 'weight_decay': 0.002, 'warmup_steps': 51, 'lambda_param': 1.0, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5926,0.527988,0.815765,0.7289,0.662106,0.68017
2,0.1187,0.488102,0.839597,0.785086,0.724547,0.741006
3,0.0839,0.479712,0.835014,0.810455,0.745125,0.764674
4,0.0751,0.483092,0.839597,0.827915,0.744811,0.769142
5,0.0705,0.490866,0.84143,0.820575,0.750502,0.771511
6,0.068,0.472601,0.840513,0.829172,0.742809,0.770009
7,0.0661,0.474135,0.845096,0.833914,0.745686,0.773181
8,0.0648,0.47259,0.847846,0.83176,0.753143,0.777696
9,0.0641,0.468119,0.84418,0.825906,0.745354,0.769629
10,0.0632,0.470001,0.84143,0.840352,0.746825,0.775157


[I 2025-03-23 19:02:07,912] Trial 30 finished with value: 0.778851444091005 and parameters: {'learning_rate': 0.0029791687652579685, 'weight_decay': 0.002, 'warmup_steps': 51, 'lambda_param': 1.0, 'temperature': 3.0}. Best is trial 11 with value: 0.7947738310704299.


Trial 31 with params: {'learning_rate': 0.004713299459621647, 'weight_decay': 0.004, 'warmup_steps': 49, 'lambda_param': 0.7000000000000001, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5262,0.504519,0.835014,0.799473,0.698363,0.729508
2,0.1105,0.451064,0.847846,0.830368,0.729446,0.75797
3,0.0821,0.482076,0.854262,0.849429,0.756763,0.787489
4,0.0735,0.46844,0.848763,0.830433,0.741143,0.769279
5,0.0717,0.466514,0.849679,0.817154,0.757997,0.771355
6,0.0727,0.464814,0.848763,0.852965,0.748084,0.781708
7,0.0712,0.479723,0.83593,0.830129,0.742852,0.767578
8,0.0684,0.468984,0.847846,0.828569,0.758292,0.778741
9,0.0652,0.465263,0.850596,0.830535,0.765846,0.783614
10,0.0636,0.463493,0.852429,0.835602,0.759469,0.781814


[I 2025-03-23 19:08:12,357] Trial 31 finished with value: 0.7784858773473884 and parameters: {'learning_rate': 0.004713299459621647, 'weight_decay': 0.004, 'warmup_steps': 49, 'lambda_param': 0.7000000000000001, 'temperature': 2.0}. Best is trial 11 with value: 0.7947738310704299.


Trial 32 with params: {'learning_rate': 0.0036581606870925974, 'weight_decay': 0.0, 'warmup_steps': 45, 'lambda_param': 0.8, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5522,0.51816,0.824015,0.752362,0.675798,0.697709
2,0.1128,0.469566,0.845096,0.832889,0.753429,0.776783
3,0.0822,0.474385,0.840513,0.790954,0.739464,0.754024
4,0.0744,0.468609,0.846013,0.809025,0.737265,0.75962
5,0.0699,0.477736,0.840513,0.809022,0.74134,0.76084
6,0.069,0.498863,0.832264,0.784694,0.704024,0.73035
7,0.0691,0.47709,0.845096,0.794516,0.738136,0.756155
8,0.0651,0.47749,0.842346,0.807748,0.742263,0.762072
9,0.0638,0.472512,0.849679,0.81043,0.746718,0.766724
10,0.063,0.48828,0.842346,0.81772,0.738591,0.763576


[I 2025-03-23 19:12:08,785] Trial 32 pruned. 


Trial 33 with params: {'learning_rate': 0.003991246290620648, 'weight_decay': 0.003, 'warmup_steps': 40, 'lambda_param': 1.0, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5422,0.49686,0.831347,0.779444,0.706688,0.728208
2,0.112,0.474562,0.842346,0.804167,0.742828,0.757429
3,0.0816,0.493389,0.843263,0.802477,0.746972,0.760024
4,0.0731,0.490122,0.84418,0.834359,0.752324,0.776466
5,0.0692,0.480003,0.845096,0.830433,0.752356,0.771581
6,0.0675,0.46432,0.848763,0.842731,0.752406,0.780772
7,0.0684,0.498923,0.84143,0.839833,0.74751,0.774304
8,0.0717,0.505683,0.843263,0.841105,0.744876,0.773991
9,0.0644,0.487315,0.84418,0.844429,0.755438,0.781851
10,0.063,0.480069,0.843263,0.85182,0.749875,0.7811


[I 2025-03-23 19:18:03,714] Trial 33 finished with value: 0.7906992303171952 and parameters: {'learning_rate': 0.003991246290620648, 'weight_decay': 0.003, 'warmup_steps': 40, 'lambda_param': 1.0, 'temperature': 2.0}. Best is trial 11 with value: 0.7947738310704299.


Trial 34 with params: {'learning_rate': 0.004627885903099892, 'weight_decay': 0.004, 'warmup_steps': 37, 'lambda_param': 0.9, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5241,0.492078,0.835014,0.770543,0.71578,0.730422
2,0.1084,0.480466,0.847846,0.844526,0.760722,0.785597
3,0.0825,0.476196,0.845096,0.832664,0.746452,0.773186
4,0.0755,0.465033,0.849679,0.849713,0.741721,0.779099
5,0.0711,0.45321,0.858845,0.843222,0.777469,0.79603
6,0.0693,0.463009,0.853346,0.837034,0.763345,0.783395
7,0.0683,0.464769,0.842346,0.834003,0.740652,0.769145
8,0.0667,0.482449,0.845096,0.826893,0.749554,0.772731
9,0.0657,0.47652,0.845096,0.842279,0.75717,0.784439
10,0.0641,0.452089,0.857929,0.847973,0.773758,0.795644


[I 2025-03-23 19:24:01,794] Trial 34 finished with value: 0.7974771651115893 and parameters: {'learning_rate': 0.004627885903099892, 'weight_decay': 0.004, 'warmup_steps': 37, 'lambda_param': 0.9, 'temperature': 2.0}. Best is trial 34 with value: 0.7974771651115893.


Trial 35 with params: {'learning_rate': 0.0036922018584183047, 'weight_decay': 0.009000000000000001, 'warmup_steps': 52, 'lambda_param': 0.0, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5625,0.53376,0.820348,0.774623,0.668098,0.701525
2,0.1133,0.472152,0.843263,0.824154,0.738715,0.765215
3,0.0824,0.507453,0.833181,0.813034,0.721838,0.746982
4,0.0749,0.491502,0.830431,0.821363,0.741031,0.763559
5,0.0718,0.482899,0.837764,0.831542,0.735065,0.764055
6,0.0681,0.466449,0.849679,0.846801,0.743437,0.774044
7,0.0676,0.481321,0.84143,0.851065,0.737376,0.774104
8,0.0663,0.475497,0.845096,0.832401,0.74117,0.769189
9,0.0646,0.460711,0.843263,0.82565,0.742483,0.766725
10,0.0644,0.471822,0.84143,0.84361,0.73618,0.768265


[I 2025-03-23 19:27:45,232] Trial 35 pruned. 


Trial 36 with params: {'learning_rate': 0.003428912761761403, 'weight_decay': 0.006, 'warmup_steps': 37, 'lambda_param': 0.9, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5566,0.501245,0.832264,0.746764,0.659118,0.688142
2,0.1131,0.488361,0.84418,0.834592,0.730029,0.762626
3,0.0816,0.483804,0.842346,0.818752,0.727156,0.757488
4,0.0749,0.467608,0.846013,0.853832,0.738676,0.776047
5,0.0705,0.463622,0.852429,0.825316,0.758992,0.778539
6,0.0681,0.479537,0.840513,0.81349,0.730321,0.755852
7,0.0689,0.478672,0.846929,0.817802,0.743287,0.768604
8,0.0679,0.468763,0.846013,0.813192,0.742287,0.766101
9,0.0641,0.450365,0.853346,0.809097,0.746664,0.766102
10,0.0628,0.453,0.854262,0.823064,0.745922,0.769831


[I 2025-03-23 19:33:37,322] Trial 36 finished with value: 0.7863641848277774 and parameters: {'learning_rate': 0.003428912761761403, 'weight_decay': 0.006, 'warmup_steps': 37, 'lambda_param': 0.9, 'temperature': 2.5}. Best is trial 34 with value: 0.7974771651115893.


Trial 37 with params: {'learning_rate': 0.0014498345168343387, 'weight_decay': 0.003, 'warmup_steps': 35, 'lambda_param': 1.0, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.7192,0.585863,0.791934,0.572219,0.543164,0.549604
2,0.1614,0.548119,0.815765,0.731427,0.673093,0.691946
3,0.0984,0.515061,0.832264,0.801916,0.721599,0.744717
4,0.0815,0.526168,0.822181,0.795002,0.689133,0.722676
5,0.0744,0.518323,0.824015,0.806631,0.701432,0.736747


[I 2025-03-23 19:35:32,777] Trial 37 pruned. 


Trial 38 with params: {'learning_rate': 0.004576270073895496, 'weight_decay': 0.003, 'warmup_steps': 29, 'lambda_param': 1.0, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5104,0.493876,0.840513,0.796506,0.713083,0.73704
2,0.1107,0.457147,0.842346,0.82277,0.732769,0.76205
3,0.082,0.467058,0.848763,0.807038,0.745671,0.7625
4,0.0746,0.463002,0.847846,0.833547,0.741682,0.773188
5,0.0728,0.490157,0.837764,0.814225,0.737252,0.761817
6,0.0724,0.460432,0.857012,0.854197,0.760943,0.789765
7,0.0685,0.461312,0.854262,0.834548,0.755351,0.781009
8,0.0658,0.47147,0.843263,0.846628,0.756211,0.78583
9,0.0646,0.460353,0.850596,0.851776,0.768415,0.794991
10,0.0637,0.468002,0.850596,0.835139,0.757103,0.781351


[I 2025-03-23 19:41:17,307] Trial 38 finished with value: 0.7784627466593542 and parameters: {'learning_rate': 0.004576270073895496, 'weight_decay': 0.003, 'warmup_steps': 29, 'lambda_param': 1.0, 'temperature': 2.5}. Best is trial 34 with value: 0.7974771651115893.


Trial 39 with params: {'learning_rate': 0.001395039612162253, 'weight_decay': 0.001, 'warmup_steps': 25, 'lambda_param': 0.2, 'temperature': 7.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.7195,0.5805,0.791934,0.577541,0.526684,0.536323
2,0.1659,0.549045,0.810266,0.732327,0.669263,0.688051
3,0.0996,0.538923,0.819432,0.800335,0.715406,0.746276
4,0.0822,0.526511,0.821265,0.816862,0.711507,0.747795
5,0.0728,0.522988,0.829514,0.826888,0.716089,0.755413
6,0.07,0.510375,0.827681,0.816445,0.718434,0.749503
7,0.0697,0.532979,0.820348,0.817358,0.719496,0.750828
8,0.0668,0.511022,0.820348,0.816112,0.715227,0.748372
9,0.0647,0.50396,0.828598,0.812516,0.716103,0.747791
10,0.0642,0.5052,0.831347,0.821497,0.723544,0.754479


[I 2025-03-23 19:45:16,888] Trial 39 pruned. 


Trial 40 with params: {'learning_rate': 0.002162197530671439, 'weight_decay': 0.005, 'warmup_steps': 40, 'lambda_param': 0.1, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6456,0.561459,0.811182,0.691646,0.59722,0.624376
2,0.132,0.520763,0.827681,0.793244,0.719935,0.740311
3,0.0862,0.489397,0.833181,0.81549,0.727021,0.755168
4,0.0756,0.487199,0.836847,0.829702,0.736734,0.764761
5,0.0702,0.490571,0.839597,0.817263,0.74063,0.766084
6,0.0682,0.501434,0.83593,0.831502,0.738714,0.765601
7,0.0672,0.493974,0.834097,0.829681,0.740501,0.771434
8,0.067,0.507643,0.824931,0.817531,0.728447,0.759347
9,0.0643,0.48311,0.832264,0.819478,0.731471,0.759606
10,0.0627,0.483708,0.834097,0.834206,0.749771,0.777107


[I 2025-03-23 19:51:02,226] Trial 40 finished with value: 0.7755883561077309 and parameters: {'learning_rate': 0.002162197530671439, 'weight_decay': 0.005, 'warmup_steps': 40, 'lambda_param': 0.1, 'temperature': 2.0}. Best is trial 34 with value: 0.7974771651115893.


Trial 41 with params: {'learning_rate': 0.0018867347495657375, 'weight_decay': 0.005, 'warmup_steps': 52, 'lambda_param': 1.0, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.674,0.55458,0.809349,0.702765,0.608099,0.633649
2,0.1385,0.517307,0.828598,0.807471,0.720841,0.747566
3,0.0904,0.490042,0.825848,0.788712,0.72325,0.742657
4,0.077,0.500685,0.834097,0.81961,0.724401,0.753477
5,0.0715,0.495745,0.83593,0.790519,0.72513,0.746077
6,0.0689,0.482744,0.837764,0.804113,0.729505,0.7517
7,0.068,0.504515,0.835014,0.814948,0.720924,0.752549
8,0.0653,0.485014,0.83868,0.803337,0.737334,0.756896
9,0.0635,0.51986,0.829514,0.797056,0.739526,0.755988
10,0.0639,0.479854,0.83868,0.831038,0.734402,0.765702


[I 2025-03-23 19:54:51,354] Trial 41 pruned. 


Trial 42 with params: {'learning_rate': 0.003024674930799064, 'weight_decay': 0.001, 'warmup_steps': 44, 'lambda_param': 1.0, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5767,0.522522,0.828598,0.768856,0.679563,0.705663
2,0.1153,0.486163,0.831347,0.813334,0.744526,0.766804
3,0.0822,0.48786,0.83868,0.825449,0.754579,0.774404
4,0.0747,0.477686,0.843263,0.830491,0.750095,0.774282
5,0.0705,0.47026,0.846929,0.844197,0.752606,0.784489
6,0.0681,0.46339,0.848763,0.83966,0.751856,0.777975
7,0.0682,0.473263,0.84418,0.835979,0.750697,0.778682
8,0.065,0.464737,0.847846,0.841325,0.766032,0.789713
9,0.0635,0.47086,0.845096,0.830888,0.764679,0.784274
10,0.0635,0.467336,0.849679,0.826642,0.760671,0.781435


[I 2025-03-23 20:00:27,147] Trial 42 finished with value: 0.7943249540580402 and parameters: {'learning_rate': 0.003024674930799064, 'weight_decay': 0.001, 'warmup_steps': 44, 'lambda_param': 1.0, 'temperature': 2.0}. Best is trial 34 with value: 0.7974771651115893.


Trial 43 with params: {'learning_rate': 0.004010284628691219, 'weight_decay': 0.008, 'warmup_steps': 10, 'lambda_param': 0.30000000000000004, 'temperature': 6.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5056,0.503572,0.831347,0.769367,0.686395,0.715543
2,0.1104,0.484978,0.846013,0.839645,0.734318,0.767442
3,0.0819,0.483234,0.83593,0.832116,0.74007,0.770606
4,0.0741,0.483036,0.842346,0.84397,0.751309,0.777476
5,0.0704,0.474077,0.842346,0.832875,0.758139,0.781492
6,0.0675,0.472838,0.842346,0.843369,0.739052,0.772154
7,0.0661,0.478356,0.843263,0.835964,0.744618,0.770671
8,0.0685,0.502614,0.839597,0.816081,0.735431,0.758552
9,0.0694,0.492324,0.843263,0.853687,0.742262,0.779138
10,0.0645,0.483305,0.836847,0.843926,0.744066,0.77562


[I 2025-03-23 20:06:14,675] Trial 43 finished with value: 0.7832884555226832 and parameters: {'learning_rate': 0.004010284628691219, 'weight_decay': 0.008, 'warmup_steps': 10, 'lambda_param': 0.30000000000000004, 'temperature': 6.0}. Best is trial 34 with value: 0.7974771651115893.


Trial 44 with params: {'learning_rate': 0.004702225101762641, 'weight_decay': 0.001, 'warmup_steps': 23, 'lambda_param': 1.0, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5103,0.500713,0.832264,0.775826,0.678956,0.711138
2,0.1107,0.469085,0.84418,0.832867,0.749743,0.777948
3,0.083,0.474532,0.847846,0.845944,0.756779,0.78372
4,0.075,0.476216,0.84143,0.847359,0.753418,0.783008
5,0.0737,0.486001,0.843263,0.838942,0.751164,0.779481
6,0.0692,0.47716,0.851512,0.857728,0.752404,0.785038
7,0.0683,0.478417,0.835014,0.84597,0.74363,0.777307
8,0.0655,0.468581,0.840513,0.844423,0.754119,0.78395
9,0.0673,0.483209,0.840513,0.828394,0.741921,0.76773
10,0.0651,0.48153,0.839597,0.845263,0.739712,0.771642


[I 2025-03-23 20:10:17,642] Trial 44 pruned. 


Trial 45 with params: {'learning_rate': 0.004141356364609583, 'weight_decay': 0.002, 'warmup_steps': 37, 'lambda_param': 0.9, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.535,0.511361,0.834097,0.775169,0.698243,0.722931
2,0.1113,0.471835,0.831347,0.829856,0.730352,0.762271
3,0.0814,0.46021,0.847846,0.826342,0.744435,0.768285
4,0.073,0.449906,0.851512,0.84288,0.761992,0.788025
5,0.071,0.488346,0.850596,0.852082,0.755162,0.786091
6,0.0705,0.479054,0.83593,0.818762,0.716593,0.749468
7,0.0674,0.464833,0.845096,0.845053,0.754347,0.78106
8,0.0661,0.475438,0.84143,0.832831,0.761478,0.78383
9,0.0641,0.467214,0.842346,0.841329,0.761777,0.786746
10,0.063,0.470764,0.84418,0.815811,0.741993,0.762278


[I 2025-03-23 20:14:03,337] Trial 45 pruned. 


Trial 46 with params: {'learning_rate': 0.00035209578167894637, 'weight_decay': 0.01, 'warmup_steps': 38, 'lambda_param': 0.8, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.1395,0.819403,0.701192,0.314224,0.315481,0.304062
2,0.5031,0.662734,0.768103,0.500326,0.46231,0.468007
3,0.3109,0.611604,0.782768,0.571563,0.516296,0.529769
4,0.2163,0.598147,0.790101,0.659116,0.583472,0.603625
5,0.1638,0.582559,0.813016,0.736311,0.640731,0.671174


[I 2025-03-23 20:15:55,774] Trial 46 pruned. 


Trial 47 with params: {'learning_rate': 0.00017209337253776082, 'weight_decay': 0.007, 'warmup_steps': 32, 'lambda_param': 0.9, 'temperature': 7.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.3972,1.061751,0.604033,0.191935,0.206468,0.178574
2,0.8085,0.850803,0.692942,0.310822,0.308456,0.298331
3,0.5991,0.761203,0.72044,0.382315,0.343536,0.346111
4,0.4721,0.72728,0.736939,0.485765,0.391959,0.411507
5,0.3889,0.697235,0.754354,0.513119,0.441772,0.459672
6,0.3278,0.68136,0.758937,0.49676,0.460102,0.467809
7,0.282,0.66743,0.771769,0.525654,0.490747,0.494916
8,0.2492,0.657625,0.778185,0.620297,0.548449,0.567162
9,0.2228,0.65242,0.791934,0.654745,0.578974,0.597784
10,0.2031,0.657559,0.778185,0.638742,0.55751,0.57924


[I 2025-03-23 20:19:37,030] Trial 47 pruned. 


Trial 48 with params: {'learning_rate': 0.0006731973980059859, 'weight_decay': 0.002, 'warmup_steps': 45, 'lambda_param': 0.7000000000000001, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.9275,0.668862,0.756187,0.460081,0.426738,0.43267
2,0.2905,0.57991,0.805683,0.656097,0.584324,0.604191
3,0.1591,0.556923,0.799267,0.741722,0.627273,0.662497
4,0.1151,0.532515,0.825848,0.783513,0.684308,0.718823
5,0.0945,0.53043,0.819432,0.770054,0.685912,0.713754
6,0.0851,0.52311,0.824931,0.77994,0.684222,0.713618
7,0.0787,0.52187,0.821265,0.779289,0.681196,0.715782
8,0.0757,0.520888,0.820348,0.783458,0.671838,0.710286
9,0.0712,0.523149,0.831347,0.807932,0.716133,0.74693
10,0.0693,0.524471,0.821265,0.775662,0.687252,0.716614


[I 2025-03-23 20:23:32,132] Trial 48 pruned. 


Trial 49 with params: {'learning_rate': 0.0045761805835038555, 'weight_decay': 0.005, 'warmup_steps': 27, 'lambda_param': 1.0, 'temperature': 5.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5078,0.501696,0.835014,0.793978,0.682669,0.720092
2,0.1092,0.47721,0.840513,0.804765,0.706906,0.735735
3,0.0808,0.477325,0.845096,0.845803,0.746858,0.778478
4,0.0744,0.480536,0.846929,0.839753,0.744921,0.775216
5,0.0724,0.486787,0.83868,0.837612,0.745107,0.77454
6,0.071,0.495744,0.83593,0.825666,0.731163,0.762424
7,0.0691,0.474939,0.847846,0.848819,0.739215,0.774811
8,0.0664,0.472374,0.847846,0.842452,0.748247,0.780145
9,0.0648,0.473104,0.847846,0.861063,0.750662,0.788647
10,0.0637,0.470273,0.846013,0.847905,0.740791,0.776376


[I 2025-03-23 20:29:24,815] Trial 49 finished with value: 0.7910770610847294 and parameters: {'learning_rate': 0.0045761805835038555, 'weight_decay': 0.005, 'warmup_steps': 27, 'lambda_param': 1.0, 'temperature': 5.0}. Best is trial 34 with value: 0.7974771651115893.


Trial 50 with params: {'learning_rate': 0.0021133792752108674, 'weight_decay': 0.005, 'warmup_steps': 24, 'lambda_param': 1.0, 'temperature': 6.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6276,0.559476,0.809349,0.69221,0.611211,0.633947
2,0.1323,0.503577,0.827681,0.780691,0.709442,0.731312
3,0.0877,0.494794,0.834097,0.792239,0.717859,0.742813
4,0.0765,0.48496,0.829514,0.812306,0.705504,0.738995
5,0.0703,0.48313,0.83593,0.801091,0.710574,0.739972


[I 2025-03-23 20:31:15,956] Trial 50 pruned. 


Trial 51 with params: {'learning_rate': 0.004280780836425894, 'weight_decay': 0.006, 'warmup_steps': 36, 'lambda_param': 1.0, 'temperature': 5.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5284,0.506098,0.823098,0.760279,0.686079,0.706745
2,0.1108,0.473956,0.84143,0.844491,0.738598,0.772179
3,0.0824,0.470222,0.848763,0.8356,0.755405,0.781896
4,0.074,0.468333,0.846013,0.845372,0.745962,0.779729
5,0.0712,0.471882,0.842346,0.844503,0.740249,0.774194
6,0.0684,0.463585,0.843263,0.822107,0.754017,0.774285
7,0.0666,0.467985,0.845096,0.8382,0.745043,0.774933
8,0.0661,0.483854,0.840513,0.833778,0.739513,0.76866
9,0.0652,0.458525,0.84418,0.839899,0.746435,0.77519
10,0.0669,0.464191,0.846013,0.836403,0.756014,0.781908


[I 2025-03-23 20:37:13,856] Trial 51 finished with value: 0.7817667993168841 and parameters: {'learning_rate': 0.004280780836425894, 'weight_decay': 0.006, 'warmup_steps': 36, 'lambda_param': 1.0, 'temperature': 5.5}. Best is trial 34 with value: 0.7974771651115893.


Trial 52 with params: {'learning_rate': 0.002257071934193931, 'weight_decay': 0.005, 'warmup_steps': 29, 'lambda_param': 0.9, 'temperature': 5.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6203,0.558431,0.806599,0.701613,0.604974,0.631467
2,0.1275,0.508032,0.832264,0.79304,0.733513,0.750163
3,0.0863,0.506588,0.826764,0.787455,0.719253,0.741261
4,0.0755,0.506592,0.835014,0.789594,0.725819,0.746443
5,0.0709,0.501457,0.83593,0.780885,0.730185,0.746332


[I 2025-03-23 20:39:10,700] Trial 52 pruned. 


Trial 53 with params: {'learning_rate': 0.0012926195143360029, 'weight_decay': 0.001, 'warmup_steps': 39, 'lambda_param': 1.0, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.7512,0.604085,0.784601,0.565449,0.519565,0.531218
2,0.175,0.536196,0.818515,0.72409,0.680258,0.694112
3,0.1029,0.507345,0.830431,0.798957,0.719476,0.742904
4,0.0845,0.504183,0.829514,0.787734,0.732405,0.749191
5,0.075,0.50408,0.832264,0.801815,0.726052,0.75016
6,0.0706,0.494351,0.829514,0.783287,0.718515,0.738849
7,0.0676,0.505676,0.828598,0.818372,0.709571,0.741722
8,0.0657,0.495017,0.825848,0.816654,0.721527,0.751811
9,0.0646,0.487607,0.829514,0.785143,0.714914,0.737681
10,0.0645,0.499905,0.834097,0.774157,0.712562,0.731474


[I 2025-03-23 20:43:03,704] Trial 53 pruned. 


Trial 54 with params: {'learning_rate': 0.00018591100871980046, 'weight_decay': 0.0, 'warmup_steps': 1, 'lambda_param': 0.30000000000000004, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.3434,1.036812,0.606783,0.190038,0.209419,0.180423
2,0.7774,0.828933,0.703941,0.338017,0.323363,0.316829
3,0.5673,0.736038,0.734189,0.387956,0.364522,0.365445


[I 2025-03-23 20:48:38,818] Trial 55 pruned. 


Trial 56 with params: {'learning_rate': 0.0001413812546509425, 'weight_decay': 0.003, 'warmup_steps': 50, 'lambda_param': 0.8, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.4779,1.132187,0.576535,0.149434,0.185932,0.156705
2,0.8961,0.909448,0.666361,0.278333,0.279143,0.268626
3,0.6789,0.812178,0.701192,0.341326,0.316282,0.313298
4,0.5485,0.768965,0.72044,0.399264,0.348416,0.352767
5,0.4609,0.735767,0.740605,0.465242,0.40502,0.416987


[I 2025-03-23 20:50:25,692] Trial 56 pruned. 


Trial 57 with params: {'learning_rate': 0.00012862788348576466, 'weight_decay': 0.003, 'warmup_steps': 45, 'lambda_param': 0.2, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.5042,1.168554,0.56462,0.14619,0.175713,0.149501
2,0.9383,0.941828,0.657195,0.276802,0.262799,0.251418
3,0.7186,0.840039,0.690192,0.353296,0.304504,0.302394
4,0.5909,0.793977,0.713107,0.340103,0.332266,0.328577
5,0.5027,0.755722,0.733272,0.415382,0.37002,0.372347
6,0.438,0.728013,0.743355,0.509715,0.418782,0.435392
7,0.387,0.714302,0.752521,0.496927,0.438247,0.450955
8,0.3487,0.703076,0.75527,0.504239,0.455339,0.466048
9,0.3194,0.695513,0.765353,0.505276,0.472376,0.475007
10,0.2954,0.701841,0.756187,0.512872,0.460925,0.473736


[I 2025-03-23 20:54:05,079] Trial 57 pruned. 


Trial 58 with params: {'learning_rate': 0.002563120018954777, 'weight_decay': 0.003, 'warmup_steps': 12, 'lambda_param': 1.0, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5849,0.542231,0.810266,0.665812,0.621069,0.633402
2,0.1228,0.48713,0.833181,0.794121,0.713013,0.736038
3,0.0844,0.48455,0.84418,0.81164,0.726711,0.754546
4,0.0745,0.480956,0.839597,0.806862,0.723558,0.749738
5,0.0699,0.484608,0.842346,0.810826,0.741236,0.76034
6,0.0692,0.483874,0.848763,0.818543,0.750718,0.769134
7,0.0705,0.488414,0.83593,0.817493,0.736062,0.762161
8,0.0657,0.486982,0.837764,0.834501,0.74823,0.775813
9,0.0645,0.474567,0.84418,0.840341,0.74931,0.778751
10,0.0632,0.487302,0.83868,0.814229,0.736889,0.760349


[I 2025-03-23 20:57:55,013] Trial 58 pruned. 


Trial 59 with params: {'learning_rate': 0.004550584745770884, 'weight_decay': 0.003, 'warmup_steps': 41, 'lambda_param': 1.0, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5239,0.499926,0.830431,0.758975,0.695948,0.713433
2,0.1092,0.487002,0.836847,0.818296,0.722252,0.756488
3,0.0822,0.465951,0.84143,0.788194,0.744477,0.752722
4,0.0746,0.465256,0.855179,0.832007,0.763614,0.781762
5,0.0695,0.456321,0.849679,0.826999,0.768624,0.782459
6,0.0688,0.470906,0.853346,0.816742,0.754339,0.768015
7,0.0682,0.467332,0.846013,0.814407,0.741814,0.761424
8,0.0669,0.471744,0.851512,0.815711,0.759268,0.77171
9,0.0649,0.461957,0.854262,0.814056,0.767314,0.778093
10,0.0638,0.454239,0.851512,0.827335,0.75687,0.774607


[I 2025-03-23 21:03:19,925] Trial 59 finished with value: 0.7845375416454283 and parameters: {'learning_rate': 0.004550584745770884, 'weight_decay': 0.003, 'warmup_steps': 41, 'lambda_param': 1.0, 'temperature': 2.0}. Best is trial 34 with value: 0.7974771651115893.


Trial 60 with params: {'learning_rate': 0.00017559280388301614, 'weight_decay': 0.0, 'warmup_steps': 7, 'lambda_param': 1.0, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.3753,1.063632,0.598533,0.16361,0.20225,0.171678
2,0.8072,0.850506,0.692026,0.312634,0.30817,0.298972
3,0.5943,0.753981,0.725023,0.38072,0.355472,0.357013
4,0.4689,0.721391,0.736939,0.472819,0.39759,0.414961
5,0.3838,0.685358,0.75802,0.495892,0.444209,0.4577
6,0.323,0.668066,0.768103,0.53295,0.471292,0.486392
7,0.2767,0.665138,0.771769,0.520882,0.483061,0.491749
8,0.2447,0.64446,0.783685,0.653556,0.551744,0.579371
9,0.2195,0.645998,0.787351,0.627604,0.559421,0.579454
10,0.1998,0.653526,0.780018,0.60678,0.546881,0.566267


[I 2025-03-23 21:07:15,579] Trial 60 pruned. 


Trial 61 with params: {'learning_rate': 0.00432172380795687, 'weight_decay': 0.004, 'warmup_steps': 53, 'lambda_param': 0.6000000000000001, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.537,0.496815,0.840513,0.798739,0.72005,0.745965
2,0.1099,0.466892,0.843263,0.834701,0.747626,0.777668
3,0.0807,0.466287,0.847846,0.825462,0.754628,0.77727
4,0.0752,0.479889,0.843263,0.815976,0.745942,0.764654
5,0.0706,0.485444,0.839597,0.815995,0.748895,0.769994
6,0.0689,0.47949,0.837764,0.820641,0.748061,0.769919
7,0.0681,0.490481,0.849679,0.834881,0.753196,0.780263
8,0.0668,0.485631,0.846013,0.842207,0.749418,0.779014
9,0.0643,0.475275,0.849679,0.833239,0.754675,0.778941
10,0.0634,0.483799,0.846929,0.808998,0.756132,0.769756


[I 2025-03-23 21:10:59,065] Trial 61 pruned. 


Trial 62 with params: {'learning_rate': 0.004929134981980148, 'weight_decay': 0.006, 'warmup_steps': 50, 'lambda_param': 0.0, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5236,0.491141,0.836847,0.780899,0.679552,0.71124
2,0.1089,0.472342,0.849679,0.827059,0.755397,0.776761
3,0.082,0.475934,0.846929,0.811811,0.742146,0.762167
4,0.0742,0.491874,0.83868,0.84673,0.75446,0.782188
5,0.0721,0.468858,0.853346,0.824299,0.757821,0.77665
6,0.0686,0.466484,0.853346,0.855543,0.766781,0.794168
7,0.0712,0.494993,0.836847,0.819634,0.745375,0.767343
8,0.0681,0.471409,0.846929,0.831563,0.750811,0.776532
9,0.0654,0.468798,0.84143,0.842187,0.754006,0.781968
10,0.0639,0.465279,0.851512,0.846986,0.767709,0.792133


[I 2025-03-23 21:18:35,328] Trial 63 pruned. 


Trial 64 with params: {'learning_rate': 0.004365025302124463, 'weight_decay': 0.006, 'warmup_steps': 52, 'lambda_param': 0.0, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5358,0.500322,0.834097,0.776713,0.700832,0.724015
2,0.1088,0.494286,0.830431,0.815117,0.707483,0.738615
3,0.0816,0.490241,0.83593,0.799736,0.727084,0.746025
4,0.074,0.472691,0.855179,0.848883,0.753499,0.778417
5,0.0725,0.483821,0.839597,0.826087,0.739162,0.765182
6,0.0718,0.49827,0.825848,0.825148,0.706184,0.740808
7,0.0703,0.49215,0.842346,0.836627,0.751557,0.77566
8,0.0653,0.489957,0.836847,0.82453,0.74403,0.76875
9,0.0647,0.502719,0.834097,0.834197,0.738752,0.768144
10,0.0632,0.494079,0.835014,0.828244,0.741189,0.767594


[I 2025-03-23 21:24:18,058] Trial 64 finished with value: 0.7738947849534097 and parameters: {'learning_rate': 0.004365025302124463, 'weight_decay': 0.006, 'warmup_steps': 52, 'lambda_param': 0.0, 'temperature': 2.0}. Best is trial 34 with value: 0.7974771651115893.


Trial 65 with params: {'learning_rate': 0.0026459092782361825, 'weight_decay': 0.009000000000000001, 'warmup_steps': 26, 'lambda_param': 0.9, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5943,0.536566,0.824931,0.7373,0.66275,0.684313
2,0.122,0.49771,0.840513,0.802497,0.728354,0.7515
3,0.084,0.488859,0.839597,0.828178,0.732929,0.764846
4,0.0747,0.480031,0.842346,0.817671,0.747373,0.767426
5,0.0705,0.482273,0.846929,0.79755,0.743341,0.757715
6,0.0674,0.473383,0.845096,0.804381,0.742686,0.761347
7,0.0661,0.480106,0.84418,0.833881,0.750343,0.774124
8,0.0655,0.488102,0.846929,0.83484,0.75246,0.775238
9,0.0648,0.477044,0.849679,0.800564,0.746713,0.761571
10,0.0631,0.473149,0.855179,0.833713,0.757169,0.779589


[I 2025-03-23 21:30:06,458] Trial 65 finished with value: 0.7775325543259547 and parameters: {'learning_rate': 0.0026459092782361825, 'weight_decay': 0.009000000000000001, 'warmup_steps': 26, 'lambda_param': 0.9, 'temperature': 4.5}. Best is trial 34 with value: 0.7974771651115893.


Trial 66 with params: {'learning_rate': 0.004853319142064979, 'weight_decay': 0.006, 'warmup_steps': 6, 'lambda_param': 1.0, 'temperature': 5.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.492,0.507708,0.832264,0.780352,0.705142,0.727547
2,0.112,0.477597,0.83868,0.830312,0.725195,0.75969
3,0.0834,0.464561,0.850596,0.829246,0.755341,0.774806
4,0.0761,0.469691,0.846013,0.851907,0.761901,0.788482
5,0.0742,0.475821,0.846929,0.834124,0.765396,0.784387
6,0.0698,0.471536,0.84418,0.84932,0.753055,0.781502
7,0.0683,0.47254,0.843263,0.832392,0.758839,0.778942
8,0.0669,0.49061,0.846013,0.847589,0.762207,0.787501
9,0.067,0.497075,0.840513,0.839336,0.749881,0.780369
10,0.0659,0.483789,0.845096,0.855404,0.75273,0.784807


[I 2025-03-23 21:35:58,968] Trial 66 finished with value: 0.7929069389422247 and parameters: {'learning_rate': 0.004853319142064979, 'weight_decay': 0.006, 'warmup_steps': 6, 'lambda_param': 1.0, 'temperature': 5.5}. Best is trial 34 with value: 0.7974771651115893.


Trial 67 with params: {'learning_rate': 0.004095230800167461, 'weight_decay': 0.007, 'warmup_steps': 6, 'lambda_param': 0.9, 'temperature': 5.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5019,0.501698,0.836847,0.746787,0.680536,0.699735
2,0.1113,0.493885,0.832264,0.809702,0.719612,0.747027
3,0.084,0.502895,0.837764,0.800908,0.723495,0.750126
4,0.0761,0.486702,0.845096,0.841024,0.765805,0.789111
5,0.071,0.47816,0.84418,0.838693,0.750426,0.777266
6,0.0691,0.485166,0.84143,0.851977,0.760024,0.789714
7,0.0682,0.505888,0.829514,0.815331,0.730331,0.755513
8,0.0698,0.50836,0.834097,0.77273,0.734012,0.741336
9,0.0678,0.481272,0.839597,0.794709,0.731745,0.751905
10,0.0654,0.481636,0.842346,0.832631,0.744515,0.774852


[I 2025-03-23 21:41:38,372] Trial 67 finished with value: 0.7679948804601235 and parameters: {'learning_rate': 0.004095230800167461, 'weight_decay': 0.007, 'warmup_steps': 6, 'lambda_param': 0.9, 'temperature': 5.0}. Best is trial 34 with value: 0.7974771651115893.


Trial 68 with params: {'learning_rate': 0.00045839533376088303, 'weight_decay': 0.007, 'warmup_steps': 52, 'lambda_param': 0.2, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.061,0.748693,0.727773,0.454746,0.367145,0.377178
2,0.4025,0.617492,0.789184,0.538008,0.526383,0.524418
3,0.2302,0.579306,0.791017,0.6725,0.58142,0.606506
4,0.1572,0.563637,0.792851,0.655579,0.601388,0.613684
5,0.1234,0.557612,0.804766,0.733769,0.649898,0.676473


[I 2025-03-23 21:43:28,224] Trial 68 pruned. 


Trial 69 with params: {'learning_rate': 0.000602120422227975, 'weight_decay': 0.005, 'warmup_steps': 6, 'lambda_param': 0.7000000000000001, 'temperature': 5.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.9322,0.695622,0.745188,0.464539,0.41195,0.418328
2,0.3202,0.601666,0.788268,0.590999,0.546564,0.553881
3,0.1768,0.568538,0.799267,0.690019,0.607564,0.632492
4,0.125,0.550904,0.805683,0.724667,0.656165,0.676481
5,0.102,0.544551,0.806599,0.788209,0.681765,0.718287
6,0.0904,0.534327,0.809349,0.77805,0.6766,0.711076
7,0.0826,0.528573,0.825848,0.786084,0.691051,0.724896
8,0.0779,0.530541,0.822181,0.789282,0.690166,0.723682
9,0.074,0.530607,0.815765,0.767581,0.680593,0.709187
10,0.0717,0.536598,0.817599,0.816935,0.712613,0.74806


[I 2025-03-23 21:47:21,536] Trial 69 pruned. 


Trial 70 with params: {'learning_rate': 0.004196604512230375, 'weight_decay': 0.005, 'warmup_steps': 16, 'lambda_param': 1.0, 'temperature': 5.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5118,0.498291,0.833181,0.760204,0.660003,0.693234
2,0.1105,0.466185,0.846013,0.830916,0.740263,0.768448
3,0.0827,0.485526,0.830431,0.830444,0.728797,0.756821
4,0.0743,0.457038,0.846929,0.839411,0.733752,0.768287
5,0.0708,0.479679,0.845096,0.843241,0.748447,0.776428
6,0.0699,0.471177,0.849679,0.853803,0.742425,0.775909
7,0.0706,0.485031,0.847846,0.840288,0.746465,0.773866
8,0.0674,0.473863,0.843263,0.831794,0.733782,0.763583
9,0.0653,0.469239,0.846929,0.846633,0.747306,0.779101
10,0.0639,0.465474,0.849679,0.844832,0.758294,0.784018


[I 2025-03-23 21:53:04,438] Trial 70 finished with value: 0.7796296163554214 and parameters: {'learning_rate': 0.004196604512230375, 'weight_decay': 0.005, 'warmup_steps': 16, 'lambda_param': 1.0, 'temperature': 5.5}. Best is trial 34 with value: 0.7974771651115893.


Trial 71 with params: {'learning_rate': 0.004360872402637638, 'weight_decay': 0.005, 'warmup_steps': 37, 'lambda_param': 0.0, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5287,0.500783,0.835014,0.719864,0.654729,0.670682
2,0.1108,0.4782,0.843263,0.839086,0.732781,0.76914
3,0.0828,0.486565,0.842346,0.834269,0.756068,0.779931
4,0.0747,0.454804,0.855179,0.847327,0.746282,0.780747
5,0.0702,0.458692,0.848763,0.832763,0.758374,0.782474
6,0.0688,0.460182,0.847846,0.841602,0.7518,0.780666
7,0.0674,0.457757,0.857012,0.853196,0.749657,0.783584
8,0.0678,0.482219,0.84418,0.836103,0.751616,0.774054
9,0.0665,0.456135,0.849679,0.848195,0.749436,0.781868
10,0.0642,0.457698,0.853346,0.860537,0.752884,0.786944


[I 2025-03-23 21:58:46,580] Trial 71 finished with value: 0.7883905987475964 and parameters: {'learning_rate': 0.004360872402637638, 'weight_decay': 0.005, 'warmup_steps': 37, 'lambda_param': 0.0, 'temperature': 3.0}. Best is trial 34 with value: 0.7974771651115893.


Trial 72 with params: {'learning_rate': 0.002937068785971872, 'weight_decay': 0.006, 'warmup_steps': 5, 'lambda_param': 0.7000000000000001, 'temperature': 6.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5378,0.532519,0.821265,0.759465,0.648904,0.684067
2,0.116,0.487601,0.835014,0.821967,0.72979,0.754967
3,0.0828,0.477508,0.832264,0.803721,0.72198,0.748615
4,0.0745,0.474682,0.846929,0.854903,0.751609,0.784956
5,0.0694,0.471307,0.842346,0.82649,0.750383,0.773608
6,0.0685,0.487159,0.835014,0.79955,0.726122,0.747079
7,0.0658,0.478018,0.839597,0.814929,0.736372,0.758966
8,0.0668,0.51349,0.831347,0.816393,0.731597,0.7564
9,0.0655,0.491973,0.836847,0.81831,0.738637,0.763135
10,0.0635,0.479701,0.84143,0.830718,0.74013,0.768729


[I 2025-03-23 22:04:23,398] Trial 72 finished with value: 0.7709958366576288 and parameters: {'learning_rate': 0.002937068785971872, 'weight_decay': 0.006, 'warmup_steps': 5, 'lambda_param': 0.7000000000000001, 'temperature': 6.0}. Best is trial 34 with value: 0.7974771651115893.


Trial 73 with params: {'learning_rate': 0.0030600626774721397, 'weight_decay': 0.005, 'warmup_steps': 0, 'lambda_param': 1.0, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.534,0.524384,0.818515,0.68773,0.646942,0.655038
2,0.1185,0.48984,0.83593,0.801707,0.725942,0.752483
3,0.0846,0.470567,0.83868,0.777683,0.722045,0.740299
4,0.0752,0.482252,0.843263,0.831678,0.731864,0.765493
5,0.0706,0.454837,0.847846,0.822424,0.736926,0.765818
6,0.0682,0.451198,0.853346,0.81202,0.740591,0.765015
7,0.0683,0.480004,0.836847,0.792207,0.715399,0.741806
8,0.0696,0.480224,0.843263,0.796523,0.71355,0.741183
9,0.0648,0.468759,0.84143,0.827864,0.727408,0.761091
10,0.064,0.46076,0.845096,0.819275,0.728162,0.757548


[I 2025-03-23 22:08:07,004] Trial 73 pruned. 


Trial 74 with params: {'learning_rate': 0.0002952710041203322, 'weight_decay': 0.01, 'warmup_steps': 47, 'lambda_param': 0.30000000000000004, 'temperature': 6.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.2158,0.877331,0.683776,0.306505,0.290231,0.283968
2,0.5764,0.702499,0.746104,0.455689,0.417074,0.424463
3,0.3738,0.644312,0.771769,0.534012,0.473491,0.48956
4,0.2656,0.619615,0.784601,0.619773,0.532491,0.55453
5,0.2021,0.599634,0.80385,0.674457,0.608643,0.629343
6,0.1628,0.590709,0.8011,0.693797,0.607888,0.634159
7,0.1383,0.573066,0.812099,0.728024,0.641681,0.666761
8,0.1225,0.555632,0.808433,0.73646,0.624342,0.659037
9,0.1117,0.56025,0.818515,0.766324,0.669636,0.699134
10,0.1042,0.574471,0.808433,0.783513,0.67249,0.705751


[I 2025-03-23 22:12:11,497] Trial 74 pruned. 


Trial 75 with params: {'learning_rate': 0.0022947406791405923, 'weight_decay': 0.0, 'warmup_steps': 47, 'lambda_param': 1.0, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6425,0.552796,0.80385,0.727397,0.625857,0.655033
2,0.1288,0.511624,0.831347,0.783767,0.718938,0.738313
3,0.0871,0.500204,0.824015,0.770518,0.714205,0.730514
4,0.0747,0.493158,0.837764,0.814742,0.741426,0.763481
5,0.0707,0.484442,0.840513,0.823672,0.743467,0.766466
6,0.0669,0.491059,0.831347,0.810474,0.738224,0.758044
7,0.0659,0.499121,0.832264,0.807452,0.743543,0.761699
8,0.0662,0.503948,0.826764,0.821011,0.729678,0.757629
9,0.0646,0.489064,0.83593,0.825236,0.747655,0.770658
10,0.0642,0.505867,0.83593,0.823401,0.759985,0.778252


[I 2025-03-23 22:18:01,156] Trial 75 finished with value: 0.7878490455549 and parameters: {'learning_rate': 0.0022947406791405923, 'weight_decay': 0.0, 'warmup_steps': 47, 'lambda_param': 1.0, 'temperature': 2.0}. Best is trial 34 with value: 0.7974771651115893.


Trial 76 with params: {'learning_rate': 0.0036508781374717377, 'weight_decay': 0.004, 'warmup_steps': 31, 'lambda_param': 0.6000000000000001, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5441,0.515227,0.827681,0.780341,0.688571,0.716324
2,0.1124,0.47414,0.84418,0.824201,0.718903,0.752019
3,0.0832,0.470586,0.837764,0.820945,0.730202,0.75532
4,0.0758,0.479664,0.84418,0.836377,0.740186,0.769756
5,0.0718,0.474886,0.842346,0.823966,0.748212,0.768622
6,0.0676,0.453356,0.846929,0.821688,0.733021,0.756732
7,0.0667,0.462982,0.849679,0.83128,0.755303,0.776229
8,0.0669,0.460404,0.848763,0.840107,0.747374,0.774979
9,0.0643,0.455969,0.849679,0.83749,0.74492,0.771359
10,0.0633,0.458291,0.847846,0.829861,0.744157,0.768813


[I 2025-03-23 22:23:32,269] Trial 76 finished with value: 0.7889670184852632 and parameters: {'learning_rate': 0.0036508781374717377, 'weight_decay': 0.004, 'warmup_steps': 31, 'lambda_param': 0.6000000000000001, 'temperature': 4.0}. Best is trial 34 with value: 0.7974771651115893.


Trial 77 with params: {'learning_rate': 0.002846839352151477, 'weight_decay': 0.006, 'warmup_steps': 5, 'lambda_param': 1.0, 'temperature': 6.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5444,0.527718,0.829514,0.73605,0.655602,0.678006
2,0.1183,0.473599,0.842346,0.838023,0.743981,0.773357
3,0.0842,0.499667,0.845096,0.839053,0.753646,0.777757
4,0.0748,0.475968,0.843263,0.848293,0.752977,0.785043
5,0.0719,0.486561,0.83593,0.823756,0.742572,0.770082
6,0.0676,0.475091,0.833181,0.85046,0.735292,0.772535
7,0.0657,0.459028,0.84418,0.844062,0.745192,0.77492
8,0.0648,0.496654,0.839597,0.84196,0.747207,0.778034
9,0.0646,0.479886,0.84143,0.838665,0.747219,0.776006
10,0.064,0.474067,0.851512,0.847701,0.750988,0.781743


[I 2025-03-23 22:29:15,492] Trial 77 finished with value: 0.780734629004694 and parameters: {'learning_rate': 0.002846839352151477, 'weight_decay': 0.006, 'warmup_steps': 5, 'lambda_param': 1.0, 'temperature': 6.0}. Best is trial 34 with value: 0.7974771651115893.


Trial 78 with params: {'learning_rate': 0.0026216410323573677, 'weight_decay': 0.004, 'warmup_steps': 32, 'lambda_param': 1.0, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5919,0.554259,0.813016,0.71226,0.643223,0.66408
2,0.1208,0.506238,0.837764,0.797995,0.728566,0.752405
3,0.0845,0.490591,0.831347,0.777489,0.730121,0.742908
4,0.0753,0.482014,0.846929,0.819879,0.732283,0.761856
5,0.0693,0.478396,0.846013,0.785227,0.728487,0.746927


[I 2025-03-23 22:31:11,965] Trial 78 pruned. 


Trial 79 with params: {'learning_rate': 0.004053744970159354, 'weight_decay': 0.004, 'warmup_steps': 30, 'lambda_param': 0.8, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5436,0.500046,0.831347,0.774181,0.685295,0.711303
2,0.113,0.484732,0.837764,0.818483,0.735943,0.761333
3,0.0844,0.494107,0.832264,0.800866,0.716388,0.740709
4,0.0752,0.471521,0.842346,0.839963,0.736506,0.770069
5,0.0703,0.460497,0.851512,0.837251,0.753147,0.778101
6,0.0698,0.458936,0.848763,0.837717,0.740889,0.767572
7,0.0701,0.480954,0.845096,0.842581,0.740297,0.773184
8,0.0684,0.459082,0.849679,0.850083,0.740509,0.777288
9,0.0666,0.478314,0.832264,0.831857,0.734554,0.767807
10,0.0641,0.464941,0.84143,0.828978,0.740663,0.770648


[I 2025-03-23 22:36:52,344] Trial 79 finished with value: 0.7774065187885846 and parameters: {'learning_rate': 0.004053744970159354, 'weight_decay': 0.004, 'warmup_steps': 30, 'lambda_param': 0.8, 'temperature': 2.0}. Best is trial 34 with value: 0.7974771651115893.


Trial 80 with params: {'learning_rate': 0.0029063834285411286, 'weight_decay': 0.01, 'warmup_steps': 11, 'lambda_param': 0.5, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5553,0.517132,0.834097,0.738869,0.685501,0.698036
2,0.1181,0.483053,0.832264,0.795482,0.70241,0.732791
3,0.0834,0.479722,0.840513,0.819737,0.739731,0.765478
4,0.0745,0.486951,0.836847,0.825669,0.732913,0.759992
5,0.0704,0.498043,0.832264,0.81574,0.718768,0.75038


[I 2025-03-23 22:38:41,219] Trial 80 pruned. 


Trial 81 with params: {'learning_rate': 0.0027386906817520235, 'weight_decay': 0.005, 'warmup_steps': 31, 'lambda_param': 0.5, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5879,0.537502,0.816682,0.731784,0.645746,0.673641
2,0.1204,0.479681,0.831347,0.822694,0.705572,0.744152
3,0.0837,0.47307,0.835014,0.770579,0.704976,0.726409
4,0.0753,0.476664,0.84143,0.809213,0.732822,0.757353
5,0.0708,0.466443,0.847846,0.79315,0.734838,0.752241
6,0.0686,0.459579,0.845096,0.802139,0.74458,0.759753
7,0.0664,0.468134,0.845096,0.824447,0.735889,0.765662
8,0.0652,0.476035,0.842346,0.786765,0.740049,0.750574
9,0.0641,0.467581,0.842346,0.758879,0.726755,0.732642
10,0.0639,0.464633,0.846929,0.801758,0.734412,0.754043


[I 2025-03-23 22:42:23,103] Trial 81 pruned. 


Trial 82 with params: {'learning_rate': 0.0032089748841581515, 'weight_decay': 0.005, 'warmup_steps': 25, 'lambda_param': 0.6000000000000001, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5668,0.510766,0.825848,0.784166,0.686203,0.715566
2,0.116,0.480557,0.837764,0.821317,0.731391,0.75908
3,0.0823,0.48516,0.836847,0.803373,0.726662,0.74932
4,0.0743,0.469602,0.852429,0.825759,0.741283,0.770484
5,0.0704,0.476483,0.84418,0.827753,0.749562,0.775449
6,0.0681,0.487938,0.84143,0.814909,0.727401,0.75481
7,0.0683,0.471547,0.84143,0.802377,0.728978,0.750333
8,0.0665,0.472689,0.845096,0.81158,0.750231,0.769041
9,0.0646,0.481646,0.832264,0.822459,0.741683,0.763466
10,0.0635,0.482319,0.84418,0.842976,0.758026,0.785315


[I 2025-03-23 22:48:00,269] Trial 82 finished with value: 0.787515828496196 and parameters: {'learning_rate': 0.0032089748841581515, 'weight_decay': 0.005, 'warmup_steps': 25, 'lambda_param': 0.6000000000000001, 'temperature': 3.5}. Best is trial 34 with value: 0.7974771651115893.


Trial 83 with params: {'learning_rate': 0.004415440531757877, 'weight_decay': 0.007, 'warmup_steps': 47, 'lambda_param': 0.2, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5352,0.490676,0.846013,0.796941,0.693728,0.72554
2,0.111,0.48044,0.84143,0.828436,0.744244,0.771035
3,0.0819,0.464318,0.854262,0.823912,0.770416,0.783668
4,0.0749,0.46718,0.856095,0.848745,0.769574,0.794559
5,0.0703,0.465922,0.852429,0.845286,0.756067,0.781915
6,0.0677,0.451432,0.851512,0.830732,0.755382,0.778253
7,0.067,0.470445,0.850596,0.837137,0.754214,0.778525
8,0.0669,0.47168,0.851512,0.832326,0.75531,0.777718
9,0.0657,0.458271,0.848763,0.827389,0.756084,0.777744
10,0.0646,0.468599,0.846929,0.823668,0.753096,0.773785


[I 2025-03-23 22:53:40,349] Trial 83 finished with value: 0.7769307522030653 and parameters: {'learning_rate': 0.004415440531757877, 'weight_decay': 0.007, 'warmup_steps': 47, 'lambda_param': 0.2, 'temperature': 2.0}. Best is trial 34 with value: 0.7974771651115893.


Trial 84 with params: {'learning_rate': 5.286423289644344e-05, 'weight_decay': 0.008, 'warmup_steps': 31, 'lambda_param': 0.9, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.7706,1.458973,0.444546,0.068499,0.101838,0.073045
2,1.2878,1.257875,0.52154,0.13346,0.145596,0.124188
3,1.1188,1.152004,0.580202,0.171389,0.192448,0.169047
4,0.9946,1.059339,0.615949,0.215034,0.215063,0.195863
5,0.8936,0.995148,0.637947,0.258262,0.239246,0.224967


[I 2025-03-23 22:55:30,327] Trial 84 pruned. 


Trial 85 with params: {'learning_rate': 0.0031842479310475206, 'weight_decay': 0.004, 'warmup_steps': 40, 'lambda_param': 0.7000000000000001, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5661,0.524041,0.816682,0.73778,0.654724,0.679713
2,0.1158,0.495324,0.831347,0.816196,0.725531,0.753592
3,0.0832,0.489376,0.835014,0.798819,0.725296,0.746782
4,0.0731,0.477638,0.843263,0.819684,0.749578,0.771719
5,0.0697,0.470036,0.846929,0.825503,0.755632,0.773793
6,0.0686,0.466002,0.850596,0.820733,0.750277,0.772935
7,0.0667,0.476626,0.845096,0.831584,0.752709,0.774958
8,0.0672,0.490758,0.846929,0.788024,0.741996,0.749987
9,0.0647,0.471137,0.848763,0.833173,0.752843,0.776735
10,0.0628,0.470961,0.846929,0.820092,0.753956,0.772685


[I 2025-03-23 23:01:01,205] Trial 85 finished with value: 0.7814249122797632 and parameters: {'learning_rate': 0.0031842479310475206, 'weight_decay': 0.004, 'warmup_steps': 40, 'lambda_param': 0.7000000000000001, 'temperature': 3.5}. Best is trial 34 with value: 0.7974771651115893.


Trial 86 with params: {'learning_rate': 0.0044240010312868614, 'weight_decay': 0.004, 'warmup_steps': 28, 'lambda_param': 1.0, 'temperature': 5.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5136,0.502091,0.829514,0.795695,0.678771,0.713555
2,0.1105,0.459536,0.84143,0.812392,0.738832,0.757557
3,0.0812,0.47542,0.845096,0.834369,0.743251,0.772183
4,0.0748,0.474563,0.847846,0.823933,0.737219,0.767219
5,0.071,0.470725,0.845096,0.839233,0.748429,0.77618
6,0.0694,0.474527,0.845096,0.834389,0.755646,0.778862
7,0.0685,0.478131,0.83868,0.80721,0.726575,0.747277
8,0.0679,0.473115,0.843263,0.838116,0.76007,0.785646
9,0.0651,0.460865,0.846013,0.832921,0.752539,0.778445
10,0.0635,0.466589,0.846013,0.849134,0.751728,0.782919


[I 2025-03-23 23:07:04,076] Trial 86 finished with value: 0.7867732109270049 and parameters: {'learning_rate': 0.0044240010312868614, 'weight_decay': 0.004, 'warmup_steps': 28, 'lambda_param': 1.0, 'temperature': 5.0}. Best is trial 34 with value: 0.7974771651115893.


Trial 87 with params: {'learning_rate': 0.001883701782955438, 'weight_decay': 0.006, 'warmup_steps': 52, 'lambda_param': 0.1, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6763,0.553796,0.809349,0.667564,0.592365,0.608455
2,0.1391,0.515349,0.828598,0.811282,0.706837,0.742439
3,0.0902,0.502065,0.827681,0.786165,0.718183,0.741814
4,0.0775,0.498231,0.835014,0.828919,0.723986,0.757032
5,0.0699,0.485058,0.83868,0.792449,0.728808,0.748796
6,0.0668,0.482009,0.84143,0.822514,0.737034,0.765262
7,0.0655,0.487527,0.839597,0.795976,0.730271,0.748748
8,0.0642,0.495649,0.83593,0.811259,0.733187,0.75807
9,0.0643,0.488521,0.840513,0.818103,0.74454,0.76881
10,0.066,0.486652,0.840513,0.803193,0.728373,0.750473


[I 2025-03-23 23:10:43,193] Trial 87 pruned. 


Trial 88 with params: {'learning_rate': 0.002247972675444571, 'weight_decay': 0.003, 'warmup_steps': 41, 'lambda_param': 0.9, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6395,0.554811,0.809349,0.718801,0.627696,0.657195
2,0.129,0.514216,0.821265,0.778099,0.71625,0.727595
3,0.0871,0.483472,0.83593,0.825159,0.739955,0.763911
4,0.0758,0.485029,0.836847,0.816067,0.737798,0.756334
5,0.0705,0.484938,0.83593,0.83428,0.742981,0.770559
6,0.0673,0.471244,0.837764,0.830002,0.748229,0.771993
7,0.0658,0.468685,0.835014,0.822866,0.737545,0.762554
8,0.0695,0.489444,0.839597,0.834421,0.741385,0.76586
9,0.0647,0.474647,0.84418,0.836714,0.747746,0.771368
10,0.0631,0.474869,0.839597,0.833043,0.749367,0.771158


[I 2025-03-23 23:16:19,078] Trial 88 finished with value: 0.7714502418779527 and parameters: {'learning_rate': 0.002247972675444571, 'weight_decay': 0.003, 'warmup_steps': 41, 'lambda_param': 0.9, 'temperature': 2.5}. Best is trial 34 with value: 0.7974771651115893.


Trial 89 with params: {'learning_rate': 0.0027082860758057105, 'weight_decay': 0.002, 'warmup_steps': 33, 'lambda_param': 0.6000000000000001, 'temperature': 5.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5855,0.545599,0.820348,0.752711,0.66547,0.692463
2,0.1192,0.48853,0.837764,0.77783,0.718408,0.734198
3,0.083,0.471994,0.847846,0.818115,0.735891,0.760023
4,0.075,0.494659,0.835014,0.814065,0.721227,0.75085
5,0.0716,0.487492,0.835014,0.78274,0.713144,0.734943


[I 2025-03-23 23:18:11,669] Trial 89 pruned. 


Trial 90 with params: {'learning_rate': 0.0011115662517499805, 'weight_decay': 0.004, 'warmup_steps': 40, 'lambda_param': 0.6000000000000001, 'temperature': 7.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.7821,0.610857,0.777269,0.555611,0.519276,0.52637
2,0.1913,0.549407,0.810266,0.705346,0.649054,0.66304
3,0.1113,0.511601,0.833181,0.812922,0.686167,0.728098
4,0.0877,0.516531,0.831347,0.806256,0.73038,0.750157
5,0.0773,0.51165,0.828598,0.799377,0.695718,0.730225


[I 2025-03-23 23:20:02,772] Trial 90 pruned. 


Trial 91 with params: {'learning_rate': 0.0036214255732385, 'weight_decay': 0.006, 'warmup_steps': 41, 'lambda_param': 0.0, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5468,0.493944,0.832264,0.789565,0.697737,0.726538
2,0.1124,0.473252,0.839597,0.799428,0.722289,0.745136
3,0.0832,0.466177,0.847846,0.8133,0.748236,0.767404
4,0.0725,0.46099,0.845096,0.832281,0.744617,0.770251
5,0.0694,0.455207,0.848763,0.830344,0.74522,0.772265
6,0.068,0.46542,0.848763,0.838305,0.737166,0.766368
7,0.0668,0.461603,0.84143,0.844584,0.740999,0.777466
8,0.0652,0.462114,0.846929,0.83416,0.74414,0.773895
9,0.0682,0.452782,0.850596,0.838606,0.754392,0.781619
10,0.0641,0.464745,0.846929,0.839249,0.751854,0.779764


[I 2025-03-23 23:26:02,541] Trial 91 finished with value: 0.7822283380414009 and parameters: {'learning_rate': 0.0036214255732385, 'weight_decay': 0.006, 'warmup_steps': 41, 'lambda_param': 0.0, 'temperature': 3.5}. Best is trial 34 with value: 0.7974771651115893.


Trial 92 with params: {'learning_rate': 0.00430869489862612, 'weight_decay': 0.003, 'warmup_steps': 20, 'lambda_param': 0.0, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5156,0.510422,0.820348,0.718158,0.662858,0.679762
2,0.1127,0.484495,0.83593,0.833069,0.72624,0.755785
3,0.0821,0.475783,0.836847,0.808776,0.7417,0.761588
4,0.0743,0.476369,0.837764,0.819555,0.745117,0.764967
5,0.0731,0.477659,0.83868,0.803038,0.735939,0.756456
6,0.0714,0.482025,0.836847,0.834654,0.729006,0.765323
7,0.0693,0.474802,0.84418,0.837154,0.751814,0.777558
8,0.0661,0.464909,0.846929,0.84534,0.759844,0.786096
9,0.0649,0.473522,0.846013,0.839939,0.75025,0.779191
10,0.064,0.474584,0.840513,0.834603,0.749717,0.776002


[I 2025-03-23 23:29:50,399] Trial 92 pruned. 


Trial 93 with params: {'learning_rate': 0.004222613671891532, 'weight_decay': 0.005, 'warmup_steps': 35, 'lambda_param': 0.0, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5307,0.502755,0.835014,0.780884,0.72094,0.737988
2,0.1111,0.47689,0.837764,0.798331,0.723631,0.745155
3,0.081,0.474317,0.83868,0.7818,0.732155,0.743877
4,0.0748,0.478508,0.853346,0.830834,0.754933,0.777636
5,0.0738,0.4848,0.83593,0.826106,0.73706,0.76456
6,0.0713,0.46927,0.846013,0.790452,0.732104,0.746056
7,0.0671,0.506315,0.833181,0.832129,0.735956,0.766748
8,0.0666,0.473077,0.840513,0.788152,0.730849,0.746743
9,0.0647,0.475143,0.84418,0.784009,0.74133,0.752073
10,0.0632,0.466729,0.84418,0.795248,0.74152,0.754368


[I 2025-03-23 23:33:34,467] Trial 93 pruned. 


Trial 94 with params: {'learning_rate': 0.0038726610714625485, 'weight_decay': 0.009000000000000001, 'warmup_steps': 36, 'lambda_param': 0.0, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5434,0.515906,0.828598,0.753582,0.675839,0.702388
2,0.1124,0.472006,0.842346,0.802734,0.723061,0.74914
3,0.0825,0.469673,0.846013,0.834335,0.741085,0.768661
4,0.074,0.46647,0.846929,0.840182,0.745234,0.7717
5,0.07,0.482835,0.84418,0.817469,0.746788,0.767292
6,0.068,0.457499,0.851512,0.838453,0.744501,0.77073
7,0.069,0.477489,0.84143,0.823919,0.741785,0.76701
8,0.0669,0.475721,0.847846,0.830773,0.741476,0.771273
9,0.0656,0.455874,0.846013,0.838262,0.74921,0.775231
10,0.0639,0.460778,0.854262,0.836855,0.745586,0.773423


[I 2025-03-23 23:37:24,368] Trial 94 pruned. 


Trial 95 with params: {'learning_rate': 0.00473854602311833, 'weight_decay': 0.006, 'warmup_steps': 24, 'lambda_param': 0.6000000000000001, 'temperature': 6.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5075,0.510387,0.826764,0.759374,0.671713,0.69915
2,0.1107,0.474077,0.843263,0.8311,0.742784,0.77022
3,0.0842,0.506199,0.834097,0.837413,0.743211,0.773383
4,0.077,0.467789,0.845096,0.847371,0.744101,0.774202
5,0.0711,0.477084,0.847846,0.812645,0.739494,0.759996
6,0.069,0.492675,0.83593,0.817375,0.756506,0.767499
7,0.0696,0.472679,0.839597,0.823448,0.735765,0.76329
8,0.0664,0.4869,0.840513,0.841732,0.736054,0.767709
9,0.0663,0.479645,0.843263,0.831789,0.752059,0.773124
10,0.064,0.467243,0.857012,0.848398,0.761001,0.785257


[I 2025-03-23 23:42:59,230] Trial 95 finished with value: 0.7829867222611644 and parameters: {'learning_rate': 0.00473854602311833, 'weight_decay': 0.006, 'warmup_steps': 24, 'lambda_param': 0.6000000000000001, 'temperature': 6.0}. Best is trial 34 with value: 0.7974771651115893.


Trial 96 with params: {'learning_rate': 0.00015972356535382792, 'weight_decay': 0.01, 'warmup_steps': 3, 'lambda_param': 0.1, 'temperature': 6.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.404,1.096568,0.593951,0.158153,0.198108,0.168048
2,0.8473,0.875457,0.68011,0.306407,0.292723,0.284657
3,0.6316,0.773874,0.713107,0.350502,0.333941,0.331847
4,0.5042,0.736734,0.733272,0.458126,0.383044,0.397641
5,0.4171,0.703258,0.747021,0.491964,0.423951,0.441022


[I 2025-03-23 23:44:51,379] Trial 96 pruned. 


Trial 97 with params: {'learning_rate': 0.0011001486486898098, 'weight_decay': 0.007, 'warmup_steps': 43, 'lambda_param': 0.0, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.7922,0.606838,0.773602,0.542552,0.495386,0.505458
2,0.197,0.538318,0.818515,0.770284,0.670247,0.699068
3,0.1139,0.509096,0.824931,0.81545,0.715893,0.748926
4,0.0887,0.516492,0.825848,0.815971,0.736621,0.760296
5,0.079,0.514071,0.830431,0.798445,0.713496,0.738797
6,0.0729,0.503197,0.828598,0.801078,0.697948,0.729853
7,0.0699,0.500716,0.833181,0.804794,0.713557,0.744085
8,0.067,0.50998,0.827681,0.829085,0.721827,0.756711
9,0.0671,0.499954,0.834097,0.835416,0.738903,0.768803
10,0.0645,0.505874,0.829514,0.82247,0.731689,0.758868


[I 2025-03-23 23:48:36,009] Trial 97 pruned. 


Trial 98 with params: {'learning_rate': 0.0030143050778263865, 'weight_decay': 0.001, 'warmup_steps': 5, 'lambda_param': 1.0, 'temperature': 5.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5402,0.518442,0.827681,0.751374,0.674528,0.696613
2,0.1156,0.483257,0.839597,0.822009,0.720753,0.754363
3,0.0813,0.478022,0.846929,0.823755,0.739992,0.767104
4,0.0741,0.472717,0.851512,0.822714,0.749545,0.770334
5,0.0713,0.474679,0.83868,0.819125,0.748009,0.766649
6,0.0716,0.480764,0.839597,0.834897,0.741786,0.770845
7,0.0687,0.468985,0.848763,0.830181,0.746532,0.769275
8,0.065,0.453451,0.853346,0.849808,0.763661,0.792388
9,0.0637,0.449848,0.857012,0.854328,0.756231,0.788185
10,0.0626,0.450291,0.853346,0.846171,0.750983,0.782612


[I 2025-03-23 23:56:13,616] Trial 99 pruned. 


Trial 100 with params: {'learning_rate': 0.004348836125793389, 'weight_decay': 0.007, 'warmup_steps': 44, 'lambda_param': 0.0, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5303,0.4954,0.836847,0.775407,0.684562,0.712531
2,0.1113,0.477863,0.84418,0.824097,0.751427,0.771269
3,0.0828,0.465018,0.842346,0.822544,0.764578,0.781092
4,0.0735,0.475338,0.84418,0.818168,0.750752,0.76939
5,0.0712,0.47448,0.852429,0.830698,0.770914,0.787782
6,0.0698,0.480466,0.833181,0.813717,0.728786,0.755123
7,0.0687,0.47898,0.846013,0.837142,0.753339,0.778389
8,0.0656,0.471576,0.840513,0.830774,0.755413,0.78032
9,0.0644,0.467572,0.839597,0.836396,0.758414,0.7825
10,0.0633,0.470244,0.84143,0.829224,0.75785,0.779461


[I 2025-03-24 00:02:01,916] Trial 100 finished with value: 0.7863960462549161 and parameters: {'learning_rate': 0.004348836125793389, 'weight_decay': 0.007, 'warmup_steps': 44, 'lambda_param': 0.0, 'temperature': 2.5}. Best is trial 34 with value: 0.7974771651115893.


Trial 101 with params: {'learning_rate': 0.004536832866449745, 'weight_decay': 0.0, 'warmup_steps': 42, 'lambda_param': 1.0, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5303,0.48493,0.83593,0.79282,0.698203,0.727778
2,0.111,0.476151,0.857929,0.854983,0.774455,0.800562
3,0.0809,0.463883,0.851512,0.853037,0.773741,0.798673
4,0.0744,0.46447,0.846929,0.837339,0.749643,0.772795
5,0.073,0.460072,0.852429,0.832227,0.75585,0.776101
6,0.0696,0.456573,0.858845,0.841211,0.759808,0.784811
7,0.0688,0.453026,0.855179,0.84335,0.75181,0.779389
8,0.0667,0.462178,0.856095,0.842804,0.770464,0.791418
9,0.065,0.436163,0.858845,0.846613,0.771156,0.793355
10,0.0633,0.433672,0.866178,0.847315,0.776035,0.798919


[I 2025-03-24 00:07:52,662] Trial 101 finished with value: 0.8059585066857037 and parameters: {'learning_rate': 0.004536832866449745, 'weight_decay': 0.0, 'warmup_steps': 42, 'lambda_param': 1.0, 'temperature': 2.0}. Best is trial 101 with value: 0.8059585066857037.


Trial 102 with params: {'learning_rate': 0.004400238362225207, 'weight_decay': 0.001, 'warmup_steps': 32, 'lambda_param': 0.9, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5252,0.500028,0.847846,0.774237,0.71276,0.729475
2,0.1097,0.4702,0.842346,0.836768,0.735186,0.767301
3,0.0829,0.47316,0.840513,0.81808,0.735041,0.763895
4,0.0742,0.474507,0.848763,0.82132,0.743335,0.764235
5,0.073,0.471745,0.843263,0.817697,0.73689,0.762546
6,0.0685,0.454846,0.852429,0.853384,0.748352,0.780038
7,0.067,0.469993,0.836847,0.795494,0.722757,0.746632
8,0.066,0.469886,0.846013,0.820823,0.754323,0.773773
9,0.0666,0.456864,0.850596,0.846324,0.744596,0.778083
10,0.0644,0.450831,0.851512,0.843559,0.754989,0.782746


[I 2025-03-24 00:13:51,928] Trial 102 finished with value: 0.7880673386280296 and parameters: {'learning_rate': 0.004400238362225207, 'weight_decay': 0.001, 'warmup_steps': 32, 'lambda_param': 0.9, 'temperature': 2.0}. Best is trial 101 with value: 0.8059585066857037.


Trial 103 with params: {'learning_rate': 0.0036015512832142556, 'weight_decay': 0.0, 'warmup_steps': 40, 'lambda_param': 0.9, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5471,0.487051,0.839597,0.79712,0.69467,0.726361
2,0.1112,0.471375,0.850596,0.836067,0.748683,0.777525
3,0.0813,0.474672,0.846013,0.810636,0.755517,0.773855
4,0.0734,0.460261,0.846013,0.846721,0.758355,0.787191
5,0.0694,0.450429,0.855179,0.832847,0.75586,0.77982
6,0.0678,0.461841,0.842346,0.822386,0.749251,0.771914
7,0.0683,0.453054,0.849679,0.852089,0.758712,0.787094
8,0.066,0.460591,0.845096,0.829778,0.750384,0.776883
9,0.0649,0.457771,0.846929,0.833081,0.763188,0.783544
10,0.0633,0.459482,0.849679,0.835698,0.755594,0.778896


[I 2025-03-24 00:19:32,223] Trial 103 finished with value: 0.7839459643782314 and parameters: {'learning_rate': 0.0036015512832142556, 'weight_decay': 0.0, 'warmup_steps': 40, 'lambda_param': 0.9, 'temperature': 2.5}. Best is trial 101 with value: 0.8059585066857037.


Trial 104 with params: {'learning_rate': 0.004734140457579948, 'weight_decay': 0.003, 'warmup_steps': 48, 'lambda_param': 0.2, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5252,0.506774,0.832264,0.767178,0.662554,0.695137
2,0.1101,0.47335,0.846929,0.849218,0.74038,0.775348
3,0.082,0.481135,0.83868,0.84041,0.748087,0.774172
4,0.0759,0.482833,0.843263,0.838491,0.75046,0.779221
5,0.0714,0.479918,0.852429,0.841878,0.758912,0.78397
6,0.0692,0.471562,0.845096,0.8397,0.740037,0.769476
7,0.0672,0.475453,0.847846,0.841133,0.73673,0.770166
8,0.0677,0.507096,0.835014,0.818578,0.738976,0.761302
9,0.0671,0.470049,0.84418,0.866283,0.763998,0.796676
10,0.064,0.470205,0.842346,0.843402,0.737385,0.770243


[I 2025-03-24 00:25:29,812] Trial 104 finished with value: 0.7816927166139871 and parameters: {'learning_rate': 0.004734140457579948, 'weight_decay': 0.003, 'warmup_steps': 48, 'lambda_param': 0.2, 'temperature': 2.5}. Best is trial 101 with value: 0.8059585066857037.


Trial 105 with params: {'learning_rate': 0.004262297239362222, 'weight_decay': 0.002, 'warmup_steps': 34, 'lambda_param': 0.0, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5289,0.509368,0.827681,0.786915,0.687878,0.716465
2,0.1109,0.496363,0.830431,0.820415,0.720019,0.747775
3,0.0807,0.472751,0.846929,0.84308,0.74343,0.777017
4,0.0722,0.481868,0.849679,0.843369,0.748487,0.778961
5,0.0739,0.484794,0.842346,0.838338,0.743541,0.774646
6,0.071,0.484804,0.83593,0.828569,0.740949,0.768413
7,0.0678,0.475187,0.837764,0.847715,0.751789,0.784501
8,0.0676,0.484251,0.836847,0.83365,0.740725,0.769754
9,0.0653,0.48089,0.84143,0.845516,0.746076,0.779187
10,0.0636,0.476926,0.846929,0.841944,0.750837,0.780653


[I 2025-03-24 00:31:13,337] Trial 105 finished with value: 0.7814559903392839 and parameters: {'learning_rate': 0.004262297239362222, 'weight_decay': 0.002, 'warmup_steps': 34, 'lambda_param': 0.0, 'temperature': 4.5}. Best is trial 101 with value: 0.8059585066857037.


Trial 106 with params: {'learning_rate': 0.00467977917923701, 'weight_decay': 0.003, 'warmup_steps': 14, 'lambda_param': 0.9, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5036,0.497343,0.827681,0.783663,0.713628,0.73461
2,0.109,0.470985,0.83593,0.802441,0.729354,0.749506
3,0.0847,0.461286,0.837764,0.823126,0.739665,0.76314
4,0.0752,0.458448,0.842346,0.831668,0.753024,0.775481
5,0.0737,0.468343,0.83868,0.837276,0.758051,0.781922
6,0.0705,0.470916,0.842346,0.842572,0.764716,0.784871
7,0.0694,0.484962,0.83868,0.838052,0.746103,0.771965
8,0.0674,0.475949,0.84143,0.840702,0.76047,0.784146
9,0.0656,0.465702,0.83868,0.842974,0.74935,0.777026
10,0.0643,0.463171,0.839597,0.841255,0.751694,0.77944


[I 2025-03-24 00:37:14,626] Trial 106 finished with value: 0.7725280436243157 and parameters: {'learning_rate': 0.00467977917923701, 'weight_decay': 0.003, 'warmup_steps': 14, 'lambda_param': 0.9, 'temperature': 4.5}. Best is trial 101 with value: 0.8059585066857037.


Trial 107 with params: {'learning_rate': 0.0046542822901996265, 'weight_decay': 0.001, 'warmup_steps': 37, 'lambda_param': 0.5, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5211,0.501261,0.83593,0.749489,0.669784,0.695268
2,0.1099,0.48686,0.845096,0.82511,0.744462,0.767584
3,0.0823,0.47569,0.849679,0.829792,0.761,0.779235
4,0.0741,0.465136,0.845096,0.813844,0.740094,0.764506
5,0.07,0.445363,0.851512,0.826816,0.762397,0.782382
6,0.0678,0.452571,0.847846,0.844265,0.748704,0.781322
7,0.0683,0.487288,0.84143,0.820772,0.765123,0.780357
8,0.0694,0.479038,0.84418,0.842386,0.749049,0.779154
9,0.0654,0.467389,0.843263,0.839869,0.755411,0.781613
10,0.0637,0.454092,0.856095,0.852569,0.760519,0.790461


[I 2025-03-24 00:43:04,393] Trial 107 finished with value: 0.7976917318221173 and parameters: {'learning_rate': 0.0046542822901996265, 'weight_decay': 0.001, 'warmup_steps': 37, 'lambda_param': 0.5, 'temperature': 2.5}. Best is trial 101 with value: 0.8059585066857037.


Trial 108 with params: {'learning_rate': 0.004272746453574432, 'weight_decay': 0.006, 'warmup_steps': 20, 'lambda_param': 0.8, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5138,0.514172,0.829514,0.761052,0.684703,0.706988
2,0.1114,0.478704,0.846013,0.840384,0.73643,0.767462
3,0.082,0.48245,0.840513,0.829237,0.725688,0.758151
4,0.0762,0.493837,0.835014,0.81554,0.725468,0.754676
5,0.0712,0.490572,0.843263,0.843628,0.751955,0.78285
6,0.0682,0.482738,0.83868,0.839291,0.739841,0.769884
7,0.0671,0.480357,0.842346,0.843624,0.744059,0.774111
8,0.0691,0.459652,0.850596,0.849044,0.748074,0.777886
9,0.0664,0.469883,0.850596,0.842103,0.755118,0.782501
10,0.064,0.473393,0.84418,0.834826,0.755497,0.77931


[I 2025-03-24 00:48:43,310] Trial 108 finished with value: 0.7950135768056764 and parameters: {'learning_rate': 0.004272746453574432, 'weight_decay': 0.006, 'warmup_steps': 20, 'lambda_param': 0.8, 'temperature': 4.0}. Best is trial 101 with value: 0.8059585066857037.


Trial 109 with params: {'learning_rate': 0.002095225598060413, 'weight_decay': 0.002, 'warmup_steps': 33, 'lambda_param': 0.6000000000000001, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6407,0.558256,0.809349,0.734187,0.628382,0.660027
2,0.133,0.497695,0.827681,0.838906,0.735214,0.769377
3,0.0872,0.503227,0.827681,0.816776,0.724347,0.755372
4,0.0762,0.490495,0.831347,0.82618,0.730058,0.761732
5,0.0711,0.488666,0.839597,0.841391,0.753305,0.782218
6,0.0674,0.483889,0.833181,0.832449,0.741358,0.77073
7,0.0661,0.480605,0.836847,0.823704,0.735873,0.765506
8,0.0677,0.490294,0.835014,0.831344,0.737151,0.768161
9,0.0645,0.490319,0.834097,0.834582,0.732563,0.766911
10,0.0647,0.482314,0.840513,0.825567,0.73786,0.767188


[I 2025-03-24 00:52:32,358] Trial 109 pruned. 


Trial 110 with params: {'learning_rate': 0.0018195437435353106, 'weight_decay': 0.0, 'warmup_steps': 36, 'lambda_param': 0.2, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6786,0.567466,0.812099,0.692303,0.595928,0.621389
2,0.1434,0.526441,0.813016,0.76659,0.667167,0.695605
3,0.0917,0.495426,0.834097,0.789492,0.714134,0.73878
4,0.0772,0.500254,0.835014,0.828777,0.712543,0.746354
5,0.0713,0.506497,0.828598,0.824188,0.718741,0.750634
6,0.0697,0.489525,0.835014,0.841065,0.750074,0.777199
7,0.0668,0.49151,0.836847,0.838698,0.738884,0.766017
8,0.065,0.494207,0.827681,0.828633,0.713419,0.7494
9,0.064,0.483641,0.839597,0.835331,0.74342,0.769204
10,0.0638,0.484326,0.845096,0.844243,0.744951,0.774449


[I 2025-03-24 00:58:18,389] Trial 110 finished with value: 0.7686705634357262 and parameters: {'learning_rate': 0.0018195437435353106, 'weight_decay': 0.0, 'warmup_steps': 36, 'lambda_param': 0.2, 'temperature': 2.5}. Best is trial 101 with value: 0.8059585066857037.


Trial 111 with params: {'learning_rate': 0.002066756897783046, 'weight_decay': 0.007, 'warmup_steps': 21, 'lambda_param': 0.8, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6337,0.559638,0.80385,0.683355,0.591441,0.614208
2,0.134,0.504014,0.833181,0.786281,0.721997,0.74274
3,0.0883,0.506251,0.832264,0.776315,0.726433,0.740216
4,0.0762,0.492274,0.831347,0.81194,0.711417,0.740772
5,0.0707,0.472833,0.839597,0.794319,0.715812,0.740102


[I 2025-03-24 01:00:16,769] Trial 111 pruned. 


Trial 112 with params: {'learning_rate': 0.004415119212034352, 'weight_decay': 0.001, 'warmup_steps': 42, 'lambda_param': 0.5, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5353,0.501596,0.839597,0.776162,0.688222,0.714784
2,0.113,0.47317,0.839597,0.831769,0.753711,0.772764
3,0.0834,0.471523,0.84143,0.832777,0.756106,0.777224
4,0.0746,0.458673,0.849679,0.840833,0.754424,0.779969
5,0.0713,0.448001,0.860678,0.850934,0.781217,0.804373
6,0.0696,0.453639,0.856095,0.855668,0.778465,0.801255
7,0.0666,0.443773,0.853346,0.851281,0.773751,0.795724
8,0.0664,0.471034,0.843263,0.847354,0.764568,0.791706
9,0.0647,0.461575,0.850596,0.851901,0.778608,0.801198
10,0.0638,0.458342,0.849679,0.851526,0.770445,0.796747


[I 2025-03-24 01:05:54,578] Trial 112 finished with value: 0.8063353096417676 and parameters: {'learning_rate': 0.004415119212034352, 'weight_decay': 0.001, 'warmup_steps': 42, 'lambda_param': 0.5, 'temperature': 2.0}. Best is trial 112 with value: 0.8063353096417676.


Trial 113 with params: {'learning_rate': 0.0027698030897307293, 'weight_decay': 0.001, 'warmup_steps': 36, 'lambda_param': 0.5, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5856,0.513588,0.829514,0.713299,0.662647,0.677235
2,0.1199,0.486242,0.83593,0.787381,0.731096,0.746462
3,0.0831,0.489053,0.835014,0.813588,0.732641,0.757261
4,0.074,0.492462,0.832264,0.831665,0.734776,0.764737
5,0.0702,0.481809,0.836847,0.828216,0.736366,0.765632
6,0.0673,0.488545,0.835014,0.826257,0.736909,0.762693
7,0.0666,0.470644,0.842346,0.836566,0.745877,0.773303
8,0.065,0.478143,0.843263,0.84134,0.749355,0.779062
9,0.0642,0.482122,0.846013,0.83637,0.751095,0.778881
10,0.0632,0.480221,0.846013,0.843189,0.744264,0.772571


[I 2025-03-24 01:10:01,124] Trial 113 pruned. 


Trial 114 with params: {'learning_rate': 0.0028725847007688935, 'weight_decay': 0.0, 'warmup_steps': 31, 'lambda_param': 0.5, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5802,0.525591,0.819432,0.762721,0.656949,0.685807
2,0.1198,0.487614,0.832264,0.809474,0.714334,0.743107
3,0.0833,0.469019,0.847846,0.831678,0.741152,0.769516
4,0.0746,0.470256,0.84418,0.829336,0.746423,0.772665
5,0.0703,0.471214,0.846929,0.820202,0.744434,0.76929
6,0.0677,0.469788,0.84143,0.828514,0.73702,0.762576
7,0.0673,0.48617,0.840513,0.834841,0.732858,0.767173
8,0.0665,0.480047,0.842346,0.828038,0.751964,0.775601
9,0.0642,0.470937,0.84418,0.830655,0.747522,0.773394
10,0.0629,0.461531,0.850596,0.843834,0.761482,0.786634


[I 2025-03-24 01:15:43,772] Trial 114 finished with value: 0.7873142620659457 and parameters: {'learning_rate': 0.0028725847007688935, 'weight_decay': 0.0, 'warmup_steps': 31, 'lambda_param': 0.5, 'temperature': 2.0}. Best is trial 112 with value: 0.8063353096417676.


Trial 115 with params: {'learning_rate': 0.001550621979711997, 'weight_decay': 0.0, 'warmup_steps': 48, 'lambda_param': 0.4, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.72,0.582863,0.797434,0.587596,0.5585,0.560802
2,0.1565,0.526166,0.829514,0.773745,0.702705,0.72564
3,0.096,0.501465,0.831347,0.804306,0.717638,0.74642
4,0.0804,0.501251,0.831347,0.838872,0.73256,0.766882
5,0.0719,0.495888,0.834097,0.813032,0.735086,0.760387
6,0.0686,0.483895,0.842346,0.829043,0.739883,0.76973
7,0.0686,0.498145,0.836847,0.830748,0.724214,0.760555
8,0.0669,0.517137,0.819432,0.789525,0.706727,0.731526
9,0.0649,0.49136,0.830431,0.806683,0.730006,0.753417
10,0.0631,0.496742,0.832264,0.811015,0.739674,0.761222


[I 2025-03-24 01:19:21,569] Trial 115 pruned. 


Trial 116 with params: {'learning_rate': 0.004235768957186816, 'weight_decay': 0.001, 'warmup_steps': 48, 'lambda_param': 0.6000000000000001, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5344,0.497741,0.829514,0.763336,0.696819,0.715811
2,0.1122,0.478079,0.846929,0.839081,0.749397,0.776271
3,0.0819,0.475444,0.845096,0.816666,0.745876,0.766925
4,0.0741,0.479684,0.843263,0.830445,0.73789,0.764242
5,0.0705,0.470273,0.849679,0.833269,0.745643,0.770485
6,0.0678,0.462378,0.850596,0.809543,0.740554,0.758417
7,0.0687,0.458643,0.851512,0.817163,0.744167,0.762589
8,0.0658,0.463843,0.846013,0.819345,0.74667,0.769707
9,0.0647,0.469965,0.839597,0.813065,0.737308,0.755577
10,0.0634,0.467584,0.843263,0.834061,0.742045,0.769742


[I 2025-03-24 01:24:51,391] Trial 116 finished with value: 0.7779132219523801 and parameters: {'learning_rate': 0.004235768957186816, 'weight_decay': 0.001, 'warmup_steps': 48, 'lambda_param': 0.6000000000000001, 'temperature': 2.0}. Best is trial 112 with value: 0.8063353096417676.


Trial 117 with params: {'learning_rate': 0.00012050092247739796, 'weight_decay': 0.003, 'warmup_steps': 37, 'lambda_param': 0.5, 'temperature': 7.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.517,1.18995,0.558203,0.139117,0.169832,0.141899
2,0.9667,0.965553,0.647113,0.253141,0.24877,0.233049
3,0.749,0.86233,0.684693,0.351495,0.299332,0.297498
4,0.6202,0.813051,0.704858,0.345022,0.323597,0.322399
5,0.5307,0.771545,0.722273,0.413949,0.358824,0.363525


[I 2025-03-24 01:26:41,876] Trial 117 pruned. 


Trial 118 with params: {'learning_rate': 0.004999618304828799, 'weight_decay': 0.0, 'warmup_steps': 42, 'lambda_param': 0.30000000000000004, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5125,0.508385,0.831347,0.806878,0.716582,0.74699
2,0.1098,0.472688,0.846013,0.845751,0.736689,0.769626
3,0.0827,0.465175,0.852429,0.820718,0.743581,0.76659
4,0.0752,0.477252,0.840513,0.834886,0.746251,0.772421
5,0.0733,0.451365,0.854262,0.853538,0.757825,0.788872
6,0.0694,0.445608,0.854262,0.82963,0.755551,0.778309
7,0.0684,0.454686,0.849679,0.843588,0.748568,0.776869
8,0.0671,0.471579,0.850596,0.821395,0.748188,0.767275
9,0.0663,0.464635,0.842346,0.81653,0.736659,0.762417
10,0.0644,0.455804,0.851512,0.841234,0.75161,0.778782


[I 2025-03-24 01:32:32,810] Trial 118 finished with value: 0.7784922870226199 and parameters: {'learning_rate': 0.004999618304828799, 'weight_decay': 0.0, 'warmup_steps': 42, 'lambda_param': 0.30000000000000004, 'temperature': 3.5}. Best is trial 112 with value: 0.8063353096417676.


Trial 119 with params: {'learning_rate': 0.0035048035393293498, 'weight_decay': 0.0, 'warmup_steps': 33, 'lambda_param': 1.0, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5572,0.498579,0.837764,0.783468,0.702141,0.725414
2,0.1141,0.47384,0.83868,0.821228,0.729012,0.754452
3,0.0832,0.482495,0.836847,0.826169,0.750994,0.769583
4,0.0743,0.473316,0.839597,0.838386,0.753016,0.778634
5,0.0708,0.455348,0.845096,0.83552,0.745286,0.772153
6,0.0705,0.483623,0.83593,0.827372,0.738581,0.763345
7,0.0685,0.472186,0.83593,0.810725,0.730316,0.755805
8,0.0652,0.462619,0.846929,0.834782,0.756553,0.77809
9,0.0638,0.455869,0.848763,0.826764,0.756967,0.775558
10,0.0633,0.481778,0.83868,0.806968,0.73096,0.752438


[I 2025-03-24 01:36:14,877] Trial 119 pruned. 


Trial 120 with params: {'learning_rate': 0.00016104904333464902, 'weight_decay': 0.009000000000000001, 'warmup_steps': 26, 'lambda_param': 0.2, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.4173,1.092865,0.595784,0.16864,0.201488,0.170668
2,0.8463,0.877716,0.68561,0.307595,0.300838,0.291867
3,0.633,0.780385,0.714024,0.350765,0.334787,0.334103
4,0.5064,0.742972,0.736939,0.467529,0.386671,0.403786
5,0.421,0.708191,0.754354,0.493752,0.437783,0.452151


[I 2025-03-24 01:38:03,850] Trial 120 pruned. 


Trial 121 with params: {'learning_rate': 8.532115701682182e-05, 'weight_decay': 0.003, 'warmup_steps': 34, 'lambda_param': 1.0, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.6283,1.305413,0.502291,0.113585,0.131216,0.105784
2,1.1089,1.095469,0.59945,0.190206,0.20693,0.181741
3,0.9067,0.969276,0.648029,0.279375,0.25104,0.238662
4,0.7691,0.896432,0.676444,0.321524,0.28217,0.278449
5,0.6738,0.849347,0.689276,0.354331,0.306429,0.305275
6,0.605,0.815166,0.703941,0.350373,0.323936,0.324254
7,0.5506,0.794162,0.712191,0.346241,0.335121,0.331983
8,0.5093,0.775039,0.71494,0.389222,0.347075,0.349363
9,0.476,0.765304,0.729606,0.414155,0.369588,0.370101
10,0.4489,0.759702,0.727773,0.446526,0.377953,0.388833


[I 2025-03-24 01:41:44,883] Trial 121 pruned. 


Trial 122 with params: {'learning_rate': 0.0031907523622934524, 'weight_decay': 0.004, 'warmup_steps': 40, 'lambda_param': 0.5, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5667,0.512404,0.831347,0.763342,0.698513,0.718454
2,0.1152,0.477543,0.84418,0.833246,0.749618,0.774916
3,0.0809,0.483735,0.84418,0.811749,0.74281,0.762796
4,0.0734,0.482243,0.84143,0.836788,0.74423,0.773014
5,0.069,0.470365,0.846929,0.836566,0.751071,0.776731
6,0.069,0.481688,0.843263,0.808008,0.746215,0.764828
7,0.0668,0.464122,0.853346,0.840303,0.768421,0.792765
8,0.0644,0.461772,0.848763,0.837823,0.758027,0.784539
9,0.0635,0.458495,0.847846,0.782522,0.742725,0.752234
10,0.0629,0.454397,0.852429,0.836011,0.756756,0.782715


[I 2025-03-24 01:47:19,076] Trial 122 finished with value: 0.7772922675638035 and parameters: {'learning_rate': 0.0031907523622934524, 'weight_decay': 0.004, 'warmup_steps': 40, 'lambda_param': 0.5, 'temperature': 2.0}. Best is trial 112 with value: 0.8063353096417676.


Trial 123 with params: {'learning_rate': 0.004789433618611509, 'weight_decay': 0.006, 'warmup_steps': 22, 'lambda_param': 0.8, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5029,0.496663,0.829514,0.795665,0.699807,0.72762
2,0.1094,0.482317,0.836847,0.828773,0.722559,0.758735
3,0.0823,0.494115,0.829514,0.818012,0.731274,0.757392
4,0.0767,0.485468,0.843263,0.826439,0.734468,0.764679
5,0.0739,0.507071,0.836847,0.846887,0.734605,0.772693
6,0.0702,0.477465,0.843263,0.849925,0.732302,0.770619
7,0.0677,0.472016,0.846013,0.857786,0.749861,0.784214
8,0.0662,0.478275,0.84418,0.861415,0.741355,0.780117
9,0.065,0.465755,0.846013,0.861295,0.754239,0.788028
10,0.0639,0.478427,0.84143,0.858282,0.750318,0.784265


[I 2025-03-24 01:53:08,249] Trial 123 finished with value: 0.7881982345719765 and parameters: {'learning_rate': 0.004789433618611509, 'weight_decay': 0.006, 'warmup_steps': 22, 'lambda_param': 0.8, 'temperature': 3.5}. Best is trial 112 with value: 0.8063353096417676.


Trial 124 with params: {'learning_rate': 0.002496652742182962, 'weight_decay': 0.005, 'warmup_steps': 41, 'lambda_param': 1.0, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6151,0.529394,0.815765,0.712031,0.6412,0.661958
2,0.1236,0.48138,0.839597,0.808676,0.725247,0.749668
3,0.0846,0.48006,0.835014,0.797967,0.728649,0.752977
4,0.074,0.48075,0.83593,0.827971,0.741498,0.768872
5,0.0697,0.473327,0.842346,0.816662,0.742912,0.764144
6,0.0672,0.463153,0.846929,0.820328,0.766069,0.781463
7,0.0658,0.463719,0.847846,0.825382,0.757587,0.777151
8,0.0646,0.478039,0.845096,0.811576,0.755408,0.773001
9,0.0661,0.467133,0.83868,0.810185,0.73752,0.760906
10,0.0636,0.462362,0.848763,0.820794,0.764612,0.78162


[I 2025-03-24 01:58:39,658] Trial 124 finished with value: 0.7798086085051277 and parameters: {'learning_rate': 0.002496652742182962, 'weight_decay': 0.005, 'warmup_steps': 41, 'lambda_param': 1.0, 'temperature': 2.0}. Best is trial 112 with value: 0.8063353096417676.


Trial 125 with params: {'learning_rate': 0.003317741415449306, 'weight_decay': 0.006, 'warmup_steps': 18, 'lambda_param': 1.0, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5482,0.532946,0.824015,0.747737,0.661451,0.687284
2,0.1153,0.515731,0.829514,0.809441,0.733071,0.752519
3,0.0836,0.468879,0.848763,0.812326,0.737727,0.761234
4,0.0736,0.465715,0.843263,0.841793,0.748605,0.779912
5,0.0703,0.468873,0.840513,0.822984,0.740481,0.765137
6,0.068,0.478504,0.84143,0.831847,0.750959,0.776191
7,0.068,0.486361,0.836847,0.822449,0.725342,0.754964
8,0.0654,0.479393,0.843263,0.832446,0.743347,0.768761
9,0.0639,0.475726,0.842346,0.838496,0.754113,0.781248
10,0.0633,0.478112,0.84418,0.830723,0.735592,0.766052


[I 2025-03-24 02:02:19,518] Trial 125 pruned. 


Trial 126 with params: {'learning_rate': 0.004625406287617865, 'weight_decay': 0.003, 'warmup_steps': 30, 'lambda_param': 0.5, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5106,0.49981,0.83868,0.796427,0.718425,0.740148
2,0.11,0.458223,0.853346,0.83065,0.750337,0.774102
3,0.0814,0.467175,0.846929,0.831678,0.759157,0.7807
4,0.0738,0.484043,0.845096,0.829802,0.76567,0.784261
5,0.0727,0.474693,0.848763,0.834576,0.775803,0.793211
6,0.0681,0.456676,0.851512,0.842249,0.762608,0.784119
7,0.0694,0.465913,0.851512,0.839741,0.760137,0.786119
8,0.0674,0.462144,0.851512,0.855547,0.765229,0.790926
9,0.0654,0.451896,0.857929,0.85529,0.762377,0.789205
10,0.0636,0.459305,0.861595,0.871524,0.783793,0.81154


[I 2025-03-24 02:08:00,473] Trial 126 finished with value: 0.8043682654696674 and parameters: {'learning_rate': 0.004625406287617865, 'weight_decay': 0.003, 'warmup_steps': 30, 'lambda_param': 0.5, 'temperature': 2.5}. Best is trial 112 with value: 0.8063353096417676.


Trial 127 with params: {'learning_rate': 0.004702810800953262, 'weight_decay': 0.002, 'warmup_steps': 39, 'lambda_param': 0.5, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5218,0.490115,0.839597,0.778249,0.694628,0.721062
2,0.1109,0.465996,0.83868,0.830908,0.729976,0.762143
3,0.0825,0.475336,0.845096,0.82124,0.74679,0.767439
4,0.0747,0.473168,0.846929,0.840754,0.734035,0.769314
5,0.075,0.490624,0.83868,0.834366,0.742502,0.772832
6,0.0727,0.45898,0.853346,0.839357,0.747846,0.775188
7,0.0685,0.464153,0.855179,0.85905,0.767499,0.79604
8,0.0658,0.457605,0.848763,0.833349,0.750242,0.775375
9,0.0649,0.453169,0.855179,0.853018,0.761802,0.788528
10,0.0652,0.45457,0.856095,0.850562,0.762118,0.787649


[I 2025-03-24 02:13:39,215] Trial 127 finished with value: 0.7915440881627639 and parameters: {'learning_rate': 0.004702810800953262, 'weight_decay': 0.002, 'warmup_steps': 39, 'lambda_param': 0.5, 'temperature': 3.0}. Best is trial 112 with value: 0.8063353096417676.


Trial 128 with params: {'learning_rate': 0.004611317942825121, 'weight_decay': 0.001, 'warmup_steps': 31, 'lambda_param': 0.6000000000000001, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5155,0.511268,0.828598,0.743413,0.657456,0.685005
2,0.1106,0.467782,0.840513,0.828636,0.735889,0.763921
3,0.0824,0.480762,0.846929,0.836653,0.743107,0.774209
4,0.0765,0.468697,0.850596,0.834792,0.756709,0.780667
5,0.0718,0.457516,0.846013,0.848499,0.751072,0.78308
6,0.0686,0.464087,0.846929,0.837006,0.75284,0.777971
7,0.0665,0.472284,0.84143,0.835427,0.750053,0.777692
8,0.0673,0.491489,0.840513,0.826093,0.756936,0.778752
9,0.0675,0.467115,0.839597,0.828285,0.755336,0.776564
10,0.0642,0.466716,0.846013,0.847603,0.753101,0.783617


[I 2025-03-24 02:19:35,773] Trial 128 finished with value: 0.7848598296181579 and parameters: {'learning_rate': 0.004611317942825121, 'weight_decay': 0.001, 'warmup_steps': 31, 'lambda_param': 0.6000000000000001, 'temperature': 4.0}. Best is trial 112 with value: 0.8063353096417676.


Trial 129 with params: {'learning_rate': 0.004136835397350295, 'weight_decay': 0.008, 'warmup_steps': 14, 'lambda_param': 1.0, 'temperature': 5.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5199,0.493085,0.831347,0.772987,0.678656,0.706359
2,0.1093,0.457915,0.853346,0.827516,0.741063,0.76832
3,0.0835,0.479682,0.843263,0.824426,0.74339,0.767569
4,0.0764,0.470528,0.848763,0.849119,0.751238,0.780616
5,0.0706,0.457014,0.853346,0.839617,0.758674,0.781419
6,0.069,0.462359,0.846013,0.831559,0.742642,0.771279
7,0.0677,0.451356,0.859762,0.863738,0.762107,0.793173
8,0.0657,0.461221,0.848763,0.855326,0.755777,0.787308
9,0.0648,0.447629,0.857929,0.862481,0.769517,0.801289
10,0.0636,0.459266,0.849679,0.851438,0.757162,0.783786


[I 2025-03-24 02:25:32,167] Trial 129 finished with value: 0.7884057723184554 and parameters: {'learning_rate': 0.004136835397350295, 'weight_decay': 0.008, 'warmup_steps': 14, 'lambda_param': 1.0, 'temperature': 5.5}. Best is trial 112 with value: 0.8063353096417676.


Trial 130 with params: {'learning_rate': 0.0028694012003961217, 'weight_decay': 0.002, 'warmup_steps': 47, 'lambda_param': 0.5, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.595,0.51613,0.817599,0.742321,0.668388,0.689468
2,0.119,0.484249,0.83593,0.789654,0.736489,0.749711
3,0.0833,0.47656,0.843263,0.823715,0.74326,0.768133
4,0.0745,0.479976,0.846929,0.825956,0.752958,0.775591
5,0.0696,0.468846,0.850596,0.836834,0.758225,0.783566
6,0.0685,0.473364,0.83868,0.811238,0.743019,0.764776
7,0.0687,0.494254,0.836847,0.825651,0.734376,0.760892
8,0.0669,0.495175,0.829514,0.796249,0.727617,0.744756
9,0.0649,0.474646,0.840513,0.825634,0.768418,0.784758
10,0.0631,0.465193,0.846929,0.827181,0.756401,0.777894


[I 2025-03-24 02:29:32,933] Trial 130 pruned. 


Trial 131 with params: {'learning_rate': 0.0033461980885856947, 'weight_decay': 0.001, 'warmup_steps': 49, 'lambda_param': 1.0, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.568,0.515603,0.822181,0.752872,0.661814,0.69162
2,0.1141,0.479783,0.831347,0.817271,0.707725,0.741816
3,0.0821,0.493017,0.831347,0.807974,0.7396,0.759066
4,0.0736,0.466408,0.839597,0.827703,0.739057,0.764304
5,0.07,0.461292,0.848763,0.833242,0.744514,0.773081
6,0.0683,0.472814,0.84418,0.822287,0.742401,0.761107
7,0.0667,0.485523,0.834097,0.820458,0.731265,0.75244
8,0.0647,0.465,0.839597,0.832026,0.743763,0.768014
9,0.0637,0.458137,0.846929,0.831651,0.753338,0.775028
10,0.0634,0.461829,0.846013,0.842781,0.757026,0.782572


[I 2025-03-24 02:35:31,601] Trial 131 finished with value: 0.7856191892296971 and parameters: {'learning_rate': 0.0033461980885856947, 'weight_decay': 0.001, 'warmup_steps': 49, 'lambda_param': 1.0, 'temperature': 2.5}. Best is trial 112 with value: 0.8063353096417676.


Trial 132 with params: {'learning_rate': 0.0032170711379117074, 'weight_decay': 0.002, 'warmup_steps': 32, 'lambda_param': 0.30000000000000004, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.568,0.52428,0.819432,0.748452,0.642633,0.675689
2,0.1149,0.492581,0.835014,0.819819,0.725292,0.754688
3,0.0828,0.480184,0.843263,0.831259,0.746585,0.7719
4,0.0744,0.480806,0.846013,0.817484,0.744292,0.765959
5,0.0716,0.485712,0.842346,0.823901,0.743097,0.766712
6,0.069,0.482799,0.842346,0.839013,0.747334,0.773505
7,0.0669,0.480394,0.846929,0.83071,0.754033,0.778217
8,0.0661,0.489313,0.84143,0.81844,0.75886,0.773311
9,0.0643,0.468439,0.849679,0.832702,0.750789,0.775718
10,0.0637,0.466699,0.851512,0.847489,0.756589,0.783496


[I 2025-03-24 02:41:07,597] Trial 132 finished with value: 0.7855127709822601 and parameters: {'learning_rate': 0.0032170711379117074, 'weight_decay': 0.002, 'warmup_steps': 32, 'lambda_param': 0.30000000000000004, 'temperature': 2.5}. Best is trial 112 with value: 0.8063353096417676.


Trial 133 with params: {'learning_rate': 0.004255934020341269, 'weight_decay': 0.002, 'warmup_steps': 24, 'lambda_param': 0.4, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5143,0.501429,0.832264,0.784097,0.709491,0.73432
2,0.1108,0.479359,0.83868,0.826063,0.736455,0.766645
3,0.0826,0.479277,0.840513,0.836369,0.737078,0.770172
4,0.076,0.488983,0.835014,0.831301,0.725053,0.757776
5,0.0729,0.476744,0.840513,0.837726,0.72977,0.765121
6,0.0689,0.454824,0.850596,0.857516,0.753173,0.7869
7,0.0689,0.468886,0.847846,0.848709,0.744989,0.776195
8,0.0657,0.460864,0.851512,0.857155,0.750529,0.786536
9,0.0645,0.449882,0.857012,0.856258,0.754788,0.786533
10,0.0636,0.454439,0.849679,0.843315,0.760918,0.784593


[I 2025-03-24 02:46:52,145] Trial 133 finished with value: 0.7825652945041495 and parameters: {'learning_rate': 0.004255934020341269, 'weight_decay': 0.002, 'warmup_steps': 24, 'lambda_param': 0.4, 'temperature': 2.0}. Best is trial 112 with value: 0.8063353096417676.


Trial 134 with params: {'learning_rate': 0.0007078235644524678, 'weight_decay': 0.005, 'warmup_steps': 17, 'lambda_param': 0.2, 'temperature': 5.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8878,0.656474,0.756187,0.442772,0.428407,0.430248
2,0.2782,0.573879,0.8011,0.629874,0.587059,0.598727
3,0.1538,0.54258,0.809349,0.715592,0.632692,0.657355
4,0.1125,0.534575,0.806599,0.757378,0.663088,0.691459
5,0.0943,0.532259,0.815765,0.782708,0.699346,0.726839


[I 2025-03-24 02:48:46,827] Trial 134 pruned. 


Trial 135 with params: {'learning_rate': 0.004990332367174081, 'weight_decay': 0.003, 'warmup_steps': 28, 'lambda_param': 0.6000000000000001, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4994,0.524806,0.818515,0.775552,0.681556,0.708014
2,0.1093,0.476831,0.842346,0.814183,0.730268,0.756689
3,0.0818,0.448967,0.854262,0.828218,0.755826,0.777181
4,0.075,0.46349,0.854262,0.846534,0.773515,0.795351
5,0.0723,0.453208,0.855179,0.845161,0.775171,0.797545
6,0.0705,0.474953,0.846013,0.837459,0.752872,0.779163
7,0.0679,0.460371,0.849679,0.855782,0.754564,0.790372
8,0.0667,0.503236,0.836847,0.837712,0.725661,0.762828
9,0.0673,0.474832,0.845096,0.846004,0.745165,0.779274
10,0.064,0.467903,0.852429,0.85151,0.751753,0.784627


[I 2025-03-24 02:54:16,638] Trial 135 finished with value: 0.7939615163167681 and parameters: {'learning_rate': 0.004990332367174081, 'weight_decay': 0.003, 'warmup_steps': 28, 'lambda_param': 0.6000000000000001, 'temperature': 2.5}. Best is trial 112 with value: 0.8063353096417676.


Trial 136 with params: {'learning_rate': 0.003169431906301606, 'weight_decay': 0.004, 'warmup_steps': 33, 'lambda_param': 0.4, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5687,0.510389,0.827681,0.735395,0.660654,0.682405
2,0.1154,0.476439,0.84143,0.817591,0.739862,0.76088
3,0.0819,0.473334,0.853346,0.837017,0.762149,0.780136
4,0.0762,0.474278,0.84418,0.829674,0.75171,0.774583
5,0.0701,0.474104,0.845096,0.804685,0.734664,0.753266


[I 2025-03-24 02:56:10,152] Trial 136 pruned. 


Trial 137 with params: {'learning_rate': 0.004031821430205959, 'weight_decay': 0.002, 'warmup_steps': 36, 'lambda_param': 0.5, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5413,0.509106,0.83593,0.75822,0.689156,0.709983
2,0.1112,0.478026,0.833181,0.809541,0.714048,0.747568
3,0.0815,0.453854,0.847846,0.834276,0.756313,0.777428
4,0.074,0.45668,0.851512,0.847906,0.748656,0.77944
5,0.0704,0.472481,0.845096,0.820936,0.751235,0.773158
6,0.0688,0.458859,0.846929,0.826774,0.738357,0.765971
7,0.0681,0.47615,0.847846,0.823077,0.737056,0.761822
8,0.0672,0.465269,0.848763,0.841724,0.758468,0.784747
9,0.0658,0.451041,0.846013,0.835154,0.750536,0.775847
10,0.0638,0.443977,0.854262,0.842898,0.769381,0.793986


[I 2025-03-24 03:01:53,416] Trial 137 finished with value: 0.7955481463817776 and parameters: {'learning_rate': 0.004031821430205959, 'weight_decay': 0.002, 'warmup_steps': 36, 'lambda_param': 0.5, 'temperature': 2.0}. Best is trial 112 with value: 0.8063353096417676.


Trial 138 with params: {'learning_rate': 0.004483491574986904, 'weight_decay': 0.002, 'warmup_steps': 25, 'lambda_param': 0.5, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5093,0.518204,0.827681,0.765029,0.670834,0.703637
2,0.1098,0.483904,0.837764,0.831198,0.721435,0.759598
3,0.0815,0.469042,0.845096,0.795401,0.722274,0.742488
4,0.0746,0.479217,0.84143,0.836054,0.744507,0.775663
5,0.0721,0.455419,0.837764,0.809938,0.722133,0.748487
6,0.0686,0.462906,0.851512,0.852215,0.756673,0.788004
7,0.0698,0.478036,0.84418,0.823136,0.72668,0.756058
8,0.0665,0.473242,0.847846,0.843437,0.752909,0.782417
9,0.0645,0.463706,0.84418,0.841616,0.748436,0.780439
10,0.063,0.463956,0.845096,0.837614,0.751222,0.780031


[I 2025-03-24 03:07:23,886] Trial 138 finished with value: 0.7781286659326964 and parameters: {'learning_rate': 0.004483491574986904, 'weight_decay': 0.002, 'warmup_steps': 25, 'lambda_param': 0.5, 'temperature': 3.0}. Best is trial 112 with value: 0.8063353096417676.


Trial 139 with params: {'learning_rate': 0.004813032515512858, 'weight_decay': 0.005, 'warmup_steps': 29, 'lambda_param': 0.4, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5031,0.490951,0.84418,0.790599,0.70961,0.734243
2,0.1093,0.480644,0.836847,0.823402,0.750045,0.772351
3,0.0818,0.468614,0.846929,0.832195,0.753408,0.778501
4,0.0754,0.469297,0.847846,0.840918,0.750903,0.780422
5,0.072,0.47981,0.839597,0.830814,0.74504,0.770831
6,0.069,0.478294,0.836847,0.795235,0.725931,0.74729
7,0.0691,0.468542,0.84418,0.844765,0.767043,0.791724
8,0.067,0.466002,0.845096,0.8384,0.750499,0.77544
9,0.0649,0.461801,0.84418,0.826171,0.750133,0.771795
10,0.0635,0.468054,0.839597,0.833749,0.748103,0.772782


[I 2025-03-24 03:11:18,036] Trial 139 pruned. 


Trial 140 with params: {'learning_rate': 0.004283264216032976, 'weight_decay': 0.001, 'warmup_steps': 36, 'lambda_param': 0.5, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5314,0.500539,0.828598,0.74675,0.687671,0.703978
2,0.1113,0.467183,0.842346,0.823365,0.731182,0.762874
3,0.0826,0.463815,0.84418,0.83773,0.747167,0.77372
4,0.0737,0.462536,0.845096,0.832615,0.732941,0.764008
5,0.0709,0.474283,0.843263,0.837533,0.740598,0.771303
6,0.0715,0.480494,0.84143,0.832774,0.736878,0.767951
7,0.0673,0.467492,0.84143,0.836667,0.729398,0.764397
8,0.0654,0.456031,0.851512,0.845767,0.747002,0.777387
9,0.0643,0.463575,0.84418,0.827358,0.73734,0.764933
10,0.0637,0.467507,0.852429,0.847821,0.759418,0.786917


[I 2025-03-24 03:16:59,053] Trial 140 finished with value: 0.7886900190348453 and parameters: {'learning_rate': 0.004283264216032976, 'weight_decay': 0.001, 'warmup_steps': 36, 'lambda_param': 0.5, 'temperature': 2.5}. Best is trial 112 with value: 0.8063353096417676.


Trial 141 with params: {'learning_rate': 0.002197067410974609, 'weight_decay': 0.001, 'warmup_steps': 23, 'lambda_param': 0.9, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6209,0.556582,0.812099,0.699393,0.612876,0.637068
2,0.1312,0.503419,0.825848,0.806493,0.725653,0.753462
3,0.086,0.484355,0.837764,0.826609,0.723393,0.755155
4,0.0751,0.473482,0.842346,0.832558,0.738854,0.76928
5,0.0699,0.481888,0.834097,0.798679,0.726548,0.746583


[I 2025-03-24 03:18:53,301] Trial 141 pruned. 


Trial 142 with params: {'learning_rate': 0.0026996505638788378, 'weight_decay': 0.002, 'warmup_steps': 44, 'lambda_param': 0.5, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6046,0.535826,0.809349,0.722497,0.620628,0.650677
2,0.1218,0.498696,0.833181,0.828098,0.731454,0.762848
3,0.0853,0.487035,0.836847,0.823293,0.741527,0.76642
4,0.0747,0.47897,0.845096,0.827325,0.746881,0.77153
5,0.0698,0.47665,0.835014,0.802276,0.731774,0.752734
6,0.0684,0.503783,0.831347,0.798195,0.728192,0.747198
7,0.0673,0.486594,0.831347,0.82024,0.717709,0.750711
8,0.0652,0.486438,0.832264,0.813844,0.735084,0.755568
9,0.0644,0.484989,0.83593,0.799291,0.735838,0.751956
10,0.0638,0.479513,0.837764,0.806298,0.747578,0.761395


[I 2025-03-24 03:22:39,272] Trial 142 pruned. 


Trial 143 with params: {'learning_rate': 0.003501374323685498, 'weight_decay': 0.004, 'warmup_steps': 15, 'lambda_param': 0.6000000000000001, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5352,0.506362,0.821265,0.773136,0.655943,0.695391
2,0.1143,0.477992,0.839597,0.820695,0.725728,0.757666
3,0.083,0.468793,0.852429,0.825585,0.743031,0.768585
4,0.0745,0.466575,0.842346,0.816563,0.72983,0.756786
5,0.0708,0.486482,0.83593,0.81723,0.708064,0.743586


[I 2025-03-24 03:24:28,998] Trial 143 pruned. 


Trial 144 with params: {'learning_rate': 0.0020694471934371078, 'weight_decay': 0.005, 'warmup_steps': 53, 'lambda_param': 0.1, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6567,0.554145,0.811182,0.730627,0.641501,0.666646
2,0.1333,0.520562,0.821265,0.793681,0.715191,0.738616
3,0.0878,0.482461,0.83868,0.816918,0.741745,0.765072
4,0.0765,0.494414,0.835014,0.801558,0.737927,0.753062
5,0.0707,0.478451,0.839597,0.820469,0.737265,0.762038
6,0.0673,0.474718,0.84418,0.825094,0.747929,0.769366
7,0.0658,0.479905,0.839597,0.826679,0.731541,0.757323
8,0.0649,0.472972,0.840513,0.812896,0.738965,0.757163
9,0.0637,0.471631,0.837764,0.826498,0.741843,0.765058
10,0.063,0.476545,0.843263,0.820102,0.746907,0.765708


[I 2025-03-24 03:28:13,438] Trial 144 pruned. 


Trial 145 with params: {'learning_rate': 0.004511459311461613, 'weight_decay': 0.001, 'warmup_steps': 19, 'lambda_param': 1.0, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5064,0.512399,0.834097,0.796615,0.707289,0.736096
2,0.1102,0.461962,0.852429,0.831058,0.742954,0.770808
3,0.0829,0.461199,0.837764,0.798849,0.724985,0.748204
4,0.0753,0.461282,0.849679,0.823181,0.739111,0.765007
5,0.0737,0.459111,0.846929,0.837141,0.748514,0.777049
6,0.0696,0.473075,0.848763,0.84785,0.750196,0.782329
7,0.0689,0.446513,0.856095,0.838988,0.74686,0.778305
8,0.0668,0.445481,0.849679,0.830011,0.744121,0.771158
9,0.0659,0.453225,0.847846,0.826267,0.742311,0.769236
10,0.0651,0.457227,0.846929,0.83335,0.749319,0.77657


[I 2025-03-24 03:32:06,153] Trial 145 pruned. 


Trial 146 with params: {'learning_rate': 0.0030027166857921443, 'weight_decay': 0.003, 'warmup_steps': 32, 'lambda_param': 0.6000000000000001, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5771,0.518134,0.814849,0.725704,0.65122,0.673091
2,0.1173,0.471965,0.843263,0.814469,0.743019,0.76359
3,0.0839,0.48082,0.84143,0.786439,0.740544,0.752558
4,0.0756,0.457427,0.850596,0.823639,0.747556,0.769859
5,0.0706,0.472058,0.856095,0.843825,0.767541,0.792407
6,0.0677,0.465431,0.847846,0.840682,0.746918,0.778399
7,0.0665,0.465438,0.850596,0.811012,0.738226,0.761044
8,0.0648,0.456148,0.850596,0.834209,0.751932,0.775946
9,0.0643,0.452502,0.84418,0.795244,0.73369,0.753118
10,0.0639,0.463878,0.845096,0.805895,0.738307,0.758432


[I 2025-03-24 03:35:48,713] Trial 146 pruned. 


Trial 147 with params: {'learning_rate': 0.004549556532209056, 'weight_decay': 0.007, 'warmup_steps': 8, 'lambda_param': 0.9, 'temperature': 6.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4827,0.491728,0.824015,0.749048,0.647551,0.6819
2,0.1102,0.474324,0.851512,0.847435,0.751144,0.780985
3,0.0835,0.451606,0.853346,0.831341,0.758556,0.780251
4,0.0746,0.466357,0.846013,0.830061,0.756132,0.777663
5,0.0707,0.458956,0.850596,0.848434,0.764196,0.791443
6,0.0697,0.485507,0.839597,0.845591,0.746711,0.77932
7,0.0728,0.475222,0.846013,0.835402,0.747754,0.775635
8,0.0691,0.488349,0.83868,0.826977,0.735684,0.765768
9,0.0656,0.458566,0.852429,0.834438,0.761457,0.783516
10,0.064,0.454414,0.857929,0.850547,0.75876,0.785378


[I 2025-03-24 03:41:42,960] Trial 147 finished with value: 0.785476781529609 and parameters: {'learning_rate': 0.004549556532209056, 'weight_decay': 0.007, 'warmup_steps': 8, 'lambda_param': 0.9, 'temperature': 6.5}. Best is trial 112 with value: 0.8063353096417676.


Trial 148 with params: {'learning_rate': 0.0042506356964603685, 'weight_decay': 0.002, 'warmup_steps': 38, 'lambda_param': 0.5, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5342,0.498172,0.833181,0.767483,0.659936,0.692283
2,0.1107,0.457325,0.847846,0.841221,0.73362,0.767663
3,0.0821,0.479839,0.837764,0.806066,0.724855,0.747748
4,0.0752,0.454842,0.857929,0.857505,0.75542,0.789894
5,0.0716,0.45227,0.853346,0.831655,0.753267,0.779244
6,0.0685,0.451903,0.857012,0.849252,0.760571,0.791227
7,0.0689,0.463028,0.847846,0.823569,0.743404,0.769123
8,0.0674,0.455583,0.851512,0.842261,0.744639,0.774648
9,0.065,0.454302,0.851512,0.846649,0.745709,0.779661
10,0.0639,0.44601,0.857929,0.847025,0.751479,0.782446


[I 2025-03-24 03:47:28,906] Trial 148 finished with value: 0.7895455727268724 and parameters: {'learning_rate': 0.0042506356964603685, 'weight_decay': 0.002, 'warmup_steps': 38, 'lambda_param': 0.5, 'temperature': 2.0}. Best is trial 112 with value: 0.8063353096417676.


Trial 149 with params: {'learning_rate': 0.004107457138097337, 'weight_decay': 0.0, 'warmup_steps': 43, 'lambda_param': 0.6000000000000001, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5376,0.489716,0.839597,0.766958,0.683255,0.70779
2,0.1121,0.477846,0.836847,0.823583,0.725228,0.756889
3,0.0823,0.463325,0.848763,0.833048,0.75178,0.775071
4,0.0731,0.466491,0.850596,0.824513,0.75556,0.775859
5,0.0719,0.471439,0.849679,0.830971,0.757666,0.778676
6,0.0702,0.466212,0.846929,0.828829,0.740987,0.764847
7,0.0678,0.489708,0.833181,0.817446,0.742846,0.76538
8,0.0671,0.46407,0.846929,0.831791,0.752522,0.77784
9,0.0647,0.467468,0.846013,0.831211,0.749077,0.775004
10,0.064,0.477152,0.83868,0.814517,0.742685,0.763817


[I 2025-03-24 03:51:18,633] Trial 149 pruned. 


In [47]:
print(best_trial4)

BestRun(run_id='112', objective=0.8063353096417676, hyperparameters={'learning_rate': 0.004415119212034352, 'weight_decay': 0.001, 'warmup_steps': 42, 'lambda_param': 0.5, 'temperature': 2.0}, run_summary=None)
