In [1]:
from transformers import BasicTokenizer, Trainer
from datasets import concatenate_datasets, load_from_disk
import kagglehub
import optuna
import torch
import math
import base

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/jovyan/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package punkt to /home/jovyan/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /home/jovyan/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /home/jovyan/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger_eng is already up-to-
[nltk_data]       date!


In [2]:
base.reset_seed()

In [3]:
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("GPU is available and will be used:", torch.cuda.get_device_name(0))
else:
    device = torch.device("cpu")
    print("GPU is not available, using CPU.")

GPU is available and will be used: NVIDIA A100 80GB PCIe MIG 2g.20gb


In [4]:
my_glove = kagglehub.dataset_download("thanakomsn/glove6b300dtxt")
print(my_glove)

/home/jovyan/.cache/kagglehub/datasets/thanakomsn/glove6b300dtxt/versions/1


In [5]:
GLOVE_FILE = f"{my_glove}/glove.6B.300d.txt"
DATASET = "trec"

In [6]:
train_data = load_from_disk(f"~/data/{DATASET}/train-logits_coarse")
eval_data = load_from_disk(f"~/data/{DATASET}/eval-logits_coarse")
test_data = load_from_disk(f"~/data/{DATASET}/test-logits_coarse")

all_train_data = load_from_disk(f"~/data/{DATASET}/train-logits-augmented_coarse")

all_data = concatenate_datasets([load_from_disk(file) for file in [f"~/data/{DATASET}/eval-logits_coarse", f"~/data/{DATASET}/test-logits_coarse", f"~/data/{DATASET}/train-logits-augmented_coarse"]])
tokenizer = BasicTokenizer(do_lower_case=True)

In [7]:
train_data_tokens = list(map(lambda e: tokenizer.tokenize(e["sentence"]), train_data))
eval_data_tokens = list(map(lambda e: tokenizer.tokenize(e["sentence"]), eval_data))
test_data_tokens = list(map(lambda e: tokenizer.tokenize(e["sentence"]), test_data))

all_train_data_tokens = list(map(lambda e: tokenizer.tokenize(e["sentence"]), all_train_data))

all_data_tokens = list(map(lambda e: tokenizer.tokenize(e["sentence"]), all_data))

In [8]:
vocab = base.get_vocab(all_data_tokens)

In [9]:
word_index = dict(zip(vocab, range(len(vocab))))

In [10]:
embeddings_index = base.get_embeddings_indeces(GLOVE_FILE)

Found 400000 word vectors.


In [11]:
print(len(vocab))
num_tokens = len(vocab) + 2
embedding_dim = 300

8766


In [12]:
embedding_matrix = base.get_embedding_matrix(num_tokens, embedding_dim, word_index, embeddings_index)

Converted 8551 words (215) misses


In [13]:
train_data_index = list(map(lambda x: list(map(lambda y: word_index[y], x)),train_data_tokens))
eval_data_index = list(map(lambda x: list(map(lambda y: word_index[y], x)),eval_data_tokens))
test_data_index = list(map(lambda x: list(map(lambda y: word_index[y], x)),test_data_tokens))

all_train_data_index = list(map(lambda x: list(map(lambda y: word_index[y], x)),all_train_data_tokens))

In [14]:
train_padded_data = list(map(lambda x: base.padd(x,60), train_data_index))
eval_padded_data = list(map(lambda x: base.padd(x,60), eval_data_index))
test_padded_data = list(map(lambda x: base.padd(x,60), test_data_index))

all_train_padded_data = list(map(lambda x: base.padd(x,60), all_train_data_index))

In [15]:
train_data = train_data.add_column("input_ids", train_padded_data)
eval_data = eval_data.add_column("input_ids", eval_padded_data)
test_data = test_data.add_column("input_ids", test_padded_data)

all_train_data = all_train_data.add_column("input_ids", all_train_padded_data)

In [16]:
num_epochs = 15
batch_size = 128

In [17]:
#Nápočet epoch na steps
data_length = len(train_data)
min_r = math.ceil(data_length/batch_size)*5
max_r = math.ceil(data_length/batch_size)*num_epochs
warm_up = math.ceil(data_length/batch_size/10)

In [18]:
def hp_space(trial):
    params =  {
        "learning_rate": trial.suggest_float("learning_rate", 5e-5, 5e-3, log=True),
        "weight_decay": trial.suggest_float("weight_decay", 0, 1e-2, step=1e-3),
        "warmup_steps" : trial.suggest_int("warmup_steps", 0, warm_up)
    }   
    print(f"Trial {trial.number} with params: {params}")
    return params

In [19]:
pruner = optuna.pruners.HyperbandPruner(min_resource=min_r, max_resource=max_r, reduction_factor=2, bootstrap_count=2)
sampler = optuna.samplers.TPESampler(seed=42, multivariate=True)



In [20]:
def get_BiLSTM():
    return base.BiLSTMClassifier(embedding_matrix=embedding_matrix, embedding_dim=embedding_dim, fc_dim=400, hidden_dim=300, output_dim=6, freeze_embed=True)

In [21]:
base.reset_seed()

In [22]:
training_args = base.get_training_args(output_dir=f"~/results/{DATASET}/bilstm-base_coarse_hp-search", logging_dir=f"~/logs/{DATASET}/bilstm-base_coarse_hp-search", epochs=num_epochs, batch_size=batch_size)

In [23]:
trainer = Trainer(
    args=training_args,
    train_dataset=train_data,
    eval_dataset=eval_data,
    compute_metrics=base.compute_metrics,
    model_init = lambda: get_BiLSTM()
)
  

In [24]:
best_trial = trainer.hyperparameter_search(
    direction="maximize",
    backend="optuna",
    hp_space=hp_space,
    compute_objective=lambda metrics: metrics["eval_f1"],
    pruner=pruner,
    sampler=sampler,
    study_name="Base",
    n_trials=150
)

[I 2025-03-21 23:19:55,926] A new study created in memory with name: Base


Trial 0 with params: {'learning_rate': 0.0002805758207667253, 'weight_decay': 0.01, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.5864,1.352458,0.472044,0.556011,0.364247,0.328637
2,1.1564,0.997047,0.615949,0.554059,0.514145,0.522096
3,0.8511,0.799823,0.710357,0.610599,0.604893,0.601129
4,0.6639,0.705013,0.754354,0.65511,0.63906,0.640721
5,0.5839,0.636923,0.770852,0.654833,0.658059,0.654533
6,0.5345,0.61149,0.776352,0.65475,0.663254,0.657938
7,0.4742,0.603255,0.776352,0.657674,0.663115,0.658393
8,0.4374,0.58606,0.788268,0.665197,0.675951,0.667878
9,0.4079,0.596886,0.791017,0.662209,0.678182,0.667026
10,0.3756,0.595865,0.787351,0.662146,0.671639,0.665349


[I 2025-03-21 23:21:09,272] Trial 0 pruned. 


Trial 1 with params: {'learning_rate': 0.0007875660249889869, 'weight_decay': 0.001, 'warmup_steps': 0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.3307,0.956324,0.630614,0.544702,0.539155,0.527445
2,0.7564,0.66228,0.75527,0.644864,0.645115,0.642781
3,0.5193,0.565588,0.799267,0.680888,0.682727,0.678165
4,0.4304,0.555603,0.810266,0.697021,0.689212,0.689526
5,0.3792,0.523995,0.820348,0.693755,0.699802,0.696083


[I 2025-03-21 23:21:30,424] Trial 1 pruned. 


Trial 2 with params: {'learning_rate': 6.533369619026643e-05, 'weight_decay': 0.009000000000000001, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.7224,1.655066,0.335472,0.206457,0.245605,0.175467
2,1.6059,1.552188,0.399633,0.195159,0.303339,0.204764
3,1.4815,1.397503,0.428048,0.377042,0.324721,0.257099
4,1.3188,1.247179,0.51879,0.504744,0.4123,0.393018
5,1.1962,1.141454,0.597617,0.52815,0.498297,0.5004
6,1.0931,1.069948,0.609533,0.52161,0.516088,0.506763
7,1.0311,1.016412,0.632447,0.545871,0.53269,0.534778
8,0.977,0.980062,0.63703,0.538992,0.543279,0.538685
9,0.9425,0.952596,0.641613,0.54476,0.545352,0.541994
10,0.9043,0.937535,0.655362,0.550071,0.559979,0.551824


[I 2025-03-21 23:22:27,339] Trial 2 pruned. 


Trial 3 with params: {'learning_rate': 0.0013035123791853842, 'weight_decay': 0.0, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.2496,0.808476,0.710357,0.624384,0.605975,0.606865
2,0.6307,0.610163,0.782768,0.659995,0.670503,0.662586
3,0.4569,0.549212,0.814849,0.700566,0.695785,0.692901
4,0.3795,0.524969,0.816682,0.697418,0.695298,0.693315
5,0.3051,0.520395,0.836847,0.853583,0.775915,0.800152


[I 2025-03-21 23:22:51,062] Trial 3 pruned. 


Trial 4 with params: {'learning_rate': 0.002311294500510415, 'weight_decay': 0.002, 'warmup_steps': 0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0783,0.762843,0.744271,0.657601,0.630208,0.631119
2,0.5444,0.571311,0.7956,0.669498,0.682131,0.673954
3,0.3824,0.565799,0.808433,0.850005,0.72058,0.732464
4,0.2921,0.511165,0.840513,0.856319,0.784358,0.809735
5,0.2011,0.53126,0.846929,0.871723,0.801709,0.826375
6,0.1328,0.518657,0.850596,0.807099,0.818196,0.811328
7,0.0602,0.606565,0.865261,0.856898,0.836539,0.84545
8,0.0353,0.640073,0.861595,0.85374,0.824125,0.836835
9,0.0193,0.712197,0.855179,0.849576,0.82828,0.837429
10,0.0124,0.743814,0.860678,0.861734,0.814039,0.832364


[I 2025-03-21 23:24:33,084] Trial 4 finished with value: 0.8584306339739497 and parameters: {'learning_rate': 0.002311294500510415, 'weight_decay': 0.002, 'warmup_steps': 0}. Best is trial 4 with value: 0.8584306339739497.


Trial 5 with params: {'learning_rate': 0.00011635338541918901, 'weight_decay': 0.003, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.6813,1.585558,0.342805,0.370882,0.251271,0.185421
2,1.4837,1.34754,0.454629,0.352699,0.350941,0.301473
3,1.2295,1.12449,0.591201,0.527538,0.490984,0.494509
4,1.0393,0.989022,0.63703,0.532876,0.543562,0.535374
5,0.9264,0.899267,0.675527,0.5728,0.576362,0.573517


Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--precision/155d3220d6cd4a6553f12da68eeb3d1f97cf431206304a4bc6e2d564c29502e9 (last modified on Fri Jan 10 23:13:59 2025) since it couldn't be found locally at evaluate-metric--precision, or remotely on the Hugging Face Hub.
[I 2025-03-21 23:25:30,369] Trial 5 pruned. 


Trial 6 with params: {'learning_rate': 0.0003654769917956456, 'weight_decay': 0.003, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.5384,1.256206,0.535289,0.556438,0.42808,0.429191
2,1.0422,0.891121,0.670944,0.602899,0.564894,0.575807
3,0.7219,0.699909,0.749771,0.633311,0.6453,0.635335
4,0.5817,0.662742,0.770852,0.673937,0.654656,0.657407
5,0.5285,0.601153,0.783685,0.664918,0.670949,0.664869
6,0.4826,0.580027,0.790101,0.661358,0.676737,0.667691
7,0.4208,0.58129,0.788268,0.665359,0.674406,0.667476
8,0.3857,0.561213,0.802016,0.672962,0.686366,0.677532
9,0.3488,0.575739,0.79835,0.667989,0.684336,0.673258
10,0.3174,0.570436,0.807516,0.84965,0.705964,0.718162


[I 2025-03-21 23:26:42,609] Trial 6 finished with value: 0.7347196497737056 and parameters: {'learning_rate': 0.0003654769917956456, 'weight_decay': 0.003, 'warmup_steps': 3}. Best is trial 4 with value: 0.8584306339739497.


Trial 7 with params: {'learning_rate': 9.505122659935192e-05, 'weight_decay': 0.003, 'warmup_steps': 1}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.6938,1.607674,0.335472,0.206457,0.245605,0.175467
2,1.5382,1.429643,0.413382,0.366009,0.313215,0.229069
3,1.3172,1.218513,0.52154,0.511779,0.414514,0.398011
4,1.1365,1.07446,0.610449,0.515211,0.517976,0.513937
5,1.0208,0.985158,0.635197,0.538236,0.541495,0.53494


[I 2025-03-21 23:27:08,671] Trial 7 pruned. 


Trial 8 with params: {'learning_rate': 0.00040842279473800845, 'weight_decay': 0.008, 'warmup_steps': 0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.4853,1.198941,0.540788,0.543559,0.435641,0.43616
2,1.0002,0.838979,0.709441,0.610135,0.607345,0.606509
3,0.6831,0.667826,0.75527,0.6346,0.648921,0.637863
4,0.5477,0.634716,0.779102,0.677015,0.662754,0.664539
5,0.497,0.579848,0.790101,0.667707,0.67693,0.669984


[I 2025-03-21 23:27:33,527] Trial 8 pruned. 


Trial 9 with params: {'learning_rate': 0.0005338741354740678, 'weight_decay': 0.006, 'warmup_steps': 0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.4187,1.092555,0.584785,0.550312,0.483608,0.485282
2,0.8834,0.763453,0.72044,0.627906,0.613133,0.615332
3,0.6003,0.614857,0.775435,0.653287,0.663164,0.655859
4,0.4925,0.57537,0.799267,0.67964,0.681535,0.678108
5,0.4431,0.552677,0.802016,0.678107,0.686409,0.67986
6,0.3934,0.564479,0.802016,0.668037,0.688019,0.676068
7,0.3285,0.552053,0.813016,0.680575,0.695956,0.687073
8,0.2992,0.528221,0.824931,0.832183,0.759023,0.776909
9,0.2531,0.567078,0.814849,0.823419,0.741734,0.758827
10,0.2119,0.57247,0.824015,0.833121,0.757402,0.777232


Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--recall/11f90e583db35601050aed380d48e83202a896976b9608432fba9244fb447f24 (last modified on Fri Jan 10 23:14:00 2025) since it couldn't be found locally at evaluate-metric--recall, or remotely on the Hugging Face Hub.
[I 2025-03-21 23:28:54,187] Trial 9 finished with value: 0.8137883832328874 and parameters: {'learning_rate': 0.0005338741354740678, 'weight_decay': 0.006, 'warmup_steps': 0}. Best is trial 4 with value: 0.8584306339739497.


Trial 10 with params: {'learning_rate': 0.004518165681587256, 'weight_decay': 0.002, 'warmup_steps': 0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.9854,0.653521,0.759853,0.654044,0.647462,0.646452
2,0.4718,0.70244,0.780018,0.779079,0.736817,0.739612
3,0.3016,0.447198,0.856095,0.859328,0.809947,0.82888
4,0.1809,0.49234,0.861595,0.871238,0.831859,0.848483
5,0.1012,0.587219,0.855179,0.867408,0.828832,0.843986
6,0.0508,0.601599,0.870761,0.859858,0.842188,0.849474
7,0.0292,0.714744,0.877177,0.87237,0.844153,0.856276
8,0.0202,0.696085,0.871677,0.853952,0.841345,0.847091
9,0.0057,0.751116,0.879927,0.889973,0.838734,0.858502
10,0.0014,0.794981,0.877177,0.888581,0.845212,0.862751


[I 2025-03-21 23:30:15,598] Trial 10 pruned. 


Trial 11 with params: {'learning_rate': 0.0020056372842325635, 'weight_decay': 0.006, 'warmup_steps': 0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.1014,0.784704,0.726856,0.670509,0.618475,0.622445
2,0.5501,0.619408,0.769019,0.648262,0.664435,0.646602
3,0.3845,0.536433,0.830431,0.870147,0.735105,0.751407
4,0.293,0.503701,0.840513,0.866034,0.787001,0.813074
5,0.208,0.521521,0.843263,0.867767,0.808674,0.830659
6,0.1496,0.515508,0.853346,0.852883,0.820151,0.832179
7,0.0777,0.595345,0.857929,0.87084,0.821663,0.840417
8,0.057,0.653007,0.851512,0.854543,0.817522,0.831641
9,0.0422,0.693688,0.852429,0.858937,0.803728,0.825309
10,0.0154,0.715482,0.861595,0.86071,0.835474,0.845293


[I 2025-03-21 23:31:28,680] Trial 11 finished with value: 0.8463298875430686 and parameters: {'learning_rate': 0.0020056372842325635, 'weight_decay': 0.006, 'warmup_steps': 0}. Best is trial 4 with value: 0.8584306339739497.


Trial 12 with params: {'learning_rate': 0.0033049565193748773, 'weight_decay': 0.007, 'warmup_steps': 0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.9914,0.692095,0.762603,0.685615,0.650338,0.654464
2,0.5108,0.588476,0.787351,0.675912,0.67391,0.668362
3,0.3398,0.532822,0.824931,0.840417,0.790448,0.8029
4,0.2281,0.484883,0.848763,0.860525,0.81214,0.831306
5,0.1442,0.530737,0.857012,0.871449,0.831044,0.846497
6,0.0792,0.551755,0.864345,0.86755,0.837537,0.849078
7,0.0383,0.611196,0.871677,0.8584,0.840723,0.848706
8,0.0279,0.644454,0.872594,0.870966,0.82501,0.842209
9,0.009,0.694149,0.872594,0.864849,0.833482,0.846822
10,0.004,0.744311,0.873511,0.857121,0.833724,0.844112


[I 2025-03-21 23:32:24,314] Trial 12 pruned. 


Trial 13 with params: {'learning_rate': 0.0018997871267974278, 'weight_decay': 0.005, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.1396,0.806876,0.718607,0.658475,0.61105,0.61337
2,0.5663,0.597456,0.780018,0.656954,0.670924,0.660159
3,0.4121,0.525098,0.823098,0.859908,0.756551,0.779368
4,0.3178,0.500812,0.832264,0.846375,0.78034,0.803021
5,0.2313,0.514468,0.83593,0.833269,0.783863,0.801991
6,0.1678,0.502565,0.857929,0.866304,0.813992,0.831511
7,0.0947,0.578666,0.847846,0.849468,0.814519,0.827931
8,0.0686,0.624198,0.84418,0.848321,0.812,0.825636
9,0.0481,0.668234,0.855179,0.865398,0.826029,0.841923
10,0.0193,0.686434,0.858845,0.858986,0.831949,0.842889


[I 2025-03-21 23:33:55,637] Trial 13 finished with value: 0.8453646914065941 and parameters: {'learning_rate': 0.0018997871267974278, 'weight_decay': 0.005, 'warmup_steps': 2}. Best is trial 4 with value: 0.8584306339739497.


Trial 14 with params: {'learning_rate': 0.0028927493446863814, 'weight_decay': 0.01, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0761,0.715219,0.757104,0.669628,0.642563,0.646276
2,0.5393,0.557167,0.802016,0.675636,0.686634,0.679536
3,0.3818,0.502724,0.831347,0.832022,0.78529,0.79819
4,0.2576,0.520757,0.83868,0.85889,0.79396,0.816811
5,0.1667,0.519434,0.855179,0.822982,0.819084,0.820087
6,0.1185,0.508507,0.842346,0.810581,0.820235,0.813631
7,0.0571,0.643343,0.84418,0.84785,0.800887,0.818819
8,0.0326,0.739454,0.851512,0.83875,0.81527,0.825184
9,0.0145,0.692681,0.874427,0.884891,0.824872,0.846714
10,0.0059,0.727235,0.872594,0.869511,0.824421,0.841435


[I 2025-03-21 23:34:44,435] Trial 14 pruned. 


Trial 15 with params: {'learning_rate': 0.0013668811947394382, 'weight_decay': 0.0, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.2242,0.79425,0.711274,0.622088,0.607455,0.608059
2,0.6289,0.612187,0.778185,0.655215,0.667448,0.659557
3,0.4621,0.557092,0.804766,0.687226,0.688284,0.678814
4,0.3631,0.534024,0.821265,0.697892,0.699706,0.694876
5,0.2987,0.523516,0.83593,0.856653,0.773055,0.799016
6,0.2335,0.50225,0.846929,0.858368,0.796011,0.815785
7,0.1528,0.555991,0.842346,0.844478,0.820468,0.829271
8,0.1088,0.564904,0.856095,0.833543,0.828589,0.830503
9,0.0748,0.598601,0.855179,0.858533,0.827533,0.84053
10,0.041,0.713754,0.84143,0.851764,0.81544,0.830003


[I 2025-03-21 23:36:26,779] Trial 15 finished with value: 0.8415098825219275 and parameters: {'learning_rate': 0.0013668811947394382, 'weight_decay': 0.0, 'warmup_steps': 2}. Best is trial 4 with value: 0.8584306339739497.


Trial 16 with params: {'learning_rate': 0.0014810709963883602, 'weight_decay': 0.004, 'warmup_steps': 1}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.194,0.783779,0.718607,0.62386,0.613734,0.612283
2,0.6219,0.610886,0.782768,0.661754,0.669784,0.664315
3,0.4586,0.557552,0.810266,0.694163,0.692084,0.68623
4,0.3599,0.531161,0.815765,0.698664,0.695016,0.692241
5,0.2872,0.513672,0.843263,0.865093,0.807845,0.829662
6,0.2269,0.530913,0.83593,0.849555,0.796975,0.813546
7,0.1487,0.5523,0.846013,0.857437,0.814382,0.829174
8,0.0967,0.590389,0.854262,0.834661,0.828219,0.830623
9,0.0647,0.624503,0.854262,0.848781,0.827359,0.836587
10,0.0338,0.701495,0.842346,0.824617,0.818045,0.819854


[I 2025-03-21 23:37:33,897] Trial 16 pruned. 


Trial 17 with params: {'learning_rate': 0.0020085822314002493, 'weight_decay': 0.008, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.1723,0.77284,0.71494,0.623882,0.610899,0.608403
2,0.5746,0.63491,0.765353,0.642268,0.661583,0.643806
3,0.4127,0.58226,0.802016,0.683979,0.687894,0.679778
4,0.3279,0.506607,0.840513,0.866439,0.795275,0.819749
5,0.2384,0.502694,0.856095,0.86247,0.820121,0.836864
6,0.1634,0.49703,0.860678,0.833183,0.825269,0.828418
7,0.0956,0.584042,0.854262,0.859801,0.818419,0.834124
8,0.0672,0.614357,0.846013,0.846721,0.814109,0.826079
9,0.0373,0.670224,0.857012,0.861438,0.819814,0.836291
10,0.0233,0.662172,0.857012,0.855832,0.812476,0.827821


[I 2025-03-21 23:38:32,836] Trial 17 pruned. 


Trial 18 with params: {'learning_rate': 0.0017085233295811128, 'weight_decay': 0.005, 'warmup_steps': 0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.1367,0.758746,0.724106,0.63218,0.617939,0.617955
2,0.5763,0.6865,0.752521,0.631997,0.652507,0.631535
3,0.4215,0.563812,0.802933,0.684989,0.686632,0.681231
4,0.3424,0.500417,0.829514,0.877632,0.731862,0.753557
5,0.2652,0.48139,0.847846,0.840499,0.81374,0.824365
6,0.1955,0.517986,0.84143,0.84348,0.81019,0.82187
7,0.1126,0.608907,0.84418,0.846287,0.811746,0.824017
8,0.0847,0.60682,0.853346,0.844547,0.819006,0.829074
9,0.0547,0.642332,0.851512,0.846815,0.824982,0.83416
10,0.0279,0.732594,0.842346,0.847421,0.808757,0.823876


[I 2025-03-21 23:39:35,932] Trial 18 pruned. 


Trial 19 with params: {'learning_rate': 0.003727216734386374, 'weight_decay': 0.0, 'warmup_steps': 1}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0642,0.689416,0.743355,0.639714,0.636004,0.633976
2,0.5281,0.61765,0.789184,0.763344,0.751812,0.750216
3,0.3217,0.49909,0.835014,0.848726,0.796941,0.812941
4,0.2228,0.48038,0.852429,0.87585,0.815712,0.83861
5,0.128,0.512129,0.861595,0.873783,0.825181,0.843422
6,0.0969,0.555514,0.856095,0.804396,0.850532,0.821505
7,0.0438,0.688228,0.870761,0.882207,0.830737,0.850409
8,0.0167,0.718539,0.869844,0.870271,0.831215,0.846735
9,0.0059,0.755058,0.87901,0.879361,0.837668,0.854749
10,0.003,0.763975,0.877177,0.876132,0.836304,0.852406


[I 2025-03-21 23:40:53,038] Trial 19 finished with value: 0.8526120636605846 and parameters: {'learning_rate': 0.003727216734386374, 'weight_decay': 0.0, 'warmup_steps': 1}. Best is trial 4 with value: 0.8584306339739497.


Trial 20 with params: {'learning_rate': 0.004391486310509663, 'weight_decay': 0.001, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0682,0.750272,0.744271,0.663365,0.631961,0.635345
2,0.5247,0.584735,0.784601,0.663129,0.675909,0.660928
3,0.3252,0.466339,0.83868,0.849508,0.780082,0.799254
4,0.1987,0.493815,0.854262,0.845526,0.827061,0.83525
5,0.1141,0.510615,0.865261,0.837388,0.847425,0.840586
6,0.081,0.59348,0.860678,0.811069,0.835902,0.821438
7,0.0473,0.646665,0.874427,0.876166,0.842677,0.856991
8,0.0252,0.673377,0.867094,0.864974,0.82953,0.843566
9,0.0043,0.737691,0.875344,0.874753,0.834829,0.851124
10,0.0028,0.766623,0.871677,0.871984,0.832502,0.848543


[I 2025-03-21 23:42:08,850] Trial 20 finished with value: 0.8487654921195581 and parameters: {'learning_rate': 0.004391486310509663, 'weight_decay': 0.001, 'warmup_steps': 2}. Best is trial 4 with value: 0.8584306339739497.


Trial 21 with params: {'learning_rate': 0.004279483560254982, 'weight_decay': 0.002, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0608,0.802372,0.730522,0.665417,0.618775,0.6262
2,0.5419,0.57816,0.806599,0.840916,0.710856,0.715237
3,0.3347,0.467986,0.835014,0.849596,0.796353,0.813991
4,0.2162,0.512799,0.854262,0.876466,0.79918,0.824507
5,0.1258,0.572239,0.856095,0.850501,0.819939,0.832455
6,0.0817,0.571356,0.868011,0.867559,0.839083,0.850965
7,0.0339,0.632629,0.875344,0.88738,0.834143,0.854824
8,0.0116,0.67004,0.87901,0.854587,0.847814,0.850933
9,0.0105,0.683314,0.87901,0.889497,0.838875,0.85817
10,0.0039,0.723336,0.874427,0.885642,0.835156,0.854161


[I 2025-03-21 23:44:04,939] Trial 21 finished with value: 0.8614978057589228 and parameters: {'learning_rate': 0.004279483560254982, 'weight_decay': 0.002, 'warmup_steps': 3}. Best is trial 21 with value: 0.8614978057589228.


Trial 22 with params: {'learning_rate': 0.004827531108315613, 'weight_decay': 0.004, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0534,0.700369,0.753437,0.668939,0.639902,0.645268
2,0.513,0.580499,0.797434,0.781629,0.758012,0.759065
3,0.3006,0.474208,0.849679,0.866878,0.823986,0.839336
4,0.1872,0.457388,0.87626,0.890395,0.835202,0.856107
5,0.1092,0.581989,0.863428,0.846594,0.825647,0.834419
6,0.0717,0.602599,0.862511,0.864349,0.826362,0.840046
7,0.0356,0.659321,0.875344,0.868156,0.834682,0.848824
8,0.014,0.726478,0.87901,0.878695,0.837169,0.854068
9,0.0043,0.773283,0.878093,0.889518,0.837079,0.857253
10,0.0022,0.824411,0.875344,0.887408,0.834564,0.85473


[I 2025-03-21 23:45:36,096] Trial 22 finished with value: 0.8581205100494064 and parameters: {'learning_rate': 0.004827531108315613, 'weight_decay': 0.004, 'warmup_steps': 4}. Best is trial 21 with value: 0.8614978057589228.


Trial 23 with params: {'learning_rate': 0.0029185535384892917, 'weight_decay': 0.003, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0983,0.716269,0.746104,0.641385,0.639306,0.634784
2,0.5523,0.609803,0.791017,0.679029,0.674652,0.674396
3,0.381,0.496968,0.834097,0.79095,0.784117,0.784645
4,0.2501,0.523005,0.842346,0.871602,0.788588,0.816363
5,0.1759,0.495034,0.866178,0.85192,0.837447,0.843852
6,0.117,0.504753,0.860678,0.835913,0.834556,0.834767
7,0.0574,0.623255,0.864345,0.877271,0.835708,0.852512
8,0.0379,0.706253,0.851512,0.856797,0.815764,0.831621
9,0.0152,0.695266,0.868928,0.879083,0.830725,0.849072
10,0.0088,0.75227,0.868011,0.881729,0.828459,0.849091


[I 2025-03-21 23:46:22,466] Trial 23 pruned. 


Trial 24 with params: {'learning_rate': 0.004823662083442585, 'weight_decay': 0.003, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0543,0.752047,0.733272,0.649246,0.627088,0.625969
2,0.5208,0.520923,0.821265,0.831509,0.747579,0.765839
3,0.3116,0.470064,0.846929,0.847627,0.795862,0.812338
4,0.1934,0.491193,0.861595,0.870302,0.832825,0.848542
5,0.0958,0.54174,0.866178,0.858724,0.828316,0.840517
6,0.0722,0.639705,0.851512,0.861625,0.818848,0.831838
7,0.0369,0.67535,0.87626,0.878997,0.833291,0.851986
8,0.0149,0.742065,0.87626,0.864635,0.835461,0.847991
9,0.004,0.801914,0.885426,0.886894,0.842095,0.860184
10,0.0055,0.798129,0.879927,0.877228,0.839988,0.85487


[I 2025-03-21 23:47:50,839] Trial 24 finished with value: 0.8585728040040231 and parameters: {'learning_rate': 0.004823662083442585, 'weight_decay': 0.003, 'warmup_steps': 3}. Best is trial 21 with value: 0.8614978057589228.


Trial 25 with params: {'learning_rate': 0.0037563660458696632, 'weight_decay': 0.002, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0574,0.825084,0.71769,0.659566,0.604875,0.60666
2,0.5391,0.63335,0.780935,0.798322,0.727598,0.738506
3,0.3441,0.499558,0.824931,0.860971,0.759335,0.781577
4,0.2335,0.471819,0.852429,0.872329,0.787537,0.813854
5,0.1423,0.532833,0.863428,0.877776,0.825808,0.845426
6,0.0865,0.552459,0.857929,0.858417,0.822768,0.835759
7,0.0308,0.692334,0.873511,0.888536,0.831168,0.853204
8,0.0128,0.75347,0.865261,0.853224,0.81703,0.831717
9,0.0077,0.80586,0.863428,0.874776,0.807655,0.82933
10,0.0067,0.771088,0.868011,0.866985,0.819329,0.837837


[I 2025-03-21 23:48:37,058] Trial 25 pruned. 


Trial 26 with params: {'learning_rate': 0.0013760348167273764, 'weight_decay': 0.004, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.236,0.80118,0.705775,0.618807,0.602934,0.603792
2,0.6281,0.609916,0.777269,0.652973,0.666945,0.657855
3,0.4593,0.555872,0.806599,0.687699,0.689577,0.680846
4,0.3643,0.533105,0.823098,0.700154,0.700541,0.69611
5,0.2958,0.510813,0.840513,0.860095,0.79622,0.819407
6,0.2311,0.502217,0.849679,0.861512,0.806829,0.82545
7,0.149,0.549223,0.842346,0.855953,0.820402,0.833522
8,0.109,0.562704,0.858845,0.823924,0.831637,0.827201
9,0.0724,0.640452,0.845096,0.837762,0.811379,0.821515
10,0.0408,0.707995,0.846013,0.853758,0.820459,0.833727


[I 2025-03-21 23:49:55,299] Trial 26 finished with value: 0.8282485225359011 and parameters: {'learning_rate': 0.0013760348167273764, 'weight_decay': 0.004, 'warmup_steps': 3}. Best is trial 21 with value: 0.8614978057589228.


Trial 27 with params: {'learning_rate': 0.004953521126687416, 'weight_decay': 0.005, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0314,0.697632,0.746104,0.655967,0.635071,0.638674
2,0.4995,0.492039,0.835014,0.870938,0.765216,0.791742
3,0.292,0.467694,0.842346,0.856886,0.792359,0.811151
4,0.1805,0.514197,0.860678,0.883245,0.819674,0.843949
5,0.0989,0.570174,0.864345,0.845927,0.835618,0.838772
6,0.0586,0.63297,0.859762,0.850283,0.813254,0.827987
7,0.0357,0.662631,0.87626,0.881368,0.832478,0.852331
8,0.0174,0.684332,0.877177,0.862016,0.843818,0.85211
9,0.0064,0.735152,0.874427,0.887291,0.8344,0.854829
10,0.002,0.74725,0.879927,0.878891,0.838362,0.854689


[I 2025-03-21 23:51:35,653] Trial 27 finished with value: 0.8561278041181115 and parameters: {'learning_rate': 0.004953521126687416, 'weight_decay': 0.005, 'warmup_steps': 3}. Best is trial 21 with value: 0.8614978057589228.


Trial 28 with params: {'learning_rate': 0.002953666986018182, 'weight_decay': 0.002, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0773,0.707917,0.75802,0.659718,0.646749,0.645553
2,0.5194,0.692497,0.75802,0.636409,0.65705,0.636369
3,0.3548,0.505595,0.831347,0.851809,0.800796,0.818069
4,0.2464,0.525722,0.832264,0.863617,0.771519,0.80004
5,0.1536,0.531452,0.858845,0.876197,0.822607,0.842454
6,0.1005,0.530046,0.855179,0.802702,0.838357,0.816534
7,0.0482,0.616191,0.861595,0.856724,0.843048,0.848899
8,0.0266,0.660212,0.867094,0.854734,0.829253,0.839742
9,0.0133,0.645981,0.875344,0.865809,0.84536,0.854419
10,0.0047,0.723954,0.871677,0.862333,0.832143,0.845078


[I 2025-03-21 23:52:24,322] Trial 28 pruned. 


Trial 29 with params: {'learning_rate': 0.004086694036890403, 'weight_decay': 0.0, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0578,0.802103,0.731439,0.661155,0.620413,0.625019
2,0.5462,0.53994,0.820348,0.808668,0.746313,0.762014
3,0.3422,0.472222,0.83593,0.831885,0.787906,0.801081
4,0.2239,0.494444,0.858845,0.878728,0.801969,0.827317
5,0.1349,0.524103,0.859762,0.864392,0.832318,0.845188
6,0.0912,0.527151,0.864345,0.837003,0.83564,0.835744
7,0.0375,0.613344,0.874427,0.868328,0.843157,0.854474
8,0.0132,0.655889,0.87626,0.855961,0.837192,0.845337
9,0.0066,0.72363,0.880843,0.880048,0.83913,0.855846
10,0.0035,0.762307,0.879927,0.878954,0.83943,0.855577


[I 2025-03-21 23:53:41,299] Trial 29 finished with value: 0.8550880066893455 and parameters: {'learning_rate': 0.004086694036890403, 'weight_decay': 0.0, 'warmup_steps': 3}. Best is trial 21 with value: 0.8614978057589228.


Trial 30 with params: {'learning_rate': 0.0018888408065567675, 'weight_decay': 0.0, 'warmup_steps': 0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.1095,0.792948,0.72044,0.663681,0.612243,0.616164
2,0.5584,0.651273,0.76352,0.648946,0.660021,0.640825
3,0.4017,0.545867,0.811182,0.8597,0.702033,0.706179
4,0.3105,0.506898,0.832264,0.860837,0.790207,0.814293
5,0.2271,0.496477,0.848763,0.86873,0.813298,0.834318
6,0.1689,0.532658,0.843263,0.802527,0.812414,0.806419
7,0.0997,0.55362,0.854262,0.869016,0.819219,0.837799
8,0.0654,0.567056,0.866178,0.862208,0.845096,0.852878
9,0.0295,0.68258,0.852429,0.868116,0.796895,0.821111
10,0.0206,0.737727,0.847846,0.863124,0.792357,0.816539


[I 2025-03-21 23:54:56,930] Trial 30 pruned. 


Trial 31 with params: {'learning_rate': 0.0035671574088844246, 'weight_decay': 0.005, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0605,0.679141,0.759853,0.673454,0.642998,0.650762
2,0.5251,0.620333,0.778185,0.827913,0.687978,0.691299
3,0.3575,0.475809,0.835014,0.805071,0.805152,0.802245
4,0.2187,0.539456,0.830431,0.856324,0.761436,0.787328
5,0.1427,0.534198,0.867094,0.869961,0.828632,0.844497
6,0.0984,0.52316,0.855179,0.806726,0.848824,0.821582
7,0.0476,0.609975,0.877177,0.887005,0.837306,0.856314
8,0.018,0.662038,0.868928,0.881668,0.830316,0.849128
9,0.0079,0.707225,0.872594,0.884814,0.834316,0.852578
10,0.007,0.721099,0.880843,0.892785,0.839467,0.859892


[I 2025-03-21 23:56:09,078] Trial 31 finished with value: 0.8561012918698491 and parameters: {'learning_rate': 0.0035671574088844246, 'weight_decay': 0.005, 'warmup_steps': 4}. Best is trial 21 with value: 0.8614978057589228.


Trial 32 with params: {'learning_rate': 0.004200313800706393, 'weight_decay': 0.002, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0431,0.712744,0.76352,0.685358,0.652501,0.655764
2,0.5085,0.551758,0.817599,0.78061,0.762478,0.767483
3,0.3208,0.467805,0.84143,0.842517,0.789498,0.806891
4,0.1997,0.505445,0.857929,0.86676,0.831374,0.844595
5,0.125,0.509198,0.859762,0.861433,0.831107,0.843163
6,0.0819,0.580502,0.858845,0.862249,0.822351,0.837951
7,0.0389,0.626532,0.877177,0.879353,0.84559,0.860101
8,0.0185,0.754317,0.860678,0.85846,0.830125,0.842314
9,0.0069,0.775202,0.872594,0.872975,0.833616,0.849344
10,0.0036,0.794469,0.874427,0.876012,0.844663,0.857986


[I 2025-03-21 23:57:19,731] Trial 32 finished with value: 0.859739839687817 and parameters: {'learning_rate': 0.004200313800706393, 'weight_decay': 0.002, 'warmup_steps': 4}. Best is trial 21 with value: 0.8614978057589228.


Trial 33 with params: {'learning_rate': 0.004352002852686966, 'weight_decay': 0.002, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0569,0.693876,0.76077,0.687932,0.645232,0.653328
2,0.5113,0.531275,0.813932,0.805732,0.766815,0.780337
3,0.3179,0.477532,0.845096,0.857646,0.794752,0.813208
4,0.1959,0.457696,0.858845,0.875333,0.804209,0.828127
5,0.1128,0.561174,0.860678,0.886156,0.814283,0.838989
6,0.0995,0.542178,0.867094,0.878932,0.83047,0.847863
7,0.0303,0.673225,0.861595,0.864635,0.814143,0.833209
8,0.0124,0.734944,0.869844,0.859993,0.829633,0.8426
9,0.0078,0.765692,0.868928,0.885051,0.827866,0.850188
10,0.0044,0.77739,0.870761,0.884823,0.831454,0.851766


[I 2025-03-21 23:59:16,654] Trial 33 finished with value: 0.8536843835158621 and parameters: {'learning_rate': 0.004352002852686966, 'weight_decay': 0.002, 'warmup_steps': 4}. Best is trial 21 with value: 0.8614978057589228.


Trial 34 with params: {'learning_rate': 0.003512390454796949, 'weight_decay': 0.004, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0831,0.691073,0.75802,0.67469,0.643662,0.649202
2,0.5394,0.584775,0.791017,0.667946,0.679628,0.66763
3,0.3629,0.48788,0.831347,0.831174,0.783306,0.798103
4,0.2412,0.510153,0.850596,0.861395,0.804099,0.825543
5,0.1487,0.518963,0.856095,0.872714,0.819347,0.839367
6,0.1079,0.485202,0.867094,0.828073,0.838733,0.832873
7,0.0402,0.648291,0.868928,0.858479,0.831325,0.842029
8,0.0251,0.646433,0.874427,0.874979,0.833862,0.850772
9,0.0094,0.637657,0.882676,0.892749,0.840359,0.860528
10,0.0038,0.713144,0.871677,0.875441,0.839481,0.854838


[I 2025-03-22 00:00:27,530] Trial 34 finished with value: 0.8562063211479026 and parameters: {'learning_rate': 0.003512390454796949, 'weight_decay': 0.004, 'warmup_steps': 2}. Best is trial 21 with value: 0.8614978057589228.


Trial 35 with params: {'learning_rate': 0.0013931247287235543, 'weight_decay': 0.002, 'warmup_steps': 0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.1777,0.787103,0.714024,0.624067,0.609488,0.609302
2,0.6273,0.609131,0.786434,0.662646,0.67277,0.667144
3,0.458,0.526752,0.819432,0.697499,0.698325,0.695159
4,0.3567,0.524094,0.824931,0.701833,0.700848,0.698203
5,0.2846,0.522035,0.835014,0.853361,0.773452,0.798461


[I 2025-03-22 00:00:53,408] Trial 35 pruned. 


Trial 36 with params: {'learning_rate': 5.370203809578854e-05, 'weight_decay': 0.009000000000000001, 'warmup_steps': 1}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.7295,1.673907,0.334555,0.20639,0.244925,0.174792
2,1.6286,1.58451,0.441797,0.217672,0.332675,0.26045
3,1.543,1.481439,0.448213,0.225933,0.337198,0.26918
4,1.4159,1.346899,0.460128,0.53833,0.353929,0.31321
5,1.293,1.237188,0.532539,0.523963,0.427475,0.418713
6,1.1901,1.15998,0.588451,0.531596,0.488619,0.489116
7,1.1238,1.10248,0.60495,0.535628,0.504119,0.507761
8,1.0665,1.05897,0.615032,0.523316,0.52086,0.516219
9,1.0297,1.029029,0.624198,0.532451,0.528176,0.525886
10,0.9953,1.010386,0.624198,0.521675,0.532147,0.52332


[I 2025-03-22 00:01:43,013] Trial 36 pruned. 


Trial 37 with params: {'learning_rate': 0.0038057740033223767, 'weight_decay': 0.0, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0536,0.681057,0.756187,0.668817,0.641434,0.648192
2,0.5258,0.59148,0.789184,0.841578,0.701341,0.717221
3,0.3373,0.478775,0.842346,0.806029,0.801062,0.801822
4,0.2096,0.50965,0.845096,0.872398,0.809952,0.833234
5,0.1283,0.520733,0.857929,0.823535,0.842073,0.831178


[I 2025-03-22 00:02:08,222] Trial 37 pruned. 


Trial 38 with params: {'learning_rate': 0.000466016775972213, 'weight_decay': 0.001, 'warmup_steps': 1}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.4714,1.183516,0.544455,0.563605,0.438173,0.440298
2,0.9584,0.792345,0.722273,0.618567,0.61875,0.616199
3,0.6387,0.64535,0.76077,0.641269,0.653633,0.642295
4,0.5178,0.600456,0.794684,0.68388,0.676237,0.676404
5,0.4646,0.56682,0.790101,0.668708,0.677181,0.669919


[I 2025-03-22 00:02:34,231] Trial 38 pruned. 


Trial 39 with params: {'learning_rate': 5.7801019639330395e-05, 'weight_decay': 0.002, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.7304,1.670668,0.332722,0.206254,0.243565,0.173431
2,1.623,1.576212,0.425298,0.211151,0.321487,0.241408
3,1.526,1.453809,0.44363,0.390899,0.334867,0.267729
4,1.3824,1.310351,0.482126,0.533067,0.376254,0.347919
5,1.2568,1.20167,0.557287,0.529364,0.453897,0.454327
6,1.152,1.122088,0.606783,0.521048,0.510648,0.505395
7,1.0863,1.066815,0.622365,0.545314,0.521333,0.524886
8,1.0308,1.027085,0.621448,0.524567,0.529437,0.522713
9,0.9955,0.998561,0.627864,0.532692,0.532303,0.528936
10,0.9601,0.981287,0.636114,0.533867,0.543595,0.534895


[I 2025-03-22 00:03:54,786] Trial 39 pruned. 


Trial 40 with params: {'learning_rate': 0.0002081476747934512, 'weight_decay': 0.006, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.6309,1.454892,0.48396,0.434677,0.371332,0.347929
2,1.2745,1.105335,0.598533,0.54204,0.496123,0.501526
3,0.9759,0.910898,0.668194,0.562348,0.575618,0.563487
4,0.7824,0.768764,0.729606,0.623679,0.620799,0.619437
5,0.6811,0.704372,0.751604,0.643933,0.639764,0.638401


Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--f1/34c46321f42186df33a6260966e34a368f14868d9cc2ba47d142112e2800d233 (last modified on Fri Jan 10 23:14:01 2025) since it couldn't be found locally at evaluate-metric--f1, or remotely on the Hugging Face Hub.
[I 2025-03-22 00:04:31,452] Trial 40 pruned. 


Trial 41 with params: {'learning_rate': 6.459897452290429e-05, 'weight_decay': 0.0, 'warmup_steps': 0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.7159,1.650014,0.334555,0.20639,0.244925,0.174792
2,1.6031,1.550638,0.399633,0.190698,0.303193,0.207731
3,1.481,1.398571,0.428964,0.377834,0.325339,0.258319
4,1.3212,1.250382,0.520623,0.509382,0.414013,0.395662
5,1.2003,1.146849,0.597617,0.527603,0.497186,0.499015
6,1.0989,1.07593,0.609533,0.524455,0.515589,0.506618
7,1.0377,1.022924,0.635197,0.547803,0.53455,0.536328
8,0.9841,0.986961,0.637947,0.540158,0.54298,0.538381
9,0.9491,0.959113,0.643446,0.546828,0.546022,0.543494
10,0.9129,0.943594,0.655362,0.550657,0.559504,0.551465


[I 2025-03-22 00:05:23,315] Trial 41 pruned. 


Trial 42 with params: {'learning_rate': 0.003790872833342546, 'weight_decay': 0.003, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0464,0.847191,0.71769,0.661166,0.602223,0.604801
2,0.5407,0.609656,0.780018,0.781732,0.744738,0.749837
3,0.3419,0.474944,0.84143,0.829965,0.790264,0.803958
4,0.2259,0.467832,0.856095,0.860496,0.800574,0.82182
5,0.1461,0.531962,0.858845,0.860405,0.822485,0.837315
6,0.0856,0.561055,0.861595,0.862761,0.835952,0.846196
7,0.0271,0.653657,0.873511,0.862493,0.832923,0.845559
8,0.0183,0.713082,0.872594,0.872767,0.832753,0.848947
9,0.0077,0.719793,0.874427,0.886263,0.825366,0.847502
10,0.004,0.747648,0.875344,0.876825,0.835202,0.852076


[I 2025-03-22 00:07:24,124] Trial 42 finished with value: 0.8461676074063441 and parameters: {'learning_rate': 0.003790872833342546, 'weight_decay': 0.003, 'warmup_steps': 3}. Best is trial 21 with value: 0.8614978057589228.


Trial 43 with params: {'learning_rate': 0.0017538121707045189, 'weight_decay': 0.004, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.1989,0.758369,0.71769,0.624783,0.613448,0.61254
2,0.5985,0.622601,0.770852,0.652809,0.662669,0.652483
3,0.4372,0.520729,0.818515,0.690703,0.69939,0.691264
4,0.329,0.512622,0.829514,0.87344,0.742355,0.764773
5,0.2609,0.511117,0.840513,0.860728,0.795857,0.819436
6,0.1805,0.497526,0.853346,0.856241,0.818237,0.833318
7,0.1123,0.555741,0.850596,0.864223,0.816624,0.834271
8,0.0767,0.646706,0.845096,0.845613,0.813553,0.825081
9,0.0522,0.621944,0.855179,0.862827,0.826676,0.841794
10,0.0237,0.653974,0.856095,0.850219,0.82828,0.838009


[I 2025-03-22 00:08:40,536] Trial 43 finished with value: 0.8318571282965319 and parameters: {'learning_rate': 0.0017538121707045189, 'weight_decay': 0.004, 'warmup_steps': 4}. Best is trial 21 with value: 0.8614978057589228.


Trial 44 with params: {'learning_rate': 0.004986587105437094, 'weight_decay': 0.003, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0298,0.663801,0.766269,0.675373,0.650764,0.656611
2,0.488,0.557424,0.80385,0.793076,0.75418,0.761468
3,0.2997,0.470313,0.839597,0.859997,0.816267,0.830692
4,0.1739,0.49804,0.862511,0.871582,0.823789,0.842347
5,0.1048,0.566303,0.869844,0.889141,0.82857,0.851521
6,0.0653,0.560264,0.870761,0.881106,0.833814,0.850541
7,0.0244,0.753637,0.878093,0.892918,0.836556,0.857804
8,0.0166,0.700253,0.875344,0.875571,0.835395,0.851114
9,0.0104,0.740645,0.87901,0.888776,0.837531,0.857244
10,0.0025,0.765453,0.880843,0.891047,0.838822,0.858985


[I 2025-03-22 00:10:10,302] Trial 44 finished with value: 0.8591569308490077 and parameters: {'learning_rate': 0.004986587105437094, 'weight_decay': 0.003, 'warmup_steps': 4}. Best is trial 21 with value: 0.8614978057589228.


Trial 45 with params: {'learning_rate': 0.004215348626750819, 'weight_decay': 0.003, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0581,0.705984,0.761687,0.693426,0.648302,0.654183
2,0.5074,0.542056,0.804766,0.803925,0.75706,0.77361
3,0.3181,0.478157,0.83868,0.854041,0.797614,0.816384
4,0.1985,0.475445,0.854262,0.892292,0.772452,0.798854
5,0.1184,0.596293,0.857929,0.866497,0.814002,0.83139
6,0.0976,0.575432,0.865261,0.875715,0.829516,0.845237
7,0.0381,0.632489,0.870761,0.882719,0.822447,0.844362
8,0.011,0.65837,0.874427,0.858055,0.84439,0.850641
9,0.0049,0.738617,0.878093,0.888071,0.838206,0.856859
10,0.0024,0.779852,0.868928,0.882159,0.830272,0.849925


[I 2025-03-22 00:11:34,775] Trial 45 finished with value: 0.857765605531958 and parameters: {'learning_rate': 0.004215348626750819, 'weight_decay': 0.003, 'warmup_steps': 4}. Best is trial 21 with value: 0.8614978057589228.


Trial 46 with params: {'learning_rate': 0.000996764125365047, 'weight_decay': 0.001, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.311,0.901443,0.659945,0.609664,0.553669,0.566367
2,0.72,0.651317,0.773602,0.669385,0.658205,0.659689
3,0.5001,0.583738,0.797434,0.682379,0.682142,0.67381
4,0.4159,0.549023,0.815765,0.693508,0.694794,0.691357
5,0.3529,0.509547,0.827681,0.867083,0.714521,0.719688


Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--recall/11f90e583db35601050aed380d48e83202a896976b9608432fba9244fb447f24 (last modified on Fri Jan 10 23:14:00 2025) since it couldn't be found locally at evaluate-metric--recall, or remotely on the Hugging Face Hub.
[I 2025-03-22 00:12:49,821] Trial 46 pruned. 


Trial 47 with params: {'learning_rate': 0.004737095611977738, 'weight_decay': 0.001, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0433,0.692947,0.759853,0.675256,0.645218,0.652007
2,0.5092,0.535661,0.821265,0.821889,0.765626,0.781758
3,0.3038,0.451246,0.848763,0.840753,0.814448,0.823642
4,0.1876,0.488064,0.865261,0.885176,0.835169,0.854287
5,0.1035,0.533968,0.872594,0.874535,0.833304,0.848922
6,0.0641,0.539347,0.868011,0.84221,0.83822,0.839991
7,0.0185,0.704475,0.874427,0.888588,0.833769,0.854922
8,0.0078,0.723859,0.872594,0.865585,0.832384,0.846684
9,0.0042,0.759742,0.871677,0.880684,0.832731,0.850931
10,0.0036,0.772827,0.880843,0.891434,0.83905,0.859286


[I 2025-03-22 00:14:43,503] Trial 47 finished with value: 0.8598275451797589 and parameters: {'learning_rate': 0.004737095611977738, 'weight_decay': 0.001, 'warmup_steps': 4}. Best is trial 21 with value: 0.8614978057589228.


Trial 48 with params: {'learning_rate': 0.004045445599292136, 'weight_decay': 0.001, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0515,0.644211,0.769019,0.673668,0.652739,0.658242
2,0.4985,0.549417,0.818515,0.788989,0.774393,0.776988
3,0.3145,0.52062,0.823098,0.846145,0.775607,0.796008
4,0.2047,0.507524,0.848763,0.894096,0.810303,0.840532
5,0.11,0.535564,0.859762,0.839898,0.822634,0.830109
6,0.0741,0.579371,0.863428,0.873669,0.818041,0.837319
7,0.0332,0.634063,0.868011,0.858384,0.830379,0.842311
8,0.0321,0.668412,0.865261,0.858079,0.827537,0.839822
9,0.0163,0.694867,0.869844,0.861478,0.830408,0.843329
10,0.0063,0.72342,0.87626,0.887245,0.826723,0.848623


[I 2025-03-22 00:15:55,895] Trial 48 finished with value: 0.8446159269253041 and parameters: {'learning_rate': 0.004045445599292136, 'weight_decay': 0.001, 'warmup_steps': 4}. Best is trial 21 with value: 0.8614978057589228.


Trial 49 with params: {'learning_rate': 0.004488582705806834, 'weight_decay': 0.002, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.053,0.661557,0.767186,0.675915,0.650698,0.655904
2,0.5103,0.550429,0.802933,0.782912,0.760383,0.765203
3,0.3071,0.449872,0.857012,0.830093,0.820975,0.824244
4,0.1814,0.460038,0.857929,0.876457,0.820813,0.841427
5,0.1054,0.516656,0.873511,0.885853,0.824303,0.846202
6,0.0617,0.701153,0.855179,0.869018,0.811107,0.830785
7,0.027,0.62961,0.87901,0.87971,0.835919,0.853689
8,0.0115,0.708397,0.861595,0.845503,0.822031,0.832465
9,0.0058,0.733197,0.872594,0.883287,0.832374,0.851927
10,0.0074,0.732877,0.872594,0.873595,0.8323,0.849044


[I 2025-03-22 00:17:13,584] Trial 49 pruned. 


Trial 50 with params: {'learning_rate': 0.004706490493446825, 'weight_decay': 0.002, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0536,0.852716,0.705775,0.655267,0.599788,0.60429
2,0.5344,0.576514,0.79835,0.840103,0.704305,0.708034
3,0.3255,0.476273,0.84143,0.813301,0.791843,0.797414
4,0.2044,0.46081,0.864345,0.881312,0.816665,0.839875
5,0.1095,0.522691,0.871677,0.881597,0.833607,0.851672
6,0.0755,0.553974,0.866178,0.855731,0.82039,0.833183
7,0.0276,0.689401,0.874427,0.886297,0.814224,0.838945
8,0.0109,0.729144,0.874427,0.87176,0.833521,0.848821
9,0.0083,0.744035,0.874427,0.882172,0.816258,0.838426
10,0.0042,0.757503,0.875344,0.850821,0.844068,0.847036


[I 2025-03-22 00:18:09,582] Trial 50 pruned. 


Trial 51 with params: {'learning_rate': 0.004883934376485026, 'weight_decay': 0.002, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0538,0.713613,0.752521,0.670614,0.637111,0.642787
2,0.5148,0.551614,0.802933,0.812292,0.752654,0.767044
3,0.3082,0.464252,0.845096,0.853391,0.821413,0.832597
4,0.1928,0.493789,0.868011,0.894465,0.826693,0.851625
5,0.1168,0.590927,0.862511,0.864779,0.825081,0.840196
6,0.0616,0.629188,0.877177,0.888628,0.835042,0.85591
7,0.0243,0.718249,0.874427,0.890457,0.840657,0.860826
8,0.0177,0.746781,0.868011,0.868504,0.828549,0.844609
9,0.0102,0.76417,0.872594,0.873701,0.833529,0.849636
10,0.0053,0.762505,0.879927,0.890834,0.83874,0.85871


[I 2025-03-22 00:19:22,569] Trial 51 finished with value: 0.8504181664572664 and parameters: {'learning_rate': 0.004883934376485026, 'weight_decay': 0.002, 'warmup_steps': 4}. Best is trial 21 with value: 0.8614978057589228.


Trial 52 with params: {'learning_rate': 0.004907424139266804, 'weight_decay': 0.002, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0456,0.747166,0.759853,0.674657,0.645675,0.649798
2,0.5232,0.519284,0.811182,0.679774,0.694945,0.685201
3,0.3091,0.479604,0.845096,0.843464,0.794187,0.809375
4,0.201,0.479679,0.869844,0.861782,0.838522,0.848711
5,0.1163,0.497663,0.88451,0.883751,0.841449,0.85845
6,0.0621,0.59412,0.874427,0.867492,0.843189,0.853783
7,0.0323,0.637088,0.872594,0.875488,0.832102,0.849308
8,0.0115,0.653122,0.871677,0.861527,0.83347,0.845326
9,0.0043,0.699451,0.872594,0.871393,0.833614,0.848971
10,0.0016,0.743536,0.882676,0.88226,0.840886,0.857463


[I 2025-03-22 00:21:22,365] Trial 52 finished with value: 0.8543584598874577 and parameters: {'learning_rate': 0.004907424139266804, 'weight_decay': 0.002, 'warmup_steps': 3}. Best is trial 21 with value: 0.8614978057589228.


Trial 53 with params: {'learning_rate': 0.004939732994567213, 'weight_decay': 0.004, 'warmup_steps': 0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.9866,0.679385,0.759853,0.659031,0.647421,0.644842
2,0.4682,0.560923,0.804766,0.762325,0.762683,0.757519
3,0.2727,0.455983,0.850596,0.851586,0.808456,0.823911
4,0.1649,0.535879,0.856095,0.865185,0.819434,0.83673
5,0.0813,0.599159,0.865261,0.883877,0.827178,0.848853
6,0.0518,0.654843,0.863428,0.862423,0.827708,0.840974
7,0.0305,0.671378,0.87626,0.890575,0.835194,0.856686
8,0.0137,0.725212,0.868011,0.862266,0.838474,0.84895
9,0.0064,0.725792,0.873511,0.865901,0.833162,0.847244
10,0.003,0.748187,0.87626,0.870166,0.844855,0.856052


[I 2025-03-22 00:22:43,967] Trial 53 finished with value: 0.8483917657058831 and parameters: {'learning_rate': 0.004939732994567213, 'weight_decay': 0.004, 'warmup_steps': 0}. Best is trial 21 with value: 0.8614978057589228.


Trial 54 with params: {'learning_rate': 0.0048565101381452215, 'weight_decay': 0.0, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0529,0.715417,0.746104,0.663526,0.634061,0.638212
2,0.5155,0.592678,0.7956,0.790363,0.77416,0.771361
3,0.2992,0.468084,0.848763,0.862547,0.814163,0.83181
4,0.184,0.477395,0.869844,0.886004,0.82126,0.844502
5,0.1099,0.591989,0.864345,0.876928,0.827563,0.845568
6,0.07,0.598377,0.867094,0.878632,0.829989,0.847276
7,0.0338,0.697862,0.878093,0.876511,0.827829,0.846651
8,0.0176,0.721238,0.870761,0.883596,0.822023,0.844444
9,0.0062,0.725687,0.87626,0.884457,0.827459,0.847769
10,0.0049,0.771951,0.882676,0.891439,0.831551,0.853219


[I 2025-03-22 00:24:40,124] Trial 54 finished with value: 0.8541969867513268 and parameters: {'learning_rate': 0.0048565101381452215, 'weight_decay': 0.0, 'warmup_steps': 4}. Best is trial 21 with value: 0.8614978057589228.


Trial 55 with params: {'learning_rate': 0.002398133780663588, 'weight_decay': 0.002, 'warmup_steps': 0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0772,0.766901,0.752521,0.67578,0.634797,0.641601
2,0.5495,0.580532,0.792851,0.671534,0.679214,0.672592
3,0.3825,0.519442,0.824015,0.863688,0.737828,0.758694
4,0.278,0.485322,0.845096,0.872868,0.800981,0.825845
5,0.1879,0.534918,0.857929,0.879584,0.810925,0.835625
6,0.1297,0.546892,0.84143,0.776524,0.81092,0.789373
7,0.0634,0.625838,0.860678,0.858579,0.815198,0.831674
8,0.0352,0.681023,0.862511,0.854814,0.825026,0.837364
9,0.0223,0.723633,0.861595,0.863537,0.815293,0.833798
10,0.0121,0.79618,0.857012,0.875028,0.810355,0.833651


[I 2025-03-22 00:25:57,372] Trial 55 pruned. 


Trial 56 with params: {'learning_rate': 0.0015660241907992083, 'weight_decay': 0.002, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.2144,0.784944,0.712191,0.620807,0.60883,0.607107
2,0.612,0.610458,0.770852,0.649422,0.662225,0.653136
3,0.453,0.546779,0.809349,0.688733,0.692283,0.683257
4,0.3472,0.519882,0.823098,0.863687,0.71033,0.713733
5,0.2783,0.51107,0.843263,0.859896,0.78911,0.813089


[I 2025-03-22 00:26:21,496] Trial 56 pruned. 


Trial 57 with params: {'learning_rate': 0.0018056034702201, 'weight_decay': 0.002, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.1879,0.758957,0.710357,0.624229,0.607174,0.606517
2,0.5832,0.626969,0.770852,0.647614,0.66425,0.650131
3,0.4268,0.505244,0.834097,0.868048,0.720713,0.722949
4,0.3202,0.519096,0.828598,0.851726,0.778351,0.800541
5,0.2491,0.52217,0.840513,0.863106,0.795898,0.820249
6,0.1729,0.488149,0.852429,0.839822,0.816427,0.826542
7,0.1041,0.594879,0.853346,0.866994,0.808957,0.828896
8,0.0727,0.636607,0.84143,0.834007,0.810257,0.819567
9,0.0407,0.654287,0.851512,0.850259,0.823848,0.835004
10,0.0196,0.699377,0.851512,0.840812,0.807494,0.820522


[I 2025-03-22 00:27:12,939] Trial 57 pruned. 


Trial 58 with params: {'learning_rate': 0.00021771047684957567, 'weight_decay': 0.01, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.6255,1.437147,0.485793,0.425856,0.372996,0.346643
2,1.2572,1.086598,0.604033,0.544773,0.500499,0.505266
3,0.9584,0.896856,0.671861,0.565073,0.57863,0.566196
4,0.7675,0.754529,0.740605,0.636531,0.628951,0.629942
5,0.6643,0.691212,0.751604,0.643594,0.639702,0.637986
6,0.6025,0.656689,0.766269,0.648697,0.653679,0.649523
7,0.5387,0.639511,0.772686,0.66089,0.658601,0.657252
8,0.498,0.617236,0.773602,0.654181,0.663035,0.656595
9,0.4716,0.619622,0.775435,0.652655,0.663149,0.655392
10,0.4372,0.616897,0.771769,0.647325,0.659724,0.652134


[I 2025-03-22 00:28:18,045] Trial 58 pruned. 


Trial 59 with params: {'learning_rate': 0.0048602160405686, 'weight_decay': 0.01, 'warmup_steps': 0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.9912,0.723147,0.739688,0.651854,0.633064,0.629179
2,0.4853,0.519318,0.827681,0.808108,0.762457,0.774935
3,0.2848,0.486688,0.845096,0.85859,0.802394,0.820901
4,0.1701,0.507893,0.867094,0.869145,0.835081,0.849349
5,0.0868,0.567324,0.865261,0.859884,0.835696,0.846449
6,0.048,0.658218,0.857929,0.831961,0.836545,0.832545
7,0.0299,0.679034,0.872594,0.87352,0.840889,0.854634
8,0.0153,0.696239,0.879927,0.879728,0.838064,0.854773
9,0.0096,0.779603,0.875344,0.893963,0.822752,0.848577
10,0.0044,0.717761,0.887259,0.875449,0.843833,0.857361


[I 2025-03-22 00:30:18,579] Trial 59 finished with value: 0.847994758457093 and parameters: {'learning_rate': 0.0048602160405686, 'weight_decay': 0.01, 'warmup_steps': 0}. Best is trial 21 with value: 0.8614978057589228.


Trial 60 with params: {'learning_rate': 0.004763014943682799, 'weight_decay': 0.003, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0591,0.692235,0.750687,0.653935,0.640713,0.640736
2,0.5112,0.51682,0.821265,0.81454,0.745533,0.76431
3,0.303,0.463869,0.848763,0.863866,0.805939,0.824443
4,0.1814,0.487003,0.861595,0.863518,0.83226,0.845281
5,0.0904,0.569547,0.854262,0.820797,0.837381,0.827192
6,0.0872,0.568471,0.869844,0.879948,0.821566,0.842316
7,0.0354,0.654911,0.87626,0.87202,0.8437,0.856163
8,0.0096,0.671043,0.88451,0.874809,0.851993,0.862225
9,0.002,0.749346,0.886343,0.875771,0.843945,0.857515
10,0.0025,0.802839,0.869844,0.858964,0.832534,0.843274


[I 2025-03-22 00:31:48,068] Trial 60 finished with value: 0.8633534618076598 and parameters: {'learning_rate': 0.004763014943682799, 'weight_decay': 0.003, 'warmup_steps': 3}. Best is trial 60 with value: 0.8633534618076598.


Trial 61 with params: {'learning_rate': 0.00390905479059359, 'weight_decay': 0.003, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.062,0.72786,0.739688,0.656623,0.628324,0.630725
2,0.5274,0.569625,0.804766,0.844534,0.727085,0.74151
3,0.3365,0.470485,0.845096,0.875196,0.775933,0.798339
4,0.2199,0.47259,0.864345,0.882371,0.815537,0.839521
5,0.1353,0.51038,0.858845,0.878321,0.821704,0.842958
6,0.0847,0.543673,0.864345,0.880803,0.834401,0.852165
7,0.0332,0.617194,0.878093,0.888771,0.836409,0.856553
8,0.0116,0.747108,0.866178,0.867404,0.827387,0.843364
9,0.0108,0.753348,0.868011,0.881883,0.829013,0.849266
10,0.006,0.722459,0.869844,0.868409,0.821457,0.839643


[I 2025-03-22 00:32:42,176] Trial 61 pruned. 


Trial 62 with params: {'learning_rate': 0.004788140783448126, 'weight_decay': 0.003, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0584,0.698621,0.750687,0.655429,0.640666,0.640272
2,0.5127,0.527663,0.820348,0.832618,0.745918,0.765903
3,0.3085,0.471818,0.850596,0.865272,0.807699,0.826814
4,0.1878,0.491244,0.868011,0.868944,0.837324,0.850681
5,0.1066,0.538483,0.849679,0.806019,0.834786,0.814035
6,0.0732,0.53648,0.877177,0.864816,0.810492,0.827738
7,0.0336,0.63166,0.871677,0.866658,0.842248,0.853159
8,0.0147,0.68552,0.872594,0.86001,0.834419,0.844699
9,0.005,0.74736,0.875344,0.863256,0.827124,0.841774
10,0.0031,0.77393,0.873511,0.859176,0.825789,0.83919


[I 2025-03-22 00:33:55,751] Trial 62 finished with value: 0.8406415116718825 and parameters: {'learning_rate': 0.004788140783448126, 'weight_decay': 0.003, 'warmup_steps': 3}. Best is trial 60 with value: 0.8633534618076598.


Trial 63 with params: {'learning_rate': 0.004668453956432682, 'weight_decay': 0.004, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.039,0.693212,0.766269,0.683242,0.649418,0.656982
2,0.5064,0.525545,0.824015,0.809192,0.7671,0.780013
3,0.3016,0.428918,0.853346,0.864205,0.808332,0.827969
4,0.1777,0.474863,0.860678,0.864764,0.804868,0.825641
5,0.1142,0.59668,0.864345,0.865167,0.827317,0.84194
6,0.0743,0.537985,0.868011,0.865118,0.830474,0.844021
7,0.0314,0.606848,0.87626,0.848205,0.817787,0.830077
8,0.0142,0.716372,0.865261,0.853856,0.826522,0.838068
9,0.0067,0.718922,0.866178,0.853037,0.819694,0.832432
10,0.0025,0.740967,0.868928,0.856056,0.821247,0.835303


[I 2025-03-22 00:34:58,027] Trial 63 pruned. 


Trial 64 with params: {'learning_rate': 0.002714927870688111, 'weight_decay': 0.003, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0842,0.7314,0.747021,0.666802,0.634252,0.640673
2,0.546,0.604838,0.782768,0.826569,0.709201,0.723528
3,0.3721,0.534985,0.815765,0.852751,0.746549,0.763289
4,0.2684,0.512389,0.83868,0.853478,0.784894,0.809033
5,0.1771,0.497427,0.854262,0.870898,0.808171,0.830809
6,0.1267,0.506391,0.855179,0.799463,0.839825,0.814609
7,0.0594,0.595089,0.864345,0.865759,0.826371,0.842426
8,0.0262,0.650822,0.857929,0.852857,0.820599,0.834136
9,0.0119,0.716132,0.862511,0.863294,0.825606,0.840622
10,0.0087,0.725867,0.867094,0.858229,0.829027,0.841381


[I 2025-03-22 00:36:09,870] Trial 64 finished with value: 0.8402248146703016 and parameters: {'learning_rate': 0.002714927870688111, 'weight_decay': 0.003, 'warmup_steps': 2}. Best is trial 60 with value: 0.8633534618076598.


Trial 65 with params: {'learning_rate': 0.00453084233337998, 'weight_decay': 0.004, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0588,0.827636,0.709441,0.659461,0.603499,0.611102
2,0.5395,0.544444,0.812099,0.820119,0.740376,0.757305
3,0.3221,0.457774,0.842346,0.823194,0.80183,0.809285
4,0.1989,0.485598,0.865261,0.884827,0.81737,0.84146
5,0.1198,0.497086,0.871677,0.875488,0.840835,0.85535
6,0.0764,0.559461,0.873511,0.884325,0.834371,0.853457
7,0.0306,0.619453,0.871677,0.855586,0.832196,0.841939
8,0.0211,0.700623,0.874427,0.86429,0.834491,0.847307
9,0.0095,0.717718,0.870761,0.872522,0.832283,0.848131
10,0.0036,0.765194,0.879927,0.867178,0.839249,0.851176


[I 2025-03-22 00:37:29,054] Trial 65 pruned. 


Trial 66 with params: {'learning_rate': 0.004850818985486912, 'weight_decay': 0.001, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0494,0.784981,0.745188,0.666848,0.633518,0.637782
2,0.5245,0.516567,0.820348,0.687729,0.701511,0.692899
3,0.3115,0.481594,0.843263,0.83379,0.801191,0.81292
4,0.2016,0.484948,0.871677,0.860066,0.839712,0.848806
5,0.1149,0.494701,0.872594,0.861044,0.833188,0.844079
6,0.0844,0.493694,0.863428,0.837176,0.837203,0.836562
7,0.0414,0.600827,0.874427,0.858791,0.816179,0.83246
8,0.0116,0.66228,0.883593,0.86221,0.859871,0.86031
9,0.0076,0.719192,0.88176,0.879683,0.830753,0.849717
10,0.0074,0.735101,0.878093,0.862761,0.829369,0.842116


[I 2025-03-22 00:38:46,129] Trial 66 finished with value: 0.8550039518623506 and parameters: {'learning_rate': 0.004850818985486912, 'weight_decay': 0.001, 'warmup_steps': 3}. Best is trial 60 with value: 0.8633534618076598.


Trial 67 with params: {'learning_rate': 0.001930535718685673, 'weight_decay': 0.003, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.1601,0.780036,0.714024,0.628996,0.609902,0.607949
2,0.5639,0.644269,0.759853,0.63783,0.65766,0.638208
3,0.4014,0.530402,0.816682,0.856784,0.716486,0.726029
4,0.3059,0.529438,0.835014,0.860122,0.792389,0.814732
5,0.239,0.522547,0.847846,0.857068,0.801847,0.822812


[I 2025-03-22 00:39:15,491] Trial 67 pruned. 


Trial 68 with params: {'learning_rate': 0.003472007262329788, 'weight_decay': 0.003, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0582,0.706702,0.753437,0.670298,0.640541,0.647211
2,0.5232,0.555069,0.805683,0.840807,0.727595,0.742473
3,0.3364,0.556968,0.815765,0.841629,0.768562,0.790666
4,0.2314,0.535629,0.84143,0.871568,0.778458,0.807364
5,0.1449,0.521578,0.865261,0.87594,0.819086,0.838374


[I 2025-03-22 00:39:52,121] Trial 68 pruned. 


Trial 69 with params: {'learning_rate': 0.0021135709051425568, 'weight_decay': 0.0, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.1563,0.784539,0.715857,0.630913,0.611483,0.609125
2,0.5597,0.630159,0.770852,0.644131,0.664652,0.649036
3,0.3938,0.530063,0.831347,0.864203,0.756976,0.775941
4,0.2944,0.520033,0.833181,0.834992,0.791339,0.806505
5,0.2246,0.513155,0.853346,0.851959,0.81696,0.831031
6,0.1745,0.548766,0.830431,0.83618,0.8031,0.812156
7,0.0961,0.56378,0.861595,0.863335,0.824221,0.839895
8,0.0554,0.606092,0.863428,0.838743,0.836912,0.837397
9,0.0271,0.700054,0.855179,0.86995,0.808541,0.830385
10,0.017,0.717208,0.866178,0.877498,0.828447,0.846818


[I 2025-03-22 00:41:06,183] Trial 69 finished with value: 0.8517944249460269 and parameters: {'learning_rate': 0.0021135709051425568, 'weight_decay': 0.0, 'warmup_steps': 4}. Best is trial 60 with value: 0.8633534618076598.


Trial 70 with params: {'learning_rate': 0.004845146724504942, 'weight_decay': 0.003, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0529,0.751566,0.76077,0.681118,0.641629,0.650519
2,0.4987,0.518849,0.819432,0.854088,0.70977,0.710661
3,0.2997,0.453203,0.850596,0.8353,0.798914,0.810476
4,0.1827,0.539052,0.854262,0.877735,0.825445,0.84628
5,0.101,0.505687,0.869844,0.883083,0.830827,0.850303
6,0.0629,0.55665,0.870761,0.880397,0.823496,0.843787
7,0.0272,0.6927,0.873511,0.874885,0.834583,0.850883
8,0.0161,0.715834,0.863428,0.854057,0.836524,0.843914
9,0.0039,0.803952,0.868928,0.86794,0.830858,0.845857
10,0.0017,0.831603,0.868928,0.869349,0.830328,0.845988


Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--recall/11f90e583db35601050aed380d48e83202a896976b9608432fba9244fb447f24 (last modified on Fri Jan 10 23:14:00 2025) since it couldn't be found locally at evaluate-metric--recall, or remotely on the Hugging Face Hub.
[I 2025-03-22 00:44:14,027] Trial 70 finished with value: 0.8466462964665157 and parameters: {'learning_rate': 0.004845146724504942, 'weight_decay': 0.003, 'warmup_steps': 2}. Best is trial 60 with value: 0.8633534618076598.


Trial 71 with params: {'learning_rate': 0.0046414061042716415, 'weight_decay': 0.004, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0383,0.712281,0.75802,0.679657,0.642463,0.651428
2,0.505,0.540229,0.814849,0.807717,0.760087,0.774268
3,0.3048,0.455151,0.851512,0.853624,0.826554,0.836095
4,0.1861,0.461157,0.867094,0.86808,0.837154,0.850334
5,0.1072,0.577369,0.870761,0.883956,0.831884,0.851252
6,0.0702,0.603092,0.877177,0.883229,0.818943,0.839895
7,0.0323,0.647897,0.872594,0.867992,0.822103,0.839772
8,0.0163,0.656623,0.882676,0.878546,0.83153,0.849778
9,0.0037,0.712536,0.88176,0.878187,0.8313,0.849375
10,0.0021,0.74961,0.877177,0.874967,0.826563,0.844908


[I 2025-03-22 00:45:16,164] Trial 71 pruned. 


Trial 72 with params: {'learning_rate': 0.00010295616529943657, 'weight_decay': 0.005, 'warmup_steps': 0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.6827,1.59457,0.336389,0.206524,0.246286,0.176138
2,1.5124,1.392047,0.419798,0.359824,0.320943,0.25299
3,1.2754,1.173507,0.557287,0.527533,0.453613,0.454113
4,1.089,1.032712,0.622365,0.520427,0.530102,0.523102
5,0.9764,0.945785,0.647113,0.548075,0.551566,0.547282
6,0.8892,0.885194,0.683776,0.581527,0.581697,0.579813
7,0.8242,0.839778,0.700275,0.597925,0.595201,0.595457
8,0.7663,0.80696,0.714024,0.60052,0.612862,0.606182
9,0.7311,0.784107,0.729606,0.617521,0.620573,0.616681
10,0.6888,0.769375,0.728689,0.609345,0.624236,0.615826


[I 2025-03-22 00:46:21,592] Trial 72 pruned. 


Trial 73 with params: {'learning_rate': 5.953168512495511e-05, 'weight_decay': 0.01, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.729,1.667471,0.334555,0.20639,0.244925,0.174792
2,1.6193,1.57134,0.407883,0.198844,0.308966,0.223142
3,1.5168,1.440883,0.43538,0.386237,0.329501,0.263375
4,1.3668,1.294433,0.489459,0.529803,0.383931,0.359347
5,1.2414,1.186278,0.571036,0.532453,0.469118,0.472571


[I 2025-03-22 00:46:49,013] Trial 73 pruned. 


Trial 74 with params: {'learning_rate': 0.00486623669227327, 'weight_decay': 0.005, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.05,0.72269,0.750687,0.659976,0.642331,0.640865
2,0.5183,0.488052,0.813932,0.687446,0.69703,0.691362
3,0.2981,0.445411,0.847846,0.84231,0.786358,0.804181
4,0.1868,0.529462,0.853346,0.862131,0.824517,0.839792
5,0.1036,0.539417,0.875344,0.850412,0.845013,0.846253
6,0.0763,0.531769,0.860678,0.836491,0.798365,0.810515
7,0.0328,0.632164,0.879927,0.893422,0.828504,0.852177
8,0.0106,0.671853,0.873511,0.871401,0.834736,0.849248
9,0.0039,0.725137,0.877177,0.886661,0.827983,0.849109
10,0.0027,0.779903,0.875344,0.874155,0.825653,0.844133


[I 2025-03-22 00:48:25,516] Trial 74 finished with value: 0.8453280989332312 and parameters: {'learning_rate': 0.00486623669227327, 'weight_decay': 0.005, 'warmup_steps': 3}. Best is trial 60 with value: 0.8633534618076598.


Trial 75 with params: {'learning_rate': 0.004303160690801534, 'weight_decay': 0.003, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0622,0.697982,0.766269,0.687262,0.647901,0.655797
2,0.5112,0.57253,0.8011,0.804441,0.767433,0.778003
3,0.3205,0.476313,0.840513,0.856447,0.7991,0.817535
4,0.192,0.469872,0.855179,0.87786,0.808014,0.833501
5,0.1032,0.543144,0.864345,0.86932,0.826996,0.843445
6,0.0768,0.587508,0.871677,0.882324,0.8338,0.851829
7,0.0325,0.619704,0.877177,0.887317,0.827749,0.849052
8,0.0122,0.690756,0.879927,0.888701,0.839179,0.857744
9,0.0047,0.70156,0.871677,0.884992,0.83274,0.852977
10,0.0018,0.761923,0.879927,0.889943,0.839474,0.858476


[I 2025-03-22 00:49:40,629] Trial 75 finished with value: 0.855324323841287 and parameters: {'learning_rate': 0.004303160690801534, 'weight_decay': 0.003, 'warmup_steps': 4}. Best is trial 60 with value: 0.8633534618076598.


Trial 76 with params: {'learning_rate': 0.004729937350554726, 'weight_decay': 0.007, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.043,0.694046,0.76352,0.679515,0.647062,0.654292
2,0.506,0.526313,0.823098,0.858882,0.739634,0.757926
3,0.305,0.455229,0.849679,0.845226,0.797277,0.812132
4,0.1882,0.476499,0.867094,0.885093,0.828985,0.849577
5,0.1106,0.527552,0.868011,0.877858,0.801711,0.824831
6,0.0568,0.576459,0.872594,0.882151,0.825992,0.845532
7,0.0223,0.726756,0.873511,0.888091,0.823676,0.846818
8,0.0065,0.743905,0.88451,0.882064,0.842534,0.858674
9,0.0052,0.735156,0.87626,0.885316,0.827705,0.848276
10,0.0019,0.769541,0.87901,0.890608,0.828417,0.850921


[I 2025-03-22 00:51:37,537] Trial 76 finished with value: 0.8484367479486589 and parameters: {'learning_rate': 0.004729937350554726, 'weight_decay': 0.007, 'warmup_steps': 4}. Best is trial 60 with value: 0.8633534618076598.


Trial 77 with params: {'learning_rate': 0.001007761125954244, 'weight_decay': 0.01, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.3107,0.900071,0.660862,0.614448,0.55436,0.568811
2,0.7164,0.646394,0.773602,0.665242,0.658904,0.658972
3,0.4979,0.578763,0.8011,0.682715,0.684975,0.676577
4,0.411,0.551841,0.816682,0.696621,0.694959,0.69244
5,0.3493,0.506119,0.830431,0.867023,0.716775,0.720982


[I 2025-03-22 00:52:03,953] Trial 77 pruned. 


Trial 78 with params: {'learning_rate': 0.0012905812025664177, 'weight_decay': 0.008, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.2372,0.814281,0.702108,0.619347,0.599212,0.600953
2,0.6359,0.618158,0.778185,0.653226,0.667762,0.658421
3,0.4607,0.543131,0.814849,0.691377,0.696091,0.689119
4,0.3639,0.52966,0.829514,0.708406,0.70485,0.702461
5,0.3035,0.521477,0.831347,0.8554,0.77922,0.804802


[I 2025-03-22 00:52:27,918] Trial 78 pruned. 


Trial 79 with params: {'learning_rate': 0.0017949182987154404, 'weight_decay': 0.005, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.1911,0.757355,0.72044,0.625293,0.615793,0.613485
2,0.5853,0.619215,0.778185,0.653295,0.669402,0.657119
3,0.4265,0.511105,0.832264,0.701248,0.710009,0.703597
4,0.3206,0.524542,0.830431,0.853884,0.780329,0.802646
5,0.2487,0.531506,0.83868,0.862324,0.793139,0.818201
6,0.1789,0.489306,0.851512,0.839521,0.816506,0.826442
7,0.1067,0.570777,0.857929,0.870111,0.812638,0.832717
8,0.0739,0.647974,0.83868,0.841479,0.808767,0.820256
9,0.0468,0.650555,0.860678,0.867394,0.821024,0.839325
10,0.0272,0.661283,0.867094,0.86445,0.819508,0.836521


[I 2025-03-22 00:53:17,447] Trial 79 pruned. 


Trial 80 with params: {'learning_rate': 0.0002094413048824941, 'weight_decay': 0.0, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.6303,1.45376,0.488543,0.435311,0.375966,0.35454
2,1.2732,1.103585,0.59945,0.538767,0.497717,0.50234
3,0.9738,0.912153,0.666361,0.558505,0.574832,0.561211
4,0.7813,0.767088,0.728689,0.622981,0.620486,0.618919
5,0.6792,0.701598,0.754354,0.646253,0.642077,0.640885


[I 2025-03-22 00:53:43,953] Trial 80 pruned. 


Trial 81 with params: {'learning_rate': 0.0045525256796740046, 'weight_decay': 0.003, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0527,0.664352,0.764436,0.671186,0.647677,0.653089
2,0.5131,0.52352,0.819432,0.813447,0.770944,0.786521
3,0.3066,0.455171,0.847846,0.822912,0.814043,0.816569
4,0.1856,0.460649,0.860678,0.880644,0.824021,0.845167
5,0.1056,0.546894,0.871677,0.885767,0.832263,0.852428
6,0.0742,0.563991,0.868928,0.879668,0.832051,0.849336
7,0.0337,0.588329,0.88176,0.875548,0.839174,0.854107
8,0.0148,0.716159,0.868928,0.869339,0.830447,0.846051
9,0.0039,0.762372,0.875344,0.874089,0.835736,0.851102
10,0.003,0.763254,0.879927,0.889402,0.839252,0.85852


[I 2025-03-22 00:55:03,271] Trial 81 finished with value: 0.8580224988417423 and parameters: {'learning_rate': 0.0045525256796740046, 'weight_decay': 0.003, 'warmup_steps': 4}. Best is trial 60 with value: 0.8633534618076598.


Trial 82 with params: {'learning_rate': 0.004064435491058705, 'weight_decay': 0.003, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.039,0.668924,0.772686,0.685022,0.656229,0.661986
2,0.5081,0.535005,0.822181,0.856925,0.720206,0.729601
3,0.3167,0.457075,0.839597,0.832486,0.806681,0.817354
4,0.1989,0.472506,0.866178,0.883931,0.827118,0.848782
5,0.1014,0.605344,0.855179,0.873469,0.810119,0.832521


[I 2025-03-22 00:55:29,393] Trial 82 pruned. 


Trial 83 with params: {'learning_rate': 0.004286748217882578, 'weight_decay': 0.003, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.058,0.68426,0.768103,0.688683,0.651218,0.658937
2,0.5088,0.556632,0.810266,0.803758,0.762461,0.776742
3,0.3242,0.475415,0.840513,0.843951,0.798897,0.813683
4,0.1993,0.481467,0.860678,0.896703,0.77608,0.803309
5,0.1145,0.513506,0.871677,0.873759,0.831558,0.848565
6,0.0947,0.56871,0.865261,0.876752,0.83795,0.852857
7,0.045,0.65114,0.871677,0.873426,0.822262,0.841815
8,0.0099,0.751609,0.863428,0.864291,0.826185,0.841171
9,0.0084,0.706359,0.87626,0.875523,0.836218,0.852254
10,0.0038,0.761353,0.87901,0.877061,0.838538,0.853977


[I 2025-03-22 00:57:45,471] Trial 83 finished with value: 0.8534473594653779 and parameters: {'learning_rate': 0.004286748217882578, 'weight_decay': 0.003, 'warmup_steps': 4}. Best is trial 60 with value: 0.8633534618076598.


Trial 84 with params: {'learning_rate': 0.001033755732956399, 'weight_decay': 0.003, 'warmup_steps': 0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.2567,0.857237,0.68286,0.588824,0.583199,0.581903
2,0.6882,0.640663,0.766269,0.65316,0.654256,0.651963
3,0.4954,0.556573,0.806599,0.686213,0.689287,0.680867
4,0.4061,0.54333,0.819432,0.702796,0.697219,0.696395
5,0.3459,0.500734,0.840513,0.70771,0.716459,0.711502


[I 2025-03-22 00:58:11,316] Trial 84 pruned. 


Trial 85 with params: {'learning_rate': 0.004436359388442898, 'weight_decay': 0.003, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0502,0.699563,0.76077,0.681941,0.647588,0.651303
2,0.5067,0.528331,0.820348,0.791761,0.771054,0.778501
3,0.3061,0.453538,0.850596,0.851661,0.804744,0.822565
4,0.195,0.467384,0.859762,0.871476,0.830437,0.846495
5,0.103,0.526873,0.867094,0.88545,0.829045,0.84978
6,0.0799,0.560081,0.869844,0.883511,0.831324,0.850802
7,0.0232,0.716064,0.868011,0.882891,0.821001,0.843051
8,0.0138,0.642006,0.877177,0.877643,0.83684,0.853191
9,0.0041,0.709864,0.87626,0.887791,0.836218,0.856107
10,0.0059,0.698073,0.878093,0.879334,0.838001,0.854807


[I 2025-03-22 01:00:10,847] Trial 85 finished with value: 0.856376902344139 and parameters: {'learning_rate': 0.004436359388442898, 'weight_decay': 0.003, 'warmup_steps': 4}. Best is trial 60 with value: 0.8633534618076598.


Trial 86 with params: {'learning_rate': 0.0002597113179487162, 'weight_decay': 0.01, 'warmup_steps': 1}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.5877,1.358038,0.477544,0.398101,0.366477,0.327424
2,1.1772,1.02176,0.603116,0.547151,0.500949,0.5097
3,0.8806,0.834088,0.689276,0.590277,0.589853,0.581631
4,0.6927,0.726943,0.747021,0.65138,0.634392,0.636227
5,0.6068,0.654773,0.765353,0.650624,0.65401,0.650276


[I 2025-03-22 01:00:33,672] Trial 86 pruned. 


Trial 87 with params: {'learning_rate': 0.0022006383833072054, 'weight_decay': 0.002, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.1485,0.801299,0.712191,0.642207,0.607336,0.606463
2,0.5613,0.60511,0.779102,0.654013,0.671022,0.657262
3,0.3923,0.529768,0.830431,0.864282,0.755614,0.774792
4,0.2893,0.497999,0.839597,0.866364,0.795118,0.819726
5,0.2096,0.497428,0.857012,0.840412,0.8287,0.833553
6,0.1588,0.515923,0.852429,0.861064,0.81044,0.826506
7,0.0824,0.585438,0.855179,0.854389,0.820381,0.833617
8,0.0546,0.633524,0.865261,0.863495,0.827985,0.842086
9,0.0223,0.728239,0.864345,0.860514,0.817817,0.833195
10,0.016,0.685795,0.861595,0.860691,0.814759,0.832478


[I 2025-03-22 01:01:22,726] Trial 87 pruned. 


Trial 88 with params: {'learning_rate': 0.0005004376561176635, 'weight_decay': 0.004, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.4636,1.162992,0.548121,0.565186,0.441065,0.441654
2,0.9328,0.768631,0.732356,0.622253,0.628051,0.623699
3,0.6146,0.638845,0.764436,0.649134,0.654201,0.646263
4,0.5033,0.581787,0.79835,0.681816,0.680448,0.678583
5,0.4527,0.555889,0.794684,0.670117,0.68045,0.673271
6,0.4048,0.571331,0.8011,0.667172,0.687646,0.675443
7,0.3436,0.54714,0.811182,0.680316,0.694236,0.685687
8,0.3126,0.535377,0.816682,0.808215,0.725327,0.735695
9,0.2706,0.569987,0.812099,0.847754,0.721872,0.734242
10,0.2304,0.578515,0.824015,0.837174,0.7477,0.768578


[I 2025-03-22 01:02:49,499] Trial 88 pruned. 


Trial 89 with params: {'learning_rate': 0.0022907448654774757, 'weight_decay': 0.002, 'warmup_steps': 1}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.117,0.720211,0.750687,0.667031,0.640127,0.641608
2,0.5464,0.624062,0.778185,0.658199,0.668971,0.657619
3,0.3774,0.524043,0.832264,0.865579,0.757988,0.777526
4,0.2749,0.475186,0.846013,0.858441,0.821664,0.835886
5,0.1966,0.509208,0.856095,0.843961,0.8193,0.82977


[I 2025-03-22 01:03:29,543] Trial 89 pruned. 


Trial 90 with params: {'learning_rate': 0.004935311626704155, 'weight_decay': 0.001, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0368,0.734364,0.733272,0.655313,0.622103,0.629261
2,0.5244,0.506073,0.822181,0.861377,0.719277,0.731992
3,0.3044,0.460543,0.846013,0.853858,0.822344,0.832933
4,0.1915,0.486116,0.863428,0.873342,0.814954,0.836864
5,0.1088,0.54257,0.864345,0.857156,0.835225,0.844983
6,0.0886,0.569489,0.868011,0.870604,0.829607,0.845722
7,0.0287,0.65453,0.883593,0.873672,0.841155,0.854579
8,0.0115,0.69793,0.878093,0.875634,0.837875,0.853024
9,0.0052,0.73849,0.87626,0.87647,0.835014,0.851986
10,0.0013,0.765671,0.879927,0.869363,0.838357,0.851585


[I 2025-03-22 01:04:48,232] Trial 90 pruned. 


Trial 91 with params: {'learning_rate': 0.004936773918313235, 'weight_decay': 0.002, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.038,0.681651,0.76077,0.677097,0.64394,0.651687
2,0.4848,0.589007,0.789184,0.808417,0.735004,0.743535
3,0.3027,0.468135,0.845096,0.860444,0.793006,0.813007
4,0.1794,0.511112,0.862511,0.881941,0.824072,0.845218
5,0.1087,0.581532,0.862511,0.877513,0.824546,0.844703
6,0.0567,0.61715,0.873511,0.888191,0.832859,0.854418
7,0.0231,0.685529,0.858845,0.874935,0.821333,0.841692
8,0.008,0.708756,0.871677,0.884409,0.832631,0.852672
9,0.003,0.753959,0.87901,0.887744,0.839277,0.857679
10,0.0017,0.797274,0.878093,0.888439,0.838221,0.85738


[I 2025-03-22 01:07:25,458] Trial 91 finished with value: 0.8571982062558038 and parameters: {'learning_rate': 0.004936773918313235, 'weight_decay': 0.002, 'warmup_steps': 4}. Best is trial 60 with value: 0.8633534618076598.


Trial 92 with params: {'learning_rate': 0.0030558199173995222, 'weight_decay': 0.005, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0811,0.746593,0.750687,0.665602,0.637591,0.640165
2,0.5356,0.62685,0.783685,0.783437,0.745824,0.753671
3,0.3591,0.503469,0.83593,0.84754,0.787383,0.80454
4,0.2414,0.527225,0.834097,0.84943,0.781111,0.805236
5,0.1634,0.532088,0.857012,0.823657,0.811583,0.815159


[I 2025-03-22 01:07:47,533] Trial 92 pruned. 


Trial 93 with params: {'learning_rate': 0.004631926940808317, 'weight_decay': 0.004, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0368,0.702942,0.759853,0.683283,0.64274,0.652387
2,0.5173,0.523182,0.821265,0.800278,0.720913,0.72761
3,0.3116,0.437545,0.849679,0.843203,0.814371,0.825285
4,0.1886,0.46223,0.856095,0.879074,0.808827,0.833985
5,0.1043,0.53725,0.874427,0.874174,0.833838,0.849918
6,0.0603,0.588457,0.873511,0.885768,0.824761,0.846882
7,0.0282,0.702409,0.870761,0.86603,0.841371,0.851174
8,0.0183,0.654555,0.87626,0.867422,0.836744,0.849699
9,0.008,0.733785,0.872594,0.886757,0.831869,0.853143
10,0.0034,0.753664,0.875344,0.886104,0.835236,0.854812


[I 2025-03-22 01:08:57,001] Trial 93 finished with value: 0.8539876430522629 and parameters: {'learning_rate': 0.004631926940808317, 'weight_decay': 0.004, 'warmup_steps': 4}. Best is trial 60 with value: 0.8633534618076598.


Trial 94 with params: {'learning_rate': 0.004878470746858751, 'weight_decay': 0.004, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0538,0.724231,0.743355,0.65224,0.636914,0.63373
2,0.5119,0.494699,0.819432,0.692769,0.701375,0.694817
3,0.3011,0.459258,0.851512,0.847183,0.798713,0.814897
4,0.1882,0.480653,0.865261,0.864178,0.826534,0.841928
5,0.1075,0.536732,0.871677,0.854107,0.842296,0.847309
6,0.0657,0.533341,0.875344,0.874226,0.845392,0.857392
7,0.0323,0.652478,0.870761,0.870802,0.831307,0.847332
8,0.0126,0.646788,0.878093,0.860175,0.837363,0.847574
9,0.0069,0.72897,0.870761,0.884042,0.821121,0.844115
10,0.0053,0.729137,0.880843,0.875443,0.830795,0.847455


[I 2025-03-22 01:09:47,854] Trial 94 pruned. 


Trial 95 with params: {'learning_rate': 0.001443467510590515, 'weight_decay': 0.003, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.2331,0.790676,0.722273,0.62499,0.616586,0.615327
2,0.6192,0.607414,0.776352,0.655844,0.665138,0.658349
3,0.4555,0.559259,0.808433,0.694429,0.691026,0.684052
4,0.3649,0.525376,0.814849,0.693463,0.695013,0.69043
5,0.2894,0.5105,0.843263,0.863165,0.806772,0.828479
6,0.2304,0.497525,0.848763,0.862269,0.797422,0.818035
7,0.1462,0.560702,0.846929,0.84732,0.824447,0.832115
8,0.1029,0.571552,0.857929,0.830651,0.840725,0.834943
9,0.0697,0.625016,0.850596,0.847975,0.823696,0.834055
10,0.0374,0.722945,0.837764,0.836365,0.813374,0.822164


[I 2025-03-22 01:10:40,461] Trial 95 pruned. 


Trial 96 with params: {'learning_rate': 0.004866368721276928, 'weight_decay': 0.001, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0467,0.733876,0.743355,0.668807,0.629025,0.635339
2,0.5094,0.562501,0.806599,0.798516,0.755884,0.766123
3,0.3093,0.466762,0.845096,0.850741,0.821407,0.832077
4,0.1914,0.516093,0.862511,0.876037,0.823788,0.843558
5,0.1208,0.556729,0.871677,0.879138,0.833639,0.850057
6,0.0611,0.572411,0.877177,0.886982,0.836847,0.856044
7,0.0327,0.65717,0.863428,0.86622,0.824926,0.841401
8,0.0133,0.748701,0.87901,0.877027,0.836821,0.853262
9,0.009,0.776896,0.87626,0.88927,0.834226,0.855696
10,0.0025,0.760462,0.880843,0.877485,0.839495,0.854904


[I 2025-03-22 01:12:03,936] Trial 96 finished with value: 0.85729896315223 and parameters: {'learning_rate': 0.004866368721276928, 'weight_decay': 0.001, 'warmup_steps': 4}. Best is trial 60 with value: 0.8633534618076598.


Trial 97 with params: {'learning_rate': 0.002650810292413887, 'weight_decay': 0.001, 'warmup_steps': 0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0671,0.677749,0.767186,0.662077,0.651367,0.654253
2,0.5478,0.607436,0.787351,0.66323,0.675575,0.666098
3,0.394,0.503876,0.820348,0.864484,0.726291,0.74483
4,0.2696,0.488,0.84418,0.861341,0.772609,0.797884
5,0.1725,0.552709,0.853346,0.874753,0.806394,0.830995
6,0.1144,0.541871,0.852429,0.796176,0.819906,0.805869
7,0.0588,0.641313,0.868011,0.848452,0.830177,0.838136
8,0.0354,0.694635,0.866178,0.851756,0.809386,0.825606
9,0.02,0.660401,0.867094,0.861356,0.828063,0.841677
10,0.0095,0.741044,0.863428,0.874715,0.816628,0.837567


[I 2025-03-22 01:12:51,043] Trial 97 pruned. 


Trial 98 with params: {'learning_rate': 0.0019628213663286974, 'weight_decay': 0.003, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.1578,0.78758,0.709441,0.630011,0.605637,0.603885
2,0.5607,0.642298,0.762603,0.641187,0.659799,0.640384
3,0.4007,0.531295,0.816682,0.856482,0.705893,0.708669
4,0.3055,0.520672,0.834097,0.854899,0.773457,0.797143
5,0.2336,0.543838,0.843263,0.856985,0.806688,0.826252
6,0.1899,0.536856,0.83868,0.841706,0.808275,0.819194
7,0.1002,0.543613,0.863428,0.867324,0.834606,0.848636
8,0.0639,0.623499,0.860678,0.849063,0.823966,0.834227
9,0.0366,0.65494,0.856095,0.870208,0.809617,0.831581
10,0.0206,0.678674,0.862511,0.854055,0.824249,0.836894


[I 2025-03-22 01:13:39,711] Trial 98 pruned. 


Trial 99 with params: {'learning_rate': 0.002208923542566908, 'weight_decay': 0.001, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.1321,0.784184,0.741522,0.668542,0.626999,0.633884
2,0.5553,0.624526,0.778185,0.655757,0.669018,0.657715
3,0.3984,0.522203,0.821265,0.860552,0.719607,0.729364
4,0.2959,0.485922,0.837764,0.858848,0.776878,0.801619
5,0.2017,0.546623,0.847846,0.847627,0.802792,0.820245


[I 2025-03-22 01:14:02,549] Trial 99 pruned. 


Trial 100 with params: {'learning_rate': 0.002384444777664535, 'weight_decay': 0.002, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.1344,0.78291,0.718607,0.633024,0.613787,0.610041
2,0.5561,0.585125,0.791017,0.668849,0.678802,0.669619
3,0.3971,0.506764,0.824015,0.863236,0.767908,0.789767
4,0.2795,0.475793,0.842346,0.854111,0.817413,0.832108
5,0.2024,0.476927,0.861595,0.845278,0.824475,0.833015
6,0.1384,0.500353,0.857929,0.85624,0.832404,0.841398
7,0.0687,0.610277,0.866178,0.877482,0.827847,0.846846
8,0.041,0.654962,0.855179,0.855274,0.831272,0.839598
9,0.0245,0.671678,0.864345,0.881483,0.814545,0.839141
10,0.0128,0.719733,0.870761,0.883284,0.820994,0.843883


[I 2025-03-22 01:15:20,873] Trial 100 finished with value: 0.8543832577197127 and parameters: {'learning_rate': 0.002384444777664535, 'weight_decay': 0.002, 'warmup_steps': 4}. Best is trial 60 with value: 0.8633534618076598.


Trial 101 with params: {'learning_rate': 0.004575990738701344, 'weight_decay': 0.001, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.04,0.69174,0.768103,0.684387,0.651993,0.658389
2,0.5086,0.528885,0.821265,0.812068,0.774208,0.787041
3,0.3024,0.442184,0.855179,0.865999,0.799472,0.821609
4,0.1868,0.503632,0.84418,0.838225,0.790863,0.80822
5,0.1137,0.54481,0.867094,0.870382,0.828102,0.844743
6,0.0679,0.606167,0.87626,0.871848,0.817564,0.837021
7,0.0261,0.701491,0.877177,0.890408,0.827698,0.850236
8,0.0088,0.701731,0.872594,0.870473,0.823201,0.841568
9,0.0051,0.743668,0.88176,0.890028,0.84054,0.859462
10,0.0069,0.785362,0.87626,0.886045,0.826766,0.848217


[I 2025-03-22 01:16:16,591] Trial 101 pruned. 


Trial 102 with params: {'learning_rate': 0.003402614695301284, 'weight_decay': 0.001, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0649,0.709458,0.756187,0.664781,0.643151,0.646666
2,0.5323,0.617335,0.792851,0.805622,0.753439,0.766694
3,0.3632,0.477341,0.833181,0.830368,0.785487,0.798589
4,0.2327,0.505827,0.846929,0.867116,0.793334,0.81763
5,0.1518,0.543796,0.855179,0.872096,0.810934,0.831343
6,0.1176,0.541007,0.856095,0.843616,0.813017,0.823431
7,0.0416,0.630187,0.862511,0.864693,0.825907,0.841342
8,0.019,0.656028,0.864345,0.864704,0.827706,0.841752
9,0.0086,0.69673,0.878093,0.889741,0.827432,0.850248
10,0.0052,0.773509,0.871677,0.886081,0.82305,0.845698


[I 2025-03-22 01:17:29,205] Trial 102 finished with value: 0.8500322099091896 and parameters: {'learning_rate': 0.003402614695301284, 'weight_decay': 0.001, 'warmup_steps': 4}. Best is trial 60 with value: 0.8633534618076598.


Trial 103 with params: {'learning_rate': 0.004397187064809733, 'weight_decay': 0.002, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0509,0.693027,0.754354,0.679993,0.641894,0.647141
2,0.5078,0.530584,0.813932,0.833727,0.756402,0.77915
3,0.3072,0.453608,0.849679,0.841226,0.802865,0.817898
4,0.1864,0.458389,0.852429,0.884331,0.823847,0.847247
5,0.1028,0.563373,0.863428,0.859899,0.816434,0.833353


[I 2025-03-22 01:17:53,422] Trial 103 pruned. 


Trial 104 with params: {'learning_rate': 6.119956273045214e-05, 'weight_decay': 0.006, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.726,1.662635,0.335472,0.206457,0.245605,0.175467
2,1.6145,1.564904,0.404216,0.201754,0.306459,0.217371
3,1.5052,1.426399,0.433547,0.385298,0.328243,0.261246
4,1.351,1.279104,0.494959,0.52422,0.38898,0.363985
5,1.2275,1.17241,0.582035,0.535021,0.481333,0.486316
6,1.1231,1.097212,0.612282,0.524382,0.517519,0.509175
7,1.0605,1.043628,0.628781,0.545366,0.527445,0.529703
8,1.0061,1.006044,0.630614,0.533122,0.537148,0.531612
9,0.9713,0.977938,0.633364,0.536637,0.537031,0.533473
10,0.9348,0.961796,0.64253,0.538891,0.549749,0.540452


[I 2025-03-22 01:18:52,552] Trial 104 pruned. 


Trial 105 with params: {'learning_rate': 0.004800207282760222, 'weight_decay': 0.0, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0499,0.798026,0.713107,0.653682,0.60373,0.609906
2,0.5269,0.548576,0.808433,0.795147,0.756118,0.76652
3,0.3105,0.488889,0.832264,0.834716,0.809813,0.817993
4,0.196,0.463601,0.865261,0.882892,0.817957,0.840596
5,0.1231,0.529346,0.873511,0.873217,0.833366,0.84904
6,0.0674,0.609128,0.879927,0.895301,0.836909,0.859478
7,0.0284,0.671205,0.872594,0.876273,0.830829,0.849341
8,0.0114,0.726065,0.870761,0.870119,0.8306,0.846216
9,0.0083,0.738982,0.878093,0.891749,0.836152,0.857611
10,0.0018,0.799157,0.875344,0.890124,0.83406,0.855529


[I 2025-03-22 01:20:26,652] Trial 105 finished with value: 0.859510017052134 and parameters: {'learning_rate': 0.004800207282760222, 'weight_decay': 0.0, 'warmup_steps': 4}. Best is trial 60 with value: 0.8633534618076598.


Trial 106 with params: {'learning_rate': 0.0036166419452346612, 'weight_decay': 0.0, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0586,0.696952,0.76077,0.68151,0.648403,0.653487
2,0.5083,0.646574,0.792851,0.804655,0.727066,0.739086
3,0.3416,0.547122,0.812099,0.832653,0.760422,0.776872
4,0.2368,0.491755,0.856095,0.878584,0.810154,0.833707
5,0.1421,0.485995,0.857929,0.87391,0.821748,0.841743
6,0.0997,0.520608,0.869844,0.874625,0.839469,0.854474
7,0.0335,0.618972,0.87901,0.880829,0.847028,0.861448
8,0.0093,0.727916,0.870761,0.882473,0.840761,0.857614
9,0.0044,0.741984,0.88176,0.892791,0.850159,0.867234
10,0.0027,0.789149,0.871677,0.885207,0.842172,0.859485


[I 2025-03-22 01:21:46,451] Trial 106 finished with value: 0.8619648281041864 and parameters: {'learning_rate': 0.0036166419452346612, 'weight_decay': 0.0, 'warmup_steps': 4}. Best is trial 60 with value: 0.8633534618076598.


Trial 107 with params: {'learning_rate': 0.0021211981307182066, 'weight_decay': 0.0, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.1529,0.796881,0.722273,0.641734,0.61599,0.614753
2,0.5618,0.616889,0.769936,0.64485,0.664173,0.649065
3,0.3943,0.517393,0.835014,0.866597,0.759649,0.778638
4,0.2915,0.520229,0.83593,0.842122,0.802403,0.816719
5,0.2185,0.519245,0.84418,0.836316,0.808517,0.819756


[I 2025-03-22 01:22:24,978] Trial 107 pruned. 


Trial 108 with params: {'learning_rate': 0.0032334011093850274, 'weight_decay': 0.0, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0793,0.706664,0.75802,0.659871,0.646904,0.64574
2,0.528,0.642942,0.780935,0.794624,0.74455,0.755573
3,0.3495,0.479368,0.842346,0.821853,0.802025,0.809095
4,0.2311,0.519451,0.846013,0.870273,0.792325,0.81796
5,0.1526,0.462768,0.867094,0.853413,0.848646,0.850267
6,0.1056,0.54967,0.859762,0.857225,0.826948,0.836821
7,0.0451,0.610016,0.863428,0.876423,0.82556,0.845047
8,0.0211,0.647387,0.866178,0.86764,0.828524,0.844428
9,0.0115,0.674929,0.868928,0.879075,0.822469,0.84262
10,0.006,0.705661,0.857012,0.849988,0.821427,0.833632


[I 2025-03-22 01:24:00,368] Trial 108 pruned. 


Trial 109 with params: {'learning_rate': 0.002953148657005172, 'weight_decay': 0.0, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.09,0.703141,0.748854,0.647147,0.640827,0.637528
2,0.5384,0.628397,0.789184,0.671064,0.678071,0.668282
3,0.3726,0.497565,0.83593,0.824139,0.798391,0.805466
4,0.2515,0.565093,0.829514,0.845108,0.777969,0.801336
5,0.1777,0.495765,0.859762,0.845428,0.822913,0.832041
6,0.1085,0.517019,0.861595,0.872698,0.815872,0.836157
7,0.0547,0.675649,0.854262,0.860232,0.828777,0.839799
8,0.0363,0.686511,0.862511,0.867221,0.835636,0.848685
9,0.0171,0.753194,0.866178,0.868469,0.819141,0.83779
10,0.0082,0.746345,0.867094,0.867717,0.829006,0.844634


[I 2025-03-22 01:25:19,359] Trial 109 finished with value: 0.8431451895441383 and parameters: {'learning_rate': 0.002953148657005172, 'weight_decay': 0.0, 'warmup_steps': 4}. Best is trial 60 with value: 0.8633534618076598.


Trial 110 with params: {'learning_rate': 0.0036663935098773218, 'weight_decay': 0.0, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0528,0.885862,0.700275,0.655579,0.589612,0.59189
2,0.5436,0.623503,0.778185,0.766054,0.743929,0.74434
3,0.3407,0.48622,0.828598,0.844202,0.780931,0.80019
4,0.2348,0.507257,0.851512,0.877742,0.795735,0.823047
5,0.1332,0.489293,0.861595,0.840905,0.833549,0.836778
6,0.0884,0.559019,0.857012,0.857035,0.822637,0.835011
7,0.0379,0.644332,0.862511,0.863751,0.823774,0.839686
8,0.0184,0.657433,0.868011,0.85059,0.829407,0.838733
9,0.0144,0.666613,0.868928,0.86939,0.830804,0.846354
10,0.0085,0.690138,0.870761,0.878891,0.833188,0.84977


[I 2025-03-22 01:26:46,932] Trial 110 finished with value: 0.8520271126439544 and parameters: {'learning_rate': 0.0036663935098773218, 'weight_decay': 0.0, 'warmup_steps': 3}. Best is trial 60 with value: 0.8633534618076598.


Trial 111 with params: {'learning_rate': 0.0040613651393643264, 'weight_decay': 0.003, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0492,0.79744,0.727773,0.658802,0.618327,0.62298
2,0.5439,0.537263,0.816682,0.860882,0.72338,0.742262
3,0.3363,0.485633,0.833181,0.808056,0.804386,0.801214
4,0.2232,0.489146,0.856095,0.877051,0.800506,0.825747
5,0.1386,0.515505,0.869844,0.871819,0.830534,0.846883
6,0.0845,0.542437,0.866178,0.844571,0.836172,0.840099
7,0.0301,0.682127,0.87901,0.879948,0.836379,0.854074
8,0.0083,0.745764,0.877177,0.878177,0.835783,0.853245
9,0.006,0.719379,0.880843,0.891233,0.838154,0.858843
10,0.0054,0.750795,0.879927,0.87978,0.838394,0.855175


Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--precision/155d3220d6cd4a6553f12da68eeb3d1f97cf431206304a4bc6e2d564c29502e9 (last modified on Fri Jan 10 23:13:59 2025) since it couldn't be found locally at evaluate-metric--precision, or remotely on the Hugging Face Hub.
[I 2025-03-22 01:28:49,533] Trial 111 finished with value: 0.8532970182572569 and parameters: {'learning_rate': 0.0040613651393643264, 'weight_decay': 0.003, 'warmup_steps': 3}. Best is trial 60 with value: 0.8633534618076598.


Trial 112 with params: {'learning_rate': 0.000915201931634133, 'weight_decay': 0.009000000000000001, 'warmup_steps': 0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.2819,0.895993,0.653529,0.575526,0.555954,0.556176
2,0.7231,0.660078,0.766269,0.656703,0.652374,0.652475
3,0.5053,0.556561,0.808433,0.692363,0.690015,0.686969
4,0.4164,0.557543,0.810266,0.695624,0.690428,0.688973
5,0.3617,0.521344,0.823098,0.696395,0.702162,0.698119
6,0.2949,0.514897,0.833181,0.863497,0.765301,0.788231
7,0.2207,0.530788,0.837764,0.848304,0.788234,0.80713
8,0.179,0.56473,0.83593,0.843873,0.814108,0.825324
9,0.1466,0.595392,0.840513,0.845053,0.816543,0.827574
10,0.0962,0.672624,0.83868,0.857089,0.814291,0.831354


[I 2025-03-22 01:30:31,732] Trial 112 pruned. 


Trial 113 with params: {'learning_rate': 0.0049978570436611395, 'weight_decay': 0.0, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0284,0.660148,0.769019,0.675462,0.65374,0.658362
2,0.4906,0.534097,0.809349,0.826,0.748651,0.764074
3,0.298,0.453448,0.84418,0.845665,0.801241,0.81664
4,0.1742,0.497672,0.857012,0.879562,0.819263,0.841331
5,0.0988,0.576505,0.864345,0.885157,0.824712,0.847597
6,0.0716,0.589632,0.859762,0.876797,0.814127,0.836205
7,0.0264,0.626719,0.885426,0.895749,0.842529,0.863065
8,0.0079,0.737759,0.87626,0.888084,0.835351,0.855794
9,0.0038,0.761105,0.87626,0.887306,0.83558,0.855616
10,0.002,0.811662,0.880843,0.890602,0.839209,0.858907


[I 2025-03-22 01:31:57,870] Trial 113 finished with value: 0.8597312241717119 and parameters: {'learning_rate': 0.0049978570436611395, 'weight_decay': 0.0, 'warmup_steps': 4}. Best is trial 60 with value: 0.8633534618076598.


Trial 114 with params: {'learning_rate': 0.003685428204390865, 'weight_decay': 0.0, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0573,0.666603,0.765353,0.667197,0.652819,0.654684
2,0.525,0.604306,0.800183,0.790824,0.749138,0.759586
3,0.3352,0.517728,0.827681,0.852392,0.774851,0.800269
4,0.2239,0.473239,0.857929,0.880631,0.802564,0.828652
5,0.1303,0.516623,0.870761,0.881188,0.832384,0.850689
6,0.0679,0.561603,0.862511,0.856706,0.836539,0.843983
7,0.0313,0.646321,0.871677,0.881206,0.814678,0.837009
8,0.0216,0.697112,0.874427,0.884455,0.826108,0.847133
9,0.0123,0.712819,0.877177,0.886777,0.828381,0.849404
10,0.0123,0.75824,0.875344,0.888394,0.835736,0.855521


[I 2025-03-22 01:33:14,920] Trial 114 finished with value: 0.852522367543517 and parameters: {'learning_rate': 0.003685428204390865, 'weight_decay': 0.0, 'warmup_steps': 4}. Best is trial 60 with value: 0.8633534618076598.


Trial 115 with params: {'learning_rate': 0.0036048620060859043, 'weight_decay': 0.0, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0558,0.636088,0.767186,0.666854,0.65507,0.657372
2,0.5173,0.560411,0.817599,0.854139,0.7441,0.765532
3,0.3349,0.502641,0.840513,0.838972,0.795359,0.811615
4,0.2164,0.50632,0.84418,0.874946,0.808587,0.833617
5,0.1386,0.524222,0.866178,0.865309,0.828986,0.842919
6,0.0909,0.565304,0.861595,0.876558,0.834435,0.850802
7,0.0372,0.646335,0.862511,0.869079,0.824766,0.842756
8,0.0255,0.618948,0.877177,0.867698,0.846617,0.855831
9,0.0162,0.683029,0.882676,0.895684,0.851089,0.869186
10,0.0075,0.709469,0.879927,0.892241,0.838116,0.858989


[I 2025-03-22 01:34:36,161] Trial 115 finished with value: 0.8618134563658776 and parameters: {'learning_rate': 0.0036048620060859043, 'weight_decay': 0.0, 'warmup_steps': 4}. Best is trial 60 with value: 0.8633534618076598.


Trial 116 with params: {'learning_rate': 0.0034440890357421935, 'weight_decay': 0.0, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0429,0.726149,0.749771,0.672117,0.636907,0.64396
2,0.5191,0.580515,0.799267,0.669196,0.685029,0.674557
3,0.3527,0.492359,0.827681,0.830649,0.780425,0.795329
4,0.2235,0.536383,0.84418,0.873336,0.791039,0.817684
5,0.1483,0.512745,0.861595,0.877788,0.814589,0.837138
6,0.0931,0.541853,0.852429,0.811688,0.828421,0.818798
7,0.0419,0.625214,0.865261,0.87798,0.81939,0.839453
8,0.025,0.667474,0.863428,0.838838,0.825979,0.831913
9,0.0106,0.687668,0.87901,0.88994,0.829005,0.8512
10,0.0069,0.699416,0.873511,0.885585,0.815441,0.839068


[I 2025-03-22 01:35:23,113] Trial 116 pruned. 


Trial 117 with params: {'learning_rate': 0.0023242206720919794, 'weight_decay': 0.0, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.1371,0.785869,0.724106,0.641406,0.617555,0.615385
2,0.5581,0.594534,0.788268,0.666059,0.676317,0.666565
3,0.3993,0.509423,0.834097,0.870489,0.766509,0.789516
4,0.2864,0.48842,0.84143,0.86736,0.815979,0.835452
5,0.2051,0.496781,0.852429,0.8366,0.815527,0.824082


[I 2025-03-22 01:35:51,040] Trial 117 pruned. 


Trial 118 with params: {'learning_rate': 0.004092484990218411, 'weight_decay': 0.0, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0479,0.660706,0.767186,0.680975,0.650841,0.657661
2,0.5074,0.580815,0.797434,0.798628,0.755916,0.767428
3,0.3207,0.475832,0.836847,0.829675,0.78315,0.799861
4,0.2057,0.465494,0.859762,0.876799,0.804368,0.828816
5,0.1174,0.499939,0.868928,0.873472,0.829533,0.84659
6,0.0868,0.560366,0.871677,0.883098,0.833333,0.852156
7,0.0337,0.71435,0.862511,0.879273,0.815615,0.838206
8,0.0147,0.704507,0.866178,0.870719,0.835217,0.850321
9,0.0051,0.769332,0.868011,0.868376,0.828604,0.844777
10,0.0036,0.768158,0.864345,0.866387,0.826764,0.842803


[I 2025-03-22 01:36:40,024] Trial 118 pruned. 


Trial 119 with params: {'learning_rate': 0.003983207603375396, 'weight_decay': 0.001, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0521,0.687038,0.76352,0.677356,0.647845,0.654888
2,0.5126,0.582564,0.781852,0.782333,0.744033,0.752959
3,0.3344,0.475176,0.84418,0.854187,0.803371,0.819095
4,0.207,0.505119,0.857012,0.87461,0.802188,0.825424
5,0.1264,0.559242,0.861595,0.878962,0.815004,0.836933
6,0.0744,0.546295,0.860678,0.873317,0.825573,0.842119
7,0.0293,0.622419,0.874427,0.885808,0.845138,0.861429
8,0.0158,0.663252,0.871677,0.875397,0.840424,0.855456
9,0.0057,0.715013,0.87626,0.885452,0.836998,0.855401
10,0.0046,0.747022,0.872594,0.871477,0.833217,0.848502


[I 2025-03-22 01:37:57,964] Trial 119 finished with value: 0.8585426259865501 and parameters: {'learning_rate': 0.003983207603375396, 'weight_decay': 0.001, 'warmup_steps': 4}. Best is trial 60 with value: 0.8633534618076598.


Trial 120 with params: {'learning_rate': 0.00022650159354999495, 'weight_decay': 0.004, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.6245,1.426775,0.477544,0.418109,0.365025,0.332446
2,1.243,1.07167,0.603116,0.550885,0.497436,0.503529
3,0.9454,0.870887,0.681027,0.574086,0.585929,0.57643
4,0.7476,0.747932,0.743355,0.644411,0.630746,0.633006
5,0.6496,0.682328,0.758937,0.647524,0.645839,0.643134


[I 2025-03-22 01:38:21,541] Trial 120 pruned. 


Trial 121 with params: {'learning_rate': 0.004729824285245729, 'weight_decay': 0.0, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0462,0.799911,0.726856,0.660763,0.619917,0.625562
2,0.5343,0.546174,0.807516,0.842811,0.701806,0.698772
3,0.3136,0.473804,0.843263,0.841998,0.793069,0.807852
4,0.2089,0.489741,0.868011,0.875292,0.82766,0.847002
5,0.1362,0.486673,0.869844,0.85908,0.838974,0.847231
6,0.0688,0.548684,0.882676,0.883338,0.85051,0.864488
7,0.0335,0.605232,0.882676,0.884403,0.839427,0.857383
8,0.0144,0.684109,0.87626,0.873415,0.835792,0.851015
9,0.0033,0.709021,0.879927,0.877608,0.83923,0.854614
10,0.0025,0.720223,0.889093,0.886292,0.846311,0.862453


[I 2025-03-22 01:39:37,006] Trial 121 finished with value: 0.8597682123845218 and parameters: {'learning_rate': 0.004729824285245729, 'weight_decay': 0.0, 'warmup_steps': 3}. Best is trial 60 with value: 0.8633534618076598.


Trial 122 with params: {'learning_rate': 0.004204983838719877, 'weight_decay': 0.0, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0725,0.813889,0.740605,0.672992,0.627121,0.635264
2,0.5416,0.587636,0.790101,0.666226,0.677698,0.666899
3,0.3364,0.463147,0.840513,0.820636,0.787776,0.800722
4,0.2165,0.519453,0.854262,0.897008,0.787097,0.819216
5,0.1268,0.478767,0.872594,0.887914,0.830807,0.852908
6,0.0585,0.598434,0.859762,0.872982,0.823569,0.842152
7,0.0274,0.637537,0.878093,0.877346,0.847454,0.860115
8,0.0131,0.770742,0.854262,0.868882,0.829901,0.84416
9,0.0061,0.815582,0.851512,0.870136,0.826858,0.842798
10,0.0084,0.726195,0.868928,0.87017,0.839669,0.852641


[I 2025-03-22 01:40:54,474] Trial 122 finished with value: 0.8636126323169178 and parameters: {'learning_rate': 0.004204983838719877, 'weight_decay': 0.0, 'warmup_steps': 2}. Best is trial 122 with value: 0.8636126323169178.


Trial 123 with params: {'learning_rate': 0.004483170022431039, 'weight_decay': 0.0, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0565,0.786056,0.733272,0.664479,0.623295,0.629716
2,0.5405,0.529631,0.822181,0.824392,0.738756,0.75541
3,0.3256,0.485502,0.835014,0.810397,0.797448,0.799029
4,0.2096,0.537791,0.847846,0.87234,0.791899,0.819286
5,0.1172,0.546719,0.864345,0.86939,0.826832,0.842939
6,0.0758,0.569792,0.859762,0.854086,0.833755,0.841129
7,0.0326,0.641768,0.885426,0.895718,0.843096,0.863319
8,0.0092,0.682734,0.87626,0.877597,0.844683,0.85882
9,0.0046,0.72832,0.87901,0.892771,0.837104,0.858795
10,0.0023,0.749681,0.885426,0.897015,0.851794,0.870078


Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--f1/34c46321f42186df33a6260966e34a368f14868d9cc2ba47d142112e2800d233 (last modified on Fri Jan 10 23:14:01 2025) since it couldn't be found locally at evaluate-metric--f1, or remotely on the Hugging Face Hub.
[I 2025-03-22 01:42:29,767] Trial 123 finished with value: 0.8691821658235614 and parameters: {'learning_rate': 0.004483170022431039, 'weight_decay': 0.0, 'warmup_steps': 3}. Best is trial 123 with value: 0.8691821658235614.


Trial 124 with params: {'learning_rate': 0.0029731801707089775, 'weight_decay': 0.0, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0759,0.753488,0.748854,0.663917,0.635879,0.637934
2,0.5256,0.664893,0.767186,0.647798,0.661374,0.645917
3,0.3532,0.498153,0.837764,0.849815,0.789479,0.807417
4,0.2387,0.523418,0.835014,0.865455,0.782909,0.810477
5,0.1548,0.533869,0.860678,0.87598,0.823998,0.843304
6,0.1064,0.575319,0.850596,0.815371,0.827574,0.819566
7,0.0562,0.601,0.865261,0.874855,0.819134,0.838921
8,0.0257,0.683216,0.863428,0.846283,0.836412,0.840496
9,0.0113,0.733821,0.861595,0.873311,0.815913,0.836325
10,0.0088,0.753509,0.866178,0.86425,0.818935,0.836249


[I 2025-03-22 01:43:27,519] Trial 124 pruned. 


Trial 125 with params: {'learning_rate': 0.003230646552105269, 'weight_decay': 0.0, 'warmup_steps': 1}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0617,0.771191,0.747938,0.678972,0.63122,0.640786
2,0.5421,0.637539,0.781852,0.824109,0.693809,0.689685
3,0.3656,0.491616,0.832264,0.847213,0.792924,0.810759
4,0.2483,0.4681,0.851512,0.861774,0.815518,0.833788
5,0.1524,0.495088,0.859762,0.843179,0.823378,0.831534
6,0.1085,0.52816,0.853346,0.835911,0.829974,0.831015
7,0.0478,0.627501,0.860678,0.856455,0.833705,0.843098
8,0.0225,0.662585,0.868011,0.8688,0.829033,0.84517
9,0.0095,0.702044,0.871677,0.887351,0.829827,0.852404
10,0.0081,0.727591,0.872594,0.870368,0.833827,0.848283


[I 2025-03-22 01:44:55,086] Trial 125 finished with value: 0.8525452488333726 and parameters: {'learning_rate': 0.003230646552105269, 'weight_decay': 0.0, 'warmup_steps': 1}. Best is trial 123 with value: 0.8691821658235614.


Trial 126 with params: {'learning_rate': 0.0037016130447764394, 'weight_decay': 0.0, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0747,0.718629,0.750687,0.671668,0.637038,0.642866
2,0.5385,0.569734,0.802016,0.844213,0.760796,0.78098
3,0.3497,0.487313,0.83868,0.83983,0.799931,0.813672
4,0.2322,0.540792,0.847846,0.877594,0.789648,0.819744
5,0.1338,0.513798,0.862511,0.825001,0.81596,0.818995


[I 2025-03-22 01:45:52,252] Trial 126 pruned. 


Trial 127 with params: {'learning_rate': 0.0034639224191048276, 'weight_decay': 0.0, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0816,0.722756,0.746104,0.671974,0.631895,0.640027
2,0.5463,0.595175,0.788268,0.666712,0.676825,0.665426
3,0.3702,0.494853,0.833181,0.826592,0.794015,0.803994
4,0.2429,0.489785,0.847846,0.86222,0.811882,0.831467
5,0.1466,0.505602,0.861595,0.87702,0.823286,0.84389
6,0.0934,0.534974,0.846013,0.788518,0.822872,0.800678
7,0.0418,0.64716,0.868011,0.861416,0.839261,0.848027
8,0.0272,0.662785,0.871677,0.862612,0.83278,0.845353
9,0.0088,0.687076,0.874427,0.858036,0.816768,0.832595
10,0.0093,0.715335,0.877177,0.86967,0.845452,0.85606


[I 2025-03-22 01:47:24,915] Trial 127 finished with value: 0.8507404067364064 and parameters: {'learning_rate': 0.0034639224191048276, 'weight_decay': 0.0, 'warmup_steps': 2}. Best is trial 123 with value: 0.8691821658235614.


Trial 128 with params: {'learning_rate': 0.004915050861068427, 'weight_decay': 0.0, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0524,0.708651,0.76352,0.67412,0.647699,0.652369
2,0.4892,0.57713,0.797434,0.842534,0.71384,0.720706
3,0.2966,0.495668,0.830431,0.791309,0.795215,0.786523
4,0.1748,0.506208,0.865261,0.867168,0.806593,0.828657
5,0.0888,0.546127,0.868011,0.882011,0.83007,0.849318
6,0.0608,0.553507,0.872594,0.84853,0.844318,0.84585
7,0.0177,0.72337,0.874427,0.88887,0.824173,0.847983
8,0.0096,0.725832,0.870761,0.860987,0.83102,0.843783
9,0.0024,0.778468,0.868011,0.875471,0.811574,0.832683
10,0.0015,0.805317,0.879927,0.887298,0.83062,0.850682


[I 2025-03-22 01:48:54,516] Trial 128 finished with value: 0.8439552351428187 and parameters: {'learning_rate': 0.004915050861068427, 'weight_decay': 0.0, 'warmup_steps': 2}. Best is trial 123 with value: 0.8691821658235614.


Trial 129 with params: {'learning_rate': 0.004345024274104152, 'weight_decay': 0.0, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.058,0.754485,0.737855,0.661831,0.625888,0.630585
2,0.525,0.612312,0.789184,0.827179,0.707393,0.714302
3,0.3219,0.480849,0.837764,0.8379,0.799196,0.811803
4,0.2099,0.485722,0.860678,0.879823,0.804077,0.829553
5,0.1201,0.523936,0.867094,0.872637,0.828403,0.845476
6,0.0675,0.537882,0.874427,0.867165,0.842929,0.853633
7,0.0355,0.61069,0.87901,0.855406,0.837051,0.845347
8,0.0101,0.693911,0.868928,0.835905,0.830023,0.832704
9,0.004,0.717856,0.878093,0.847212,0.835942,0.841098
10,0.002,0.750456,0.879927,0.853723,0.838407,0.84526


[I 2025-03-22 01:49:44,727] Trial 129 pruned. 


Trial 130 with params: {'learning_rate': 0.004803304746734306, 'weight_decay': 0.0, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0539,0.727072,0.740605,0.651108,0.634021,0.632221
2,0.5178,0.503062,0.824931,0.861674,0.714648,0.716989
3,0.305,0.467089,0.849679,0.846421,0.797358,0.813583
4,0.1896,0.489776,0.865261,0.865571,0.834939,0.848224
5,0.0945,0.558746,0.859762,0.843145,0.833169,0.83609


[I 2025-03-22 01:50:09,375] Trial 130 pruned. 


Trial 131 with params: {'learning_rate': 0.004222912980430708, 'weight_decay': 0.0, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0561,0.704579,0.751604,0.675789,0.641163,0.644197
2,0.5065,0.552245,0.805683,0.776601,0.759457,0.764244
3,0.3077,0.479481,0.845096,0.860398,0.800109,0.821369
4,0.1909,0.506344,0.854262,0.900587,0.797852,0.829332
5,0.1174,0.565808,0.866178,0.887684,0.826392,0.849514
6,0.0683,0.597147,0.864345,0.832006,0.836129,0.833241
7,0.0296,0.703257,0.859762,0.86174,0.824986,0.838912
8,0.0207,0.653288,0.882676,0.859979,0.849134,0.854005
9,0.0085,0.772959,0.871677,0.886536,0.830899,0.852489
10,0.0037,0.783015,0.87901,0.890071,0.838672,0.858121


[I 2025-03-22 01:51:24,029] Trial 131 finished with value: 0.8584206688119417 and parameters: {'learning_rate': 0.004222912980430708, 'weight_decay': 0.0, 'warmup_steps': 4}. Best is trial 123 with value: 0.8691821658235614.


Trial 132 with params: {'learning_rate': 0.004135037114415676, 'weight_decay': 0.0, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0532,0.702828,0.761687,0.681222,0.647969,0.653483
2,0.5144,0.587393,0.8011,0.797019,0.759597,0.76957
3,0.3274,0.46772,0.83868,0.851339,0.789425,0.807645
4,0.2017,0.481631,0.857012,0.875066,0.819655,0.840279
5,0.115,0.527787,0.862511,0.852914,0.817054,0.830976


[I 2025-03-22 01:51:50,503] Trial 132 pruned. 


Trial 133 with params: {'learning_rate': 0.004611847549782694, 'weight_decay': 0.0, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0367,0.698562,0.761687,0.679814,0.645174,0.653458
2,0.5169,0.534075,0.817599,0.810511,0.771826,0.78423
3,0.3115,0.436924,0.856095,0.856703,0.819308,0.833481
4,0.1859,0.46173,0.869844,0.887002,0.820167,0.84404
5,0.1055,0.510971,0.869844,0.876428,0.830953,0.849005
6,0.0663,0.595119,0.873511,0.882499,0.835671,0.852975
7,0.028,0.622165,0.873511,0.848035,0.824237,0.834822
8,0.0129,0.665878,0.870761,0.853235,0.831332,0.841182
9,0.007,0.694205,0.87626,0.87621,0.836949,0.852937
10,0.0022,0.725969,0.875344,0.887138,0.836249,0.855859


[I 2025-03-22 01:53:05,856] Trial 133 finished with value: 0.8504793358634255 and parameters: {'learning_rate': 0.004611847549782694, 'weight_decay': 0.0, 'warmup_steps': 4}. Best is trial 123 with value: 0.8691821658235614.


Trial 134 with params: {'learning_rate': 0.0016158514505531909, 'weight_decay': 0.0, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.2072,0.77741,0.714024,0.622999,0.610155,0.60904
2,0.6044,0.60993,0.773602,0.653113,0.66435,0.655524
3,0.4488,0.540262,0.808433,0.688363,0.69085,0.683477
4,0.3399,0.513238,0.827681,0.86749,0.722479,0.7336
5,0.2704,0.50245,0.850596,0.869047,0.812824,0.834573


[I 2025-03-22 01:53:30,404] Trial 134 pruned. 


Trial 135 with params: {'learning_rate': 0.004599155892559169, 'weight_decay': 0.002, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0452,0.733718,0.764436,0.677998,0.645476,0.651974
2,0.4987,0.565839,0.802016,0.67575,0.688681,0.676902
3,0.3085,0.443986,0.846013,0.835465,0.806191,0.815854
4,0.1786,0.465196,0.862511,0.866275,0.81661,0.835298
5,0.112,0.495976,0.870761,0.876578,0.830561,0.848781
6,0.0671,0.508664,0.87626,0.834917,0.853099,0.841883
7,0.0277,0.626265,0.873511,0.87394,0.824636,0.843415
8,0.0068,0.705868,0.87901,0.869032,0.837435,0.850953
9,0.004,0.71253,0.875344,0.872414,0.826025,0.843997
10,0.0021,0.747212,0.878093,0.867638,0.837582,0.850257


[I 2025-03-22 01:55:21,216] Trial 135 finished with value: 0.8554554359094912 and parameters: {'learning_rate': 0.004599155892559169, 'weight_decay': 0.002, 'warmup_steps': 2}. Best is trial 123 with value: 0.8691821658235614.


Trial 136 with params: {'learning_rate': 0.004953780422383182, 'weight_decay': 0.001, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0387,0.696681,0.765353,0.670278,0.651885,0.655376
2,0.5023,0.559715,0.802933,0.786389,0.762601,0.766751
3,0.3003,0.497145,0.839597,0.856645,0.798105,0.816349
4,0.1963,0.48634,0.866178,0.883763,0.817436,0.841786
5,0.1167,0.518672,0.865261,0.850101,0.826615,0.836123


[I 2025-03-22 01:55:45,097] Trial 136 pruned. 


Trial 137 with params: {'learning_rate': 0.004910440285104477, 'weight_decay': 0.0, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0355,0.673344,0.766269,0.673428,0.649984,0.655113
2,0.487,0.586318,0.784601,0.772332,0.741377,0.740104
3,0.3075,0.446068,0.853346,0.865034,0.79876,0.820183
4,0.189,0.488838,0.868011,0.885728,0.82859,0.849744
5,0.1098,0.532169,0.87626,0.877502,0.835346,0.852027
6,0.0606,0.576152,0.872594,0.886776,0.8323,0.852933
7,0.0274,0.686716,0.871677,0.889879,0.829392,0.852442
8,0.0144,0.690003,0.870761,0.873367,0.840214,0.854481
9,0.0048,0.756079,0.867094,0.883233,0.827296,0.849202
10,0.002,0.750617,0.878093,0.888479,0.837487,0.857175


[I 2025-03-22 01:57:01,288] Trial 137 finished with value: 0.8563176946004618 and parameters: {'learning_rate': 0.004910440285104477, 'weight_decay': 0.0, 'warmup_steps': 4}. Best is trial 123 with value: 0.8691821658235614.


Trial 138 with params: {'learning_rate': 0.004865490483268204, 'weight_decay': 0.0, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0507,0.730524,0.750687,0.660524,0.64263,0.641114
2,0.5186,0.496114,0.820348,0.691237,0.702039,0.695515
3,0.3002,0.455034,0.846013,0.842646,0.794723,0.810319
4,0.1882,0.52253,0.851512,0.860306,0.823337,0.838327
5,0.1078,0.504499,0.877177,0.848664,0.845803,0.84619
6,0.0837,0.562778,0.857929,0.860925,0.831129,0.84344
7,0.0319,0.602964,0.883593,0.894421,0.841238,0.861906
8,0.0088,0.679919,0.867094,0.880724,0.828577,0.848752
9,0.0032,0.698734,0.875344,0.888268,0.826113,0.848911
10,0.0026,0.755938,0.875344,0.875915,0.835045,0.851624


[I 2025-03-22 01:58:20,930] Trial 138 finished with value: 0.8539389524925736 and parameters: {'learning_rate': 0.004865490483268204, 'weight_decay': 0.0, 'warmup_steps': 3}. Best is trial 123 with value: 0.8691821658235614.


Trial 139 with params: {'learning_rate': 0.0001772405333439467, 'weight_decay': 0.002, 'warmup_steps': 0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.6337,1.493217,0.444546,0.461002,0.335931,0.309192
2,1.3185,1.15826,0.579285,0.525118,0.475492,0.474842
3,1.0379,0.962667,0.648946,0.53802,0.55934,0.544279
4,0.854,0.831662,0.704858,0.596927,0.602346,0.596332
5,0.7491,0.75241,0.736939,0.629436,0.627569,0.626151


[I 2025-03-22 01:58:47,374] Trial 139 pruned. 


Trial 140 with params: {'learning_rate': 0.004949486167221329, 'weight_decay': 0.002, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0304,0.693285,0.747938,0.660445,0.636046,0.641134
2,0.5025,0.493742,0.830431,0.866309,0.762474,0.788172
3,0.2908,0.467376,0.849679,0.865288,0.807148,0.825226
4,0.1786,0.487169,0.864345,0.874385,0.825456,0.844929
5,0.0943,0.585542,0.870761,0.866761,0.841506,0.851267
6,0.0555,0.571002,0.869844,0.868381,0.822645,0.839255
7,0.0219,0.711255,0.875344,0.868938,0.833337,0.848322
8,0.0159,0.709819,0.869844,0.821879,0.839572,0.82747
9,0.0069,0.722901,0.87626,0.865507,0.834732,0.84798
10,0.0027,0.747046,0.878093,0.870443,0.845476,0.856459


[I 2025-03-22 02:00:09,630] Trial 140 finished with value: 0.8503762668842959 and parameters: {'learning_rate': 0.004949486167221329, 'weight_decay': 0.002, 'warmup_steps': 3}. Best is trial 123 with value: 0.8691821658235614.


Trial 141 with params: {'learning_rate': 0.0019085739405293133, 'weight_decay': 0.002, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.1643,0.775983,0.711274,0.624796,0.607408,0.605123
2,0.567,0.639733,0.76077,0.638689,0.657988,0.639136
3,0.4031,0.529165,0.818515,0.857957,0.708807,0.711103
4,0.3092,0.53212,0.836847,0.8603,0.784634,0.808267
5,0.2429,0.541691,0.84143,0.844379,0.805824,0.821404


[I 2025-03-22 02:00:45,304] Trial 141 pruned. 


Trial 142 with params: {'learning_rate': 0.004738224156037737, 'weight_decay': 0.0, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.043,0.70163,0.766269,0.684092,0.650371,0.657392
2,0.5101,0.525974,0.823098,0.795193,0.776811,0.782398
3,0.3036,0.443396,0.853346,0.827604,0.818426,0.82062
4,0.1883,0.456754,0.868928,0.873778,0.82936,0.846304
5,0.1105,0.557926,0.867094,0.883749,0.818197,0.841902
6,0.0617,0.566944,0.868011,0.877941,0.830885,0.847564
7,0.0248,0.713578,0.874427,0.885166,0.815038,0.838937
8,0.0105,0.76048,0.865261,0.855199,0.827801,0.839018
9,0.0053,0.816977,0.872594,0.886511,0.832182,0.853224
10,0.0033,0.796237,0.870761,0.881885,0.83135,0.850814


[I 2025-03-22 02:02:01,330] Trial 142 finished with value: 0.855418976207987 and parameters: {'learning_rate': 0.004738224156037737, 'weight_decay': 0.0, 'warmup_steps': 4}. Best is trial 123 with value: 0.8691821658235614.


Trial 143 with params: {'learning_rate': 0.001533959854171756, 'weight_decay': 0.002, 'warmup_steps': 0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.1559,0.762275,0.722273,0.634206,0.614287,0.618012
2,0.6012,0.610088,0.771769,0.64674,0.66464,0.652459
3,0.4362,0.550864,0.812099,0.690022,0.693734,0.688107
4,0.347,0.518211,0.831347,0.875163,0.725118,0.738048
5,0.2757,0.492284,0.846929,0.861508,0.802026,0.823646


[I 2025-03-22 02:02:28,033] Trial 143 pruned. 


Trial 144 with params: {'learning_rate': 0.003291869532092159, 'weight_decay': 0.002, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0482,0.714518,0.753437,0.677138,0.637516,0.64706
2,0.5303,0.553307,0.817599,0.853245,0.726868,0.740811
3,0.3626,0.498438,0.823098,0.814383,0.77742,0.787839
4,0.2305,0.514524,0.843263,0.854302,0.790112,0.812561
5,0.1587,0.514293,0.848763,0.832543,0.805809,0.81572
6,0.1122,0.585771,0.843263,0.846257,0.813827,0.823134
7,0.0376,0.6283,0.871677,0.863819,0.842238,0.851481
8,0.0269,0.640044,0.863428,0.845114,0.826763,0.834695
9,0.0202,0.639226,0.868928,0.862439,0.812665,0.83023
10,0.0156,0.64303,0.869844,0.854954,0.82275,0.835623


[I 2025-03-22 02:03:37,261] Trial 144 pruned. 


Trial 145 with params: {'learning_rate': 0.001640794804054911, 'weight_decay': 0.001, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.2071,0.774998,0.71769,0.617966,0.614112,0.609908
2,0.6098,0.614925,0.777269,0.660916,0.665542,0.660646
3,0.448,0.53877,0.807516,0.685648,0.69116,0.682239
4,0.3419,0.52203,0.822181,0.864526,0.718438,0.729897
5,0.2713,0.519941,0.847846,0.869523,0.810387,0.832967


[I 2025-03-22 02:04:04,731] Trial 145 pruned. 


Trial 146 with params: {'learning_rate': 0.0017049097129868964, 'weight_decay': 0.0, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.1665,0.775663,0.730522,0.641072,0.621656,0.622309
2,0.5891,0.598561,0.787351,0.664903,0.674058,0.666579
3,0.4244,0.56992,0.802933,0.851064,0.69446,0.697848
4,0.3344,0.495073,0.833181,0.843901,0.744213,0.765919
5,0.2494,0.516176,0.846929,0.866916,0.812514,0.833195
6,0.1871,0.48482,0.848763,0.861027,0.796085,0.817352
7,0.1087,0.564959,0.855179,0.856803,0.819164,0.833845
8,0.0746,0.618488,0.852429,0.85335,0.818948,0.832376
9,0.0377,0.668182,0.853346,0.85882,0.816104,0.833251
10,0.0266,0.692452,0.857012,0.858949,0.821061,0.835731


[I 2025-03-22 02:04:49,122] Trial 146 pruned. 


Trial 147 with params: {'learning_rate': 0.0023763745106550977, 'weight_decay': 0.002, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.1334,0.784196,0.71769,0.634169,0.612937,0.610167
2,0.5583,0.591682,0.784601,0.664053,0.6735,0.663406
3,0.3991,0.507244,0.824931,0.861378,0.760365,0.781057
4,0.283,0.476752,0.836847,0.858807,0.79469,0.817285
5,0.2032,0.465728,0.865261,0.853645,0.836017,0.843053
6,0.1388,0.513002,0.853346,0.813044,0.820421,0.81575
7,0.0756,0.592212,0.863428,0.87498,0.835627,0.851077
8,0.0425,0.671221,0.849679,0.834165,0.815983,0.823585
9,0.0278,0.689231,0.847846,0.870687,0.802222,0.826263
10,0.0185,0.668085,0.868011,0.881592,0.829038,0.848696


[I 2025-03-22 02:05:51,125] Trial 147 pruned. 


Trial 148 with params: {'learning_rate': 0.004454810058571898, 'weight_decay': 0.002, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0388,0.626637,0.777269,0.677974,0.662374,0.662969
2,0.4942,0.515944,0.829514,0.831084,0.78097,0.796825
3,0.3004,0.486505,0.854262,0.826871,0.821324,0.820818
4,0.1851,0.476787,0.863428,0.874071,0.83272,0.849403
5,0.1066,0.503219,0.872594,0.861871,0.833131,0.845198
6,0.0667,0.574702,0.859762,0.834455,0.826322,0.828312
7,0.0418,0.680997,0.863428,0.860688,0.82517,0.839177
8,0.0204,0.763894,0.866178,0.859573,0.827334,0.840852
9,0.0051,0.755411,0.880843,0.878781,0.839831,0.855599
10,0.0022,0.791907,0.873511,0.875206,0.833517,0.850454


[I 2025-03-22 02:07:17,104] Trial 148 pruned. 


Trial 149 with params: {'learning_rate': 0.004203220195458997, 'weight_decay': 0.002, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0563,0.78867,0.735105,0.665197,0.62479,0.629811
2,0.5494,0.549,0.807516,0.790293,0.727738,0.740089
3,0.3336,0.477101,0.837764,0.812529,0.788987,0.796627
4,0.2204,0.487036,0.864345,0.879198,0.80611,0.830936
5,0.1298,0.522089,0.866178,0.871493,0.827085,0.844657
6,0.0851,0.55847,0.865261,0.853746,0.818877,0.832638
7,0.0364,0.647904,0.869844,0.88092,0.811967,0.835323
8,0.012,0.679467,0.872594,0.872758,0.822617,0.842209
9,0.0041,0.723174,0.878093,0.876774,0.828272,0.847002
10,0.007,0.723778,0.868011,0.867278,0.820652,0.838661


[I 2025-03-22 02:08:06,694] Trial 149 pruned. 


In [25]:
print(best_trial)

BestRun(run_id='123', objective=0.8691821658235614, hyperparameters={'learning_rate': 0.004483170022431039, 'weight_decay': 0.0, 'warmup_steps': 3}, run_summary=None)


In [26]:
base.reset_seed()

In [27]:
training_args = base.get_training_args(output_dir=f"~/results/{DATASET}/bilstm-distill_coarse_hp-search", logging_dir=f"~/logs/{DATASET}/bilstm-distill_coarse_hp-search", remove_unused_columns=False, epochs=num_epochs, batch_size=batch_size)

In [28]:
def hp_space(trial):
    params =  {
        "learning_rate": trial.suggest_float("learning_rate", 5e-5, 5e-3, log=True),
        "weight_decay": trial.suggest_float("weight_decay", 0, 1e-2, step=1e-3),
        "warmup_steps" : trial.suggest_int("warmup_steps", 0, warm_up),
        "lambda_param": trial.suggest_float("lambda_param",0,1,step=.1),
        "temperature": trial.suggest_float("temperature", 2,7, step=.5)
    }
    print(f"Trial {trial.number} with params: {params}")
    return params

In [29]:
pruner = optuna.pruners.HyperbandPruner(min_resource=min_r, max_resource=max_r, reduction_factor=2, bootstrap_count=2)
sampler = optuna.samplers.TPESampler(seed=42, multivariate=True)



In [30]:
trainer = base.DistilTrainer(
    args=training_args,
    train_dataset=train_data,
    eval_dataset=eval_data,
    compute_metrics=base.compute_metrics,
    model_init = lambda: get_BiLSTM(),
)
  

In [31]:
best_trial2 = trainer.hyperparameter_search(
    direction="maximize",
    backend="optuna",
    hp_space=hp_space,
    compute_objective=lambda metrics: metrics["eval_f1"],
    pruner=pruner,
    sampler=sampler,
    study_name="Distill",
    n_trials=150
)

[I 2025-03-22 02:08:07,009] A new study created in memory with name: Distill


Trial 0 with params: {'learning_rate': 0.0002805758207667253, 'weight_decay': 0.01, 'warmup_steps': 3, 'lambda_param': 0.6000000000000001, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.6988,3.231866,0.442713,0.386394,0.334541,0.279822
2,2.8466,2.454531,0.582951,0.543396,0.477618,0.478625
3,2.2147,1.975077,0.684693,0.574947,0.587045,0.580133
4,1.7358,1.709124,0.739688,0.64252,0.628358,0.630368
5,1.5044,1.546215,0.76352,0.645572,0.653665,0.64706


[I 2025-03-22 02:08:29,737] Trial 0 pruned. 


Trial 1 with params: {'learning_rate': 0.00010255552094216992, 'weight_decay': 0.0, 'warmup_steps': 4, 'lambda_param': 0.6000000000000001, 'temperature': 5.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.8755,3.69331,0.330889,0.206118,0.242204,0.172058
2,3.595,3.350563,0.4033,0.194889,0.305436,0.215401
3,3.1519,2.907972,0.479377,0.533188,0.371623,0.335959
4,2.7786,2.603425,0.586618,0.514077,0.488038,0.487529
5,2.5376,2.410557,0.614115,0.520474,0.519875,0.512323
6,2.3381,2.270393,0.626031,0.531479,0.528603,0.522345
7,2.2042,2.16226,0.64528,0.552879,0.543421,0.544071
8,2.0777,2.090389,0.655362,0.548667,0.561895,0.554456
9,1.9957,2.021676,0.668194,0.568895,0.567679,0.564346
10,1.9013,1.986551,0.687443,0.572418,0.590543,0.577739


[I 2025-03-22 02:09:22,584] Trial 1 pruned. 


Trial 2 with params: {'learning_rate': 5.497167787383099e-05, 'weight_decay': 0.01, 'warmup_steps': 4, 'lambda_param': 0.2, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.9416,3.824145,0.330889,0.206118,0.242204,0.172058
2,3.7572,3.661469,0.455545,0.229638,0.340438,0.272464
3,3.6188,3.481139,0.447296,0.231463,0.335748,0.272943
4,3.3762,3.203867,0.431714,0.379326,0.326896,0.26377
5,3.1285,2.989608,0.466544,0.529727,0.359225,0.314172


[I 2025-03-22 02:09:49,169] Trial 2 pruned. 


Trial 3 with params: {'learning_rate': 0.00011635338541918901, 'weight_decay': 0.003, 'warmup_steps': 2, 'lambda_param': 0.4, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.8515,3.670019,0.331806,0.206186,0.242884,0.172746
2,3.5238,3.245598,0.394134,0.180383,0.298108,0.211161
3,3.0313,2.789249,0.515124,0.510075,0.407437,0.386862
4,2.6522,2.48456,0.60495,0.513107,0.509326,0.506538
5,2.4182,2.301668,0.617782,0.519232,0.525071,0.5168
6,2.2208,2.159763,0.640697,0.542439,0.542637,0.537878
7,2.0754,2.051568,0.660862,0.56661,0.558112,0.559753
8,1.944,1.978646,0.681943,0.571467,0.586476,0.578296
9,1.8545,1.910843,0.696609,0.590015,0.59349,0.587505
10,1.7572,1.878186,0.703025,0.585649,0.604163,0.592719


[I 2025-03-22 02:10:37,758] Trial 3 pruned. 


Trial 4 with params: {'learning_rate': 0.0008369042894376068, 'weight_decay': 0.001, 'warmup_steps': 1, 'lambda_param': 0.4, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.1658,2.348479,0.615032,0.545902,0.517541,0.512972
2,1.8607,1.533984,0.744271,0.634257,0.635014,0.633394
3,1.2832,1.272643,0.797434,0.671249,0.681567,0.675495
4,1.048,1.20996,0.816682,0.698776,0.695333,0.694172
5,0.9116,1.125926,0.813016,0.681037,0.695072,0.687669
6,0.7836,1.050355,0.827681,0.695039,0.707483,0.700052
7,0.6452,1.0303,0.828598,0.695151,0.708518,0.701202
8,0.5557,1.004847,0.835014,0.70271,0.712565,0.706587
9,0.4924,1.000614,0.836847,0.705372,0.713752,0.708309
10,0.4058,0.972183,0.843263,0.875909,0.728619,0.732205


[I 2025-03-22 02:11:27,895] Trial 4 pruned. 


Trial 5 with params: {'learning_rate': 0.0018591820902866042, 'weight_decay': 0.002, 'warmup_steps': 2, 'lambda_param': 0.6000000000000001, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.7741,1.815635,0.692026,0.594162,0.595409,0.584454
2,1.4004,1.480177,0.751604,0.629308,0.650244,0.63306
3,0.996,1.141581,0.814849,0.687716,0.698551,0.689467
4,0.7673,0.993128,0.835014,0.711643,0.710984,0.708397
5,0.5915,0.94968,0.845096,0.88419,0.729323,0.734604
6,0.4546,0.858969,0.854262,0.860162,0.793235,0.812185
7,0.3034,0.857356,0.857012,0.872803,0.793558,0.816755
8,0.2279,0.854396,0.864345,0.880131,0.819181,0.840158
9,0.1668,0.799076,0.858845,0.874379,0.821955,0.842007
10,0.1192,0.811515,0.864345,0.87824,0.827709,0.846618


[I 2025-03-22 02:13:19,918] Trial 5 finished with value: 0.8510229972146656 and parameters: {'learning_rate': 0.0018591820902866042, 'weight_decay': 0.002, 'warmup_steps': 2, 'lambda_param': 0.6000000000000001, 'temperature': 2.0}. Best is trial 5 with value: 0.8510229972146656.


Trial 6 with params: {'learning_rate': 0.0008204643365323959, 'weight_decay': 0.001, 'warmup_steps': 0, 'lambda_param': 1.0, 'temperature': 7.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.136,2.322045,0.610449,0.528897,0.51697,0.512453
2,1.8534,1.546579,0.743355,0.635621,0.635419,0.63328
3,1.2858,1.31121,0.787351,0.663842,0.673365,0.665469
4,1.0578,1.222855,0.814849,0.699391,0.693317,0.692917
5,0.9332,1.120169,0.815765,0.682553,0.696848,0.688998
6,0.7969,1.046115,0.833181,0.695233,0.712172,0.703072
7,0.6631,1.045535,0.825848,0.693153,0.706283,0.698974
8,0.575,1.008964,0.837764,0.704816,0.714603,0.708195
9,0.5093,1.013775,0.833181,0.703009,0.710838,0.705364
10,0.4221,0.973174,0.842346,0.707039,0.718788,0.712737


[I 2025-03-22 02:15:12,001] Trial 6 pruned. 


Trial 7 with params: {'learning_rate': 0.0020690200562805084, 'weight_decay': 0.003, 'warmup_steps': 0, 'lambda_param': 0.7000000000000001, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.644,1.723166,0.714024,0.613389,0.612922,0.604714
2,1.3482,1.389516,0.769936,0.642308,0.665369,0.648379
3,0.9264,1.032186,0.832264,0.70213,0.710823,0.704937
4,0.701,0.941982,0.84418,0.718496,0.717823,0.715158
5,0.5208,0.905908,0.859762,0.860602,0.768133,0.787129
6,0.389,0.831212,0.865261,0.87934,0.817607,0.840071
7,0.2594,0.8358,0.869844,0.887327,0.821302,0.844461
8,0.2001,0.807597,0.872594,0.868916,0.825636,0.841738
9,0.1497,0.819008,0.866178,0.881244,0.827513,0.848277
10,0.113,0.795618,0.866178,0.878945,0.828771,0.847951


[I 2025-03-22 02:17:06,446] Trial 7 finished with value: 0.8505390784495207 and parameters: {'learning_rate': 0.0020690200562805084, 'weight_decay': 0.003, 'warmup_steps': 0, 'lambda_param': 0.7000000000000001, 'temperature': 4.0}. Best is trial 5 with value: 0.8510229972146656.


Trial 8 with params: {'learning_rate': 8.770946743725407e-05, 'weight_decay': 0.005, 'warmup_steps': 0, 'lambda_param': 1.0, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.8753,3.707097,0.330889,0.206118,0.242204,0.172058
2,3.6385,3.454203,0.405133,0.197843,0.307013,0.216194
3,3.2699,3.04379,0.458295,0.388562,0.349462,0.300165
4,2.9221,2.757501,0.549038,0.513232,0.445384,0.438809
5,2.6884,2.55106,0.606783,0.512452,0.512572,0.506227
6,2.4908,2.418005,0.603116,0.510559,0.510277,0.499445
7,2.3687,2.305327,0.63428,0.544511,0.534316,0.533889
8,2.2571,2.231668,0.636114,0.5342,0.540862,0.535555
9,2.1803,2.164911,0.637947,0.544551,0.540615,0.538101
10,2.0922,2.131188,0.652612,0.545112,0.557114,0.546899


[I 2025-03-22 02:18:11,075] Trial 8 pruned. 


Trial 9 with params: {'learning_rate': 0.0010568529720322872, 'weight_decay': 0.003, 'warmup_steps': 2, 'lambda_param': 0.6000000000000001, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.064,2.183717,0.624198,0.568282,0.525342,0.528146
2,1.7126,1.438073,0.772686,0.658286,0.659548,0.655513
3,1.162,1.230365,0.807516,0.686344,0.688227,0.683641
4,0.9401,1.129039,0.824931,0.700942,0.703222,0.699427
5,0.7927,1.09962,0.825848,0.702464,0.704383,0.700957
6,0.688,0.981265,0.835014,0.703552,0.7129,0.707315
7,0.5378,0.985171,0.830431,0.69611,0.710378,0.702223
8,0.4569,0.959486,0.84143,0.878646,0.736137,0.747217
9,0.3967,0.943524,0.845096,0.88051,0.729332,0.733982
10,0.3078,0.908966,0.856095,0.862992,0.783808,0.805354


[I 2025-03-22 02:19:44,898] Trial 9 finished with value: 0.8473313172542892 and parameters: {'learning_rate': 0.0010568529720322872, 'weight_decay': 0.003, 'warmup_steps': 2, 'lambda_param': 0.6000000000000001, 'temperature': 3.0}. Best is trial 5 with value: 0.8510229972146656.


Trial 10 with params: {'learning_rate': 0.003553256925699131, 'weight_decay': 0.003, 'warmup_steps': 3, 'lambda_param': 0.1, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.47,1.69105,0.729606,0.643723,0.622665,0.622106
2,1.1804,1.35666,0.774519,0.652472,0.668864,0.651352
3,0.7775,0.949401,0.840513,0.709028,0.717639,0.711776
4,0.5211,0.788918,0.871677,0.881184,0.795391,0.819263
5,0.3333,0.733714,0.87901,0.890826,0.829409,0.851854
6,0.2337,0.731706,0.878093,0.887757,0.838618,0.85715
7,0.1327,0.697755,0.87901,0.893657,0.837483,0.859009
8,0.1001,0.726873,0.87626,0.888898,0.846077,0.863522
9,0.0821,0.688666,0.882676,0.893708,0.831272,0.854083
10,0.0713,0.693459,0.880843,0.89205,0.830898,0.853127


[I 2025-03-22 02:21:28,829] Trial 10 finished with value: 0.8642486071510325 and parameters: {'learning_rate': 0.003553256925699131, 'weight_decay': 0.003, 'warmup_steps': 3, 'lambda_param': 0.1, 'temperature': 2.0}. Best is trial 10 with value: 0.8642486071510325.


Trial 11 with params: {'learning_rate': 0.0036979694616670403, 'weight_decay': 0.006, 'warmup_steps': 4, 'lambda_param': 0.1, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.5456,1.787175,0.745188,0.665377,0.629581,0.637659
2,1.2603,1.222683,0.7956,0.66698,0.684248,0.671637
3,0.7606,0.936195,0.84143,0.70764,0.717557,0.7113
4,0.4942,0.808872,0.870761,0.877671,0.7945,0.817128
5,0.3023,0.761989,0.87901,0.886158,0.830947,0.850245
6,0.1815,0.730705,0.87626,0.887237,0.826812,0.848268
7,0.13,0.69077,0.886343,0.896668,0.834881,0.857458
8,0.0994,0.703221,0.871677,0.869967,0.83341,0.847582
9,0.0811,0.682097,0.880843,0.891226,0.830312,0.852566
10,0.0726,0.672842,0.886343,0.894346,0.83471,0.856257


[I 2025-03-22 02:22:49,706] Trial 11 finished with value: 0.8531937327622643 and parameters: {'learning_rate': 0.0036979694616670403, 'weight_decay': 0.006, 'warmup_steps': 4, 'lambda_param': 0.1, 'temperature': 2.5}. Best is trial 10 with value: 0.8642486071510325.


Trial 12 with params: {'learning_rate': 0.0019274829133128666, 'weight_decay': 0.006, 'warmup_steps': 4, 'lambda_param': 0.0, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.8361,1.799777,0.697525,0.597386,0.599158,0.588253
2,1.4041,1.458738,0.762603,0.640304,0.65878,0.643111
3,1.0056,1.045404,0.837764,0.701053,0.714805,0.707372
4,0.7565,0.986989,0.83868,0.712972,0.713023,0.709664
5,0.5903,0.902208,0.847846,0.877482,0.732764,0.734738
6,0.4232,0.867809,0.851512,0.858125,0.781475,0.802023
7,0.2852,0.85001,0.862511,0.877612,0.807388,0.829603
8,0.2078,0.776096,0.87626,0.870807,0.828507,0.844409
9,0.1607,0.77225,0.870761,0.885051,0.831832,0.852315
10,0.1133,0.769931,0.867094,0.866862,0.819457,0.837783


[I 2025-03-22 02:23:48,225] Trial 12 pruned. 


Trial 13 with params: {'learning_rate': 0.002354389238739818, 'weight_decay': 0.001, 'warmup_steps': 4, 'lambda_param': 0.0, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.7143,1.724618,0.716774,0.616441,0.613495,0.606542
2,1.3114,1.440574,0.777269,0.651275,0.670959,0.654448
3,0.9158,1.033363,0.829514,0.698997,0.709691,0.702354
4,0.6728,0.944858,0.839597,0.881236,0.732534,0.745474
5,0.5003,0.826692,0.868011,0.857365,0.802252,0.819919
6,0.3323,0.793575,0.870761,0.881832,0.8231,0.844112
7,0.2157,0.795607,0.874427,0.888582,0.82606,0.847694
8,0.1535,0.760691,0.870761,0.883953,0.822333,0.844844
9,0.1158,0.758599,0.872594,0.88676,0.833362,0.854076
10,0.0908,0.755973,0.868928,0.881317,0.830534,0.850049


[I 2025-03-22 02:25:10,894] Trial 13 finished with value: 0.854005305421063 and parameters: {'learning_rate': 0.002354389238739818, 'weight_decay': 0.001, 'warmup_steps': 4, 'lambda_param': 0.0, 'temperature': 4.5}. Best is trial 10 with value: 0.8642486071510325.


Trial 14 with params: {'learning_rate': 0.002571199242007248, 'weight_decay': 0.001, 'warmup_steps': 4, 'lambda_param': 0.0, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.677,1.690744,0.740605,0.645536,0.630772,0.629737
2,1.2861,1.341565,0.784601,0.658908,0.675891,0.661758
3,0.8845,0.982151,0.832264,0.704229,0.710387,0.705224
4,0.6344,0.893379,0.846929,0.719558,0.721152,0.718387
5,0.4608,0.847106,0.865261,0.87192,0.800272,0.822044
6,0.2997,0.803845,0.863428,0.876652,0.817765,0.838906
7,0.1888,0.810177,0.871677,0.885215,0.822975,0.8454
8,0.134,0.78318,0.870761,0.880468,0.822956,0.843446
9,0.0995,0.759764,0.872594,0.886567,0.832713,0.853629
10,0.0865,0.759285,0.866178,0.878103,0.818521,0.840132


[I 2025-03-22 02:27:10,532] Trial 14 finished with value: 0.8564082089254419 and parameters: {'learning_rate': 0.002571199242007248, 'weight_decay': 0.001, 'warmup_steps': 4, 'lambda_param': 0.0, 'temperature': 4.5}. Best is trial 10 with value: 0.8642486071510325.


Trial 15 with params: {'learning_rate': 0.0030217420502757367, 'weight_decay': 0.0, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.5828,1.741976,0.732356,0.660973,0.617387,0.6291
2,1.2847,1.268359,0.793767,0.668003,0.680827,0.671476
3,0.8419,0.99725,0.829514,0.700105,0.709422,0.702656
4,0.5873,0.85331,0.858845,0.86049,0.767398,0.787086
5,0.4022,0.77291,0.87626,0.880527,0.799242,0.821847
6,0.2395,0.759068,0.869844,0.878745,0.815086,0.835516
7,0.1652,0.756483,0.87901,0.893511,0.828309,0.851705
8,0.1243,0.72657,0.88176,0.892786,0.841892,0.861371
9,0.0952,0.723841,0.87901,0.890973,0.838879,0.859032
10,0.0814,0.703743,0.890926,0.899177,0.848459,0.867839


[I 2025-03-22 02:28:39,481] Trial 15 finished with value: 0.8653670565585027 and parameters: {'learning_rate': 0.0030217420502757367, 'weight_decay': 0.0, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 3.0}. Best is trial 15 with value: 0.8653670565585027.


Trial 16 with params: {'learning_rate': 0.0038249914852321264, 'weight_decay': 0.001, 'warmup_steps': 2, 'lambda_param': 0.1, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.493,1.679795,0.737855,0.66416,0.622483,0.632944
2,1.2188,1.395151,0.771769,0.650955,0.666404,0.64878
3,0.7699,0.975859,0.833181,0.705238,0.712529,0.707269
4,0.5144,0.819917,0.868928,0.880938,0.79266,0.817366
5,0.3165,0.789897,0.870761,0.874667,0.805401,0.826067
6,0.1992,0.742775,0.874427,0.886085,0.835197,0.854663
7,0.1371,0.699106,0.87901,0.890697,0.837552,0.858195
8,0.1006,0.715479,0.88176,0.891175,0.840826,0.860158
9,0.0806,0.688904,0.88451,0.895451,0.842194,0.862844
10,0.0718,0.690723,0.883593,0.881677,0.841223,0.857759


[I 2025-03-22 02:30:02,116] Trial 16 finished with value: 0.8557459147932797 and parameters: {'learning_rate': 0.0038249914852321264, 'weight_decay': 0.001, 'warmup_steps': 2, 'lambda_param': 0.1, 'temperature': 2.5}. Best is trial 15 with value: 0.8653670565585027.


Trial 17 with params: {'learning_rate': 0.004504578651801021, 'weight_decay': 0.004, 'warmup_steps': 2, 'lambda_param': 0.1, 'temperature': 5.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.548,1.582783,0.745188,0.683819,0.633149,0.641491
2,1.1438,1.379941,0.777269,0.651691,0.671504,0.652052
3,0.7039,0.893207,0.852429,0.718554,0.726904,0.721698
4,0.4472,0.785661,0.877177,0.871088,0.809171,0.829347
5,0.2591,0.753302,0.875344,0.871803,0.827156,0.84431
6,0.1668,0.74666,0.877177,0.865791,0.837865,0.849556
7,0.1164,0.698574,0.88451,0.894277,0.833299,0.855563
8,0.0872,0.694451,0.885426,0.893991,0.844319,0.863163
9,0.0749,0.673906,0.890009,0.89872,0.847413,0.867114
10,0.0703,0.686149,0.87901,0.888528,0.82885,0.850559


[I 2025-03-22 02:31:39,611] Trial 17 finished with value: 0.8624725563036103 and parameters: {'learning_rate': 0.004504578651801021, 'weight_decay': 0.004, 'warmup_steps': 2, 'lambda_param': 0.1, 'temperature': 5.0}. Best is trial 15 with value: 0.8653670565585027.


Trial 18 with params: {'learning_rate': 0.0004032865535478877, 'weight_decay': 0.0, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.548,2.967965,0.470211,0.377053,0.362241,0.328368
2,2.5115,2.108385,0.670027,0.593909,0.564345,0.572523
3,1.7824,1.601888,0.746104,0.626901,0.638705,0.632473
4,1.4109,1.528552,0.75802,0.654368,0.646674,0.644912
5,1.2569,1.38689,0.767186,0.639858,0.660243,0.648569


[I 2025-03-22 02:32:05,094] Trial 18 pruned. 


Trial 19 with params: {'learning_rate': 0.0023732094051342367, 'weight_decay': 0.0, 'warmup_steps': 1, 'lambda_param': 0.0, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6708,1.7293,0.703941,0.614775,0.605345,0.596262
2,1.3432,1.385822,0.771769,0.651715,0.664738,0.653028
3,0.9271,1.021424,0.831347,0.699739,0.710249,0.70401
4,0.6744,0.953286,0.83868,0.712489,0.713734,0.709991
5,0.4911,0.861814,0.859762,0.856997,0.769391,0.787001
6,0.3309,0.793573,0.868011,0.882011,0.811537,0.835345
7,0.2255,0.7562,0.877177,0.891127,0.817881,0.84255
8,0.157,0.744501,0.879927,0.89011,0.840209,0.858782
9,0.118,0.736132,0.874427,0.887624,0.834894,0.855219
10,0.096,0.733723,0.874427,0.886334,0.834707,0.854626


[I 2025-03-22 02:33:30,648] Trial 19 finished with value: 0.8603915471215112 and parameters: {'learning_rate': 0.0023732094051342367, 'weight_decay': 0.0, 'warmup_steps': 1, 'lambda_param': 0.0, 'temperature': 4.0}. Best is trial 15 with value: 0.8653670565585027.


Trial 20 with params: {'learning_rate': 0.0003001697306453937, 'weight_decay': 0.002, 'warmup_steps': 4, 'lambda_param': 0.4, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.6847,3.212402,0.43538,0.380142,0.329267,0.272506
2,2.8136,2.415873,0.588451,0.537518,0.484577,0.487312
3,2.1522,1.923787,0.691109,0.580984,0.592187,0.584248
4,1.6691,1.688065,0.740605,0.647235,0.627672,0.629901
5,1.4531,1.508724,0.766269,0.646755,0.656737,0.649573


[I 2025-03-22 02:34:04,985] Trial 20 pruned. 


Trial 21 with params: {'learning_rate': 0.004561134508141711, 'weight_decay': 0.005, 'warmup_steps': 2, 'lambda_param': 0.4, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.54,1.591421,0.738772,0.679932,0.627225,0.635831
2,1.1476,1.348123,0.780018,0.654763,0.673836,0.655293
3,0.7039,0.93544,0.847846,0.717165,0.722838,0.718271
4,0.454,0.843367,0.864345,0.863742,0.789946,0.811423
5,0.2668,0.732157,0.882676,0.893011,0.832561,0.854315
6,0.1811,0.722073,0.887259,0.879883,0.837213,0.853053
7,0.1271,0.764017,0.877177,0.890471,0.818634,0.842911
8,0.0968,0.715353,0.885426,0.893379,0.835304,0.856129
9,0.0812,0.711358,0.879927,0.887083,0.831427,0.851139
10,0.0735,0.710645,0.882676,0.890248,0.833402,0.853419


[I 2025-03-22 02:35:34,482] Trial 21 finished with value: 0.8510214374385022 and parameters: {'learning_rate': 0.004561134508141711, 'weight_decay': 0.005, 'warmup_steps': 2, 'lambda_param': 0.4, 'temperature': 4.5}. Best is trial 15 with value: 0.8653670565585027.


Trial 22 with params: {'learning_rate': 0.0023401745911146386, 'weight_decay': 0.005, 'warmup_steps': 2, 'lambda_param': 0.1, 'temperature': 5.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.7019,1.73218,0.698442,0.595273,0.602764,0.588682
2,1.3511,1.352224,0.781852,0.657229,0.672887,0.660705
3,0.9095,0.993402,0.834097,0.700065,0.711819,0.705266
4,0.6686,0.93546,0.840513,0.713876,0.715447,0.7118
5,0.4806,0.841716,0.864345,0.8751,0.80912,0.83087
6,0.3336,0.751312,0.875344,0.870445,0.827731,0.84367
7,0.2091,0.790497,0.874427,0.88987,0.824863,0.847297
8,0.1588,0.734704,0.874427,0.875362,0.835473,0.851612
9,0.1208,0.748464,0.872594,0.88543,0.823263,0.846022
10,0.0956,0.724584,0.877177,0.886975,0.837231,0.856167


[I 2025-03-22 02:37:07,704] Trial 22 finished with value: 0.853260361629674 and parameters: {'learning_rate': 0.0023401745911146386, 'weight_decay': 0.005, 'warmup_steps': 2, 'lambda_param': 0.1, 'temperature': 5.5}. Best is trial 15 with value: 0.8653670565585027.


Trial 23 with params: {'learning_rate': 0.003063348713944044, 'weight_decay': 0.006, 'warmup_steps': 0, 'lambda_param': 0.1, 'temperature': 5.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.4757,1.555594,0.757104,0.64945,0.645256,0.641987
2,1.23,1.26969,0.792851,0.663287,0.680717,0.670268
3,0.8421,0.985673,0.837764,0.706733,0.715865,0.709669
4,0.5851,0.885743,0.842346,0.884026,0.743895,0.762978
5,0.4053,0.806834,0.870761,0.876881,0.80436,0.826069
6,0.2411,0.817048,0.868011,0.876944,0.822318,0.84095
7,0.1567,0.718458,0.88451,0.895582,0.834417,0.856542
8,0.1248,0.732306,0.869844,0.870145,0.831808,0.846992
9,0.0932,0.692512,0.887259,0.895766,0.836771,0.857959
10,0.0791,0.678551,0.885426,0.894877,0.834807,0.856597


[I 2025-03-22 02:39:15,334] Trial 23 finished with value: 0.8594956653605509 and parameters: {'learning_rate': 0.003063348713944044, 'weight_decay': 0.006, 'warmup_steps': 0, 'lambda_param': 0.1, 'temperature': 5.0}. Best is trial 15 with value: 0.8653670565585027.


Trial 24 with params: {'learning_rate': 0.004882319945002036, 'weight_decay': 0.002, 'warmup_steps': 3, 'lambda_param': 0.4, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.4786,1.493866,0.761687,0.654927,0.649191,0.648022
2,1.1515,1.280712,0.787351,0.660564,0.678956,0.6601
3,0.7075,0.971534,0.834097,0.710352,0.713184,0.708728
4,0.4546,0.819645,0.867094,0.877265,0.782517,0.806149
5,0.2499,0.723799,0.878093,0.887219,0.829656,0.850284
6,0.1483,0.702497,0.88176,0.895758,0.829283,0.853673
7,0.1036,0.699556,0.883593,0.895147,0.832575,0.855334
8,0.0857,0.646265,0.890926,0.897682,0.83849,0.859833
9,0.0728,0.64075,0.889093,0.898726,0.837198,0.859647
10,0.0639,0.648418,0.885426,0.895916,0.83361,0.856428


[I 2025-03-22 02:40:54,218] Trial 24 finished with value: 0.8572122845133769 and parameters: {'learning_rate': 0.004882319945002036, 'weight_decay': 0.002, 'warmup_steps': 3, 'lambda_param': 0.4, 'temperature': 2.0}. Best is trial 15 with value: 0.8653670565585027.


Trial 25 with params: {'learning_rate': 0.0008631782416548917, 'weight_decay': 0.0, 'warmup_steps': 2, 'lambda_param': 0.2, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.1777,2.306175,0.619615,0.534599,0.52551,0.51762
2,1.8382,1.533519,0.744271,0.636376,0.635879,0.6341
3,1.2657,1.261262,0.7956,0.669716,0.680552,0.6741
4,1.0347,1.199646,0.817599,0.698662,0.69628,0.694342
5,0.8911,1.122011,0.823098,0.691333,0.702497,0.69606
6,0.7664,1.036978,0.830431,0.697559,0.709636,0.702483
7,0.6247,1.021208,0.827681,0.693452,0.7079,0.700064
8,0.5389,0.998046,0.836847,0.703898,0.714546,0.707777
9,0.4794,1.003076,0.837764,0.70657,0.714826,0.709123
10,0.3928,0.973739,0.839597,0.872469,0.725074,0.728753


[I 2025-03-22 02:41:44,879] Trial 25 pruned. 


Trial 26 with params: {'learning_rate': 0.00036673897334545683, 'weight_decay': 0.003, 'warmup_steps': 0, 'lambda_param': 0.0, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.5412,2.995335,0.470211,0.384576,0.361553,0.326831
2,2.5756,2.194972,0.644363,0.569293,0.540254,0.546672
3,1.9011,1.712473,0.732356,0.619635,0.624367,0.620323
4,1.4954,1.585999,0.75802,0.661566,0.645622,0.647062
5,1.3266,1.411447,0.773602,0.645846,0.664136,0.654182


[I 2025-03-22 02:42:09,800] Trial 26 pruned. 


Trial 27 with params: {'learning_rate': 0.004456983089178604, 'weight_decay': 0.004, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.5312,1.601164,0.743355,0.685924,0.631077,0.64061
2,1.1441,1.315499,0.775435,0.649728,0.670533,0.651335
3,0.6965,0.911393,0.846013,0.711415,0.722413,0.715733
4,0.442,0.788104,0.869844,0.876857,0.79448,0.817335
5,0.2646,0.758419,0.875344,0.871206,0.826821,0.843779
6,0.1566,0.726332,0.878093,0.859756,0.819511,0.834762
7,0.1119,0.695097,0.883593,0.893529,0.832376,0.85462
8,0.0872,0.695189,0.882676,0.893277,0.841135,0.861321
9,0.0768,0.661908,0.890009,0.898982,0.847273,0.867304
10,0.0714,0.677794,0.879927,0.891059,0.839111,0.859126


[I 2025-03-22 02:43:37,098] Trial 27 finished with value: 0.8647906032427356 and parameters: {'learning_rate': 0.004456983089178604, 'weight_decay': 0.004, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 3.0}. Best is trial 15 with value: 0.8653670565585027.


Trial 28 with params: {'learning_rate': 0.004293263950214244, 'weight_decay': 0.008, 'warmup_steps': 1, 'lambda_param': 0.2, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.4186,1.619355,0.752521,0.656949,0.63945,0.641428
2,1.1377,1.205074,0.796517,0.672954,0.684887,0.671705
3,0.7107,0.913532,0.855179,0.722353,0.72948,0.724249
4,0.4463,0.790451,0.872594,0.866443,0.805538,0.825293
5,0.2757,0.713584,0.887259,0.896941,0.845902,0.865328
6,0.1659,0.723773,0.87901,0.877026,0.828828,0.847523
7,0.1102,0.728992,0.882676,0.892784,0.832167,0.853847
8,0.0853,0.700664,0.87901,0.891359,0.838598,0.85896
9,0.0739,0.67846,0.887259,0.882166,0.835792,0.853544
10,0.0681,0.703646,0.88176,0.877448,0.832404,0.849497


[I 2025-03-22 02:44:57,000] Trial 28 finished with value: 0.8607751091259989 and parameters: {'learning_rate': 0.004293263950214244, 'weight_decay': 0.008, 'warmup_steps': 1, 'lambda_param': 0.2, 'temperature': 2.0}. Best is trial 15 with value: 0.8653670565585027.


Trial 29 with params: {'learning_rate': 0.002845513569588676, 'weight_decay': 0.004, 'warmup_steps': 2, 'lambda_param': 0.1, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.5405,1.789897,0.734189,0.657867,0.620875,0.62774
2,1.262,1.219271,0.799267,0.669637,0.685604,0.675566
3,0.8238,1.001541,0.834097,0.704656,0.712626,0.706153
4,0.5958,0.87306,0.850596,0.860549,0.770175,0.791801
5,0.4144,0.78291,0.873511,0.881195,0.80655,0.82956
6,0.2617,0.808565,0.878093,0.889592,0.828916,0.850533
7,0.1764,0.752783,0.874427,0.888596,0.825579,0.848536
8,0.1311,0.768268,0.873511,0.884247,0.82597,0.846897
9,0.0992,0.737946,0.87901,0.890875,0.838768,0.858826
10,0.081,0.732763,0.878093,0.888088,0.828645,0.850235


[I 2025-03-22 02:46:22,885] Trial 29 pruned. 


Trial 30 with params: {'learning_rate': 0.0007243732057988554, 'weight_decay': 0.0, 'warmup_steps': 4, 'lambda_param': 1.0, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.3332,2.521359,0.571036,0.560728,0.464007,0.466405
2,2.0687,1.629497,0.744271,0.636459,0.63728,0.635071
3,1.3617,1.384171,0.772686,0.654648,0.660894,0.652912
4,1.1169,1.227666,0.813016,0.695391,0.692976,0.691663
5,0.9783,1.173649,0.813932,0.681258,0.697614,0.687623


[I 2025-03-22 02:46:59,738] Trial 30 pruned. 


Trial 31 with params: {'learning_rate': 0.002351164157089694, 'weight_decay': 0.002, 'warmup_steps': 3, 'lambda_param': 0.0, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6532,1.691029,0.747938,0.64992,0.63599,0.635946
2,1.2915,1.347022,0.777269,0.649591,0.670642,0.655031
3,0.8768,1.014749,0.832264,0.704813,0.710612,0.705507
4,0.6464,0.90383,0.849679,0.716202,0.724724,0.718458
5,0.4918,0.841127,0.862511,0.86743,0.788714,0.810061
6,0.3298,0.781699,0.872594,0.884868,0.824778,0.84626
7,0.214,0.79451,0.880843,0.894061,0.829708,0.852815
8,0.1565,0.755081,0.874427,0.885165,0.835406,0.854268
9,0.1159,0.763389,0.873511,0.887687,0.833387,0.854435
10,0.0945,0.737634,0.874427,0.875938,0.844287,0.857825


[I 2025-03-22 02:48:34,685] Trial 31 finished with value: 0.8643820544997064 and parameters: {'learning_rate': 0.002351164157089694, 'weight_decay': 0.002, 'warmup_steps': 3, 'lambda_param': 0.0, 'temperature': 2.0}. Best is trial 15 with value: 0.8653670565585027.


Trial 32 with params: {'learning_rate': 0.0019460937314393334, 'weight_decay': 0.001, 'warmup_steps': 3, 'lambda_param': 0.0, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.8132,1.800756,0.699358,0.598561,0.600765,0.590183
2,1.3979,1.470683,0.756187,0.634107,0.654116,0.636956
3,0.9892,1.057708,0.824015,0.690586,0.704543,0.696502
4,0.7352,0.978994,0.840513,0.714055,0.715534,0.711786
5,0.5727,0.890141,0.846013,0.879252,0.739629,0.750436
6,0.397,0.846539,0.855179,0.849141,0.79341,0.81136
7,0.2663,0.824833,0.873511,0.884754,0.826055,0.846659
8,0.192,0.777584,0.877177,0.874494,0.83914,0.853067
9,0.1477,0.800576,0.863428,0.877283,0.816354,0.838447
10,0.1119,0.784838,0.865261,0.863866,0.818266,0.835708


[I 2025-03-22 02:49:26,009] Trial 32 pruned. 


Trial 33 with params: {'learning_rate': 0.0027596411800196465, 'weight_decay': 0.003, 'warmup_steps': 3, 'lambda_param': 0.0, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6537,1.70077,0.72594,0.627817,0.621783,0.617751
2,1.3026,1.341424,0.781852,0.659343,0.672279,0.661631
3,0.8592,0.968454,0.839597,0.706433,0.717007,0.710213
4,0.6032,0.896982,0.857012,0.858961,0.766022,0.784384
5,0.4189,0.80707,0.868011,0.879224,0.820651,0.84145
6,0.28,0.760392,0.871677,0.853211,0.823339,0.835903
7,0.1802,0.776024,0.874427,0.888887,0.825287,0.848261
8,0.1327,0.736581,0.879927,0.893419,0.839712,0.860477
9,0.1001,0.728318,0.873511,0.885213,0.824089,0.846442
10,0.0864,0.723741,0.88176,0.893708,0.839686,0.860646


[I 2025-03-22 02:50:57,336] Trial 33 finished with value: 0.8588383461181608 and parameters: {'learning_rate': 0.0027596411800196465, 'weight_decay': 0.003, 'warmup_steps': 3, 'lambda_param': 0.0, 'temperature': 2.0}. Best is trial 15 with value: 0.8653670565585027.


Trial 34 with params: {'learning_rate': 0.004246937438067793, 'weight_decay': 0.004, 'warmup_steps': 1, 'lambda_param': 0.0, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.4495,1.645581,0.747021,0.653226,0.634288,0.637125
2,1.1985,1.101838,0.813932,0.687506,0.695903,0.690761
3,0.7155,0.913988,0.84418,0.712741,0.720726,0.714968
4,0.4525,0.780106,0.865261,0.842412,0.791036,0.807686
5,0.2764,0.723298,0.878093,0.886549,0.819389,0.841943
6,0.1645,0.725716,0.88451,0.890596,0.834576,0.854308
7,0.1127,0.700059,0.889093,0.897799,0.836727,0.858972
8,0.0885,0.692798,0.886343,0.894224,0.835385,0.856643
9,0.0762,0.669169,0.886343,0.894555,0.835115,0.85664
10,0.0678,0.675234,0.885426,0.894628,0.834538,0.856358


[I 2025-03-22 02:52:55,768] Trial 34 finished with value: 0.8545702544846298 and parameters: {'learning_rate': 0.004246937438067793, 'weight_decay': 0.004, 'warmup_steps': 1, 'lambda_param': 0.0, 'temperature': 3.5}. Best is trial 15 with value: 0.8653670565585027.


Trial 35 with params: {'learning_rate': 5.817102176211476e-05, 'weight_decay': 0.0, 'warmup_steps': 1, 'lambda_param': 0.8, 'temperature': 6.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.9294,3.804008,0.332722,0.206254,0.243565,0.173431
2,3.7397,3.644073,0.437214,0.213903,0.32957,0.25432
3,3.5861,3.425663,0.439963,0.225772,0.331082,0.26623
4,3.3136,3.142843,0.441797,0.382373,0.33578,0.279994
5,3.0801,2.944485,0.472044,0.490155,0.365928,0.327179
6,2.8825,2.794424,0.537122,0.515021,0.432731,0.425284
7,2.7578,2.678471,0.566453,0.519796,0.464324,0.461546
8,2.6564,2.597057,0.608616,0.517284,0.51358,0.507455
9,2.5775,2.524304,0.595784,0.519783,0.496065,0.493873
10,2.5078,2.47689,0.604033,0.506007,0.511384,0.502243


[I 2025-03-22 02:53:49,583] Trial 35 pruned. 


Trial 36 with params: {'learning_rate': 0.0006103845035566807, 'weight_decay': 0.003, 'warmup_steps': 3, 'lambda_param': 0.0, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.3914,2.688601,0.508708,0.562185,0.397693,0.375358
2,2.2128,1.731585,0.726856,0.617786,0.623179,0.619461
3,1.4419,1.386811,0.770852,0.650698,0.659985,0.652604
4,1.1722,1.297654,0.80385,0.688992,0.68543,0.683793
5,1.0471,1.221855,0.804766,0.677445,0.68963,0.680735


[I 2025-03-22 02:54:17,906] Trial 36 pruned. 


Trial 37 with params: {'learning_rate': 5.431299921217806e-05, 'weight_decay': 0.009000000000000001, 'warmup_steps': 0, 'lambda_param': 0.4, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.9301,3.812021,0.330889,0.206118,0.242204,0.172058
2,3.7493,3.656371,0.465628,0.233312,0.348709,0.279079
3,3.6141,3.476128,0.450046,0.231234,0.338039,0.273508
4,3.3735,3.205936,0.428964,0.378579,0.324367,0.259433
5,3.1327,2.996451,0.463795,0.551409,0.356149,0.308378
6,2.9361,2.853268,0.514207,0.524035,0.407453,0.391575
7,2.8179,2.738404,0.553621,0.519745,0.449922,0.445816
8,2.7187,2.656349,0.601283,0.51231,0.506127,0.502251
9,2.6442,2.587507,0.577452,0.520445,0.476171,0.474958
10,2.5774,2.536228,0.5967,0.502005,0.503895,0.495964


[I 2025-03-22 02:55:26,437] Trial 37 pruned. 


Trial 38 with params: {'learning_rate': 0.00014198795619548116, 'weight_decay': 0.005, 'warmup_steps': 3, 'lambda_param': 0.30000000000000004, 'temperature': 6.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.831,3.63292,0.341888,0.372123,0.250613,0.185341
2,3.3977,3.0504,0.450962,0.366563,0.346026,0.295869
3,2.847,2.5825,0.588451,0.522982,0.487803,0.489369
4,2.4468,2.290669,0.626031,0.522354,0.531346,0.523943
5,2.2027,2.099752,0.655362,0.55196,0.560675,0.553288
6,1.9925,1.953013,0.694775,0.582239,0.593435,0.586537
7,1.8417,1.854746,0.71494,0.613301,0.606868,0.608378
8,1.7091,1.781839,0.721357,0.606301,0.619307,0.611833
9,1.6264,1.715582,0.736939,0.619976,0.631049,0.622358
10,1.5313,1.696143,0.734189,0.610934,0.631148,0.619113


[I 2025-03-22 02:56:24,887] Trial 38 pruned. 


Trial 39 with params: {'learning_rate': 0.00023647740624003471, 'weight_decay': 0.01, 'warmup_steps': 2, 'lambda_param': 0.8, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.7381,3.361891,0.445463,0.249777,0.333358,0.278589
2,2.9981,2.606336,0.567369,0.50808,0.469317,0.465045
3,2.3743,2.185366,0.640697,0.528875,0.554706,0.533157
4,1.9329,1.813645,0.713107,0.601417,0.609171,0.6032
5,1.6664,1.656453,0.747021,0.629725,0.639019,0.631734


[I 2025-03-22 02:56:50,466] Trial 39 pruned. 


Trial 40 with params: {'learning_rate': 0.0027786624907667934, 'weight_decay': 0.0, 'warmup_steps': 3, 'lambda_param': 0.0, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.659,1.686337,0.72319,0.625162,0.620402,0.614868
2,1.3121,1.346725,0.780018,0.656613,0.671336,0.659022
3,0.866,0.98799,0.829514,0.700335,0.708535,0.702754
4,0.6079,0.869261,0.859762,0.851983,0.759191,0.774585
5,0.4211,0.806395,0.869844,0.879065,0.822638,0.842456
6,0.2845,0.744503,0.87901,0.888736,0.829333,0.850789
7,0.1756,0.768209,0.879927,0.891725,0.829324,0.851883
8,0.1287,0.720305,0.87626,0.888985,0.826456,0.849417
9,0.0971,0.714718,0.875344,0.887156,0.825514,0.848123
10,0.082,0.696315,0.883593,0.892993,0.83251,0.854407


[I 2025-03-22 02:58:26,116] Trial 40 finished with value: 0.849846441803345 and parameters: {'learning_rate': 0.0027786624907667934, 'weight_decay': 0.0, 'warmup_steps': 3, 'lambda_param': 0.0, 'temperature': 2.0}. Best is trial 15 with value: 0.8653670565585027.


Trial 41 with params: {'learning_rate': 0.0031805328653544385, 'weight_decay': 0.002, 'warmup_steps': 3, 'lambda_param': 0.1, 'temperature': 7.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.4969,1.611387,0.753437,0.660674,0.639593,0.644879
2,1.2109,1.497641,0.766269,0.644363,0.664093,0.642677
3,0.808,1.006397,0.830431,0.699358,0.710748,0.703106
4,0.5585,0.853292,0.868928,0.86426,0.802198,0.822198
5,0.3602,0.778925,0.868928,0.879264,0.821477,0.84213
6,0.2169,0.768782,0.873511,0.886391,0.824051,0.846758
7,0.1438,0.742653,0.878093,0.890166,0.827907,0.850521
8,0.1062,0.751763,0.871677,0.883163,0.823498,0.845145
9,0.0863,0.714964,0.878093,0.888376,0.828098,0.849948
10,0.0725,0.718297,0.87901,0.889697,0.829373,0.85097


[I 2025-03-22 02:59:54,702] Trial 41 finished with value: 0.8511263816536966 and parameters: {'learning_rate': 0.0031805328653544385, 'weight_decay': 0.002, 'warmup_steps': 3, 'lambda_param': 0.1, 'temperature': 7.0}. Best is trial 15 with value: 0.8653670565585027.


Trial 42 with params: {'learning_rate': 0.004420791233096938, 'weight_decay': 0.001, 'warmup_steps': 2, 'lambda_param': 0.2, 'temperature': 5.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.5304,1.6301,0.751604,0.683178,0.633091,0.645563
2,1.1972,1.319382,0.784601,0.660681,0.675578,0.661247
3,0.7215,0.914463,0.856095,0.890906,0.73834,0.743218
4,0.4396,0.79393,0.867094,0.874213,0.792694,0.814519
5,0.2615,0.767583,0.872594,0.870525,0.815566,0.83521
6,0.1621,0.710837,0.882676,0.867454,0.832332,0.846573
7,0.1132,0.691995,0.887259,0.897399,0.835745,0.858297
8,0.0898,0.691174,0.879927,0.891311,0.839774,0.859624
9,0.0771,0.662477,0.886343,0.895547,0.845342,0.864584
10,0.0703,0.66427,0.889093,0.898501,0.846627,0.866608


[I 2025-03-22 03:02:13,428] Trial 42 finished with value: 0.8653135901599289 and parameters: {'learning_rate': 0.004420791233096938, 'weight_decay': 0.001, 'warmup_steps': 2, 'lambda_param': 0.2, 'temperature': 5.0}. Best is trial 15 with value: 0.8653670565585027.


Trial 43 with params: {'learning_rate': 0.002073610294025963, 'weight_decay': 0.001, 'warmup_steps': 3, 'lambda_param': 0.4, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.7501,1.78437,0.697525,0.602266,0.599753,0.58812
2,1.3635,1.444351,0.766269,0.641125,0.662152,0.645408
3,0.9366,1.027667,0.831347,0.701081,0.709645,0.703568
4,0.697,0.94795,0.842346,0.713454,0.717482,0.712918
5,0.5325,0.872884,0.857929,0.887781,0.749575,0.759932
6,0.3792,0.810618,0.866178,0.849292,0.811173,0.825087
7,0.2512,0.823405,0.868011,0.885551,0.820137,0.842953
8,0.1773,0.764807,0.870761,0.885186,0.82299,0.845487
9,0.132,0.770265,0.870761,0.885585,0.83184,0.852638
10,0.1023,0.760844,0.868011,0.881043,0.829597,0.849386


[I 2025-03-22 03:03:08,658] Trial 43 pruned. 


Trial 44 with params: {'learning_rate': 0.004823477148877309, 'weight_decay': 0.002, 'warmup_steps': 1, 'lambda_param': 0.4, 'temperature': 6.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.4088,1.467805,0.769019,0.656699,0.657945,0.655464
2,1.1026,1.190453,0.805683,0.671911,0.69327,0.677667
3,0.6613,0.865865,0.864345,0.895025,0.772475,0.792952
4,0.4052,0.82063,0.865261,0.879068,0.808375,0.83123
5,0.2412,0.718026,0.882676,0.893501,0.832418,0.854446
6,0.1454,0.726128,0.879927,0.890077,0.830125,0.851667
7,0.1113,0.689796,0.890009,0.896975,0.838335,0.859465
8,0.0831,0.642919,0.890009,0.899507,0.845955,0.866736
9,0.0724,0.646584,0.886343,0.894497,0.834557,0.856321
10,0.0669,0.657188,0.88451,0.881405,0.842725,0.85822


[I 2025-03-22 03:04:41,841] Trial 44 finished with value: 0.8570853454826013 and parameters: {'learning_rate': 0.004823477148877309, 'weight_decay': 0.002, 'warmup_steps': 1, 'lambda_param': 0.4, 'temperature': 6.5}. Best is trial 15 with value: 0.8653670565585027.


Trial 45 with params: {'learning_rate': 0.0031170638231037117, 'weight_decay': 0.002, 'warmup_steps': 2, 'lambda_param': 0.30000000000000004, 'temperature': 5.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.5821,1.690842,0.729606,0.652414,0.616054,0.626239
2,1.2868,1.348964,0.772686,0.655144,0.666136,0.653032
3,0.8505,0.997901,0.831347,0.701558,0.711206,0.704536
4,0.5878,0.860249,0.850596,0.8617,0.768914,0.792135
5,0.3888,0.803595,0.868928,0.876334,0.803308,0.825382
6,0.2655,0.812137,0.861595,0.850592,0.827366,0.836286
7,0.1683,0.753943,0.877177,0.890121,0.837005,0.857451
8,0.1234,0.75651,0.878093,0.888467,0.838762,0.857288
9,0.0907,0.7166,0.883593,0.894369,0.842235,0.862248
10,0.0764,0.721864,0.878093,0.889526,0.837764,0.857542


[I 2025-03-22 03:06:29,969] Trial 45 finished with value: 0.8606360524193984 and parameters: {'learning_rate': 0.0031170638231037117, 'weight_decay': 0.002, 'warmup_steps': 2, 'lambda_param': 0.30000000000000004, 'temperature': 5.0}. Best is trial 15 with value: 0.8653670565585027.


Trial 46 with params: {'learning_rate': 0.0036674761347565868, 'weight_decay': 0.001, 'warmup_steps': 1, 'lambda_param': 0.1, 'temperature': 6.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.4707,1.727836,0.741522,0.659375,0.627989,0.632534
2,1.1991,1.218462,0.791934,0.665107,0.681047,0.667606
3,0.7528,0.95759,0.832264,0.701618,0.711186,0.704883
4,0.4933,0.818069,0.867094,0.876667,0.792055,0.815288
5,0.3004,0.768774,0.868011,0.878264,0.820623,0.841315
6,0.183,0.7896,0.878093,0.885394,0.839808,0.856044
7,0.1385,0.69354,0.888176,0.898736,0.845458,0.865729
8,0.1009,0.729633,0.880843,0.891747,0.840518,0.860127
9,0.0863,0.706463,0.88176,0.892815,0.8409,0.860965
10,0.0771,0.707362,0.88176,0.88099,0.840616,0.857033


[I 2025-03-22 03:08:29,080] Trial 46 finished with value: 0.8608005064232649 and parameters: {'learning_rate': 0.0036674761347565868, 'weight_decay': 0.001, 'warmup_steps': 1, 'lambda_param': 0.1, 'temperature': 6.0}. Best is trial 15 with value: 0.8653670565585027.


Trial 47 with params: {'learning_rate': 0.00017209337253776082, 'weight_decay': 0.007, 'warmup_steps': 3, 'lambda_param': 0.9, 'temperature': 7.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.8045,3.568075,0.389551,0.313468,0.287853,0.242926
2,3.2465,2.872011,0.490376,0.489735,0.386315,0.354802
3,2.6613,2.404885,0.612282,0.508466,0.519815,0.509835
4,2.2446,2.114437,0.646196,0.548394,0.550507,0.545238
5,1.9995,1.944438,0.698442,0.586419,0.599193,0.589758


[I 2025-03-22 03:08:52,763] Trial 47 pruned. 


Trial 48 with params: {'learning_rate': 0.0031258669943988048, 'weight_decay': 0.002, 'warmup_steps': 4, 'lambda_param': 0.2, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6335,1.737315,0.728689,0.633419,0.621557,0.618603
2,1.2801,1.309528,0.771769,0.654417,0.665859,0.650534
3,0.841,0.950505,0.840513,0.708492,0.7166,0.712089
4,0.5632,0.819118,0.868928,0.90479,0.765674,0.784931
5,0.3751,0.793858,0.873511,0.87389,0.833546,0.84989
6,0.2467,0.753399,0.87626,0.886973,0.837867,0.856514
7,0.1549,0.71977,0.880843,0.893267,0.839941,0.860335
8,0.1139,0.744478,0.872594,0.887003,0.834271,0.854777
9,0.0901,0.734283,0.87626,0.889461,0.835785,0.856752
10,0.0785,0.707385,0.87901,0.890501,0.838895,0.858872


[I 2025-03-22 03:10:18,077] Trial 48 finished with value: 0.8632284202614405 and parameters: {'learning_rate': 0.0031258669943988048, 'weight_decay': 0.002, 'warmup_steps': 4, 'lambda_param': 0.2, 'temperature': 2.0}. Best is trial 15 with value: 0.8653670565585027.


Trial 49 with params: {'learning_rate': 0.0022836790925798655, 'weight_decay': 0.006, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6936,1.762162,0.704858,0.607882,0.605621,0.596727
2,1.3523,1.286198,0.781852,0.662109,0.670745,0.662498
3,0.9177,0.995988,0.83593,0.703616,0.713264,0.70772
4,0.666,0.922627,0.839597,0.713466,0.715113,0.711763
5,0.481,0.878773,0.862511,0.86996,0.789179,0.811216
6,0.3254,0.798347,0.864345,0.877034,0.819414,0.839794
7,0.2194,0.826634,0.870761,0.889984,0.821717,0.844902
8,0.168,0.734766,0.88176,0.890653,0.83235,0.853336
9,0.1195,0.733355,0.870761,0.884084,0.821899,0.844649
10,0.0958,0.73493,0.871677,0.882342,0.824076,0.845066


[I 2025-03-22 03:11:20,212] Trial 49 pruned. 


Trial 50 with params: {'learning_rate': 0.0021133792752108674, 'weight_decay': 0.005, 'warmup_steps': 2, 'lambda_param': 1.0, 'temperature': 6.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.7019,1.714476,0.703025,0.602435,0.604393,0.596079
2,1.3403,1.435361,0.769936,0.642909,0.665231,0.64888
3,0.927,1.039844,0.836847,0.70476,0.713759,0.707477
4,0.7063,0.953073,0.84143,0.716444,0.715193,0.713012
5,0.5326,0.881793,0.858845,0.833157,0.750199,0.758982
6,0.3769,0.795087,0.864345,0.865161,0.817627,0.835792
7,0.2381,0.837041,0.864345,0.882204,0.817169,0.839667
8,0.1859,0.780547,0.878093,0.889316,0.837677,0.857314
9,0.1323,0.793039,0.868928,0.882914,0.820741,0.843381
10,0.1014,0.784812,0.868928,0.872732,0.839539,0.85368


[I 2025-03-22 03:12:40,433] Trial 50 finished with value: 0.8607360325973866 and parameters: {'learning_rate': 0.0021133792752108674, 'weight_decay': 0.005, 'warmup_steps': 2, 'lambda_param': 1.0, 'temperature': 6.5}. Best is trial 15 with value: 0.8653670565585027.


Trial 51 with params: {'learning_rate': 0.003962068467048509, 'weight_decay': 0.003, 'warmup_steps': 4, 'lambda_param': 0.4, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.5571,1.695554,0.725023,0.653566,0.615286,0.623206
2,1.2027,1.314392,0.784601,0.663793,0.676143,0.661225
3,0.783,0.947573,0.84143,0.710668,0.719991,0.713387
4,0.4989,0.793052,0.863428,0.870293,0.780298,0.802681
5,0.3014,0.804616,0.872594,0.882645,0.824383,0.844948
6,0.1954,0.737711,0.878093,0.889306,0.827794,0.850237
7,0.1258,0.71042,0.879927,0.891891,0.829973,0.852343
8,0.0933,0.706249,0.88451,0.892733,0.842994,0.861982
9,0.0764,0.691464,0.883593,0.893072,0.842302,0.861705
10,0.0696,0.692204,0.883593,0.893807,0.842442,0.862092


[I 2025-03-22 03:14:00,506] Trial 51 finished with value: 0.8583120299968598 and parameters: {'learning_rate': 0.003962068467048509, 'weight_decay': 0.003, 'warmup_steps': 4, 'lambda_param': 0.4, 'temperature': 3.0}. Best is trial 15 with value: 0.8653670565585027.


Trial 52 with params: {'learning_rate': 0.001807671879704312, 'weight_decay': 0.002, 'warmup_steps': 4, 'lambda_param': 0.2, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.8438,1.824639,0.693859,0.593705,0.595919,0.584332
2,1.4166,1.519865,0.748854,0.628668,0.648475,0.630296
3,1.0205,1.054245,0.831347,0.696139,0.709812,0.70233
4,0.7746,0.991558,0.839597,0.71262,0.714583,0.711193
5,0.6091,0.935293,0.837764,0.707679,0.715019,0.709681
6,0.4482,0.847187,0.857929,0.858208,0.777453,0.796051
7,0.3034,0.853524,0.862511,0.876591,0.807464,0.829533
8,0.2173,0.79562,0.877177,0.88606,0.829418,0.849263
9,0.1612,0.794412,0.864345,0.878646,0.817241,0.839612
10,0.1182,0.80263,0.867094,0.866356,0.819843,0.83756


[I 2025-03-22 03:14:59,350] Trial 52 pruned. 


Trial 53 with params: {'learning_rate': 0.004269637593700472, 'weight_decay': 0.004, 'warmup_steps': 3, 'lambda_param': 0.1, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.5184,1.539519,0.756187,0.660793,0.64431,0.645718
2,1.1735,1.3419,0.779102,0.657193,0.674661,0.653765
3,0.755,0.935578,0.850596,0.883554,0.735257,0.738036
4,0.4859,0.811945,0.863428,0.874104,0.77921,0.802699
5,0.2864,0.750509,0.878093,0.884849,0.830619,0.849456
6,0.1743,0.716376,0.889093,0.898613,0.836589,0.859306
7,0.118,0.770438,0.877177,0.895102,0.827551,0.851746
8,0.0937,0.708686,0.87901,0.890417,0.838895,0.8582
9,0.0759,0.678952,0.888176,0.89867,0.845556,0.866078
10,0.068,0.666143,0.888176,0.897559,0.845839,0.865698


[I 2025-03-22 03:16:24,806] Trial 53 finished with value: 0.8696635593759803 and parameters: {'learning_rate': 0.004269637593700472, 'weight_decay': 0.004, 'warmup_steps': 3, 'lambda_param': 0.1, 'temperature': 3.0}. Best is trial 53 with value: 0.8696635593759803.


Trial 54 with params: {'learning_rate': 0.004918579351042261, 'weight_decay': 0.001, 'warmup_steps': 2, 'lambda_param': 0.1, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.5118,1.499107,0.762603,0.67454,0.646521,0.654797
2,1.1594,1.179821,0.794684,0.663669,0.683743,0.669711
3,0.6707,0.853099,0.856095,0.721234,0.729806,0.724735
4,0.4058,0.790062,0.872594,0.880996,0.795884,0.819341
5,0.2354,0.757247,0.870761,0.886948,0.821511,0.845474
6,0.154,0.697295,0.879927,0.875058,0.831413,0.847761
7,0.107,0.674951,0.887259,0.898773,0.835871,0.858749
8,0.0875,0.664287,0.888176,0.89829,0.846158,0.866406
9,0.0758,0.671833,0.887259,0.896876,0.836209,0.8583
10,0.0694,0.681525,0.888176,0.89652,0.83684,0.8585


[I 2025-03-22 03:17:42,467] Trial 54 finished with value: 0.8593819554733656 and parameters: {'learning_rate': 0.004918579351042261, 'weight_decay': 0.001, 'warmup_steps': 2, 'lambda_param': 0.1, 'temperature': 3.5}. Best is trial 53 with value: 0.8696635593759803.


Trial 55 with params: {'learning_rate': 7.242888062473813e-05, 'weight_decay': 0.001, 'warmup_steps': 3, 'lambda_param': 0.0, 'temperature': 6.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.9134,3.761281,0.333639,0.206322,0.244245,0.174113
2,3.697,3.575175,0.3978,0.190599,0.301971,0.202637
3,3.4454,3.236063,0.411549,0.369391,0.310985,0.24144
4,3.1074,2.940335,0.47846,0.520226,0.372256,0.339133
5,2.8821,2.737098,0.539872,0.517308,0.43552,0.4269


[I 2025-03-22 03:18:18,857] Trial 55 pruned. 


Trial 56 with params: {'learning_rate': 0.001185299123481456, 'weight_decay': 0.009000000000000001, 'warmup_steps': 4, 'lambda_param': 0.6000000000000001, 'temperature': 5.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.027,2.028092,0.657195,0.588626,0.556494,0.561049
2,1.61,1.385314,0.778185,0.665358,0.66295,0.662832
3,1.1216,1.203257,0.811182,0.686383,0.692186,0.686415
4,0.8985,1.095812,0.832264,0.710315,0.708353,0.706497
5,0.7562,1.051769,0.831347,0.703403,0.708775,0.704611
6,0.6375,0.942572,0.834097,0.701988,0.712428,0.706543
7,0.4833,0.944901,0.837764,0.868855,0.7341,0.74277
8,0.3857,0.926301,0.846013,0.851793,0.76732,0.786912
9,0.327,0.91157,0.850596,0.86476,0.797683,0.819098
10,0.2428,0.913547,0.857012,0.870119,0.812058,0.832315


[I 2025-03-22 03:19:08,499] Trial 56 pruned. 


Trial 57 with params: {'learning_rate': 0.004652661703208274, 'weight_decay': 0.004, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.5367,1.52635,0.754354,0.678114,0.640607,0.647627
2,1.1527,1.224537,0.79835,0.66748,0.687172,0.67196
3,0.6933,0.870982,0.850596,0.716412,0.725204,0.719892
4,0.4308,0.776335,0.871677,0.877981,0.795678,0.818333
5,0.2571,0.737263,0.877177,0.886713,0.818971,0.841421
6,0.1514,0.723114,0.877177,0.873622,0.828972,0.845599
7,0.1086,0.686889,0.88451,0.896816,0.833183,0.85642
8,0.0858,0.701077,0.878093,0.890097,0.836954,0.857626
9,0.0739,0.680867,0.880843,0.892449,0.839813,0.860181
10,0.0692,0.690585,0.877177,0.887757,0.837259,0.856501


[I 2025-03-22 03:20:36,265] Trial 57 finished with value: 0.8589461944305167 and parameters: {'learning_rate': 0.004652661703208274, 'weight_decay': 0.004, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 2.5}. Best is trial 53 with value: 0.8696635593759803.


Trial 58 with params: {'learning_rate': 0.0030419165866669372, 'weight_decay': 0.005, 'warmup_steps': 3, 'lambda_param': 0.2, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.5366,1.75444,0.734189,0.66343,0.620421,0.628765
2,1.2478,1.239911,0.802933,0.671795,0.689485,0.678405
3,0.7958,0.981997,0.835014,0.711447,0.711796,0.708377
4,0.5715,0.848342,0.861595,0.83782,0.788456,0.803491
5,0.371,0.804127,0.868011,0.876451,0.803128,0.825306
6,0.2487,0.778057,0.87626,0.883924,0.829632,0.847973
7,0.1584,0.747505,0.874427,0.889072,0.824454,0.847652
8,0.1136,0.732364,0.879927,0.866452,0.829699,0.844748
9,0.0915,0.709654,0.87901,0.890493,0.828371,0.850971
10,0.0773,0.712467,0.880843,0.891467,0.830236,0.852573


[I 2025-03-22 03:21:29,528] Trial 58 pruned. 


Trial 59 with params: {'learning_rate': 0.0035970927117697864, 'weight_decay': 0.004, 'warmup_steps': 3, 'lambda_param': 0.0, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.4546,1.651078,0.734189,0.653089,0.626653,0.626899
2,1.1673,1.328688,0.780018,0.655255,0.672948,0.656898
3,0.7537,0.938452,0.83593,0.704312,0.714764,0.708089
4,0.5043,0.770771,0.868928,0.879432,0.792874,0.817167
5,0.3351,0.742981,0.878093,0.886906,0.828742,0.849611
6,0.2003,0.761431,0.880843,0.891111,0.831287,0.8528
7,0.1328,0.734712,0.874427,0.886837,0.825461,0.847799
8,0.1045,0.702822,0.883593,0.884185,0.841813,0.859179
9,0.0834,0.691115,0.880843,0.891622,0.839858,0.859864
10,0.0741,0.665495,0.882676,0.892887,0.842216,0.861717


[I 2025-03-22 03:23:26,665] Trial 59 finished with value: 0.8640192431260177 and parameters: {'learning_rate': 0.0035970927117697864, 'weight_decay': 0.004, 'warmup_steps': 3, 'lambda_param': 0.0, 'temperature': 3.5}. Best is trial 53 with value: 0.8696635593759803.


Trial 60 with params: {'learning_rate': 0.00046762991988506683, 'weight_decay': 0.01, 'warmup_steps': 0, 'lambda_param': 0.30000000000000004, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.4468,2.804359,0.509624,0.549811,0.399105,0.382004
2,2.3907,1.934027,0.693859,0.597548,0.592515,0.593434
3,1.6273,1.574817,0.741522,0.63327,0.63836,0.623505
4,1.3122,1.406429,0.776352,0.665422,0.662496,0.660078
5,1.1783,1.308108,0.780018,0.654621,0.669879,0.660183


[I 2025-03-22 03:23:52,763] Trial 60 pruned. 


Trial 61 with params: {'learning_rate': 0.0022911208666678705, 'weight_decay': 0.004, 'warmup_steps': 3, 'lambda_param': 0.0, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.681,1.733833,0.705775,0.611466,0.60543,0.595849
2,1.3165,1.321386,0.783685,0.659683,0.673764,0.663566
3,0.9029,1.004162,0.828598,0.701176,0.707114,0.701891
4,0.6625,0.928459,0.849679,0.722495,0.722655,0.720186
5,0.4926,0.841534,0.868928,0.874209,0.79361,0.815893
6,0.3394,0.814918,0.870761,0.88419,0.822264,0.844503
7,0.2228,0.825119,0.871677,0.887632,0.822228,0.845507
8,0.1655,0.75563,0.878093,0.885256,0.830028,0.849294
9,0.1274,0.768895,0.873511,0.88663,0.824665,0.847144
10,0.0963,0.76358,0.873511,0.883365,0.824836,0.845717


[I 2025-03-22 03:24:43,303] Trial 61 pruned. 


Trial 62 with params: {'learning_rate': 0.002107824841770204, 'weight_decay': 0.003, 'warmup_steps': 3, 'lambda_param': 0.0, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.7312,1.747327,0.702108,0.602226,0.603085,0.593384
2,1.35,1.384852,0.771769,0.647231,0.664754,0.651547
3,0.923,1.010496,0.831347,0.702755,0.709305,0.703781
4,0.6902,0.946343,0.843263,0.714705,0.717327,0.713381
5,0.5259,0.856838,0.864345,0.851419,0.763706,0.7774
6,0.3645,0.830671,0.859762,0.854659,0.797113,0.815939
7,0.2401,0.810158,0.871677,0.885913,0.822622,0.845184
8,0.1706,0.760537,0.877177,0.889428,0.827628,0.850103
9,0.1266,0.756654,0.868011,0.883228,0.819166,0.84272
10,0.1007,0.755947,0.870761,0.881776,0.822579,0.84391


[I 2025-03-22 03:25:35,033] Trial 62 pruned. 


Trial 63 with params: {'learning_rate': 0.004976859785939093, 'weight_decay': 0.002, 'warmup_steps': 3, 'lambda_param': 0.2, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.4276,1.58106,0.750687,0.653418,0.639675,0.639411
2,1.1248,1.180006,0.807516,0.677413,0.694124,0.68148
3,0.6644,0.86258,0.856095,0.889015,0.756774,0.773612
4,0.3974,0.834336,0.864345,0.880824,0.789546,0.813737
5,0.237,0.70475,0.877177,0.890362,0.83633,0.857415
6,0.1396,0.673285,0.890009,0.900625,0.837415,0.86059
7,0.104,0.686064,0.88176,0.894571,0.831387,0.854305
8,0.0863,0.644176,0.890009,0.897298,0.838327,0.859549
9,0.0735,0.635207,0.888176,0.898166,0.836395,0.85899
10,0.0638,0.646207,0.890009,0.899799,0.837824,0.860351


[I 2025-03-22 03:26:55,444] Trial 63 finished with value: 0.8611618845713339 and parameters: {'learning_rate': 0.004976859785939093, 'weight_decay': 0.002, 'warmup_steps': 3, 'lambda_param': 0.2, 'temperature': 3.5}. Best is trial 53 with value: 0.8696635593759803.


Trial 64 with params: {'learning_rate': 0.003794022270756288, 'weight_decay': 0.002, 'warmup_steps': 2, 'lambda_param': 1.0, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.4926,1.738518,0.739688,0.666586,0.623373,0.633655
2,1.2252,1.433515,0.771769,0.65041,0.66669,0.648163
3,0.7721,0.964332,0.837764,0.705091,0.715917,0.708819
4,0.5136,0.818619,0.870761,0.87864,0.784654,0.80805
5,0.3213,0.765542,0.87626,0.878916,0.80987,0.830423
6,0.2033,0.706805,0.882676,0.891593,0.84268,0.861112
7,0.1265,0.736953,0.878093,0.895163,0.836728,0.858948
8,0.1009,0.725402,0.874427,0.8861,0.834919,0.854398
9,0.0792,0.685479,0.885426,0.896272,0.84359,0.863851
10,0.0714,0.689527,0.888176,0.897102,0.845748,0.865593


[I 2025-03-22 03:28:37,828] Trial 64 finished with value: 0.8667043263785673 and parameters: {'learning_rate': 0.003794022270756288, 'weight_decay': 0.002, 'warmup_steps': 2, 'lambda_param': 1.0, 'temperature': 3.5}. Best is trial 53 with value: 0.8696635593759803.


Trial 65 with params: {'learning_rate': 0.0027543605758172263, 'weight_decay': 0.003, 'warmup_steps': 3, 'lambda_param': 1.0, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6562,1.684668,0.725023,0.626918,0.620971,0.617327
2,1.3062,1.332181,0.786434,0.662808,0.675957,0.665174
3,0.8558,0.96544,0.843263,0.712047,0.718954,0.714064
4,0.6108,0.894258,0.857929,0.893592,0.757045,0.775213
5,0.4294,0.804084,0.865261,0.875915,0.818846,0.839137
6,0.2912,0.775939,0.868928,0.882229,0.821935,0.843594
7,0.1822,0.768166,0.877177,0.890028,0.827059,0.849836
8,0.1309,0.732194,0.87901,0.890061,0.838916,0.858584
9,0.1031,0.754208,0.870761,0.88613,0.830116,0.852071
10,0.0887,0.719589,0.878093,0.891171,0.837319,0.858277


Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--accuracy/f887c0aab52c2d38e1f8a215681126379eca617f96c447638f751434e8e65b14 (last modified on Sat Oct 12 13:56:14 2024) since it couldn't be found locally at evaluate-metric--accuracy, or remotely on the Hugging Face Hub.
[I 2025-03-22 03:30:08,524] Trial 65 finished with value: 0.856199981269698 and parameters: {'learning_rate': 0.0027543605758172263, 'weight_decay': 0.003, 'warmup_steps': 3, 'lambda_param': 1.0, 'temperature': 4.5}. Best is trial 53 with value: 0.8696635593759803.


Trial 66 with params: {'learning_rate': 0.001311246174769201, 'weight_decay': 0.002, 'warmup_steps': 3, 'lambda_param': 0.9, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.9569,1.90341,0.693859,0.596288,0.594111,0.590457
2,1.5046,1.423728,0.764436,0.645573,0.656004,0.646853
3,1.0958,1.204919,0.802016,0.677927,0.686203,0.677199
4,0.8847,1.087145,0.825848,0.703851,0.703904,0.701025
5,0.7502,1.019008,0.835014,0.703507,0.711073,0.706608


[I 2025-03-22 03:31:01,721] Trial 66 pruned. 


Trial 67 with params: {'learning_rate': 0.0038787664916892163, 'weight_decay': 0.003, 'warmup_steps': 2, 'lambda_param': 0.9, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.4928,1.56468,0.756187,0.663623,0.641639,0.6457
2,1.1827,1.33431,0.781852,0.658086,0.674897,0.657761
3,0.7357,0.952517,0.84143,0.710896,0.718603,0.713775
4,0.5035,0.840716,0.865261,0.881155,0.789123,0.815218
5,0.3114,0.769972,0.875344,0.885129,0.827461,0.848149
6,0.1936,0.715885,0.880843,0.878444,0.831022,0.849033
7,0.1349,0.709357,0.878093,0.89202,0.828388,0.851392
8,0.0973,0.714555,0.879927,0.891804,0.839058,0.859178
9,0.08,0.688573,0.88451,0.885249,0.841747,0.859653
10,0.0707,0.702876,0.882676,0.893477,0.842146,0.861794


[I 2025-03-22 03:32:23,684] Trial 67 finished with value: 0.8601834967838865 and parameters: {'learning_rate': 0.0038787664916892163, 'weight_decay': 0.003, 'warmup_steps': 2, 'lambda_param': 0.9, 'temperature': 3.0}. Best is trial 53 with value: 0.8696635593759803.


Trial 68 with params: {'learning_rate': 0.0027307382109337916, 'weight_decay': 0.0, 'warmup_steps': 2, 'lambda_param': 0.8, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.5914,1.77311,0.716774,0.635082,0.608132,0.613996
2,1.2891,1.353299,0.770852,0.651773,0.664163,0.651132
3,0.8631,0.982964,0.839597,0.710362,0.716408,0.71253
4,0.6101,0.888758,0.846929,0.852217,0.75642,0.776022
5,0.4222,0.82428,0.864345,0.873933,0.799313,0.822244
6,0.2814,0.779022,0.868011,0.880396,0.820259,0.84221
7,0.1777,0.808725,0.870761,0.885829,0.832367,0.852378
8,0.1363,0.761712,0.871677,0.884515,0.843069,0.859678
9,0.1068,0.759783,0.867094,0.881988,0.828603,0.848989
10,0.0878,0.716189,0.873511,0.885755,0.834929,0.854505


[I 2025-03-22 03:33:53,034] Trial 68 finished with value: 0.8588622308201476 and parameters: {'learning_rate': 0.0027307382109337916, 'weight_decay': 0.0, 'warmup_steps': 2, 'lambda_param': 0.8, 'temperature': 2.5}. Best is trial 53 with value: 0.8696635593759803.


Trial 69 with params: {'learning_rate': 0.000602120422227975, 'weight_decay': 0.005, 'warmup_steps': 0, 'lambda_param': 0.7000000000000001, 'temperature': 5.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.3325,2.636766,0.527039,0.531497,0.416508,0.396
2,2.1913,1.726076,0.72594,0.620504,0.622519,0.61982
3,1.4473,1.464133,0.766269,0.652765,0.656595,0.646602
4,1.1888,1.28993,0.802016,0.687185,0.68362,0.682179
5,1.0579,1.215639,0.804766,0.678343,0.689366,0.681227


[I 2025-03-22 03:34:19,051] Trial 69 pruned. 


Trial 70 with params: {'learning_rate': 0.0022886785562179417, 'weight_decay': 0.0, 'warmup_steps': 1, 'lambda_param': 0.0, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6729,1.754422,0.714024,0.624109,0.6123,0.60841
2,1.3539,1.401689,0.770852,0.650929,0.664184,0.651461
3,0.9389,1.033024,0.836847,0.704686,0.714747,0.708524
4,0.6938,0.953915,0.839597,0.714369,0.713791,0.710903
5,0.5017,0.863389,0.859762,0.857323,0.768935,0.786783
6,0.3429,0.837317,0.864345,0.878441,0.809017,0.832303
7,0.2353,0.772135,0.873511,0.889427,0.814384,0.839603
8,0.168,0.741814,0.87901,0.888671,0.840176,0.858285
9,0.1298,0.758825,0.872594,0.8878,0.8333,0.854402
10,0.0966,0.739312,0.872594,0.884592,0.833486,0.853187


[I 2025-03-22 03:35:39,360] Trial 70 pruned. 


Trial 71 with params: {'learning_rate': 0.0029532398393299986, 'weight_decay': 0.008, 'warmup_steps': 3, 'lambda_param': 0.0, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.5645,1.733178,0.741522,0.663542,0.626829,0.635158
2,1.2529,1.358894,0.790101,0.661766,0.679979,0.665682
3,0.823,0.954724,0.84143,0.706294,0.718944,0.711123
4,0.5775,0.852818,0.858845,0.860344,0.794048,0.815672
5,0.3871,0.788294,0.874427,0.886479,0.825508,0.847642
6,0.2649,0.767657,0.873511,0.88487,0.825271,0.846837
7,0.1616,0.754911,0.87901,0.892079,0.828612,0.85177
8,0.117,0.740373,0.880843,0.891125,0.831031,0.852811
9,0.091,0.713948,0.87901,0.889589,0.83004,0.851624
10,0.0774,0.71898,0.879927,0.892805,0.829882,0.853002


[I 2025-03-22 03:36:28,713] Trial 71 pruned. 


Trial 72 with params: {'learning_rate': 0.002631688033385067, 'weight_decay': 0.002, 'warmup_steps': 2, 'lambda_param': 0.7000000000000001, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.5977,1.679991,0.731439,0.639826,0.622402,0.624151
2,1.2857,1.398679,0.770852,0.646793,0.665408,0.64951
3,0.8663,0.998617,0.829514,0.700897,0.708846,0.703302
4,0.6272,0.910268,0.843263,0.885304,0.735913,0.749643
5,0.4392,0.855586,0.857012,0.86947,0.79371,0.817072
6,0.2965,0.788839,0.867094,0.877189,0.821178,0.840836
7,0.1935,0.770988,0.877177,0.893874,0.836394,0.857988
8,0.1382,0.770924,0.861595,0.878281,0.825538,0.845859
9,0.1015,0.74697,0.873511,0.8886,0.834343,0.855311
10,0.0843,0.727478,0.87626,0.889595,0.836899,0.857332


[I 2025-03-22 03:37:41,549] Trial 72 finished with value: 0.8588447910580982 and parameters: {'learning_rate': 0.002631688033385067, 'weight_decay': 0.002, 'warmup_steps': 2, 'lambda_param': 0.7000000000000001, 'temperature': 4.5}. Best is trial 53 with value: 0.8696635593759803.


Trial 73 with params: {'learning_rate': 0.002756114783490923, 'weight_decay': 0.001, 'warmup_steps': 0, 'lambda_param': 1.0, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.5495,1.628958,0.743355,0.653629,0.631519,0.637092
2,1.2431,1.444386,0.765353,0.643413,0.662498,0.643425
3,0.857,0.997091,0.835014,0.706057,0.712946,0.707853
4,0.6102,0.875976,0.851512,0.856226,0.761526,0.781486
5,0.44,0.850856,0.865261,0.869502,0.801137,0.820966
6,0.2979,0.807076,0.869844,0.881717,0.833277,0.851054
7,0.1867,0.781419,0.878093,0.88719,0.820234,0.841894
8,0.1441,0.76441,0.871677,0.881825,0.834248,0.851682
9,0.106,0.747038,0.87626,0.888472,0.835765,0.856144
10,0.0885,0.724743,0.87626,0.876273,0.836622,0.852713


[I 2025-03-22 03:38:35,260] Trial 73 pruned. 


Trial 74 with params: {'learning_rate': 0.000347802741623925, 'weight_decay': 0.009000000000000001, 'warmup_steps': 1, 'lambda_param': 0.2, 'temperature': 6.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.591,3.098372,0.454629,0.387525,0.347589,0.308151
2,2.655,2.23561,0.629698,0.553293,0.529062,0.533867
3,1.9665,1.773381,0.722273,0.613773,0.614821,0.611459
4,1.5477,1.594633,0.762603,0.663052,0.64887,0.650051
5,1.3744,1.44521,0.769019,0.651608,0.659124,0.652886


[I 2025-03-22 03:39:00,753] Trial 74 pruned. 


Trial 75 with params: {'learning_rate': 0.004129616753006163, 'weight_decay': 0.004, 'warmup_steps': 3, 'lambda_param': 0.0, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.5229,1.555585,0.750687,0.658415,0.63915,0.642139
2,1.1709,1.448188,0.769936,0.653164,0.66862,0.644277
3,0.7579,0.94524,0.842346,0.711406,0.719378,0.713877
4,0.4896,0.789713,0.871677,0.87922,0.786227,0.809042
5,0.2854,0.755497,0.88176,0.886378,0.83344,0.851643
6,0.1685,0.727715,0.879927,0.891514,0.829618,0.852259
7,0.1236,0.690753,0.878093,0.892318,0.828574,0.851764
8,0.0872,0.706825,0.879927,0.892061,0.83934,0.859533
9,0.0744,0.693722,0.880843,0.891989,0.839691,0.859961
10,0.067,0.687771,0.885426,0.896675,0.843889,0.864341


[I 2025-03-22 03:40:17,879] Trial 75 finished with value: 0.8517949040300503 and parameters: {'learning_rate': 0.004129616753006163, 'weight_decay': 0.004, 'warmup_steps': 3, 'lambda_param': 0.0, 'temperature': 4.0}. Best is trial 53 with value: 0.8696635593759803.


Trial 76 with params: {'learning_rate': 0.004137763398497745, 'weight_decay': 0.004, 'warmup_steps': 4, 'lambda_param': 0.0, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.4221,1.396784,0.784601,0.667234,0.671416,0.666626
2,1.1862,1.232335,0.797434,0.679109,0.68333,0.676716
3,0.7392,0.932485,0.84418,0.884055,0.72944,0.735423
4,0.467,0.811505,0.862511,0.874937,0.78853,0.812595
5,0.2757,0.740293,0.87626,0.888556,0.827504,0.84975
6,0.164,0.743624,0.873511,0.884451,0.824485,0.846078
7,0.1163,0.721648,0.88176,0.895865,0.831394,0.854599
8,0.0904,0.697782,0.885426,0.897175,0.84332,0.864204
9,0.0767,0.677259,0.883593,0.894858,0.842396,0.862708
10,0.0705,0.666875,0.888176,0.899036,0.845793,0.866412


[I 2025-03-22 03:42:17,120] Trial 76 finished with value: 0.8673350569957926 and parameters: {'learning_rate': 0.004137763398497745, 'weight_decay': 0.004, 'warmup_steps': 4, 'lambda_param': 0.0, 'temperature': 3.0}. Best is trial 53 with value: 0.8696635593759803.


Trial 77 with params: {'learning_rate': 0.00311551382970065, 'weight_decay': 0.004, 'warmup_steps': 4, 'lambda_param': 0.1, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6226,1.754547,0.738772,0.645173,0.627883,0.626946
2,1.2724,1.271614,0.781852,0.659197,0.67361,0.659359
3,0.8292,0.96703,0.839597,0.708229,0.716444,0.711226
4,0.5566,0.828256,0.863428,0.894361,0.771519,0.792052
5,0.3867,0.777919,0.870761,0.881508,0.822816,0.844069
6,0.2523,0.749063,0.875344,0.884621,0.837274,0.855021
7,0.1583,0.760173,0.87901,0.893347,0.838103,0.85912
8,0.1223,0.741499,0.88176,0.892644,0.840835,0.860833
9,0.0917,0.750027,0.878093,0.891949,0.836656,0.858284
10,0.0796,0.724726,0.883593,0.895353,0.841425,0.862457


[I 2025-03-22 03:43:48,738] Trial 77 finished with value: 0.862557359653608 and parameters: {'learning_rate': 0.00311551382970065, 'weight_decay': 0.004, 'warmup_steps': 4, 'lambda_param': 0.1, 'temperature': 3.5}. Best is trial 53 with value: 0.8696635593759803.


Trial 78 with params: {'learning_rate': 0.0042248547572275395, 'weight_decay': 0.002, 'warmup_steps': 1, 'lambda_param': 0.9, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.4472,1.628242,0.750687,0.656583,0.637016,0.640129
2,1.1958,1.14482,0.811182,0.682045,0.69503,0.686345
3,0.7161,0.901426,0.849679,0.712812,0.725489,0.718259
4,0.4512,0.817131,0.868011,0.86227,0.792477,0.8137
5,0.2747,0.730943,0.879927,0.879434,0.829717,0.84877
6,0.1656,0.727771,0.885426,0.893188,0.83449,0.855306
7,0.1131,0.703579,0.883593,0.893927,0.831549,0.85442
8,0.0915,0.709177,0.880843,0.864876,0.831067,0.844565
9,0.0782,0.660135,0.888176,0.895535,0.836161,0.857717
10,0.0676,0.682463,0.88176,0.890544,0.8316,0.852725


[I 2025-03-22 03:44:40,237] Trial 78 pruned. 


Trial 79 with params: {'learning_rate': 0.004393013150424973, 'weight_decay': 0.004, 'warmup_steps': 3, 'lambda_param': 0.0, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.5202,1.496609,0.747938,0.653725,0.637828,0.63961
2,1.1585,1.275858,0.786434,0.661916,0.679629,0.660478
3,0.7501,0.886492,0.849679,0.710614,0.726352,0.717374
4,0.4637,0.795036,0.868011,0.873894,0.783788,0.805943
5,0.2745,0.751038,0.88176,0.892857,0.831931,0.85356
6,0.1614,0.739859,0.874427,0.887591,0.824776,0.847704
7,0.1165,0.716973,0.88176,0.891528,0.831924,0.853455
8,0.0887,0.697987,0.88176,0.892725,0.84113,0.861091
9,0.0738,0.684736,0.882676,0.893714,0.840755,0.861313
10,0.0655,0.687594,0.883593,0.894515,0.843082,0.862908


Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--accuracy/f887c0aab52c2d38e1f8a215681126379eca617f96c447638f751434e8e65b14 (last modified on Sat Oct 12 13:56:14 2024) since it couldn't be found locally at evaluate-metric--accuracy, or remotely on the Hugging Face Hub.
[I 2025-03-22 03:46:35,229] Trial 79 finished with value: 0.8645484364402587 and parameters: {'learning_rate': 0.004393013150424973, 'weight_decay': 0.004, 'warmup_steps': 3, 'lambda_param': 0.0, 'temperature': 2.0}. Best is trial 53 with value: 0.8696635593759803.


Trial 80 with params: {'learning_rate': 0.004618353749318537, 'weight_decay': 0.006, 'warmup_steps': 3, 'lambda_param': 0.0, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.4913,1.490867,0.75527,0.653383,0.644026,0.643696
2,1.1575,1.0947,0.814849,0.685405,0.698481,0.689147
3,0.7031,0.943474,0.845096,0.713876,0.721852,0.715598
4,0.4551,0.815047,0.868011,0.875253,0.783626,0.806026
5,0.2674,0.744938,0.870761,0.879623,0.823771,0.843605
6,0.1712,0.720226,0.885426,0.898751,0.832156,0.856782
7,0.1097,0.686937,0.88176,0.891867,0.83121,0.853235
8,0.0828,0.664043,0.890009,0.89977,0.845998,0.866709
9,0.0751,0.65943,0.887259,0.897917,0.834474,0.857669
10,0.065,0.658328,0.882676,0.890589,0.832489,0.853381


[I 2025-03-22 03:47:29,847] Trial 80 pruned. 


Trial 81 with params: {'learning_rate': 0.004879264919431821, 'weight_decay': 0.003, 'warmup_steps': 4, 'lambda_param': 0.1, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.4744,1.491247,0.744271,0.630661,0.639282,0.631612
2,1.1095,1.176902,0.807516,0.6779,0.693586,0.6805
3,0.6895,0.927503,0.843263,0.709002,0.720943,0.713617
4,0.4256,0.774651,0.873511,0.889711,0.815312,0.840212
5,0.2388,0.747817,0.880843,0.89104,0.820969,0.844557
6,0.1489,0.709898,0.88451,0.897357,0.84166,0.863353
7,0.1051,0.699178,0.88451,0.895196,0.833951,0.856063
8,0.0776,0.67854,0.882676,0.893516,0.841615,0.861659
9,0.0685,0.679072,0.88176,0.891679,0.831895,0.853672
10,0.0635,0.673135,0.890009,0.900321,0.84766,0.867999


[I 2025-03-22 03:48:55,878] Trial 81 finished with value: 0.8572972192745011 and parameters: {'learning_rate': 0.004879264919431821, 'weight_decay': 0.003, 'warmup_steps': 4, 'lambda_param': 0.1, 'temperature': 2.5}. Best is trial 53 with value: 0.8696635593759803.


Trial 82 with params: {'learning_rate': 0.0027450028063711494, 'weight_decay': 0.0, 'warmup_steps': 2, 'lambda_param': 0.30000000000000004, 'temperature': 5.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.5957,1.753255,0.713107,0.632371,0.605031,0.610443
2,1.2898,1.342288,0.773602,0.653114,0.666281,0.653365
3,0.8562,0.982224,0.83593,0.707665,0.713705,0.709435
4,0.6104,0.874578,0.852429,0.863078,0.769369,0.79228
5,0.4216,0.818795,0.862511,0.872166,0.798064,0.820789
6,0.2777,0.79214,0.870761,0.883256,0.823007,0.844879
7,0.1779,0.790845,0.872594,0.885554,0.824095,0.846046
8,0.1339,0.756429,0.873511,0.887697,0.835333,0.855477
9,0.103,0.747233,0.868928,0.883691,0.830061,0.850758
10,0.0849,0.724191,0.874427,0.888676,0.835183,0.855864


[I 2025-03-22 03:50:27,787] Trial 82 finished with value: 0.8564430879239352 and parameters: {'learning_rate': 0.0027450028063711494, 'weight_decay': 0.0, 'warmup_steps': 2, 'lambda_param': 0.30000000000000004, 'temperature': 5.5}. Best is trial 53 with value: 0.8696635593759803.


Trial 83 with params: {'learning_rate': 0.004388495968130781, 'weight_decay': 0.004, 'warmup_steps': 3, 'lambda_param': 0.2, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.5218,1.511442,0.749771,0.658403,0.639434,0.641565
2,1.1677,1.189034,0.802933,0.672807,0.690147,0.676044
3,0.7333,0.91798,0.854262,0.884826,0.738922,0.740744
4,0.4689,0.806145,0.860678,0.870102,0.777416,0.800152
5,0.2794,0.731044,0.880843,0.891034,0.83203,0.853111
6,0.16,0.725978,0.880843,0.890841,0.829999,0.852103
7,0.108,0.701583,0.886343,0.89612,0.835158,0.857233
8,0.0861,0.672448,0.885426,0.893618,0.845074,0.863344
9,0.073,0.648857,0.886343,0.896194,0.844611,0.86437
10,0.0657,0.664782,0.882676,0.89332,0.842079,0.861815


[I 2025-03-22 03:51:48,748] Trial 83 finished with value: 0.8617702139133154 and parameters: {'learning_rate': 0.004388495968130781, 'weight_decay': 0.004, 'warmup_steps': 3, 'lambda_param': 0.2, 'temperature': 2.0}. Best is trial 53 with value: 0.8696635593759803.


Trial 84 with params: {'learning_rate': 0.003477564222262651, 'weight_decay': 0.0, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.4543,1.493923,0.76352,0.666411,0.648163,0.653073
2,1.1715,1.399697,0.76352,0.644554,0.661155,0.640679
3,0.7743,0.997646,0.828598,0.696195,0.709597,0.700243
4,0.54,0.853181,0.865261,0.880755,0.788542,0.814378
5,0.3481,0.762024,0.875344,0.884289,0.818055,0.84031
6,0.211,0.784164,0.872594,0.86208,0.807587,0.824636
7,0.1491,0.730027,0.88451,0.898302,0.841925,0.863537
8,0.1044,0.70991,0.87901,0.889962,0.837577,0.857805
9,0.0848,0.69394,0.879927,0.880423,0.838097,0.855517
10,0.0732,0.706369,0.879927,0.892596,0.837841,0.859178


[I 2025-03-22 03:53:06,108] Trial 84 finished with value: 0.8584305832938884 and parameters: {'learning_rate': 0.003477564222262651, 'weight_decay': 0.0, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 3.0}. Best is trial 53 with value: 0.8696635593759803.


Trial 85 with params: {'learning_rate': 0.00028731625417467325, 'weight_decay': 0.0, 'warmup_steps': 1, 'lambda_param': 1.0, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.6659,3.202211,0.43813,0.382813,0.331227,0.275504
2,2.8231,2.429939,0.583868,0.536034,0.479983,0.481946
3,2.1854,1.986556,0.666361,0.562446,0.573441,0.560749
4,1.7229,1.696184,0.743355,0.642521,0.63074,0.632118
5,1.4851,1.540065,0.758937,0.639236,0.651174,0.643042


[I 2025-03-22 03:53:33,538] Trial 85 pruned. 


Trial 86 with params: {'learning_rate': 0.003054562510458105, 'weight_decay': 0.003, 'warmup_steps': 2, 'lambda_param': 0.30000000000000004, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.5811,1.728783,0.735105,0.660835,0.619378,0.630747
2,1.28,1.291925,0.789184,0.663357,0.677507,0.667286
3,0.836,0.982805,0.825848,0.697902,0.706389,0.700073
4,0.5807,0.856482,0.858845,0.866263,0.777331,0.798936
5,0.3928,0.79399,0.868928,0.876781,0.80286,0.825441
6,0.2372,0.777276,0.868928,0.876465,0.804063,0.82564
7,0.17,0.800488,0.869844,0.887104,0.822193,0.845116
8,0.1247,0.812621,0.867094,0.882173,0.830188,0.849469
9,0.0971,0.722761,0.878093,0.888979,0.838381,0.857636
10,0.0806,0.703191,0.885426,0.896339,0.843674,0.864034


[I 2025-03-22 03:55:02,216] Trial 86 finished with value: 0.862629497941514 and parameters: {'learning_rate': 0.003054562510458105, 'weight_decay': 0.003, 'warmup_steps': 2, 'lambda_param': 0.30000000000000004, 'temperature': 3.0}. Best is trial 53 with value: 0.8696635593759803.


Trial 87 with params: {'learning_rate': 0.0035449227176150935, 'weight_decay': 0.005, 'warmup_steps': 3, 'lambda_param': 0.0, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.4592,1.618566,0.734189,0.651309,0.626489,0.627298
2,1.1658,1.321165,0.779102,0.653973,0.671607,0.655456
3,0.7482,0.94811,0.83868,0.709714,0.716632,0.711367
4,0.5045,0.798793,0.869844,0.879652,0.793549,0.817468
5,0.3195,0.76876,0.873511,0.879112,0.807113,0.828959
6,0.1957,0.748263,0.882676,0.891864,0.832529,0.853727
7,0.1309,0.694742,0.885426,0.899432,0.83314,0.857252
8,0.1027,0.724787,0.877177,0.889936,0.837628,0.857849
9,0.0802,0.690718,0.877177,0.889406,0.827485,0.850103
10,0.0715,0.695911,0.87901,0.890383,0.839271,0.858981


[I 2025-03-22 03:56:25,483] Trial 87 finished with value: 0.8613902725010606 and parameters: {'learning_rate': 0.0035449227176150935, 'weight_decay': 0.005, 'warmup_steps': 3, 'lambda_param': 0.0, 'temperature': 2.0}. Best is trial 53 with value: 0.8696635593759803.


Trial 88 with params: {'learning_rate': 0.0018305530049133063, 'weight_decay': 0.003, 'warmup_steps': 3, 'lambda_param': 0.0, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.848,1.822404,0.698442,0.597001,0.60034,0.591076
2,1.4226,1.482069,0.75802,0.637725,0.65407,0.639559
3,1.0189,1.06043,0.825848,0.692101,0.706174,0.698278
4,0.7666,0.990418,0.83868,0.711355,0.713773,0.709968
5,0.6053,0.928686,0.842346,0.877647,0.727792,0.731665


[I 2025-03-22 03:56:54,758] Trial 88 pruned. 


Trial 89 with params: {'learning_rate': 0.0041503464447007095, 'weight_decay': 0.001, 'warmup_steps': 4, 'lambda_param': 0.0, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.4204,1.409456,0.773602,0.661319,0.662534,0.658716
2,1.1644,1.21894,0.8011,0.676807,0.686941,0.678464
3,0.731,0.968435,0.83868,0.878912,0.735261,0.74687
4,0.4715,0.863249,0.862511,0.88398,0.796033,0.82312
5,0.2892,0.740771,0.875344,0.882799,0.808746,0.831652
6,0.1633,0.732008,0.880843,0.881497,0.839168,0.856325
7,0.1181,0.682621,0.887259,0.901028,0.844683,0.866388
8,0.0867,0.657861,0.888176,0.886837,0.846028,0.862617
9,0.0763,0.675288,0.886343,0.89904,0.843915,0.865326
10,0.0706,0.680199,0.879927,0.881839,0.839708,0.856848


Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--precision/155d3220d6cd4a6553f12da68eeb3d1f97cf431206304a4bc6e2d564c29502e9 (last modified on Fri Jan 10 23:13:59 2025) since it couldn't be found locally at evaluate-metric--precision, or remotely on the Hugging Face Hub.
[I 2025-03-22 03:59:24,479] Trial 89 finished with value: 0.8626363439449395 and parameters: {'learning_rate': 0.0041503464447007095, 'weight_decay': 0.001, 'warmup_steps': 4, 'lambda_param': 0.0, 'temperature': 2.0}. Best is trial 53 with value: 0.8696635593759803.


Trial 90 with params: {'learning_rate': 0.000928277511187833, 'weight_decay': 0.01, 'warmup_steps': 3, 'lambda_param': 0.4, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.1466,2.224167,0.621448,0.54033,0.528394,0.522316
2,1.7809,1.485382,0.759853,0.653338,0.646675,0.646791
3,1.2164,1.297044,0.792851,0.670786,0.677424,0.669675
4,1.0024,1.174238,0.821265,0.701117,0.700223,0.698141
5,0.8591,1.093639,0.822181,0.689154,0.702174,0.695358
6,0.7301,1.026537,0.830431,0.700166,0.709313,0.703204
7,0.5922,1.015583,0.833181,0.697819,0.712509,0.704395
8,0.5076,0.997189,0.842346,0.709269,0.719522,0.712461
9,0.4424,0.986427,0.840513,0.709488,0.716624,0.7114
10,0.3552,0.952765,0.846929,0.877954,0.731525,0.734499


Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--f1/34c46321f42186df33a6260966e34a368f14868d9cc2ba47d142112e2800d233 (last modified on Fri Jan 10 23:14:01 2025) since it couldn't be found locally at evaluate-metric--f1, or remotely on the Hugging Face Hub.
[I 2025-03-22 04:00:43,954] Trial 90 pruned. 


Trial 91 with params: {'learning_rate': 0.0035630445368327814, 'weight_decay': 0.005, 'warmup_steps': 4, 'lambda_param': 0.0, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.5144,1.547869,0.737855,0.653681,0.63172,0.632598
2,1.205,1.23969,0.791934,0.672534,0.679341,0.670532
3,0.8019,0.945072,0.843263,0.71178,0.720339,0.715027
4,0.5191,0.851049,0.858845,0.851123,0.785946,0.804752
5,0.3292,0.779289,0.875344,0.886088,0.826383,0.847777
6,0.2168,0.714122,0.877177,0.888022,0.837143,0.856667
7,0.1454,0.719546,0.87626,0.89047,0.83544,0.856632
8,0.1043,0.713167,0.879927,0.892816,0.838326,0.859383
9,0.088,0.693782,0.883593,0.893369,0.842088,0.861907
10,0.0726,0.687879,0.88176,0.892071,0.840508,0.860334


[I 2025-03-22 04:02:07,440] Trial 91 finished with value: 0.8633157081886079 and parameters: {'learning_rate': 0.0035630445368327814, 'weight_decay': 0.005, 'warmup_steps': 4, 'lambda_param': 0.0, 'temperature': 3.5}. Best is trial 53 with value: 0.8696635593759803.


Trial 92 with params: {'learning_rate': 0.0049330631717856436, 'weight_decay': 0.0, 'warmup_steps': 1, 'lambda_param': 0.4, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.4076,1.429891,0.772686,0.655347,0.66233,0.657064
2,1.1006,1.125026,0.812099,0.681828,0.697189,0.685251
3,0.6409,0.872545,0.852429,0.884887,0.745715,0.756039
4,0.3974,0.82947,0.872594,0.880386,0.80567,0.827855
5,0.2392,0.734727,0.87901,0.890811,0.829113,0.851583
6,0.1504,0.711577,0.87901,0.889217,0.838498,0.85776
7,0.1049,0.677888,0.883593,0.891742,0.83282,0.853965
8,0.0818,0.657152,0.890009,0.898707,0.847647,0.867125
9,0.073,0.666887,0.891842,0.899317,0.839452,0.86111
10,0.0674,0.659019,0.890009,0.899303,0.84638,0.866815


Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--accuracy/f887c0aab52c2d38e1f8a215681126379eca617f96c447638f751434e8e65b14 (last modified on Sat Oct 12 13:56:14 2024) since it couldn't be found locally at evaluate-metric--accuracy, or remotely on the Hugging Face Hub.
[I 2025-03-22 04:03:55,502] Trial 92 finished with value: 0.858681816345998 and parameters: {'learning_rate': 0.0049330631717856436, 'weight_decay': 0.0, 'warmup_steps': 1, 'lambda_param': 0.4, 'temperature': 4.5}. Best is trial 53 with value: 0.8696635593759803.


Trial 93 with params: {'learning_rate': 0.0011907000057052623, 'weight_decay': 0.005, 'warmup_steps': 1, 'lambda_param': 1.0, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.9732,1.987277,0.675527,0.586371,0.57671,0.572159
2,1.5745,1.475335,0.757104,0.645572,0.649872,0.640262
3,1.1317,1.225715,0.802016,0.679086,0.685855,0.677334
4,0.9129,1.106359,0.817599,0.699003,0.696775,0.694517
5,0.7723,1.06326,0.835014,0.708733,0.711616,0.708026


[I 2025-03-22 04:04:21,223] Trial 93 pruned. 


Trial 94 with params: {'learning_rate': 0.002523529242557662, 'weight_decay': 0.002, 'warmup_steps': 2, 'lambda_param': 0.2, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6719,1.644207,0.721357,0.619306,0.619226,0.614325
2,1.3223,1.373694,0.775435,0.654017,0.66861,0.655129
3,0.891,0.993874,0.833181,0.703793,0.710811,0.705395
4,0.6394,0.923335,0.842346,0.878414,0.7282,0.731621
5,0.4517,0.835024,0.856095,0.865756,0.794279,0.81572
6,0.3179,0.767136,0.87626,0.885934,0.828738,0.848923
7,0.1995,0.741973,0.87626,0.889126,0.836002,0.856363
8,0.1372,0.745801,0.883593,0.892989,0.843226,0.861808
9,0.1098,0.742003,0.870761,0.884966,0.831297,0.852142
10,0.0884,0.746105,0.88176,0.892925,0.840534,0.860865


[I 2025-03-22 04:05:41,439] Trial 94 finished with value: 0.8567436902334863 and parameters: {'learning_rate': 0.002523529242557662, 'weight_decay': 0.002, 'warmup_steps': 2, 'lambda_param': 0.2, 'temperature': 2.0}. Best is trial 53 with value: 0.8696635593759803.


Trial 95 with params: {'learning_rate': 0.00033622652480271855, 'weight_decay': 0.0, 'warmup_steps': 0, 'lambda_param': 0.4, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.5762,3.094284,0.451879,0.387475,0.345319,0.306791
2,2.6738,2.257186,0.627864,0.546045,0.52832,0.529895
3,2.0091,1.82782,0.71494,0.610649,0.609256,0.604535
4,1.5903,1.627193,0.751604,0.652292,0.638829,0.639528
5,1.4049,1.468189,0.762603,0.647853,0.65292,0.647593


[I 2025-03-22 04:06:23,750] Trial 95 pruned. 


Trial 96 with params: {'learning_rate': 5.399635979922363e-05, 'weight_decay': 0.0, 'warmup_steps': 3, 'lambda_param': 0.30000000000000004, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.9407,3.824696,0.331806,0.206186,0.242884,0.172746
2,3.7588,3.663832,0.456462,0.23083,0.340861,0.272744
3,3.6243,3.491061,0.452796,0.235683,0.339658,0.276907
4,3.3896,3.219161,0.421632,0.375879,0.318715,0.253498
5,3.1413,3.00173,0.464711,0.546382,0.357296,0.310886
6,2.9409,2.857039,0.51604,0.533923,0.409978,0.396068
7,2.8217,2.740646,0.555454,0.522392,0.452542,0.44983
8,2.7203,2.656196,0.600367,0.508191,0.505756,0.500654
9,2.6426,2.585411,0.578368,0.520651,0.477366,0.476286
10,2.5746,2.533714,0.598533,0.502796,0.506174,0.497403


[I 2025-03-22 04:07:13,885] Trial 96 pruned. 


Trial 97 with params: {'learning_rate': 0.0027204033866009628, 'weight_decay': 0.002, 'warmup_steps': 3, 'lambda_param': 0.0, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6517,1.70342,0.721357,0.62512,0.617374,0.613279
2,1.3078,1.33617,0.787351,0.66407,0.676341,0.666328
3,0.8596,0.991477,0.834097,0.704329,0.711718,0.706099
4,0.6071,0.899754,0.852429,0.852185,0.763419,0.780312
5,0.4317,0.811833,0.863428,0.851043,0.817575,0.830873
6,0.2936,0.7481,0.878093,0.8872,0.829597,0.850201
7,0.1867,0.7803,0.868928,0.887151,0.829637,0.851265
8,0.1398,0.750705,0.877177,0.887824,0.83858,0.85723
9,0.1069,0.729957,0.875344,0.889448,0.834761,0.856068
10,0.0861,0.711048,0.87901,0.890702,0.838027,0.858425


[I 2025-03-22 04:08:41,554] Trial 97 finished with value: 0.8599700529744464 and parameters: {'learning_rate': 0.0027204033866009628, 'weight_decay': 0.002, 'warmup_steps': 3, 'lambda_param': 0.0, 'temperature': 3.0}. Best is trial 53 with value: 0.8696635593759803.


Trial 98 with params: {'learning_rate': 0.00270435102690381, 'weight_decay': 0.0, 'warmup_steps': 2, 'lambda_param': 1.0, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.5862,1.765136,0.71769,0.638253,0.610059,0.615755
2,1.2945,1.410689,0.771769,0.65014,0.665962,0.650668
3,0.859,1.010981,0.824931,0.700449,0.705692,0.700523
4,0.6191,0.871469,0.853346,0.849514,0.75336,0.769597
5,0.4413,0.841002,0.857929,0.866398,0.795127,0.816547
6,0.2937,0.776444,0.871677,0.882317,0.823813,0.84473
7,0.1814,0.796756,0.871677,0.885947,0.833131,0.852865
8,0.1381,0.797996,0.864345,0.879331,0.827307,0.847362
9,0.1091,0.771544,0.869844,0.884884,0.829907,0.851342
10,0.0873,0.751819,0.871677,0.884741,0.831774,0.852327


[I 2025-03-22 04:09:46,056] Trial 98 pruned. 


Trial 99 with params: {'learning_rate': 0.0013050271933577405, 'weight_decay': 0.003, 'warmup_steps': 1, 'lambda_param': 0.1, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.9474,1.94344,0.67461,0.574388,0.579952,0.570344
2,1.5472,1.402789,0.759853,0.644595,0.652318,0.643834
3,1.1221,1.183699,0.807516,0.682925,0.690564,0.68262
4,0.8969,1.098663,0.826764,0.707441,0.70341,0.701922
5,0.7395,1.056188,0.831347,0.708465,0.7079,0.70531
6,0.6041,0.947434,0.84143,0.712493,0.717089,0.713356
7,0.4504,0.91595,0.842346,0.831379,0.746235,0.758666
8,0.3585,0.899488,0.857012,0.865414,0.795071,0.815388
9,0.2902,0.89031,0.849679,0.845113,0.814996,0.826932
10,0.2086,0.889698,0.854262,0.869039,0.819319,0.83782


[I 2025-03-22 04:10:38,459] Trial 99 pruned. 


Trial 100 with params: {'learning_rate': 0.0034031574712767424, 'weight_decay': 0.007, 'warmup_steps': 3, 'lambda_param': 0.7000000000000001, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.5463,1.608743,0.748854,0.664848,0.636725,0.64137
2,1.2124,1.259926,0.797434,0.668462,0.68541,0.671641
3,0.7554,0.945393,0.84143,0.711888,0.718562,0.713014
4,0.5117,0.814356,0.868011,0.860751,0.793269,0.813503
5,0.3299,0.763636,0.871677,0.86995,0.823752,0.841444
6,0.2189,0.745248,0.87626,0.886024,0.827994,0.848325
7,0.1428,0.721079,0.87626,0.889951,0.826659,0.849681
8,0.1023,0.733808,0.868928,0.880417,0.831768,0.849719
9,0.0814,0.700092,0.875344,0.888876,0.835839,0.85636
10,0.0709,0.694246,0.875344,0.887599,0.835682,0.855636


[I 2025-03-22 04:12:01,874] Trial 100 finished with value: 0.8590142433810947 and parameters: {'learning_rate': 0.0034031574712767424, 'weight_decay': 0.007, 'warmup_steps': 3, 'lambda_param': 0.7000000000000001, 'temperature': 3.5}. Best is trial 53 with value: 0.8696635593759803.


Trial 101 with params: {'learning_rate': 0.0010002091975377788, 'weight_decay': 0.005, 'warmup_steps': 4, 'lambda_param': 0.0, 'temperature': 5.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.128,2.191744,0.63703,0.562513,0.540886,0.541357
2,1.7375,1.47724,0.759853,0.651302,0.648121,0.646853
3,1.1958,1.278527,0.79835,0.676684,0.68128,0.674132
4,0.9656,1.143266,0.827681,0.705739,0.704401,0.702323
5,0.8256,1.06664,0.827681,0.695192,0.706217,0.700349


[I 2025-03-22 04:12:29,157] Trial 101 pruned. 


Trial 102 with params: {'learning_rate': 0.0023526348990344915, 'weight_decay': 0.006, 'warmup_steps': 4, 'lambda_param': 0.2, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.7199,1.721353,0.713107,0.611505,0.610807,0.602947
2,1.3183,1.423359,0.777269,0.649986,0.670342,0.654108
3,0.9143,1.027433,0.835014,0.703374,0.713361,0.706597
4,0.6728,0.944438,0.84143,0.881503,0.734641,0.746649
5,0.4976,0.829389,0.865261,0.854967,0.800239,0.817659
6,0.3422,0.780053,0.874427,0.885318,0.82556,0.847092
7,0.2205,0.8009,0.870761,0.885008,0.822501,0.844339
8,0.1546,0.763267,0.87626,0.887736,0.826783,0.848845
9,0.1149,0.746609,0.872594,0.886121,0.833547,0.853881
10,0.0923,0.745207,0.871677,0.884005,0.823258,0.845386


[I 2025-03-22 04:13:29,034] Trial 102 pruned. 


Trial 103 with params: {'learning_rate': 0.00027009583847554473, 'weight_decay': 0.005, 'warmup_steps': 2, 'lambda_param': 0.9, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.6988,3.241829,0.441797,0.387874,0.33334,0.276307
2,2.8729,2.472888,0.588451,0.539004,0.483477,0.48236
3,2.2525,2.036215,0.672777,0.562725,0.579976,0.567336
4,1.7903,1.724732,0.737855,0.63395,0.62615,0.626305
5,1.5418,1.576396,0.762603,0.642306,0.65308,0.645586


[I 2025-03-22 04:13:59,372] Trial 103 pruned. 


Trial 104 with params: {'learning_rate': 0.002405485837885476, 'weight_decay': 0.006, 'warmup_steps': 4, 'lambda_param': 0.1, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.7084,1.698741,0.729606,0.630046,0.622941,0.618507
2,1.3027,1.4141,0.778185,0.650976,0.670991,0.655342
3,0.9085,1.03189,0.828598,0.696266,0.708724,0.700543
4,0.6626,0.944727,0.842346,0.882961,0.73538,0.747916
5,0.4847,0.837694,0.864345,0.871208,0.799634,0.82097
6,0.3404,0.776343,0.872594,0.883075,0.82479,0.845728
7,0.2144,0.785717,0.869844,0.884559,0.822041,0.844358
8,0.1532,0.769811,0.871677,0.88166,0.824649,0.844838
9,0.1157,0.761877,0.869844,0.885461,0.829947,0.851505
10,0.094,0.740745,0.869844,0.880569,0.821802,0.843082


[I 2025-03-22 04:14:47,221] Trial 104 pruned. 


Trial 105 with params: {'learning_rate': 0.0001220905192290103, 'weight_decay': 0.003, 'warmup_steps': 1, 'lambda_param': 0.1, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.8408,3.659234,0.332722,0.206254,0.243565,0.173431
2,3.4898,3.183692,0.412466,0.364196,0.312382,0.234381
3,2.976,2.727649,0.539872,0.520776,0.432867,0.42284
4,2.5921,2.425446,0.615032,0.51857,0.51956,0.515502
5,2.3574,2.242087,0.628781,0.528104,0.535068,0.527107


[I 2025-03-22 04:15:17,333] Trial 105 pruned. 


Trial 106 with params: {'learning_rate': 0.00016644555832767357, 'weight_decay': 0.0, 'warmup_steps': 0, 'lambda_param': 0.30000000000000004, 'temperature': 6.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.7849,3.555194,0.390467,0.321387,0.288393,0.242013
2,3.2441,2.888488,0.488543,0.493068,0.383673,0.35149
3,2.6888,2.437384,0.610449,0.50914,0.518393,0.508906
4,2.2816,2.147455,0.641613,0.542566,0.547076,0.540703
5,2.0381,1.978265,0.697525,0.585715,0.598993,0.589654
6,1.839,1.822021,0.716774,0.599232,0.614133,0.605811
7,1.6865,1.732646,0.731439,0.625688,0.620655,0.621471
8,1.5471,1.664226,0.746104,0.632682,0.640513,0.634515
9,1.4856,1.611813,0.750687,0.634947,0.643702,0.635356
10,1.3894,1.57665,0.756187,0.632684,0.648919,0.63983


[I 2025-03-22 04:16:04,481] Trial 106 pruned. 


Trial 107 with params: {'learning_rate': 0.00289927115065357, 'weight_decay': 0.008, 'warmup_steps': 0, 'lambda_param': 0.5, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.5238,1.609532,0.742438,0.659985,0.632696,0.635053
2,1.2041,1.447418,0.766269,0.643831,0.663637,0.643973
3,0.8347,1.005257,0.834097,0.7028,0.712872,0.70592
4,0.5865,0.865743,0.855179,0.863442,0.773077,0.794929
5,0.4226,0.835577,0.863428,0.859428,0.818032,0.833306
6,0.2745,0.804446,0.870761,0.869293,0.823667,0.840443
7,0.1774,0.749333,0.879927,0.891891,0.829643,0.851958
8,0.1265,0.759305,0.874427,0.884661,0.835634,0.854157
9,0.0984,0.717686,0.879927,0.891264,0.838804,0.859029
10,0.0835,0.733779,0.87626,0.886827,0.827119,0.848698


[I 2025-03-22 04:16:51,509] Trial 107 pruned. 


Trial 108 with params: {'learning_rate': 0.0035089167007803135, 'weight_decay': 0.005, 'warmup_steps': 4, 'lambda_param': 0.0, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.507,1.514361,0.76077,0.661122,0.648113,0.650688
2,1.1801,1.52892,0.754354,0.63764,0.657401,0.628869
3,0.786,0.961356,0.846013,0.714146,0.723011,0.716746
4,0.5195,0.853738,0.868011,0.863619,0.792109,0.813803
5,0.3353,0.766592,0.870761,0.879129,0.823326,0.843085
6,0.2034,0.754866,0.869844,0.874089,0.796117,0.817178
7,0.1421,0.734852,0.877177,0.890357,0.836005,0.856962
8,0.1083,0.736272,0.875344,0.888829,0.835195,0.85598
9,0.0848,0.700306,0.878093,0.890134,0.827498,0.85047
10,0.074,0.698282,0.87901,0.889468,0.828876,0.850975


[I 2025-03-22 04:17:54,699] Trial 108 pruned. 


Trial 109 with params: {'learning_rate': 0.003427270677812233, 'weight_decay': 0.004, 'warmup_steps': 3, 'lambda_param': 0.1, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.4805,1.496992,0.76352,0.6679,0.651722,0.652158
2,1.1745,1.465159,0.767186,0.645841,0.667112,0.641291
3,0.7764,0.960724,0.835014,0.705264,0.713774,0.707896
4,0.4991,0.848959,0.853346,0.844134,0.780992,0.799103
5,0.3224,0.772466,0.867094,0.877589,0.819928,0.840679
6,0.1885,0.759791,0.87626,0.889866,0.835457,0.856544
7,0.1277,0.711698,0.877177,0.892601,0.836201,0.85793
8,0.0979,0.732193,0.877177,0.891572,0.836838,0.85809
9,0.0796,0.699714,0.880843,0.894386,0.839324,0.86068
10,0.0678,0.702694,0.878093,0.891014,0.837073,0.857938


Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--accuracy/f887c0aab52c2d38e1f8a215681126379eca617f96c447638f751434e8e65b14 (last modified on Sat Oct 12 13:56:14 2024) since it couldn't be found locally at evaluate-metric--accuracy, or remotely on the Hugging Face Hub.
[I 2025-03-22 04:19:25,793] Trial 109 finished with value: 0.8600089244246871 and parameters: {'learning_rate': 0.003427270677812233, 'weight_decay': 0.004, 'warmup_steps': 3, 'lambda_param': 0.1, 'temperature': 2.5}. Best is trial 53 with value: 0.8696635593759803.


Trial 110 with params: {'learning_rate': 0.004810437624051852, 'weight_decay': 0.002, 'warmup_steps': 4, 'lambda_param': 0.0, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.4266,1.42088,0.761687,0.644199,0.655193,0.646765
2,1.0884,1.214411,0.790101,0.664338,0.680102,0.665871
3,0.6715,0.855763,0.862511,0.892786,0.763115,0.779047
4,0.4045,0.769112,0.870761,0.875183,0.795677,0.816786
5,0.2328,0.721334,0.882676,0.891297,0.82382,0.846662
6,0.14,0.706573,0.883593,0.89359,0.8328,0.854591
7,0.0986,0.693776,0.883593,0.893403,0.824373,0.847728
8,0.0797,0.65539,0.887259,0.896689,0.846415,0.865612
9,0.0717,0.653024,0.88451,0.893045,0.825178,0.848102
10,0.0654,0.646995,0.886343,0.880076,0.826657,0.84606


[I 2025-03-22 04:20:17,960] Trial 110 pruned. 


Trial 111 with params: {'learning_rate': 0.003825710787815062, 'weight_decay': 0.001, 'warmup_steps': 4, 'lambda_param': 0.30000000000000004, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.4613,1.536539,0.75527,0.654475,0.644549,0.644523
2,1.1868,1.298085,0.790101,0.658968,0.68222,0.664035
3,0.7819,0.961601,0.847846,0.714141,0.72377,0.717028
4,0.5079,0.792565,0.866178,0.87101,0.782382,0.803799
5,0.3099,0.756292,0.87901,0.889658,0.820198,0.843225
6,0.1905,0.772588,0.879927,0.888821,0.830267,0.850995
7,0.1261,0.74123,0.885426,0.897143,0.834547,0.857266
8,0.0963,0.717472,0.87901,0.889314,0.8383,0.857477
9,0.0789,0.698864,0.887259,0.897612,0.835909,0.858539
10,0.0708,0.698408,0.882676,0.893673,0.841241,0.861361


Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--accuracy/f887c0aab52c2d38e1f8a215681126379eca617f96c447638f751434e8e65b14 (last modified on Sat Oct 12 13:56:14 2024) since it couldn't be found locally at evaluate-metric--accuracy, or remotely on the Hugging Face Hub.
[I 2025-03-22 04:22:25,108] Trial 111 finished with value: 0.8526990125864785 and parameters: {'learning_rate': 0.003825710787815062, 'weight_decay': 0.001, 'warmup_steps': 4, 'lambda_param': 0.30000000000000004, 'temperature': 2.0}. Best is trial 53 with value: 0.8696635593759803.


Trial 112 with params: {'learning_rate': 0.0039449366221562055, 'weight_decay': 0.005, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.5021,1.591226,0.75802,0.668208,0.643299,0.648865
2,1.1804,1.234553,0.791934,0.663686,0.681383,0.666981
3,0.7222,0.907042,0.849679,0.718285,0.723741,0.720109
4,0.4585,0.785341,0.872594,0.877601,0.796394,0.818773
5,0.2827,0.741522,0.872594,0.885028,0.834476,0.853882
6,0.1842,0.71674,0.87901,0.886153,0.810879,0.833908
7,0.1279,0.708949,0.88176,0.896584,0.840343,0.861935
8,0.0959,0.699475,0.885426,0.896129,0.842735,0.863256
9,0.0816,0.678512,0.885426,0.896838,0.843289,0.864135
10,0.0722,0.673194,0.87901,0.892637,0.83785,0.859162


[I 2025-03-22 04:24:15,313] Trial 112 finished with value: 0.8638269260333337 and parameters: {'learning_rate': 0.0039449366221562055, 'weight_decay': 0.005, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 4.0}. Best is trial 53 with value: 0.8696635593759803.


Trial 113 with params: {'learning_rate': 0.003972199689997071, 'weight_decay': 0.006, 'warmup_steps': 3, 'lambda_param': 0.1, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.5416,1.586904,0.747021,0.656837,0.635816,0.638361
2,1.1746,1.390657,0.772686,0.649847,0.669687,0.647758
3,0.7624,0.946818,0.840513,0.710565,0.718004,0.71242
4,0.5028,0.81013,0.867094,0.875825,0.782894,0.805801
5,0.3004,0.747242,0.880843,0.887916,0.831846,0.851818
6,0.186,0.714803,0.882676,0.894197,0.831334,0.854302
7,0.1235,0.721515,0.877177,0.890871,0.827278,0.850511
8,0.0947,0.700841,0.882676,0.89379,0.840924,0.861243
9,0.0779,0.691106,0.883593,0.894266,0.842035,0.862287
10,0.0703,0.691411,0.882676,0.893715,0.841268,0.861608


[I 2025-03-22 04:25:41,589] Trial 113 finished with value: 0.8526645432862852 and parameters: {'learning_rate': 0.003972199689997071, 'weight_decay': 0.006, 'warmup_steps': 3, 'lambda_param': 0.1, 'temperature': 4.0}. Best is trial 53 with value: 0.8696635593759803.


Trial 114 with params: {'learning_rate': 0.004742348929339175, 'weight_decay': 0.002, 'warmup_steps': 2, 'lambda_param': 0.2, 'temperature': 5.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.5251,1.529631,0.748854,0.660585,0.634391,0.64128
2,1.1778,1.305571,0.782768,0.657226,0.674497,0.657948
3,0.6984,0.860168,0.858845,0.724452,0.731385,0.726952
4,0.412,0.798819,0.865261,0.872799,0.790444,0.812394
5,0.2362,0.727581,0.875344,0.886656,0.826153,0.848235
6,0.1529,0.716481,0.879927,0.892688,0.840558,0.860497
7,0.1032,0.703845,0.882676,0.895294,0.831982,0.855137
8,0.0896,0.646703,0.890009,0.898732,0.847394,0.867214
9,0.0759,0.644998,0.886343,0.896214,0.83532,0.857467
10,0.0682,0.638559,0.889093,0.89759,0.83741,0.859265


[I 2025-03-22 04:27:00,463] Trial 114 finished with value: 0.8589239612290073 and parameters: {'learning_rate': 0.004742348929339175, 'weight_decay': 0.002, 'warmup_steps': 2, 'lambda_param': 0.2, 'temperature': 5.5}. Best is trial 53 with value: 0.8696635593759803.


Trial 115 with params: {'learning_rate': 0.002498660315898887, 'weight_decay': 0.003, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6869,1.662835,0.718607,0.613649,0.617408,0.610503
2,1.3325,1.375061,0.770852,0.648233,0.665381,0.651162
3,0.8951,0.997817,0.83593,0.703099,0.713279,0.706913
4,0.6505,0.937187,0.84143,0.712117,0.718417,0.713394
5,0.4662,0.838674,0.859762,0.868313,0.796754,0.818133
6,0.3184,0.751578,0.874427,0.884097,0.827426,0.847648
7,0.1976,0.76042,0.870761,0.887248,0.831369,0.852136
8,0.1456,0.759711,0.872594,0.884149,0.834283,0.853024
9,0.1059,0.739387,0.875344,0.888827,0.844593,0.862619
10,0.0903,0.734563,0.875344,0.88859,0.844359,0.862459


[I 2025-03-22 04:28:35,753] Trial 115 finished with value: 0.86617275992531 and parameters: {'learning_rate': 0.002498660315898887, 'weight_decay': 0.003, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 3.5}. Best is trial 53 with value: 0.8696635593759803.


Trial 116 with params: {'learning_rate': 0.0021306898137937614, 'weight_decay': 0.001, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.7022,1.71009,0.703941,0.602475,0.605798,0.595127
2,1.3419,1.448545,0.764436,0.63894,0.660549,0.643792
3,0.9264,1.041725,0.832264,0.702269,0.710067,0.704298
4,0.6989,0.944286,0.839597,0.713537,0.713803,0.710935
5,0.5193,0.885404,0.861595,0.864116,0.7796,0.799507
6,0.3726,0.794863,0.864345,0.865176,0.817428,0.835775
7,0.2425,0.837142,0.866178,0.886618,0.817985,0.842212
8,0.1883,0.764538,0.87901,0.889555,0.82956,0.851074
9,0.1342,0.785544,0.868928,0.884455,0.830206,0.851198
10,0.1034,0.770135,0.864345,0.877289,0.817267,0.839051


[I 2025-03-22 04:29:27,154] Trial 116 pruned. 


Trial 117 with params: {'learning_rate': 0.0011831169736366168, 'weight_decay': 0.005, 'warmup_steps': 2, 'lambda_param': 0.2, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.998,2.00908,0.665445,0.589389,0.565012,0.566896
2,1.5847,1.44764,0.762603,0.647719,0.653187,0.645896
3,1.1378,1.22942,0.802933,0.680609,0.68602,0.678058
4,0.913,1.092672,0.824931,0.7017,0.702872,0.699727
5,0.7658,1.070621,0.827681,0.705526,0.704734,0.702568


[I 2025-03-22 04:30:08,002] Trial 117 pruned. 


Trial 118 with params: {'learning_rate': 0.0033216368359463336, 'weight_decay': 0.005, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.4768,1.551105,0.764436,0.667658,0.651593,0.652795
2,1.1894,1.305058,0.792851,0.664938,0.681686,0.668675
3,0.7767,0.960994,0.840513,0.711058,0.717554,0.712467
4,0.5369,0.854021,0.859762,0.871704,0.785367,0.809132
5,0.3558,0.781744,0.865261,0.875405,0.819058,0.839106
6,0.2213,0.778687,0.872594,0.878405,0.806097,0.82767
7,0.1485,0.755867,0.872594,0.88841,0.832135,0.854019
8,0.112,0.744844,0.879927,0.889317,0.839207,0.85808
9,0.0866,0.726826,0.877177,0.890622,0.836026,0.857233
10,0.0756,0.719252,0.879927,0.892106,0.838499,0.859373


Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--precision/155d3220d6cd4a6553f12da68eeb3d1f97cf431206304a4bc6e2d564c29502e9 (last modified on Fri Jan 10 23:13:59 2025) since it couldn't be found locally at evaluate-metric--precision, or remotely on the Hugging Face Hub.
[I 2025-03-22 04:33:31,982] Trial 118 finished with value: 0.8648026875318905 and parameters: {'learning_rate': 0.0033216368359463336, 'weight_decay': 0.005, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 3.5}. Best is trial 53 with value: 0.8696635593759803.


Trial 119 with params: {'learning_rate': 0.0024894637437425024, 'weight_decay': 0.006, 'warmup_steps': 1, 'lambda_param': 0.1, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6093,1.621301,0.734189,0.64099,0.626714,0.629238
2,1.2602,1.390598,0.780018,0.651158,0.673266,0.657223
3,0.8631,1.009718,0.835014,0.704272,0.713824,0.706952
4,0.6354,0.959632,0.832264,0.702933,0.710629,0.704204
5,0.4662,0.869287,0.857929,0.86887,0.785058,0.808556
6,0.3094,0.832682,0.862511,0.870546,0.800602,0.820784
7,0.208,0.811224,0.875344,0.893103,0.826477,0.850382
8,0.1522,0.753434,0.87626,0.876329,0.836768,0.852729
9,0.1186,0.763852,0.869844,0.883982,0.830705,0.851355
10,0.1006,0.771068,0.874427,0.886628,0.833705,0.853921


[I 2025-03-22 04:35:25,242] Trial 119 finished with value: 0.8592782541376075 and parameters: {'learning_rate': 0.0024894637437425024, 'weight_decay': 0.006, 'warmup_steps': 1, 'lambda_param': 0.1, 'temperature': 3.5}. Best is trial 53 with value: 0.8696635593759803.


Trial 120 with params: {'learning_rate': 0.0012739120502149104, 'weight_decay': 0.004, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.966,1.921431,0.68286,0.589588,0.585188,0.5821
2,1.5312,1.463634,0.751604,0.643391,0.644901,0.636845
3,1.1141,1.210297,0.804766,0.682094,0.688196,0.679843
4,0.8923,1.075332,0.827681,0.704724,0.704622,0.702183
5,0.7429,1.052137,0.834097,0.709596,0.710456,0.707688


[I 2025-03-22 04:35:52,654] Trial 120 pruned. 


Trial 121 with params: {'learning_rate': 8.532115701682182e-05, 'weight_decay': 0.003, 'warmup_steps': 3, 'lambda_param': 1.0, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.8945,3.724627,0.330889,0.206118,0.242204,0.172058
2,3.6557,3.486111,0.398717,0.193694,0.302465,0.207521
3,3.3067,3.081551,0.450962,0.385722,0.342828,0.289774
4,2.9578,2.792196,0.537122,0.513323,0.431632,0.419825
5,2.7208,2.58134,0.60495,0.509645,0.510605,0.505408
6,2.5202,2.445344,0.603116,0.512919,0.510683,0.499919
7,2.3926,2.329044,0.627864,0.536536,0.528992,0.527498
8,2.2813,2.253438,0.630614,0.530108,0.535643,0.530881
9,2.2058,2.18796,0.63428,0.540953,0.537374,0.534513
10,2.12,2.155975,0.648946,0.543062,0.553175,0.543989


[I 2025-03-22 04:36:50,015] Trial 121 pruned. 


Trial 122 with params: {'learning_rate': 0.0033445222381599912, 'weight_decay': 0.004, 'warmup_steps': 2, 'lambda_param': 0.1, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.522,1.848524,0.728689,0.663059,0.613625,0.625511
2,1.2663,1.258257,0.796517,0.670594,0.68266,0.674369
3,0.8085,1.008756,0.829514,0.696653,0.709133,0.700349
4,0.5561,0.849696,0.861595,0.872135,0.797716,0.820181
5,0.3655,0.769899,0.863428,0.872852,0.79906,0.821757
6,0.2096,0.755146,0.869844,0.877776,0.814385,0.834847
7,0.1518,0.725876,0.878093,0.890538,0.827747,0.850612
8,0.1101,0.737346,0.880843,0.892761,0.839718,0.860108
9,0.0893,0.689059,0.88176,0.894579,0.840396,0.861411
10,0.0804,0.682021,0.88451,0.895432,0.842716,0.863136


[I 2025-03-22 04:38:07,909] Trial 122 finished with value: 0.8622188985204421 and parameters: {'learning_rate': 0.0033445222381599912, 'weight_decay': 0.004, 'warmup_steps': 2, 'lambda_param': 0.1, 'temperature': 3.5}. Best is trial 53 with value: 0.8696635593759803.


Trial 123 with params: {'learning_rate': 0.004247056020779973, 'weight_decay': 0.005, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.4951,1.665177,0.743355,0.670608,0.626223,0.637585
2,1.2247,1.215443,0.787351,0.666358,0.67751,0.663362
3,0.7428,0.971264,0.84418,0.712824,0.719936,0.71463
4,0.4786,0.817386,0.863428,0.873337,0.778904,0.803185
5,0.2852,0.782674,0.872594,0.881905,0.815015,0.837478
6,0.1729,0.733457,0.880843,0.886224,0.812226,0.835082
7,0.1298,0.727874,0.878093,0.889813,0.83867,0.858363
8,0.0933,0.721094,0.87901,0.877871,0.83924,0.854946
9,0.0792,0.711217,0.88176,0.892743,0.841619,0.861319
10,0.0726,0.697776,0.879927,0.89164,0.839994,0.859959


[I 2025-03-22 04:39:38,771] Trial 123 finished with value: 0.8590706492015574 and parameters: {'learning_rate': 0.004247056020779973, 'weight_decay': 0.005, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 3.0}. Best is trial 53 with value: 0.8696635593759803.


Trial 124 with params: {'learning_rate': 0.002740708589460153, 'weight_decay': 0.003, 'warmup_steps': 3, 'lambda_param': 0.1, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.663,1.701531,0.72319,0.62554,0.620004,0.615354
2,1.3041,1.337404,0.780935,0.658397,0.671657,0.660632
3,0.857,0.971554,0.842346,0.711074,0.718226,0.713189
4,0.6109,0.891348,0.857929,0.891059,0.73975,0.743955
5,0.4287,0.811008,0.865261,0.876066,0.818929,0.83919
6,0.298,0.763962,0.872594,0.885295,0.8242,0.846269
7,0.1844,0.749285,0.874427,0.889472,0.82522,0.848342
8,0.1357,0.735627,0.87626,0.889031,0.83641,0.856775
9,0.1024,0.725108,0.87626,0.889484,0.835177,0.856307
10,0.0845,0.714489,0.874427,0.88678,0.834794,0.854813


[I 2025-03-22 04:40:34,784] Trial 124 pruned. 


Trial 125 with params: {'learning_rate': 0.0028877270455147605, 'weight_decay': 0.006, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.5689,1.760402,0.737855,0.665062,0.622138,0.632981
2,1.2923,1.255092,0.79835,0.673874,0.683326,0.676046
3,0.8402,0.99091,0.834097,0.705006,0.711796,0.706212
4,0.5971,0.853948,0.860678,0.89567,0.750985,0.763736
5,0.4112,0.791561,0.871677,0.878936,0.80489,0.827521
6,0.2505,0.772461,0.872594,0.880553,0.816525,0.837165
7,0.1819,0.753566,0.873511,0.886849,0.824978,0.847448
8,0.126,0.768522,0.862511,0.874178,0.817856,0.837506
9,0.098,0.721563,0.868011,0.880595,0.821347,0.842829
10,0.0831,0.719888,0.88176,0.893262,0.831481,0.853775


[I 2025-03-22 04:42:36,273] Trial 125 finished with value: 0.8471582536088489 and parameters: {'learning_rate': 0.0028877270455147605, 'weight_decay': 0.006, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 3.5}. Best is trial 53 with value: 0.8696635593759803.


Trial 126 with params: {'learning_rate': 0.0026968220964510123, 'weight_decay': 0.003, 'warmup_steps': 2, 'lambda_param': 0.1, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.5845,1.748358,0.721357,0.641033,0.612254,0.618501
2,1.2926,1.369541,0.770852,0.650981,0.665114,0.651062
3,0.8626,0.989545,0.834097,0.706612,0.712422,0.707511
4,0.6165,0.877303,0.851512,0.890998,0.751355,0.770444
5,0.4385,0.835851,0.861595,0.871427,0.797031,0.819949
6,0.2865,0.795398,0.866178,0.880063,0.818497,0.840769
7,0.1809,0.806554,0.868011,0.884263,0.830331,0.850241
8,0.1408,0.768554,0.870761,0.884588,0.832297,0.852441
9,0.0999,0.765738,0.868928,0.883789,0.829999,0.850566
10,0.0849,0.754499,0.869844,0.882509,0.831754,0.851348


[I 2025-03-22 04:43:28,188] Trial 126 pruned. 


Trial 127 with params: {'learning_rate': 0.004705115514801234, 'weight_decay': 0.003, 'warmup_steps': 3, 'lambda_param': 0.1, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.4874,1.517799,0.751604,0.65206,0.640156,0.64039
2,1.161,1.166553,0.812099,0.685453,0.696765,0.68484
3,0.7169,0.913107,0.858845,0.723842,0.733034,0.726976
4,0.4518,0.778838,0.870761,0.875891,0.785271,0.807562
5,0.2658,0.726597,0.88176,0.891696,0.832367,0.853781
6,0.159,0.680572,0.883593,0.882753,0.831503,0.851486
7,0.1102,0.681023,0.88451,0.895381,0.833078,0.855713
8,0.0837,0.660347,0.885426,0.89315,0.834401,0.855582
9,0.072,0.651649,0.890009,0.899944,0.837687,0.860304
10,0.0663,0.652873,0.883593,0.892446,0.832811,0.854374


[I 2025-03-22 04:44:47,458] Trial 127 finished with value: 0.8574437342244333 and parameters: {'learning_rate': 0.004705115514801234, 'weight_decay': 0.003, 'warmup_steps': 3, 'lambda_param': 0.1, 'temperature': 2.5}. Best is trial 53 with value: 0.8696635593759803.


Trial 128 with params: {'learning_rate': 5.887886492101192e-05, 'weight_decay': 0.01, 'warmup_steps': 3, 'lambda_param': 0.6000000000000001, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.9335,3.807396,0.332722,0.206254,0.243565,0.173431
2,3.7416,3.644996,0.436297,0.212835,0.328833,0.25313
3,3.5848,3.420943,0.439047,0.224037,0.330512,0.264794
4,3.306,3.13335,0.442713,0.383227,0.336447,0.280858
5,3.0701,2.934336,0.47571,0.489954,0.36874,0.329227
6,2.8711,2.781988,0.540788,0.519563,0.436571,0.430219
7,2.7456,2.665829,0.570119,0.523683,0.467201,0.465062
8,2.6418,2.583329,0.606783,0.517219,0.512481,0.506367
9,2.5628,2.510516,0.597617,0.52193,0.498679,0.49673
10,2.4941,2.463408,0.60495,0.507983,0.511839,0.50295


[I 2025-03-22 04:45:38,788] Trial 128 pruned. 


Trial 129 with params: {'learning_rate': 0.0006712937288776745, 'weight_decay': 0.005, 'warmup_steps': 4, 'lambda_param': 0.4, 'temperature': 7.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.37,2.60701,0.532539,0.54675,0.423167,0.411222
2,2.1491,1.681207,0.737855,0.627835,0.633173,0.62842
3,1.399,1.352295,0.777269,0.654163,0.664801,0.657738
4,1.1392,1.267732,0.80385,0.689923,0.68543,0.684251
5,1.0116,1.201073,0.806599,0.677446,0.691524,0.682103


[I 2025-03-22 04:46:06,668] Trial 129 pruned. 


Trial 130 with params: {'learning_rate': 0.0033276593251564093, 'weight_decay': 0.004, 'warmup_steps': 1, 'lambda_param': 0.1, 'temperature': 5.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.4979,1.732918,0.744271,0.672583,0.627791,0.63766
2,1.2271,1.273343,0.784601,0.659851,0.675783,0.661491
3,0.784,0.969651,0.837764,0.705629,0.714766,0.709146
4,0.5232,0.825819,0.855179,0.862817,0.784088,0.804436
5,0.3415,0.780285,0.862511,0.870247,0.798264,0.820214
6,0.2111,0.779249,0.871677,0.884294,0.833683,0.852778
7,0.1538,0.737344,0.877177,0.891251,0.83709,0.857509
8,0.1177,0.720041,0.879927,0.89137,0.838006,0.858678
9,0.0881,0.724505,0.88176,0.894081,0.839181,0.860506
10,0.0769,0.70216,0.87901,0.890586,0.838098,0.858381


[I 2025-03-22 04:47:31,337] Trial 130 finished with value: 0.8609700252338403 and parameters: {'learning_rate': 0.0033276593251564093, 'weight_decay': 0.004, 'warmup_steps': 1, 'lambda_param': 0.1, 'temperature': 5.0}. Best is trial 53 with value: 0.8696635593759803.


Trial 131 with params: {'learning_rate': 0.0005612567161548509, 'weight_decay': 0.01, 'warmup_steps': 4, 'lambda_param': 0.0, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.4526,2.77196,0.502291,0.578715,0.391136,0.369147
2,2.2953,1.802436,0.716774,0.609904,0.614728,0.61173
3,1.5037,1.446089,0.759853,0.644015,0.651686,0.643049
4,1.2149,1.319179,0.799267,0.685059,0.681907,0.680276
5,1.0854,1.250767,0.791934,0.665575,0.679225,0.669907
6,0.9683,1.204915,0.810266,0.673989,0.69504,0.68286
7,0.8409,1.125292,0.822181,0.687601,0.702642,0.694264
8,0.7531,1.077816,0.827681,0.696536,0.706676,0.700217
9,0.6879,1.075514,0.821265,0.692551,0.702296,0.695542
10,0.6105,1.071676,0.821265,0.688065,0.702355,0.694429


Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--accuracy/f887c0aab52c2d38e1f8a215681126379eca617f96c447638f751434e8e65b14 (last modified on Sat Oct 12 13:56:14 2024) since it couldn't be found locally at evaluate-metric--accuracy, or remotely on the Hugging Face Hub.
[I 2025-03-22 04:50:05,334] Trial 131 pruned. 


Trial 132 with params: {'learning_rate': 0.004940900404793687, 'weight_decay': 0.005, 'warmup_steps': 2, 'lambda_param': 0.2, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.5057,1.488484,0.751604,0.662527,0.637992,0.644889
2,1.1559,1.298356,0.782768,0.655261,0.675444,0.656927
3,0.6898,0.865841,0.861595,0.727367,0.733087,0.729263
4,0.403,0.759885,0.872594,0.878976,0.796073,0.818801
5,0.2303,0.725852,0.877177,0.875952,0.827292,0.845982
6,0.1524,0.714103,0.87901,0.887443,0.830558,0.85072
7,0.1058,0.689492,0.879927,0.889025,0.830513,0.851684
8,0.0833,0.688838,0.88176,0.893216,0.840949,0.861213
9,0.0735,0.660621,0.88176,0.891583,0.840617,0.860214
10,0.067,0.667289,0.879927,0.892444,0.839505,0.860082


[I 2025-03-22 04:51:45,972] Trial 132 finished with value: 0.8613537518732941 and parameters: {'learning_rate': 0.004940900404793687, 'weight_decay': 0.005, 'warmup_steps': 2, 'lambda_param': 0.2, 'temperature': 4.5}. Best is trial 53 with value: 0.8696635593759803.


Trial 133 with params: {'learning_rate': 0.003787376259501541, 'weight_decay': 0.003, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.4847,1.648347,0.749771,0.663816,0.634802,0.6405
2,1.2062,1.358451,0.777269,0.655452,0.671335,0.65395
3,0.7449,0.908786,0.843263,0.711063,0.719979,0.714511
4,0.4776,0.790604,0.870761,0.879299,0.794534,0.818082
5,0.3066,0.73895,0.877177,0.885226,0.828495,0.848824
6,0.1804,0.702418,0.887259,0.876294,0.844631,0.857793
7,0.1248,0.676801,0.882676,0.894093,0.831757,0.854345
8,0.0934,0.6691,0.887259,0.898638,0.844604,0.865688
9,0.0772,0.649565,0.888176,0.899439,0.845342,0.866316
10,0.0694,0.66596,0.88451,0.895513,0.842908,0.863358


[I 2025-03-22 04:53:15,555] Trial 133 finished with value: 0.8662760990242565 and parameters: {'learning_rate': 0.003787376259501541, 'weight_decay': 0.003, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 3.5}. Best is trial 53 with value: 0.8696635593759803.


Trial 134 with params: {'learning_rate': 0.002350473961566653, 'weight_decay': 0.003, 'warmup_steps': 2, 'lambda_param': 1.0, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.7035,1.719955,0.703025,0.597955,0.606861,0.593714
2,1.3589,1.337938,0.777269,0.653487,0.669234,0.657637
3,0.9155,0.992861,0.833181,0.69908,0.711369,0.704663
4,0.6652,0.934207,0.842346,0.714863,0.71739,0.713308
5,0.4841,0.841266,0.861595,0.868775,0.788769,0.810656


[I 2025-03-22 04:53:41,410] Trial 134 pruned. 


Trial 135 with params: {'learning_rate': 0.003993808282969001, 'weight_decay': 0.003, 'warmup_steps': 1, 'lambda_param': 0.1, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.4292,1.598549,0.751604,0.657164,0.639804,0.641555
2,1.1791,1.147827,0.806599,0.681302,0.691231,0.68172
3,0.708,0.912743,0.853346,0.72631,0.726952,0.72496
4,0.4616,0.841242,0.864345,0.882129,0.797782,0.82384
5,0.2797,0.729005,0.878093,0.890015,0.828755,0.850857
6,0.1794,0.737549,0.880843,0.890663,0.830992,0.85232
7,0.1269,0.734144,0.875344,0.888965,0.826396,0.849228
8,0.1018,0.726065,0.875344,0.886682,0.83643,0.855539
9,0.0798,0.689029,0.883593,0.896302,0.841576,0.862861
10,0.0713,0.684154,0.88451,0.894887,0.842951,0.862876


[I 2025-03-22 04:55:00,552] Trial 135 finished with value: 0.8659895941301233 and parameters: {'learning_rate': 0.003993808282969001, 'weight_decay': 0.003, 'warmup_steps': 1, 'lambda_param': 0.1, 'temperature': 3.5}. Best is trial 53 with value: 0.8696635593759803.


Trial 136 with params: {'learning_rate': 0.002434446892579552, 'weight_decay': 0.004, 'warmup_steps': 0, 'lambda_param': 0.1, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.5809,1.636266,0.719523,0.619332,0.616359,0.613096
2,1.2822,1.425688,0.76077,0.639369,0.657762,0.640191
3,0.8686,1.04664,0.823098,0.695509,0.704438,0.697458
4,0.6326,0.946844,0.831347,0.702227,0.710241,0.703138
5,0.4636,0.850053,0.858845,0.862138,0.787705,0.806949


[I 2025-03-22 04:55:24,673] Trial 136 pruned. 


Trial 137 with params: {'learning_rate': 0.002472023290700323, 'weight_decay': 0.009000000000000001, 'warmup_steps': 2, 'lambda_param': 0.7000000000000001, 'temperature': 6.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6923,1.662482,0.718607,0.612934,0.617708,0.610756
2,1.3351,1.417164,0.774519,0.648441,0.669048,0.653282
3,0.8994,1.003755,0.837764,0.70781,0.713857,0.709295
4,0.6601,0.925869,0.840513,0.877882,0.726184,0.730293
5,0.474,0.838077,0.861595,0.868444,0.798446,0.819148
6,0.3209,0.754593,0.875344,0.873734,0.837464,0.851819
7,0.2045,0.767736,0.872594,0.887329,0.833357,0.853233
8,0.1516,0.730562,0.875344,0.886824,0.836035,0.855428
9,0.1119,0.739424,0.870761,0.884279,0.83123,0.851768
10,0.0909,0.737107,0.869844,0.883055,0.830825,0.851054


[I 2025-03-22 04:56:44,935] Trial 137 pruned. 


Trial 138 with params: {'learning_rate': 0.004238660626995318, 'weight_decay': 0.004, 'warmup_steps': 1, 'lambda_param': 0.1, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.4538,1.58429,0.746104,0.650279,0.63385,0.636056
2,1.1711,1.141992,0.811182,0.682364,0.695362,0.6856
3,0.7122,0.920147,0.843263,0.706776,0.720655,0.712589
4,0.4535,0.808505,0.872594,0.868147,0.805086,0.825787
5,0.273,0.749169,0.877177,0.873565,0.828582,0.845526
6,0.1652,0.739055,0.879927,0.888645,0.830696,0.851425
7,0.118,0.708399,0.888176,0.897048,0.836193,0.858356
8,0.0943,0.701322,0.882676,0.893607,0.8404,0.860999
9,0.0787,0.679668,0.891842,0.901312,0.847437,0.868418
10,0.0686,0.683981,0.885426,0.896351,0.842562,0.86346


[I 2025-03-22 04:58:11,259] Trial 138 finished with value: 0.862895762380992 and parameters: {'learning_rate': 0.004238660626995318, 'weight_decay': 0.004, 'warmup_steps': 1, 'lambda_param': 0.1, 'temperature': 3.5}. Best is trial 53 with value: 0.8696635593759803.


Trial 139 with params: {'learning_rate': 0.002565569827863233, 'weight_decay': 0.003, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.632,1.68555,0.726856,0.630389,0.621726,0.619868
2,1.2882,1.429913,0.770852,0.650093,0.666748,0.649075
3,0.8951,0.995684,0.83593,0.707816,0.712567,0.708627
4,0.6491,0.909748,0.847846,0.887172,0.739678,0.75282
5,0.46,0.84832,0.856095,0.865393,0.793142,0.815164


[I 2025-03-22 04:58:36,445] Trial 139 pruned. 


Trial 140 with params: {'learning_rate': 0.0038892206776215974, 'weight_decay': 0.003, 'warmup_steps': 1, 'lambda_param': 0.0, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.4426,1.943952,0.736022,0.663618,0.621697,0.628196
2,1.2316,1.369169,0.777269,0.652223,0.670822,0.652801
3,0.7467,0.931495,0.840513,0.713503,0.717172,0.713581
4,0.4991,0.831412,0.867094,0.880047,0.790479,0.815988
5,0.3028,0.78782,0.870761,0.878684,0.815054,0.835758
6,0.1895,0.701881,0.887259,0.894315,0.836529,0.857203
7,0.1247,0.720553,0.880843,0.891131,0.820487,0.844503
8,0.1043,0.713759,0.883593,0.891394,0.842581,0.860879
9,0.0855,0.691125,0.882676,0.891091,0.823133,0.846216
10,0.073,0.681558,0.886343,0.895232,0.845132,0.864376


[I 2025-03-22 05:01:29,683] Trial 140 finished with value: 0.8615313953963627 and parameters: {'learning_rate': 0.0038892206776215974, 'weight_decay': 0.003, 'warmup_steps': 1, 'lambda_param': 0.0, 'temperature': 3.5}. Best is trial 53 with value: 0.8696635593759803.


Trial 141 with params: {'learning_rate': 0.0002613740473084591, 'weight_decay': 0.002, 'warmup_steps': 3, 'lambda_param': 0.5, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.7211,3.290161,0.442713,0.398387,0.3339,0.281928
2,2.9272,2.492321,0.595784,0.522222,0.497719,0.498033
3,2.2753,2.098577,0.659945,0.551583,0.569502,0.553004
4,1.8328,1.732634,0.735105,0.624798,0.625405,0.622754
5,1.5757,1.591757,0.76077,0.642227,0.650999,0.644009


[I 2025-03-22 05:01:58,377] Trial 141 pruned. 


Trial 142 with params: {'learning_rate': 0.0018805032133211788, 'weight_decay': 0.001, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.7639,1.828127,0.688359,0.59204,0.59257,0.580587
2,1.3913,1.491378,0.762603,0.640586,0.658187,0.643032
3,0.9909,1.113226,0.822181,0.693796,0.704441,0.694521
4,0.7517,0.995987,0.837764,0.711541,0.712515,0.709454
5,0.586,0.973982,0.842346,0.881666,0.727052,0.732327
6,0.4659,0.870271,0.857929,0.866183,0.795745,0.816152
7,0.3105,0.871755,0.860678,0.87896,0.796627,0.820317
8,0.233,0.846366,0.868011,0.880318,0.821928,0.842299
9,0.1694,0.798169,0.859762,0.874446,0.823285,0.842672
10,0.1204,0.783962,0.867094,0.881279,0.829275,0.849332


[I 2025-03-22 05:03:13,021] Trial 142 pruned. 


Trial 143 with params: {'learning_rate': 0.004776252743820627, 'weight_decay': 0.001, 'warmup_steps': 1, 'lambda_param': 0.30000000000000004, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.4065,1.478644,0.761687,0.649309,0.652759,0.647614
2,1.0856,1.256742,0.793767,0.663783,0.684112,0.667711
3,0.6619,0.850718,0.862511,0.894065,0.771525,0.792408
4,0.4007,0.805555,0.870761,0.867059,0.812628,0.831467
5,0.2482,0.714546,0.880843,0.876873,0.831172,0.848583
6,0.1533,0.756591,0.874427,0.890902,0.833835,0.855749
7,0.1152,0.722122,0.878093,0.887349,0.82976,0.84996
8,0.086,0.668198,0.88176,0.891509,0.842145,0.860866
9,0.0749,0.672307,0.882676,0.892449,0.832643,0.85434
10,0.069,0.669472,0.877177,0.886991,0.827712,0.849013


[I 2025-03-22 05:04:05,514] Trial 143 pruned. 


Trial 144 with params: {'learning_rate': 5.8193477735771966e-05, 'weight_decay': 0.009000000000000001, 'warmup_steps': 1, 'lambda_param': 0.0, 'temperature': 5.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.9295,3.804182,0.334555,0.20639,0.244925,0.174792
2,3.7397,3.644184,0.437214,0.213919,0.329521,0.254473
3,3.5866,3.426149,0.439963,0.225733,0.331082,0.26629
4,3.3136,3.142463,0.442713,0.383007,0.336447,0.280711
5,3.0792,2.943096,0.473877,0.492797,0.367592,0.329761
6,2.8802,2.791546,0.538955,0.516554,0.434809,0.428392
7,2.7549,2.675486,0.567369,0.519163,0.465322,0.462368
8,2.6532,2.594342,0.606783,0.51603,0.512233,0.506168
9,2.5746,2.522128,0.595784,0.518686,0.49633,0.493888
10,2.5059,2.474713,0.604033,0.506109,0.511384,0.502205


[I 2025-03-22 05:04:58,650] Trial 144 pruned. 


Trial 145 with params: {'learning_rate': 0.0024525826022415956, 'weight_decay': 0.004, 'warmup_steps': 3, 'lambda_param': 0.0, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6451,1.623404,0.741522,0.64572,0.631938,0.633159
2,1.2741,1.30753,0.778185,0.653213,0.669948,0.657404
3,0.8867,1.008305,0.836847,0.706135,0.71496,0.70822
4,0.6453,0.899281,0.851512,0.722303,0.724646,0.721169
5,0.4596,0.867733,0.861595,0.871175,0.797356,0.819423
6,0.3224,0.789969,0.871677,0.85674,0.824967,0.83741
7,0.2014,0.794367,0.875344,0.8899,0.835498,0.855847
8,0.1476,0.797978,0.872594,0.88365,0.834668,0.852784
9,0.1125,0.763833,0.87626,0.890752,0.835869,0.857096
10,0.0895,0.735152,0.878093,0.877155,0.838808,0.854159


[I 2025-03-22 05:05:59,626] Trial 145 pruned. 


Trial 146 with params: {'learning_rate': 0.00318784927721531, 'weight_decay': 0.003, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.5869,1.671501,0.738772,0.655282,0.62459,0.632607
2,1.2869,1.353958,0.767186,0.652215,0.661735,0.648806
3,0.8562,0.983627,0.828598,0.697625,0.709199,0.701992
4,0.5807,0.879197,0.848763,0.861159,0.768002,0.791159
5,0.3906,0.820917,0.866178,0.872105,0.792011,0.813966
6,0.2676,0.81325,0.860678,0.869376,0.808562,0.827141
7,0.1673,0.750751,0.875344,0.887527,0.836375,0.855669
8,0.1277,0.748451,0.873511,0.885095,0.835317,0.854426
9,0.0965,0.713255,0.879927,0.893986,0.838506,0.860032
10,0.0782,0.716326,0.878093,0.889107,0.838424,0.857736


[I 2025-03-22 05:07:30,961] Trial 146 finished with value: 0.8599622617168156 and parameters: {'learning_rate': 0.00318784927721531, 'weight_decay': 0.003, 'warmup_steps': 2, 'lambda_param': 0.0, 'temperature': 3.5}. Best is trial 53 with value: 0.8696635593759803.


Trial 147 with params: {'learning_rate': 0.0025651152134400176, 'weight_decay': 0.007, 'warmup_steps': 0, 'lambda_param': 0.7000000000000001, 'temperature': 7.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.5156,1.644448,0.744271,0.666078,0.631819,0.636871
2,1.2433,1.307751,0.794684,0.663503,0.683226,0.670135
3,0.8367,1.013836,0.833181,0.707936,0.711263,0.706564
4,0.6128,0.874046,0.852429,0.887756,0.735494,0.740426
5,0.4348,0.82141,0.862511,0.871277,0.798185,0.820531
6,0.2897,0.828016,0.867094,0.868296,0.821465,0.839013
7,0.1965,0.764248,0.877177,0.892832,0.827023,0.850507
8,0.1451,0.761302,0.869844,0.867519,0.832816,0.846279
9,0.108,0.720395,0.874427,0.875717,0.834706,0.851398
10,0.0866,0.715326,0.88176,0.893351,0.840504,0.860941


[I 2025-03-22 05:08:53,795] Trial 147 finished with value: 0.8540271589078167 and parameters: {'learning_rate': 0.0025651152134400176, 'weight_decay': 0.007, 'warmup_steps': 0, 'lambda_param': 0.7000000000000001, 'temperature': 7.0}. Best is trial 53 with value: 0.8696635593759803.


Trial 148 with params: {'learning_rate': 0.004724787958470258, 'weight_decay': 0.002, 'warmup_steps': 3, 'lambda_param': 0.8, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.4878,1.533138,0.753437,0.652568,0.641433,0.641152
2,1.1666,1.157194,0.804766,0.679581,0.691078,0.678857
3,0.7127,0.927107,0.852429,0.718994,0.72748,0.721614
4,0.4578,0.805847,0.864345,0.870836,0.779827,0.801898
5,0.266,0.701516,0.88176,0.89215,0.831941,0.853823
6,0.1593,0.692707,0.889093,0.898597,0.836612,0.859288
7,0.1097,0.687124,0.880843,0.893479,0.830185,0.853274
8,0.0875,0.679413,0.885426,0.89362,0.843168,0.862388
9,0.0745,0.653866,0.882676,0.891903,0.831411,0.853397
10,0.0686,0.650024,0.891842,0.900005,0.83894,0.861125


[I 2025-03-22 05:10:29,088] Trial 148 finished with value: 0.8685321407320162 and parameters: {'learning_rate': 0.004724787958470258, 'weight_decay': 0.002, 'warmup_steps': 3, 'lambda_param': 0.8, 'temperature': 3.5}. Best is trial 53 with value: 0.8696635593759803.


Trial 149 with params: {'learning_rate': 0.0016430785628756667, 'weight_decay': 0.0, 'warmup_steps': 2, 'lambda_param': 0.30000000000000004, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.8547,1.87183,0.696609,0.597755,0.597526,0.589403
2,1.45,1.534917,0.748854,0.626956,0.648099,0.629862
3,1.0506,1.123489,0.820348,0.690292,0.701492,0.692906
4,0.7974,1.018842,0.839597,0.712681,0.713959,0.710886
5,0.6354,0.963742,0.84418,0.717051,0.718968,0.715714
6,0.4875,0.893295,0.849679,0.883357,0.751384,0.768143
7,0.336,0.856675,0.861595,0.862307,0.780704,0.799613
8,0.2448,0.866357,0.861595,0.877316,0.816917,0.83679
9,0.1916,0.806715,0.863428,0.878302,0.825112,0.845453
10,0.1391,0.841721,0.857012,0.859997,0.821717,0.836636


[I 2025-03-22 05:11:37,282] Trial 149 pruned. 


In [32]:
print(best_trial2)

BestRun(run_id='53', objective=0.8696635593759803, hyperparameters={'learning_rate': 0.004269637593700472, 'weight_decay': 0.004, 'warmup_steps': 3, 'lambda_param': 0.1, 'temperature': 3.0}, run_summary=None)


In [33]:
#Nápočet epoch na steps
data_length = len(all_train_data)
min_r = math.ceil(data_length/batch_size)*5
max_r = math.ceil(data_length/batch_size)*num_epochs
warm_up = math.ceil(data_length/batch_size/10)

In [34]:
base.reset_seed()

In [35]:
training_args = base.get_training_args(output_dir=f"~/results/{DATASET}/bilstm-base_coarse_aug_hp-search", logging_dir=f"~/logs/{DATASET}/bilstm-base_coarse_aug_hp-search", epochs=num_epochs, batch_size=batch_size)

In [36]:
def hp_space(trial):
    params =  {
        "learning_rate": trial.suggest_float("learning_rate", 5e-5, 5e-3, log=True),
        "weight_decay": trial.suggest_float("weight_decay", 0, 1e-2, step=1e-3),
        "warmup_steps" : trial.suggest_int("warmup_steps", 0, warm_up),
    }
    print(f"Trial {trial.number} with params: {params}")
    return params

In [37]:
pruner = optuna.pruners.HyperbandPruner(min_resource=min_r, max_resource=max_r, reduction_factor=2, bootstrap_count=2)
sampler = optuna.samplers.TPESampler(seed=42, multivariate=True)



In [38]:
trainer = Trainer(
    args=training_args,
    train_dataset=all_train_data,
    eval_dataset=eval_data,
    compute_metrics=base.compute_metrics,
    model_init = lambda: get_BiLSTM(),
)
  

In [39]:
best_trial3 = trainer.hyperparameter_search(
    direction="maximize",
    backend="optuna",
    hp_space=hp_space,
    compute_objective=lambda metrics: metrics["eval_f1"],
    pruner=pruner,
    sampler=sampler,
    study_name="Base-aug",
    n_trials=150
)

[I 2025-03-22 05:11:37,662] A new study created in memory with name: Base-aug


Trial 0 with params: {'learning_rate': 0.0002805758207667253, 'weight_decay': 0.01, 'warmup_steps': 23}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8259,0.584778,0.794684,0.668485,0.679396,0.673175
2,0.3922,0.513855,0.836847,0.872888,0.722258,0.727334
3,0.2755,0.5439,0.839597,0.843093,0.774421,0.790163
4,0.1949,0.565894,0.842346,0.861462,0.808615,0.828235
5,0.1417,0.583311,0.852429,0.868792,0.826644,0.843027


[I 2025-03-22 05:12:51,535] Trial 0 pruned. 


Trial 1 with params: {'learning_rate': 0.0007875660249889869, 'weight_decay': 0.001, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6007,0.504159,0.840513,0.843357,0.788561,0.806823
2,0.2269,0.548546,0.853346,0.866181,0.8089,0.828889
3,0.1216,0.576841,0.863428,0.874003,0.836569,0.850912
4,0.0706,0.675583,0.863428,0.87539,0.826687,0.844957
5,0.0468,0.725902,0.857012,0.851349,0.818376,0.832266
6,0.0261,0.786191,0.859762,0.834904,0.822779,0.828252
7,0.018,0.902115,0.857929,0.867159,0.823607,0.838973
8,0.012,0.956189,0.860678,0.863197,0.823079,0.838717
9,0.007,0.964889,0.867094,0.865804,0.830558,0.844365
10,0.0035,1.013443,0.866178,0.863918,0.829044,0.84256


[I 2025-03-22 05:15:15,474] Trial 1 pruned. 


Trial 2 with params: {'learning_rate': 6.533369619026643e-05, 'weight_decay': 0.009000000000000001, 'warmup_steps': 19}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.273,0.903403,0.668194,0.572331,0.566046,0.56648
2,0.7236,0.710772,0.75527,0.63252,0.647069,0.63904
3,0.5676,0.650728,0.766269,0.64211,0.657047,0.649028
4,0.4917,0.615396,0.785518,0.666712,0.669898,0.666841
5,0.4445,0.597669,0.789184,0.660215,0.676817,0.667627


[I 2025-03-22 05:16:30,868] Trial 2 pruned. 


Trial 3 with params: {'learning_rate': 0.0013035123791853842, 'weight_decay': 0.0, 'warmup_steps': 31}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5727,0.510748,0.849679,0.863088,0.788631,0.809036
2,0.1642,0.5186,0.874427,0.878602,0.843591,0.858198
3,0.0754,0.578803,0.874427,0.873293,0.845587,0.856236
4,0.0378,0.661752,0.878093,0.886785,0.838627,0.856117
5,0.0224,0.723559,0.875344,0.888676,0.83442,0.855321
6,0.0136,0.768296,0.878093,0.877187,0.848792,0.860184
7,0.0086,0.869805,0.879927,0.889443,0.84029,0.858474
8,0.0068,0.972727,0.880843,0.889336,0.841978,0.859149
9,0.0032,1.019954,0.880843,0.887162,0.842153,0.85823
10,0.0032,1.002367,0.88451,0.892299,0.843946,0.862015


[I 2025-03-22 05:18:39,970] Trial 3 pruned. 


Trial 4 with params: {'learning_rate': 0.002311294500510415, 'weight_decay': 0.002, 'warmup_steps': 5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.468,0.492101,0.868011,0.872817,0.794643,0.814921
2,0.116,0.499198,0.880843,0.892409,0.850703,0.867339
3,0.0492,0.611067,0.874427,0.876109,0.844803,0.857322
4,0.0275,0.672588,0.869844,0.883211,0.840991,0.857374
5,0.0178,0.747401,0.873511,0.884116,0.835964,0.853395
6,0.0104,0.762319,0.88176,0.89123,0.841666,0.859782
7,0.0059,0.945068,0.877177,0.886132,0.829575,0.848385
8,0.004,0.986719,0.875344,0.885484,0.836588,0.854811
9,0.0028,0.938505,0.882676,0.891614,0.851946,0.867458
10,0.0007,1.007379,0.883593,0.893989,0.852343,0.868745


[I 2025-03-22 05:20:48,394] Trial 4 pruned. 


Trial 5 with params: {'learning_rate': 0.00011635338541918901, 'weight_decay': 0.003, 'warmup_steps': 16}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0743,0.730135,0.743355,0.625432,0.633433,0.627024
2,0.5598,0.606924,0.789184,0.667359,0.67423,0.669291
3,0.4462,0.571675,0.807516,0.677257,0.690943,0.683502
4,0.3791,0.561568,0.813932,0.687814,0.694965,0.689345
5,0.3287,0.536615,0.827681,0.861269,0.734437,0.749552
6,0.2856,0.535766,0.843263,0.855301,0.782778,0.804732
7,0.2508,0.535368,0.83868,0.849556,0.779434,0.800261
8,0.2225,0.561191,0.830431,0.84671,0.781835,0.80204
9,0.2007,0.557262,0.83868,0.858734,0.796476,0.818752
10,0.1799,0.558107,0.836847,0.854006,0.795565,0.816704


[I 2025-03-22 05:22:58,750] Trial 5 pruned. 


Trial 6 with params: {'learning_rate': 0.0003654769917956456, 'weight_decay': 0.003, 'warmup_steps': 19}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.7632,0.561246,0.804766,0.678369,0.687801,0.682247
2,0.3468,0.510016,0.849679,0.863401,0.787321,0.810784
3,0.2275,0.554434,0.842346,0.854645,0.812369,0.826388
4,0.1508,0.630176,0.836847,0.857631,0.804132,0.823516
5,0.1043,0.618006,0.861595,0.877206,0.832261,0.850656
6,0.0745,0.680602,0.856095,0.872697,0.818044,0.839211
7,0.058,0.715709,0.855179,0.866384,0.818983,0.836667
8,0.0401,0.830245,0.845096,0.86012,0.813635,0.829374
9,0.0311,0.8414,0.846929,0.864474,0.811942,0.83103
10,0.0238,0.869891,0.853346,0.86652,0.818767,0.835768


[I 2025-03-22 05:24:56,290] Trial 6 pruned. 


Trial 7 with params: {'learning_rate': 9.505122659935192e-05, 'weight_decay': 0.003, 'warmup_steps': 11}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.1327,0.785682,0.72594,0.61661,0.616467,0.613893
2,0.6112,0.644709,0.764436,0.64565,0.65447,0.64838
3,0.4857,0.597644,0.792851,0.663561,0.67904,0.670857
4,0.4182,0.574323,0.7956,0.671436,0.679568,0.674013
5,0.3713,0.552238,0.816682,0.686067,0.698502,0.692116
6,0.333,0.544492,0.823098,0.861534,0.711053,0.716201
7,0.3005,0.54744,0.828598,0.862418,0.744335,0.763639
8,0.2719,0.560813,0.824931,0.860496,0.741095,0.760168
9,0.2489,0.562359,0.834097,0.848634,0.775733,0.797243
10,0.2282,0.549568,0.836847,0.84846,0.777494,0.798996


Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--f1/34c46321f42186df33a6260966e34a368f14868d9cc2ba47d142112e2800d233 (last modified on Fri Jan 10 23:14:01 2025) since it couldn't be found locally at evaluate-metric--f1, or remotely on the Hugging Face Hub.
[I 2025-03-22 05:27:07,500] Trial 7 pruned. 


Trial 8 with params: {'learning_rate': 0.00040842279473800845, 'weight_decay': 0.008, 'warmup_steps': 6}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.7202,0.553478,0.812099,0.691612,0.692911,0.689767
2,0.3254,0.528159,0.852429,0.867746,0.798631,0.821441
3,0.2061,0.555621,0.843263,0.842826,0.813238,0.823087
4,0.1341,0.654765,0.843263,0.852515,0.817629,0.832032
5,0.0925,0.641399,0.857012,0.872669,0.819706,0.840342
6,0.0661,0.693996,0.858845,0.851301,0.820895,0.833712
7,0.0481,0.777332,0.858845,0.869679,0.822427,0.840095
8,0.0333,0.824103,0.855179,0.855146,0.819941,0.833416
9,0.026,0.894318,0.852429,0.864551,0.809162,0.827903
10,0.0173,0.91799,0.857929,0.85712,0.822025,0.835574


[I 2025-03-22 05:29:03,517] Trial 8 pruned. 


Trial 9 with params: {'learning_rate': 0.0005338741354740678, 'weight_decay': 0.006, 'warmup_steps': 1}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.668,0.54036,0.818515,0.68845,0.699349,0.693211
2,0.2877,0.524557,0.856095,0.872153,0.81947,0.839671
3,0.1718,0.571264,0.853346,0.852726,0.820125,0.832469
4,0.1063,0.662025,0.848763,0.864788,0.813285,0.832722
5,0.069,0.688483,0.854262,0.868398,0.817092,0.836854


[I 2025-03-22 05:30:16,465] Trial 9 pruned. 


Trial 10 with params: {'learning_rate': 0.004518165681587256, 'weight_decay': 0.002, 'warmup_steps': 0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.3884,0.504426,0.872594,0.880581,0.826599,0.844509
2,0.0851,0.463949,0.900092,0.893874,0.85701,0.871766
3,0.0411,0.684239,0.872594,0.869745,0.826485,0.841081
4,0.0205,0.748351,0.890926,0.896351,0.840004,0.859712
5,0.015,0.751867,0.889093,0.873113,0.838835,0.852586
6,0.0103,0.740924,0.894592,0.863777,0.852224,0.857556
7,0.0059,0.869984,0.894592,0.857511,0.844067,0.849302
8,0.0077,0.921552,0.901008,0.893797,0.848229,0.865292
9,0.0041,0.919215,0.885426,0.869029,0.83666,0.848145
10,0.0018,0.8722,0.905591,0.876236,0.851775,0.862161


[I 2025-03-22 05:33:26,442] Trial 10 finished with value: 0.8609408656327728 and parameters: {'learning_rate': 0.004518165681587256, 'weight_decay': 0.002, 'warmup_steps': 0}. Best is trial 10 with value: 0.8609408656327728.


Trial 11 with params: {'learning_rate': 0.004258197772781102, 'weight_decay': 0.003, 'warmup_steps': 0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4012,0.498686,0.87901,0.890589,0.839048,0.858525
2,0.0872,0.482154,0.88451,0.864443,0.854634,0.858518
3,0.0419,0.722298,0.867094,0.868399,0.841734,0.849569
4,0.0222,0.691154,0.890926,0.890773,0.847,0.864745
5,0.0148,0.660988,0.891842,0.899609,0.851101,0.868809
6,0.0085,0.808283,0.892759,0.899598,0.841204,0.862108
7,0.0061,0.795363,0.899175,0.886864,0.863841,0.874063
8,0.0024,0.963802,0.889093,0.899169,0.846982,0.866572
9,0.0034,0.866788,0.901008,0.909867,0.865349,0.883264
10,0.0031,1.018743,0.889093,0.882606,0.847301,0.861109


[I 2025-03-22 05:35:23,160] Trial 11 pruned. 


Trial 12 with params: {'learning_rate': 0.002877906649343771, 'weight_decay': 0.004, 'warmup_steps': 14}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4566,0.517226,0.868011,0.87819,0.812796,0.833438
2,0.1014,0.504528,0.883593,0.861837,0.841235,0.850319
3,0.0436,0.655288,0.873511,0.848864,0.845253,0.845424
4,0.0222,0.715676,0.877177,0.891502,0.836828,0.856927
5,0.0176,0.719745,0.890009,0.868741,0.847928,0.855628
6,0.0084,0.763372,0.896425,0.892817,0.861777,0.87481
7,0.007,0.846744,0.887259,0.883452,0.847038,0.86068
8,0.0024,0.839194,0.897342,0.891701,0.854172,0.86892
9,0.0008,0.87721,0.897342,0.883,0.863641,0.87157
10,0.0003,0.920289,0.895509,0.882333,0.862405,0.870397


[I 2025-03-22 05:39:07,213] Trial 12 finished with value: 0.8704299913397645 and parameters: {'learning_rate': 0.002877906649343771, 'weight_decay': 0.004, 'warmup_steps': 14}. Best is trial 12 with value: 0.8704299913397645.


Trial 13 with params: {'learning_rate': 0.004609089875166944, 'weight_decay': 0.005, 'warmup_steps': 18}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4224,0.466092,0.882676,0.873217,0.85123,0.8609
2,0.087,0.503015,0.879927,0.866614,0.841353,0.850635
3,0.0377,0.580914,0.874427,0.873367,0.836609,0.850425
4,0.0199,0.751589,0.886343,0.876056,0.863341,0.868903
5,0.0186,0.775744,0.882676,0.877131,0.833761,0.849871


[I 2025-03-22 05:40:11,509] Trial 13 pruned. 


Trial 14 with params: {'learning_rate': 0.003147266239250273, 'weight_decay': 0.0, 'warmup_steps': 19}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.451,0.465938,0.875344,0.888824,0.835885,0.855836
2,0.0937,0.530237,0.87626,0.8893,0.847745,0.863988
3,0.0391,0.625921,0.868928,0.8432,0.842078,0.841522
4,0.0249,0.785981,0.890009,0.881059,0.856351,0.866688
5,0.0152,0.681487,0.901925,0.898249,0.867547,0.880487
6,0.0074,0.820627,0.888176,0.895549,0.848515,0.865215
7,0.0053,0.867976,0.899175,0.901659,0.837885,0.858849
8,0.0051,0.952474,0.879927,0.890104,0.833004,0.851635
9,0.003,0.924152,0.899175,0.890214,0.846223,0.862608
10,0.0012,0.923323,0.895509,0.879485,0.852847,0.863827


[I 2025-03-22 05:43:19,765] Trial 14 finished with value: 0.8590244284075368 and parameters: {'learning_rate': 0.003147266239250273, 'weight_decay': 0.0, 'warmup_steps': 19}. Best is trial 12 with value: 0.8704299913397645.


Trial 15 with params: {'learning_rate': 0.002192808407888279, 'weight_decay': 0.008, 'warmup_steps': 16}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4811,0.511898,0.869844,0.871728,0.832622,0.846992
2,0.1167,0.495347,0.88451,0.893409,0.844426,0.862829
3,0.0488,0.601039,0.875344,0.878301,0.84553,0.858907
4,0.0269,0.703393,0.87901,0.890185,0.848959,0.864931
5,0.0161,0.873918,0.869844,0.881635,0.821522,0.843282


[I 2025-03-22 05:44:18,404] Trial 15 pruned. 


Trial 16 with params: {'learning_rate': 0.0021486865738397113, 'weight_decay': 0.006, 'warmup_steps': 8}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4793,0.510033,0.860678,0.866803,0.789423,0.808642
2,0.1191,0.469415,0.887259,0.895684,0.84651,0.865124
3,0.0518,0.584626,0.883593,0.882205,0.854135,0.865127
4,0.025,0.728999,0.885426,0.88363,0.84472,0.859849
5,0.0175,0.71303,0.893676,0.886651,0.842022,0.859113
6,0.011,0.827997,0.886343,0.873751,0.855781,0.862946
7,0.0066,0.894263,0.885426,0.883323,0.845578,0.859316
8,0.0044,0.927585,0.880843,0.870063,0.84098,0.852773
9,0.0044,0.955718,0.883593,0.892084,0.844863,0.862028
10,0.0019,0.934499,0.88451,0.870376,0.845633,0.855345


[I 2025-03-22 05:46:31,011] Trial 16 pruned. 


Trial 17 with params: {'learning_rate': 0.0008016009236402311, 'weight_decay': 0.004, 'warmup_steps': 9}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6138,0.527617,0.836847,0.838448,0.776466,0.795252
2,0.2218,0.53855,0.866178,0.878536,0.846937,0.860103
3,0.1157,0.584257,0.859762,0.871725,0.832892,0.848077
4,0.0686,0.672509,0.866178,0.866189,0.828634,0.843514
5,0.0412,0.705653,0.866178,0.864616,0.826791,0.842101


[I 2025-03-22 05:47:30,947] Trial 17 pruned. 


Trial 18 with params: {'learning_rate': 0.004435522686719786, 'weight_decay': 0.009000000000000001, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.3925,0.470092,0.873511,0.882704,0.835496,0.852987
2,0.0863,0.474186,0.888176,0.883561,0.846768,0.861163
3,0.0383,0.731253,0.868928,0.8667,0.840954,0.848051
4,0.0235,0.70366,0.895509,0.902591,0.862674,0.878152
5,0.0168,0.752141,0.892759,0.898675,0.841224,0.861637
6,0.0119,0.813166,0.888176,0.8972,0.848226,0.865538
7,0.0052,1.002925,0.889093,0.894838,0.849708,0.865755
8,0.0038,0.972175,0.889093,0.894804,0.838516,0.858216
9,0.0042,1.037723,0.888176,0.896406,0.84846,0.864847
10,0.0017,1.064365,0.893676,0.900319,0.842983,0.862738


[I 2025-03-22 05:51:21,852] Trial 18 finished with value: 0.8620022053516813 and parameters: {'learning_rate': 0.004435522686719786, 'weight_decay': 0.009000000000000001, 'warmup_steps': 4}. Best is trial 12 with value: 0.8704299913397645.


Trial 19 with params: {'learning_rate': 0.0012239029628472958, 'weight_decay': 0.01, 'warmup_steps': 6}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5437,0.50362,0.853346,0.852005,0.80083,0.817323
2,0.1702,0.514152,0.87901,0.876891,0.828917,0.847453
3,0.0808,0.543144,0.864345,0.84932,0.837417,0.842629
4,0.0436,0.701151,0.868928,0.882619,0.830759,0.850278
5,0.0257,0.808343,0.870761,0.883681,0.842323,0.858722
6,0.0144,0.861895,0.872594,0.88222,0.844232,0.85883
7,0.0089,0.913103,0.874427,0.880065,0.83716,0.852118
8,0.0073,0.96104,0.871677,0.876332,0.841904,0.855631
9,0.0053,0.9478,0.871677,0.879221,0.834829,0.85026
10,0.0026,1.067664,0.861595,0.873178,0.8354,0.849337


[I 2025-03-22 05:53:26,351] Trial 19 pruned. 


Trial 20 with params: {'learning_rate': 0.003766117592486023, 'weight_decay': 0.009000000000000001, 'warmup_steps': 0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4015,0.496892,0.868011,0.883013,0.831139,0.848726
2,0.0935,0.549731,0.866178,0.840461,0.839941,0.838353
3,0.0404,0.612458,0.886343,0.88045,0.846145,0.858913
4,0.0196,0.741002,0.88451,0.883924,0.85296,0.866016
5,0.0138,0.711474,0.885426,0.892703,0.844842,0.862853
6,0.0114,0.790854,0.888176,0.886774,0.855384,0.868233
7,0.0101,0.831864,0.886343,0.89434,0.845792,0.86372
8,0.0043,0.985269,0.885426,0.881388,0.844802,0.859083
9,0.0023,1.045318,0.887259,0.88103,0.846383,0.8597
10,0.0007,1.06323,0.88176,0.869578,0.842189,0.853071


[I 2025-03-22 05:55:21,278] Trial 20 pruned. 


Trial 21 with params: {'learning_rate': 0.0048059042384735375, 'weight_decay': 0.01, 'warmup_steps': 7}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.3975,0.474775,0.878093,0.888261,0.83844,0.857382
2,0.0824,0.560812,0.874427,0.850203,0.83888,0.842585
3,0.0344,0.615355,0.882676,0.867485,0.842501,0.852531
4,0.0241,0.642458,0.891842,0.896776,0.849385,0.867045
5,0.0172,0.752714,0.888176,0.882138,0.836768,0.853798
6,0.0081,0.865979,0.901925,0.905504,0.847188,0.868202
7,0.0064,0.897994,0.887259,0.891785,0.827677,0.848739
8,0.0051,0.939687,0.887259,0.890486,0.837438,0.855732
9,0.0035,1.021396,0.883593,0.887335,0.82533,0.845091
10,0.0018,1.036585,0.887259,0.863296,0.845823,0.853268


[I 2025-03-22 05:57:48,090] Trial 21 pruned. 


Trial 22 with params: {'learning_rate': 0.0012626855053237266, 'weight_decay': 0.005, 'warmup_steps': 25}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5815,0.511523,0.850596,0.847852,0.78994,0.807198
2,0.1689,0.505664,0.872594,0.861222,0.823943,0.838905
3,0.0774,0.602011,0.854262,0.84079,0.829974,0.833627
4,0.0394,0.766918,0.863428,0.875933,0.827301,0.844315
5,0.0241,0.76192,0.87901,0.875641,0.840139,0.853994
6,0.0145,0.804781,0.868011,0.878179,0.831057,0.848218
7,0.0108,0.958616,0.868011,0.865825,0.83161,0.844193
8,0.0068,0.983609,0.878093,0.890223,0.839664,0.857698
9,0.0053,0.938489,0.871677,0.883229,0.834505,0.851771
10,0.0018,1.021918,0.879927,0.890714,0.839673,0.859047


[I 2025-03-22 05:59:44,413] Trial 22 pruned. 


Trial 23 with params: {'learning_rate': 0.0049165522352066825, 'weight_decay': 0.002, 'warmup_steps': 12}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4238,0.459954,0.88451,0.893768,0.842905,0.862541
2,0.0863,0.501912,0.883593,0.875151,0.852044,0.862068
3,0.0373,0.664498,0.877177,0.872263,0.831019,0.844606
4,0.0258,0.705264,0.879927,0.888616,0.822615,0.844521
5,0.0167,0.86017,0.879927,0.889979,0.831279,0.85151
6,0.0102,0.883593,0.88451,0.878712,0.835807,0.851026
7,0.0047,0.999635,0.880843,0.8891,0.83192,0.851693
8,0.0028,1.09404,0.879927,0.887305,0.831361,0.851025
9,0.0008,1.119404,0.888176,0.895126,0.837773,0.858212
10,0.0016,1.116176,0.886343,0.892759,0.836721,0.856468


[I 2025-03-22 06:02:51,163] Trial 23 finished with value: 0.848354511022864 and parameters: {'learning_rate': 0.0049165522352066825, 'weight_decay': 0.002, 'warmup_steps': 12}. Best is trial 12 with value: 0.8704299913397645.


Trial 24 with params: {'learning_rate': 0.0037418868054702884, 'weight_decay': 0.006, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4127,0.490013,0.877177,0.888683,0.846782,0.863035
2,0.0914,0.495385,0.885426,0.880252,0.845032,0.858982
3,0.039,0.674733,0.886343,0.892959,0.84716,0.863572
4,0.0204,0.704942,0.893676,0.887219,0.842863,0.859417
5,0.0167,0.719006,0.889093,0.895723,0.848276,0.865951
6,0.0098,0.804319,0.886343,0.878623,0.837937,0.852831
7,0.0051,0.987142,0.888176,0.895858,0.838386,0.858128
8,0.0039,0.879983,0.892759,0.896681,0.833337,0.854213
9,0.0015,0.989259,0.894592,0.900666,0.853627,0.870945
10,0.0008,1.029183,0.894592,0.901443,0.852503,0.870898


[I 2025-03-22 06:05:55,088] Trial 24 finished with value: 0.8707659295677583 and parameters: {'learning_rate': 0.0037418868054702884, 'weight_decay': 0.006, 'warmup_steps': 2}. Best is trial 24 with value: 0.8707659295677583.


Trial 25 with params: {'learning_rate': 0.0033676857424077824, 'weight_decay': 0.007, 'warmup_steps': 5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4266,0.494822,0.872594,0.862113,0.834943,0.845074
2,0.0946,0.498028,0.879927,0.878308,0.851898,0.861624
3,0.0424,0.576649,0.886343,0.880966,0.856535,0.865595
4,0.0226,0.747012,0.889093,0.898777,0.847675,0.8671
5,0.0141,0.739972,0.880843,0.859587,0.84143,0.848831
6,0.0075,0.775646,0.896425,0.882348,0.852731,0.865408
7,0.0077,0.882867,0.883593,0.878209,0.845642,0.85641
8,0.0044,0.98136,0.87626,0.883274,0.839303,0.854129
9,0.0033,0.889075,0.888176,0.883973,0.85726,0.867962
10,0.0007,1.020303,0.879927,0.878346,0.85082,0.86116


[I 2025-03-22 06:08:49,599] Trial 25 finished with value: 0.8615732418123456 and parameters: {'learning_rate': 0.0033676857424077824, 'weight_decay': 0.007, 'warmup_steps': 5}. Best is trial 24 with value: 0.8707659295677583.


Trial 26 with params: {'learning_rate': 0.0014273663265452093, 'weight_decay': 0.006, 'warmup_steps': 1}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.519,0.496614,0.857929,0.867171,0.786161,0.806956
2,0.1562,0.498097,0.873511,0.874262,0.825564,0.844218
3,0.0702,0.593083,0.866178,0.853537,0.837131,0.843465
4,0.0366,0.693659,0.873511,0.881559,0.836475,0.852555
5,0.0225,0.692533,0.883593,0.881462,0.8521,0.864216
6,0.0135,0.808983,0.878093,0.876719,0.84843,0.859808
7,0.0056,0.926627,0.880843,0.890092,0.840887,0.859521
8,0.0071,0.905411,0.878093,0.885875,0.839048,0.855981
9,0.0046,0.940763,0.882676,0.868146,0.843023,0.853378
10,0.0029,0.938805,0.883593,0.880827,0.843184,0.857733


[I 2025-03-22 06:11:24,039] Trial 26 pruned. 


Trial 27 with params: {'learning_rate': 0.00021059103361382344, 'weight_decay': 0.001, 'warmup_steps': 31}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.9113,0.612708,0.780935,0.659597,0.666508,0.66287
2,0.4407,0.531635,0.827681,0.700209,0.705277,0.701549
3,0.3355,0.557267,0.823098,0.852978,0.715125,0.712922
4,0.2525,0.550432,0.835014,0.854426,0.793573,0.814736
5,0.1941,0.555372,0.845096,0.86516,0.81066,0.831026
6,0.1542,0.56151,0.852429,0.871703,0.82527,0.844153
7,0.1281,0.585678,0.854262,0.86925,0.8279,0.843999
8,0.1012,0.635607,0.83868,0.856567,0.806927,0.823916
9,0.0855,0.666854,0.847846,0.866223,0.811508,0.832683
10,0.0698,0.669311,0.850596,0.865472,0.814322,0.833848


[I 2025-03-22 06:13:30,340] Trial 27 pruned. 


Trial 28 with params: {'learning_rate': 0.003739834854873935, 'weight_decay': 0.005, 'warmup_steps': 10}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4294,0.461081,0.875344,0.88706,0.846419,0.862115
2,0.092,0.540212,0.891842,0.898729,0.849063,0.868043
3,0.0398,0.679982,0.88176,0.892725,0.852148,0.866962
4,0.0191,0.76848,0.88176,0.881935,0.850646,0.86221
5,0.0162,0.774488,0.893676,0.890624,0.860518,0.873058
6,0.0101,0.802554,0.888176,0.888977,0.855666,0.868949
7,0.0079,0.927886,0.890926,0.898125,0.849758,0.867527
8,0.0052,0.938083,0.880843,0.89182,0.851568,0.866584
9,0.0015,0.963506,0.890926,0.898133,0.859168,0.874296
10,0.0006,0.983223,0.895509,0.902003,0.862752,0.878296


[I 2025-03-22 06:16:31,363] Trial 28 finished with value: 0.8779144345340814 and parameters: {'learning_rate': 0.003739834854873935, 'weight_decay': 0.005, 'warmup_steps': 10}. Best is trial 28 with value: 0.8779144345340814.


Trial 29 with params: {'learning_rate': 0.0013582834036614884, 'weight_decay': 0.003, 'warmup_steps': 18}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5481,0.510622,0.853346,0.840599,0.800719,0.814175
2,0.1594,0.507533,0.875344,0.866071,0.836709,0.8489
3,0.0712,0.580657,0.877177,0.875789,0.848075,0.859247
4,0.0369,0.744735,0.868928,0.869192,0.832669,0.846679
5,0.0248,0.735895,0.870761,0.883851,0.832881,0.851573


[I 2025-03-22 06:17:36,700] Trial 29 pruned. 


Trial 30 with params: {'learning_rate': 0.002939009503742753, 'weight_decay': 0.005, 'warmup_steps': 10}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4429,0.488446,0.875344,0.886439,0.836995,0.855146
2,0.1,0.516501,0.888176,0.877123,0.856667,0.865529
3,0.0443,0.634212,0.879927,0.878736,0.851008,0.861927
4,0.0211,0.797284,0.88176,0.879905,0.852623,0.863217
5,0.0187,0.765539,0.885426,0.892705,0.846238,0.862563
6,0.009,0.78764,0.894592,0.887517,0.853164,0.866252
7,0.0072,0.877391,0.885426,0.885834,0.853792,0.866923
8,0.0032,0.916921,0.886343,0.873837,0.84503,0.857344
9,0.0011,1.053836,0.886343,0.882101,0.845732,0.859709
10,0.0003,1.133592,0.888176,0.886058,0.855933,0.868099


[I 2025-03-22 06:20:40,833] Trial 30 finished with value: 0.8608434759531648 and parameters: {'learning_rate': 0.002939009503742753, 'weight_decay': 0.005, 'warmup_steps': 10}. Best is trial 28 with value: 0.8779144345340814.


Trial 31 with params: {'learning_rate': 0.004655043086201124, 'weight_decay': 0.005, 'warmup_steps': 10}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4413,0.44304,0.889093,0.88282,0.847774,0.861492
2,0.0876,0.488653,0.890926,0.880961,0.841129,0.85563
3,0.0379,0.611487,0.890926,0.874525,0.850662,0.859567
4,0.0237,0.666925,0.879927,0.857227,0.83174,0.842233
5,0.0165,0.769943,0.891842,0.873169,0.841275,0.85377
6,0.0129,0.757466,0.889093,0.854957,0.846163,0.850267
7,0.0055,0.881807,0.885426,0.868716,0.835958,0.848053
8,0.0056,0.949102,0.87901,0.846771,0.831101,0.836158
9,0.0049,0.904184,0.88176,0.868341,0.842447,0.852617
10,0.0016,1.007658,0.882676,0.86906,0.842734,0.853179


[I 2025-03-22 06:22:46,131] Trial 31 pruned. 


Trial 32 with params: {'learning_rate': 0.003977628095374422, 'weight_decay': 0.006, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4059,0.486205,0.879927,0.890435,0.850292,0.865376
2,0.0891,0.487417,0.886343,0.891594,0.846637,0.863103
3,0.04,0.610472,0.875344,0.883792,0.838533,0.853957
4,0.0191,0.693448,0.893676,0.900612,0.851655,0.869803
5,0.0146,0.872289,0.880843,0.891628,0.831982,0.852733


[I 2025-03-22 06:23:50,389] Trial 32 pruned. 


Trial 33 with params: {'learning_rate': 0.002447028796085397, 'weight_decay': 0.004, 'warmup_steps': 13}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4631,0.494227,0.867094,0.87527,0.803214,0.823995
2,0.1059,0.49081,0.895509,0.902665,0.862318,0.878109
3,0.0445,0.662569,0.872594,0.870941,0.844807,0.853439
4,0.0256,0.720633,0.879927,0.890714,0.850207,0.86551
5,0.0172,0.715422,0.882676,0.89142,0.834178,0.854416
6,0.0105,0.842216,0.88176,0.888057,0.842494,0.858718
7,0.0045,0.851399,0.888176,0.894431,0.856878,0.870529
8,0.0045,0.948838,0.879927,0.887699,0.832356,0.851867
9,0.0033,0.910741,0.875344,0.885768,0.838079,0.854954
10,0.0019,0.922408,0.880843,0.889233,0.841448,0.85919


[I 2025-03-22 06:27:06,470] Trial 33 finished with value: 0.8571949338323139 and parameters: {'learning_rate': 0.002447028796085397, 'weight_decay': 0.004, 'warmup_steps': 13}. Best is trial 28 with value: 0.8779144345340814.


Trial 34 with params: {'learning_rate': 0.002346586909584358, 'weight_decay': 0.008, 'warmup_steps': 10}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4771,0.482116,0.872594,0.883946,0.825578,0.84545
2,0.1154,0.485969,0.883593,0.890132,0.844284,0.861197
3,0.0466,0.557752,0.87901,0.877638,0.850635,0.860093
4,0.027,0.658959,0.879927,0.879838,0.839977,0.855273
5,0.0181,0.67591,0.880843,0.886121,0.833053,0.851302


[I 2025-03-22 06:28:04,093] Trial 34 pruned. 


Trial 35 with params: {'learning_rate': 0.004174616019494053, 'weight_decay': 0.007, 'warmup_steps': 0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.3927,0.450161,0.889093,0.897149,0.84684,0.865955
2,0.0868,0.515491,0.883593,0.89389,0.85241,0.868805
3,0.0379,0.640563,0.879927,0.882421,0.861312,0.8674
4,0.0222,0.662145,0.886343,0.872162,0.86328,0.867494
5,0.0122,0.82267,0.891842,0.882642,0.858361,0.868111
6,0.0094,0.800639,0.897342,0.876115,0.864366,0.869185
7,0.0059,0.842707,0.900092,0.907981,0.866135,0.882589
8,0.0048,0.908861,0.890926,0.879782,0.859034,0.867393
9,0.0048,0.893545,0.887259,0.874986,0.856662,0.86413
10,0.0024,0.891975,0.898258,0.885882,0.874258,0.879339


[I 2025-03-22 06:31:05,875] Trial 35 finished with value: 0.8691347060258267 and parameters: {'learning_rate': 0.004174616019494053, 'weight_decay': 0.007, 'warmup_steps': 0}. Best is trial 28 with value: 0.8779144345340814.


Trial 36 with params: {'learning_rate': 5.370203809578854e-05, 'weight_decay': 0.009000000000000001, 'warmup_steps': 9}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.3246,0.974118,0.637947,0.545541,0.537734,0.53707
2,0.7984,0.765978,0.734189,0.615857,0.627806,0.620711
3,0.6223,0.689689,0.752521,0.63047,0.645142,0.636283
4,0.5387,0.647287,0.773602,0.656874,0.66039,0.657108
5,0.488,0.625223,0.779102,0.652718,0.668118,0.659132


[I 2025-03-22 06:32:08,755] Trial 36 pruned. 


Trial 37 with params: {'learning_rate': 0.0038186795687329767, 'weight_decay': 0.007, 'warmup_steps': 0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4058,0.482647,0.880843,0.89352,0.840756,0.860529
2,0.0902,0.516992,0.893676,0.884506,0.860824,0.871234
3,0.0404,0.629,0.877177,0.889186,0.840192,0.856305
4,0.0246,0.705072,0.890009,0.869857,0.856648,0.861937
5,0.0154,0.767741,0.888176,0.875942,0.856594,0.864611
6,0.0065,0.817951,0.890009,0.868212,0.85753,0.862279
7,0.0047,1.066317,0.891842,0.896219,0.850662,0.8673
8,0.0074,0.98759,0.883593,0.883927,0.861995,0.870348
9,0.003,1.127022,0.880843,0.870103,0.851385,0.857607
10,0.0008,1.114813,0.883593,0.881786,0.853454,0.864835


[I 2025-03-22 06:35:17,127] Trial 37 finished with value: 0.8629561921874944 and parameters: {'learning_rate': 0.0038186795687329767, 'weight_decay': 0.007, 'warmup_steps': 0}. Best is trial 28 with value: 0.8779144345340814.


Trial 38 with params: {'learning_rate': 0.0025387025970941045, 'weight_decay': 0.005, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4486,0.509157,0.873511,0.87983,0.808221,0.828547
2,0.1069,0.518676,0.878093,0.876591,0.849619,0.859958
3,0.0454,0.629719,0.878093,0.886328,0.848801,0.862736
4,0.0227,0.709572,0.877177,0.880957,0.847335,0.860488
5,0.0173,0.805604,0.880843,0.880575,0.850157,0.862686
6,0.0101,0.799161,0.883593,0.880731,0.853074,0.86408
7,0.0059,0.918894,0.882676,0.894203,0.85298,0.868412
8,0.0033,1.066026,0.877177,0.890341,0.848403,0.862615
9,0.0041,0.959805,0.88451,0.894479,0.853571,0.869332
10,0.0016,0.955816,0.888176,0.897145,0.856081,0.87234


[I 2025-03-22 06:38:15,015] Trial 38 finished with value: 0.8732440253127116 and parameters: {'learning_rate': 0.0025387025970941045, 'weight_decay': 0.005, 'warmup_steps': 2}. Best is trial 28 with value: 0.8779144345340814.


Trial 39 with params: {'learning_rate': 0.0010475348879951107, 'weight_decay': 0.009000000000000001, 'warmup_steps': 31}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6014,0.514346,0.842346,0.845402,0.800783,0.815177
2,0.188,0.507071,0.872594,0.884639,0.832961,0.852925
3,0.0907,0.574243,0.866178,0.876208,0.838697,0.853146
4,0.0491,0.729812,0.862511,0.877597,0.824266,0.843625
5,0.0301,0.765433,0.878093,0.891785,0.845713,0.864095
6,0.0172,0.812449,0.870761,0.882646,0.840843,0.8577
7,0.0115,0.842352,0.877177,0.888091,0.836696,0.856508
8,0.0091,0.945705,0.872594,0.883244,0.8335,0.852466
9,0.0045,0.95107,0.878093,0.885716,0.839699,0.856691
10,0.0026,1.045752,0.875344,0.886884,0.845099,0.861584


[I 2025-03-22 06:41:09,751] Trial 39 finished with value: 0.8651463277157018 and parameters: {'learning_rate': 0.0010475348879951107, 'weight_decay': 0.009000000000000001, 'warmup_steps': 31}. Best is trial 28 with value: 0.8779144345340814.


Trial 40 with params: {'learning_rate': 0.0002081476747934512, 'weight_decay': 0.006, 'warmup_steps': 22}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.9008,0.614329,0.781852,0.656736,0.667777,0.662011
2,0.4392,0.531531,0.829514,0.700511,0.70695,0.702771
3,0.3357,0.557187,0.815765,0.846783,0.709142,0.706935
4,0.2552,0.548241,0.829514,0.851517,0.788559,0.810429
5,0.1984,0.543969,0.849679,0.867763,0.814721,0.834651
6,0.157,0.558818,0.849679,0.867124,0.823456,0.841137
7,0.1309,0.588353,0.853346,0.867386,0.828149,0.842916
8,0.1039,0.630482,0.837764,0.856324,0.806269,0.823538
9,0.0885,0.660448,0.848763,0.868942,0.821374,0.840626
10,0.0715,0.67051,0.847846,0.864126,0.812343,0.832147


[I 2025-03-22 06:43:32,917] Trial 40 pruned. 


Trial 41 with params: {'learning_rate': 6.459897452290429e-05, 'weight_decay': 0.0, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.26,0.904516,0.670027,0.572914,0.568124,0.567873
2,0.7273,0.714774,0.756187,0.634117,0.647679,0.640255
3,0.5722,0.652493,0.770852,0.645357,0.660725,0.652526
4,0.4966,0.619347,0.784601,0.665442,0.668538,0.665653
5,0.4493,0.601114,0.789184,0.660444,0.6762,0.66758


[I 2025-03-22 06:44:38,127] Trial 41 pruned. 


Trial 42 with params: {'learning_rate': 5.3550149515819593e-05, 'weight_decay': 0.005, 'warmup_steps': 29}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.3433,0.977418,0.635197,0.541641,0.535818,0.534537
2,0.8017,0.766258,0.736022,0.617947,0.629505,0.62247
3,0.6231,0.688841,0.75527,0.632601,0.646962,0.638351
4,0.5388,0.64705,0.769936,0.652875,0.657925,0.653927
5,0.4876,0.625271,0.778185,0.651757,0.667384,0.658195
6,0.4504,0.599921,0.792851,0.672619,0.676982,0.67375
7,0.4196,0.594206,0.802016,0.675014,0.685976,0.680038
8,0.3981,0.580232,0.808433,0.679778,0.689942,0.684788
9,0.3771,0.581662,0.806599,0.679948,0.689263,0.684021
10,0.3596,0.5777,0.806599,0.679353,0.688939,0.683864


[I 2025-03-22 06:46:33,577] Trial 42 pruned. 


Trial 43 with params: {'learning_rate': 0.002092544727369896, 'weight_decay': 0.005, 'warmup_steps': 0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4706,0.471385,0.861595,0.859354,0.817168,0.832079
2,0.1197,0.482327,0.892759,0.889942,0.859188,0.871982
3,0.0532,0.624887,0.87626,0.876426,0.847554,0.858109
4,0.0272,0.676879,0.88451,0.893498,0.843751,0.862391
5,0.0189,0.761908,0.88451,0.892913,0.833967,0.855123
6,0.0086,0.752633,0.894592,0.901279,0.842014,0.863455
7,0.0062,0.897012,0.888176,0.895958,0.837999,0.857708
8,0.0036,0.864087,0.897342,0.905595,0.844859,0.866259
9,0.0032,0.922211,0.88451,0.88191,0.844666,0.858638
10,0.0021,0.934605,0.890009,0.888851,0.857766,0.870694


[I 2025-03-22 06:49:29,202] Trial 43 finished with value: 0.863814585746975 and parameters: {'learning_rate': 0.002092544727369896, 'weight_decay': 0.005, 'warmup_steps': 0}. Best is trial 28 with value: 0.8779144345340814.


Trial 44 with params: {'learning_rate': 7.012112975444019e-05, 'weight_decay': 0.0, 'warmup_steps': 23}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.2549,0.876969,0.68286,0.585693,0.579245,0.579844
2,0.6972,0.6945,0.756187,0.633585,0.648067,0.639946
3,0.548,0.639425,0.772686,0.647182,0.662358,0.654304
4,0.4752,0.607274,0.780018,0.661118,0.666813,0.662078
5,0.4297,0.588592,0.7956,0.666496,0.68181,0.673448
6,0.3948,0.572471,0.815765,0.69124,0.696368,0.692942
7,0.3651,0.56945,0.811182,0.683982,0.693582,0.688262
8,0.342,0.565631,0.811182,0.682761,0.693099,0.687643
9,0.3189,0.566979,0.816682,0.688713,0.697049,0.691917
10,0.2998,0.557804,0.813932,0.851969,0.703557,0.70789


[I 2025-03-22 06:51:18,888] Trial 44 pruned. 


Trial 45 with params: {'learning_rate': 0.004413233144853161, 'weight_decay': 0.005, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.3986,0.499865,0.869844,0.86948,0.842836,0.852722
2,0.0849,0.484583,0.88451,0.88204,0.853793,0.865258
3,0.0386,0.616228,0.87901,0.869418,0.838725,0.851171
4,0.0248,0.669428,0.887259,0.899174,0.84461,0.865212
5,0.0157,0.743505,0.887259,0.882024,0.846745,0.860637
6,0.0071,0.956298,0.886343,0.87627,0.853225,0.863273
7,0.0061,0.940647,0.879927,0.875699,0.841252,0.854076
8,0.0044,0.88992,0.87901,0.869834,0.84918,0.857781
9,0.0046,0.965812,0.889093,0.88351,0.837277,0.855
10,0.0031,0.92515,0.889093,0.867132,0.847301,0.855917


[I 2025-03-22 06:53:19,651] Trial 45 pruned. 


Trial 46 with params: {'learning_rate': 0.0019085457592637925, 'weight_decay': 0.004, 'warmup_steps': 8}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4914,0.515901,0.854262,0.86305,0.784191,0.80411
2,0.1285,0.487831,0.877177,0.869639,0.848057,0.857316
3,0.0535,0.576303,0.869844,0.871271,0.843546,0.853432
4,0.0287,0.653894,0.88451,0.893372,0.853684,0.868988
5,0.0182,0.802436,0.870761,0.868454,0.833903,0.846187
6,0.0131,0.865736,0.87626,0.87477,0.836762,0.852085
7,0.0058,0.980484,0.869844,0.882875,0.842148,0.856614
8,0.0054,0.96181,0.87901,0.886535,0.841037,0.857148
9,0.0032,0.970073,0.87901,0.875919,0.840887,0.853618
10,0.0014,0.962461,0.87626,0.884061,0.838505,0.85513


[I 2025-03-22 06:55:22,624] Trial 46 pruned. 


Trial 47 with params: {'learning_rate': 0.0047352670213604425, 'weight_decay': 0.006, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.398,0.447815,0.872594,0.882342,0.825931,0.845857
2,0.0824,0.480694,0.887259,0.895343,0.847573,0.864668
3,0.0371,0.624581,0.873511,0.870232,0.837233,0.848577
4,0.0237,0.662813,0.885426,0.896017,0.844272,0.864029
5,0.0173,0.765057,0.890926,0.89881,0.838598,0.860399
6,0.0109,0.77117,0.879927,0.85443,0.831103,0.840242
7,0.0069,0.957584,0.893676,0.898666,0.852425,0.869379
8,0.0046,0.968942,0.890926,0.884337,0.839514,0.856426
9,0.0044,0.947429,0.891842,0.883382,0.841706,0.856981
10,0.0012,1.012075,0.892759,0.877565,0.850727,0.861967


[I 2025-03-22 06:58:16,695] Trial 47 finished with value: 0.8611546750509086 and parameters: {'learning_rate': 0.0047352670213604425, 'weight_decay': 0.006, 'warmup_steps': 2}. Best is trial 28 with value: 0.8779144345340814.


Trial 48 with params: {'learning_rate': 0.003679837247339427, 'weight_decay': 0.003, 'warmup_steps': 18}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4409,0.461875,0.87901,0.888695,0.839967,0.857756
2,0.0933,0.518167,0.88451,0.876343,0.846149,0.856761
3,0.0387,0.635533,0.875344,0.872411,0.837673,0.850403
4,0.0208,0.686663,0.887259,0.883621,0.845922,0.860529
5,0.0136,0.822909,0.88176,0.880205,0.84068,0.856406
6,0.0113,0.820862,0.891842,0.90174,0.857805,0.875505
7,0.0062,0.871835,0.882676,0.883382,0.852363,0.864594
8,0.0033,0.961479,0.878093,0.879346,0.848602,0.86118
9,0.0019,1.011723,0.88451,0.884069,0.853122,0.865793
10,0.0005,1.049937,0.88176,0.881352,0.851122,0.863343


[I 2025-03-22 07:01:28,598] Trial 48 finished with value: 0.8629495701298048 and parameters: {'learning_rate': 0.003679837247339427, 'weight_decay': 0.003, 'warmup_steps': 18}. Best is trial 28 with value: 0.8779144345340814.


Trial 49 with params: {'learning_rate': 0.0014127194909517404, 'weight_decay': 0.004, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.522,0.521969,0.847846,0.838993,0.778777,0.794585
2,0.1553,0.512529,0.88176,0.866144,0.850611,0.85738
3,0.0698,0.594509,0.856095,0.849651,0.831672,0.837501
4,0.0346,0.708703,0.875344,0.87394,0.837625,0.850811
5,0.0235,0.788745,0.871677,0.883383,0.832704,0.851571
6,0.0139,0.742019,0.87626,0.884988,0.847719,0.86216
7,0.0084,0.861552,0.870761,0.879656,0.843416,0.856472
8,0.006,0.944421,0.878093,0.885861,0.84893,0.862817
9,0.0044,0.939176,0.882676,0.890914,0.842235,0.860609
10,0.0009,0.996176,0.879927,0.890981,0.848993,0.865744


[I 2025-03-22 07:05:04,173] Trial 49 finished with value: 0.86178269732781 and parameters: {'learning_rate': 0.0014127194909517404, 'weight_decay': 0.004, 'warmup_steps': 3}. Best is trial 28 with value: 0.8779144345340814.


Trial 50 with params: {'learning_rate': 0.0011095198524473941, 'weight_decay': 0.008, 'warmup_steps': 1}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5437,0.515975,0.850596,0.846616,0.789201,0.807068
2,0.1818,0.531944,0.870761,0.871297,0.832396,0.847904
3,0.0868,0.629787,0.850596,0.840044,0.82579,0.830001
4,0.0494,0.71268,0.864345,0.864076,0.828565,0.842276
5,0.0312,0.745091,0.869844,0.885767,0.831803,0.851552
6,0.0159,0.785099,0.869844,0.867819,0.843181,0.852715
7,0.0094,0.93707,0.871677,0.88033,0.834272,0.850971
8,0.0076,0.909865,0.872594,0.883178,0.834096,0.852708
9,0.0055,0.972698,0.870761,0.880481,0.833774,0.850725
10,0.0023,1.050072,0.859762,0.857424,0.826401,0.83748


[I 2025-03-22 07:07:35,300] Trial 50 pruned. 


Trial 51 with params: {'learning_rate': 0.002525366324437439, 'weight_decay': 0.01, 'warmup_steps': 24}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4757,0.47826,0.870761,0.883487,0.832432,0.851275
2,0.108,0.490112,0.888176,0.89261,0.848695,0.864291
3,0.0444,0.68566,0.872594,0.884883,0.834665,0.852099
4,0.022,0.810904,0.882676,0.881473,0.842278,0.857936
5,0.0179,0.71653,0.887259,0.882362,0.83679,0.853658
6,0.0085,0.857211,0.88451,0.891681,0.844223,0.862108
7,0.0057,0.917877,0.882676,0.886634,0.825023,0.844182
8,0.0058,0.952135,0.883593,0.893422,0.832991,0.85484
9,0.0038,0.976816,0.883593,0.891111,0.84366,0.860866
10,0.0016,1.035304,0.889093,0.895729,0.848327,0.86584


[I 2025-03-22 07:11:05,630] Trial 51 finished with value: 0.8673725266757724 and parameters: {'learning_rate': 0.002525366324437439, 'weight_decay': 0.01, 'warmup_steps': 24}. Best is trial 28 with value: 0.8779144345340814.


Trial 52 with params: {'learning_rate': 0.0032524943784092365, 'weight_decay': 0.01, 'warmup_steps': 24}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4484,0.496542,0.877177,0.887235,0.829575,0.849407
2,0.0961,0.585705,0.875344,0.886505,0.837783,0.855934
3,0.0398,0.625159,0.87626,0.887445,0.846489,0.862622
4,0.023,0.717637,0.888176,0.884134,0.85645,0.867804
5,0.0178,0.81256,0.877177,0.890285,0.846782,0.864322
6,0.0085,0.809195,0.897342,0.905473,0.86302,0.880152
7,0.0039,1.020955,0.879927,0.885502,0.84138,0.857144
8,0.0032,0.986697,0.878093,0.888223,0.84781,0.86385
9,0.0034,0.962591,0.887259,0.884227,0.855331,0.867304
10,0.0022,1.099563,0.883593,0.873008,0.851995,0.86085


[I 2025-03-22 07:14:10,723] Trial 52 finished with value: 0.8663909668759723 and parameters: {'learning_rate': 0.0032524943784092365, 'weight_decay': 0.01, 'warmup_steps': 24}. Best is trial 28 with value: 0.8779144345340814.


Trial 53 with params: {'learning_rate': 0.0022756942174958247, 'weight_decay': 0.009000000000000001, 'warmup_steps': 21}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4873,0.511449,0.862511,0.880136,0.826508,0.845137
2,0.1143,0.481081,0.887259,0.897562,0.85533,0.872281
3,0.048,0.573879,0.877177,0.850936,0.848938,0.848978
4,0.0229,0.725796,0.88451,0.884812,0.853858,0.866142
5,0.0195,0.76782,0.88451,0.89379,0.854296,0.869786
6,0.0099,0.798518,0.88451,0.89562,0.854291,0.870617
7,0.0081,0.820647,0.886343,0.892953,0.837827,0.857042
8,0.004,0.854167,0.890926,0.898433,0.850033,0.868125
9,0.0028,0.890649,0.888176,0.876896,0.857103,0.865688
10,0.0011,0.994137,0.882676,0.872724,0.853212,0.861714


[I 2025-03-22 07:16:25,163] Trial 53 pruned. 


Trial 54 with params: {'learning_rate': 0.0011058478292104098, 'weight_decay': 0.006, 'warmup_steps': 15}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5729,0.507014,0.853346,0.849533,0.79122,0.808543
2,0.1804,0.514594,0.874427,0.876123,0.843896,0.857505
3,0.0867,0.583683,0.865261,0.848075,0.828946,0.836894
4,0.0468,0.757957,0.864345,0.880247,0.827283,0.846544
5,0.028,0.786027,0.866178,0.879151,0.828102,0.847586
6,0.0168,0.747165,0.870761,0.869676,0.832623,0.847587
7,0.0094,0.871137,0.877177,0.888921,0.836526,0.856499
8,0.0077,0.866607,0.87626,0.884947,0.837454,0.855113
9,0.0032,0.976544,0.873511,0.882746,0.835072,0.852818
10,0.0014,1.039545,0.869844,0.878673,0.833372,0.849219


[I 2025-03-22 07:18:29,401] Trial 54 pruned. 


Trial 55 with params: {'learning_rate': 0.0008953750478722926, 'weight_decay': 0.0, 'warmup_steps': 12}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5985,0.523014,0.84143,0.84135,0.780735,0.798934
2,0.2069,0.520522,0.869844,0.881559,0.830739,0.850196
3,0.1047,0.557814,0.862511,0.865178,0.834515,0.847031
4,0.0594,0.700304,0.867094,0.868077,0.828723,0.84422
5,0.0371,0.705849,0.871677,0.869527,0.833692,0.84793


[I 2025-03-22 07:19:37,080] Trial 55 pruned. 


Trial 56 with params: {'learning_rate': 0.0034158136459597936, 'weight_decay': 0.009000000000000001, 'warmup_steps': 31}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.461,0.527067,0.857929,0.873202,0.822141,0.840103
2,0.0948,0.538359,0.887259,0.89586,0.85721,0.871742
3,0.0394,0.638776,0.873511,0.863766,0.84467,0.851818
4,0.0211,0.722394,0.892759,0.900842,0.859174,0.875716
5,0.0142,0.786646,0.882676,0.893441,0.851517,0.867688
6,0.0065,0.768206,0.897342,0.895631,0.862908,0.876173
7,0.0059,0.861309,0.886343,0.877695,0.855161,0.863724
8,0.0051,1.016863,0.890009,0.899539,0.856938,0.873318
9,0.0045,0.914409,0.890926,0.876149,0.859078,0.865968
10,0.0027,0.934603,0.888176,0.883155,0.847944,0.861468


[I 2025-03-22 07:22:32,526] Trial 56 finished with value: 0.8637648602168211 and parameters: {'learning_rate': 0.0034158136459597936, 'weight_decay': 0.009000000000000001, 'warmup_steps': 31}. Best is trial 28 with value: 0.8779144345340814.


Trial 57 with params: {'learning_rate': 0.0027911621955266216, 'weight_decay': 0.01, 'warmup_steps': 20}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4575,0.484993,0.868011,0.881238,0.830263,0.849041
2,0.1018,0.518833,0.87901,0.856832,0.840636,0.847253
3,0.0411,0.55886,0.877177,0.866801,0.849218,0.856294
4,0.0214,0.710303,0.886343,0.887169,0.85348,0.867807
5,0.0142,0.836746,0.868011,0.881837,0.832441,0.84986


[I 2025-03-22 07:23:36,510] Trial 57 pruned. 


Trial 58 with params: {'learning_rate': 0.00021771047684957567, 'weight_decay': 0.01, 'warmup_steps': 22}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8866,0.608247,0.783685,0.659054,0.669327,0.66388
2,0.4317,0.529323,0.829514,0.700868,0.706982,0.703005
3,0.3276,0.561647,0.824015,0.853455,0.734799,0.744692
4,0.2456,0.547389,0.829514,0.849614,0.780437,0.803063
5,0.1896,0.545528,0.855179,0.872716,0.828004,0.845722


[I 2025-03-22 07:24:39,747] Trial 58 pruned. 


Trial 59 with params: {'learning_rate': 0.004300036960265831, 'weight_decay': 0.003, 'warmup_steps': 7}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4092,0.494736,0.872594,0.884022,0.844027,0.859218
2,0.0848,0.516191,0.883593,0.891499,0.844971,0.861381
3,0.0394,0.703886,0.873511,0.870658,0.837185,0.848463
4,0.0213,0.754054,0.883593,0.89331,0.843697,0.861803
5,0.0193,0.71206,0.88451,0.861074,0.844476,0.851571
6,0.0098,0.776464,0.889093,0.871941,0.838733,0.852068
7,0.006,0.925285,0.88451,0.890656,0.835989,0.854804
8,0.0046,0.956789,0.882676,0.89132,0.832841,0.853822
9,0.0034,1.025608,0.87901,0.865535,0.840873,0.850557
10,0.0011,1.108718,0.885426,0.879462,0.84549,0.858566


[I 2025-03-22 07:27:50,897] Trial 59 finished with value: 0.8570452436888535 and parameters: {'learning_rate': 0.004300036960265831, 'weight_decay': 0.003, 'warmup_steps': 7}. Best is trial 28 with value: 0.8779144345340814.


Trial 60 with params: {'learning_rate': 0.0016062919504297665, 'weight_decay': 0.006, 'warmup_steps': 5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5047,0.503674,0.856095,0.865109,0.794863,0.814787
2,0.1404,0.514112,0.870761,0.856968,0.841613,0.848331
3,0.0622,0.568778,0.875344,0.887437,0.845364,0.861839
4,0.0331,0.709428,0.871677,0.885331,0.841649,0.85906
5,0.0215,0.708111,0.872594,0.881656,0.834592,0.851457


[I 2025-03-22 07:28:58,176] Trial 60 pruned. 


Trial 61 with params: {'learning_rate': 0.004589457243558559, 'weight_decay': 0.01, 'warmup_steps': 22}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4266,0.489648,0.866178,0.876227,0.830441,0.846759
2,0.0864,0.492186,0.886343,0.884968,0.854978,0.867461
3,0.0375,0.651913,0.875344,0.888417,0.846911,0.861483
4,0.0228,0.675813,0.891842,0.897146,0.850718,0.868015
5,0.014,0.711983,0.889093,0.896604,0.848409,0.866254
6,0.0107,0.841739,0.886343,0.897632,0.844162,0.864798
7,0.0091,0.697734,0.901008,0.906203,0.858283,0.876103
8,0.0027,0.813117,0.904675,0.910462,0.860188,0.879397
9,0.002,0.970733,0.890926,0.889118,0.850001,0.864828
10,0.0014,0.906229,0.910174,0.90475,0.874426,0.887143


Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--precision/155d3220d6cd4a6553f12da68eeb3d1f97cf431206304a4bc6e2d564c29502e9 (last modified on Fri Jan 10 23:13:59 2025) since it couldn't be found locally at evaluate-metric--precision, or remotely on the Hugging Face Hub.
[I 2025-03-22 07:32:06,957] Trial 61 finished with value: 0.8887268625210981 and parameters: {'learning_rate': 0.004589457243558559, 'weight_decay': 0.01, 'warmup_steps': 22}. Best is trial 61 with value: 0.8887268625210981.


Trial 62 with params: {'learning_rate': 0.004779123754971437, 'weight_decay': 0.009000000000000001, 'warmup_steps': 22}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.418,0.491348,0.878093,0.887676,0.839301,0.857336
2,0.0864,0.470248,0.88451,0.878365,0.845145,0.85792
3,0.0388,0.684887,0.88451,0.861354,0.84565,0.851229
4,0.0209,0.718813,0.88451,0.878929,0.843829,0.857709
5,0.0161,0.781504,0.87901,0.883905,0.82261,0.842355


[I 2025-03-22 07:33:08,542] Trial 62 pruned. 


Trial 63 with params: {'learning_rate': 0.004557308256527191, 'weight_decay': 0.006, 'warmup_steps': 16}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4077,0.475876,0.891842,0.900947,0.858287,0.875257
2,0.0869,0.487481,0.88451,0.870629,0.855858,0.861193
3,0.0372,0.688084,0.873511,0.854066,0.846814,0.84737
4,0.021,0.722376,0.892759,0.887077,0.839178,0.857685
5,0.0165,0.691567,0.891842,0.874243,0.850366,0.859999
6,0.0077,0.864169,0.883593,0.879287,0.843792,0.857833
7,0.0079,0.88874,0.879927,0.877888,0.84023,0.855206
8,0.0051,0.961689,0.888176,0.875952,0.857786,0.864356
9,0.0039,0.996632,0.882676,0.868867,0.842948,0.853727
10,0.0016,1.079749,0.885426,0.870463,0.845608,0.855566


[I 2025-03-22 07:35:08,499] Trial 63 pruned. 


Trial 64 with params: {'learning_rate': 0.0015804893052149714, 'weight_decay': 0.01, 'warmup_steps': 26}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5451,0.511029,0.853346,0.834491,0.791869,0.805566
2,0.1441,0.514539,0.87901,0.859468,0.839167,0.848145
3,0.0635,0.588674,0.869844,0.881616,0.833583,0.85011
4,0.032,0.727846,0.877177,0.889043,0.846736,0.863252
5,0.0225,0.752098,0.874427,0.888013,0.835962,0.854655
6,0.0133,0.777133,0.880843,0.889836,0.84127,0.859631
7,0.0055,0.925625,0.878093,0.887032,0.839934,0.856806
8,0.0051,0.970075,0.872594,0.883217,0.844822,0.859274
9,0.0029,0.934951,0.886343,0.89398,0.854746,0.870388
10,0.0019,1.033226,0.879927,0.880324,0.849087,0.862221


[I 2025-03-22 07:38:36,811] Trial 64 finished with value: 0.8664127118105439 and parameters: {'learning_rate': 0.0015804893052149714, 'weight_decay': 0.01, 'warmup_steps': 26}. Best is trial 61 with value: 0.8887268625210981.


Trial 65 with params: {'learning_rate': 0.004021049004567207, 'weight_decay': 0.01, 'warmup_steps': 21}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4283,0.484168,0.87901,0.888391,0.85032,0.864737
2,0.0874,0.541106,0.887259,0.880386,0.84763,0.860052
3,0.0393,0.58839,0.882676,0.872558,0.85161,0.860225
4,0.0206,0.694258,0.880843,0.892325,0.840703,0.860679
5,0.0154,0.736037,0.889093,0.887293,0.846703,0.863016
6,0.0074,0.929762,0.892759,0.884959,0.842232,0.858301
7,0.0059,0.932927,0.887259,0.893592,0.838509,0.857416
8,0.0039,0.989331,0.887259,0.883377,0.848371,0.861378
9,0.0019,1.000045,0.888176,0.883443,0.847774,0.861817
10,0.0014,0.998905,0.891842,0.886599,0.850211,0.864712


[I 2025-03-22 07:41:38,574] Trial 65 finished with value: 0.8639690151383229 and parameters: {'learning_rate': 0.004021049004567207, 'weight_decay': 0.01, 'warmup_steps': 21}. Best is trial 61 with value: 0.8887268625210981.


Trial 66 with params: {'learning_rate': 0.004796279448787634, 'weight_decay': 0.006, 'warmup_steps': 11}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4307,0.463896,0.890009,0.895792,0.848758,0.866111
2,0.0883,0.505262,0.878093,0.871251,0.830399,0.845499
3,0.0355,0.614108,0.88451,0.891802,0.843986,0.861387
4,0.0238,0.690636,0.892759,0.900095,0.849847,0.869012
5,0.0148,0.770166,0.886343,0.88955,0.826799,0.847361


[I 2025-03-22 07:42:51,332] Trial 66 pruned. 


Trial 67 with params: {'learning_rate': 0.0039851398294530604, 'weight_decay': 0.007, 'warmup_steps': 25}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4366,0.460061,0.88176,0.881888,0.850101,0.863376
2,0.0881,0.472056,0.886343,0.862961,0.846792,0.853526
3,0.038,0.629494,0.885426,0.863782,0.856084,0.858795
4,0.0214,0.660726,0.887259,0.881829,0.837037,0.854168
5,0.0165,0.798409,0.880843,0.880138,0.850259,0.862266
6,0.0098,0.809433,0.892759,0.882237,0.858495,0.86913
7,0.0055,0.812293,0.896425,0.875761,0.862429,0.868407
8,0.0029,0.918761,0.892759,0.888387,0.850239,0.865694
9,0.0017,0.996354,0.891842,0.888437,0.858379,0.871145
10,0.0007,1.035045,0.890009,0.887061,0.857272,0.869854


[I 2025-03-22 07:46:04,786] Trial 67 finished with value: 0.8705569694886576 and parameters: {'learning_rate': 0.0039851398294530604, 'weight_decay': 0.007, 'warmup_steps': 25}. Best is trial 61 with value: 0.8887268625210981.


Trial 68 with params: {'learning_rate': 0.004317675572369586, 'weight_decay': 0.005, 'warmup_steps': 27}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4354,0.505204,0.872594,0.884613,0.824242,0.846196
2,0.0876,0.514701,0.890009,0.876012,0.849081,0.860124
3,0.0402,0.614131,0.88451,0.869516,0.844516,0.854582
4,0.0223,0.795647,0.877177,0.875592,0.838725,0.852846
5,0.0158,0.879175,0.878093,0.872299,0.830921,0.845864
6,0.0105,0.890138,0.886343,0.894509,0.836128,0.857132
7,0.0039,0.936064,0.887259,0.89159,0.83838,0.856572
8,0.0014,0.975982,0.893676,0.888432,0.851229,0.866253
9,0.0012,0.987198,0.891842,0.886293,0.850594,0.86483
10,0.0009,1.039133,0.891842,0.88746,0.849743,0.864988


[I 2025-03-22 07:49:16,562] Trial 68 finished with value: 0.8558689243295304 and parameters: {'learning_rate': 0.004317675572369586, 'weight_decay': 0.005, 'warmup_steps': 27}. Best is trial 61 with value: 0.8887268625210981.


Trial 69 with params: {'learning_rate': 0.004618563219406311, 'weight_decay': 0.007, 'warmup_steps': 25}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.427,0.438717,0.880843,0.890645,0.850328,0.866399
2,0.0855,0.519445,0.889093,0.874128,0.847725,0.858291
3,0.0383,0.687425,0.88176,0.876897,0.833828,0.849338
4,0.0193,0.690632,0.890009,0.864368,0.838713,0.849669
5,0.0157,0.872625,0.88176,0.888672,0.831506,0.851577


[I 2025-03-22 07:50:53,261] Trial 69 pruned. 


Trial 70 with params: {'learning_rate': 0.0030311673860280424, 'weight_decay': 0.007, 'warmup_steps': 28}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.466,0.510399,0.867094,0.878105,0.821415,0.83955
2,0.0998,0.525438,0.885426,0.894743,0.854729,0.870492
3,0.0399,0.639513,0.868928,0.868821,0.840815,0.851192
4,0.0214,0.789558,0.882676,0.892807,0.842246,0.861506
5,0.0153,0.837058,0.880843,0.892821,0.840677,0.860234
6,0.0105,0.853486,0.887259,0.895875,0.846155,0.864799
7,0.0073,0.910104,0.883593,0.879369,0.834584,0.850673
8,0.0052,0.911112,0.887259,0.894004,0.836392,0.85703
9,0.0023,0.970627,0.88176,0.88952,0.832913,0.852342
10,0.0006,1.060229,0.88176,0.88821,0.832864,0.851841


[I 2025-03-22 07:53:19,277] Trial 70 pruned. 


Trial 71 with params: {'learning_rate': 0.004948960023586303, 'weight_decay': 0.009000000000000001, 'warmup_steps': 27}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4259,0.455198,0.874427,0.884714,0.837022,0.854616
2,0.083,0.528887,0.88176,0.867682,0.834189,0.846598
3,0.0367,0.695502,0.872594,0.87486,0.844119,0.855485
4,0.0242,0.72321,0.882676,0.881982,0.862428,0.870573
5,0.0183,0.714718,0.88176,0.881459,0.841536,0.857146
6,0.0092,0.819595,0.888176,0.877935,0.855914,0.865158
7,0.0063,0.920019,0.888176,0.893349,0.839183,0.857965
8,0.0047,1.003103,0.880843,0.870336,0.842167,0.85318
9,0.0032,1.043784,0.874427,0.86918,0.829323,0.84257
10,0.0012,0.99626,0.893676,0.900827,0.842591,0.863281


Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--recall/11f90e583db35601050aed380d48e83202a896976b9608432fba9244fb447f24 (last modified on Fri Jan 10 23:14:00 2025) since it couldn't be found locally at evaluate-metric--recall, or remotely on the Hugging Face Hub.
[I 2025-03-22 07:57:09,771] Trial 71 finished with value: 0.8621086762709198 and parameters: {'learning_rate': 0.004948960023586303, 'weight_decay': 0.009000000000000001, 'warmup_steps': 27}. Best is trial 61 with value: 0.8887268625210981.


Trial 72 with params: {'learning_rate': 0.00010295616529943657, 'weight_decay': 0.005, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.1015,0.764892,0.736022,0.626031,0.624396,0.622791
2,0.5931,0.632051,0.775435,0.654951,0.66288,0.6572
3,0.4717,0.587363,0.7956,0.665573,0.681071,0.672817
4,0.4049,0.570281,0.800183,0.677047,0.683389,0.678288
5,0.3567,0.546443,0.816682,0.686184,0.698753,0.69229


[I 2025-03-22 07:58:11,903] Trial 72 pruned. 


Trial 73 with params: {'learning_rate': 5.953168512495511e-05, 'weight_decay': 0.01, 'warmup_steps': 27}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.3065,0.939177,0.648029,0.554025,0.547569,0.547112
2,0.7598,0.737176,0.749771,0.628454,0.641332,0.634023
3,0.5936,0.668773,0.76077,0.637141,0.652527,0.644009
4,0.5144,0.632759,0.778185,0.659881,0.664632,0.660754
5,0.4654,0.612115,0.782768,0.654981,0.671511,0.662238
6,0.4291,0.58919,0.80385,0.681989,0.68615,0.682931
7,0.3987,0.584376,0.808433,0.680745,0.691018,0.685346
8,0.3775,0.571261,0.813932,0.684583,0.694846,0.689652
9,0.3557,0.576057,0.813016,0.685159,0.694326,0.689218
10,0.3372,0.569454,0.805683,0.678289,0.687394,0.682519


[I 2025-03-22 08:00:50,821] Trial 73 pruned. 


Trial 74 with params: {'learning_rate': 0.0007962649513967249, 'weight_decay': 0.01, 'warmup_steps': 20}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6227,0.522399,0.835014,0.820462,0.784194,0.796534
2,0.2194,0.536683,0.861595,0.877283,0.833352,0.85092
3,0.1164,0.561529,0.868011,0.879119,0.839173,0.855077
4,0.0672,0.689428,0.865261,0.877717,0.835703,0.852069
5,0.0409,0.69912,0.874427,0.864431,0.834992,0.847563


[I 2025-03-22 08:02:25,497] Trial 74 pruned. 


Trial 75 with params: {'learning_rate': 0.0024115406353382176, 'weight_decay': 0.007, 'warmup_steps': 23}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4829,0.495081,0.864345,0.877607,0.819107,0.839222
2,0.1107,0.482556,0.883593,0.891328,0.844345,0.861929
3,0.047,0.607764,0.870761,0.852944,0.843804,0.847514
4,0.0216,0.711366,0.88451,0.890747,0.844717,0.861561
5,0.0158,0.75367,0.887259,0.881941,0.846698,0.860079
6,0.0091,0.852264,0.890926,0.898162,0.849541,0.867414
7,0.0082,0.912805,0.880843,0.892383,0.841336,0.85934
8,0.0047,0.919526,0.890009,0.897869,0.848429,0.866562
9,0.0021,0.947692,0.890009,0.896947,0.848722,0.86676
10,0.0009,1.00724,0.887259,0.883241,0.846024,0.861043


[I 2025-03-22 08:06:12,423] Trial 75 finished with value: 0.8673835266290993 and parameters: {'learning_rate': 0.0024115406353382176, 'weight_decay': 0.007, 'warmup_steps': 23}. Best is trial 61 with value: 0.8887268625210981.


Trial 76 with params: {'learning_rate': 0.0011539708355938335, 'weight_decay': 0.007, 'warmup_steps': 22}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5826,0.513294,0.849679,0.840459,0.779842,0.796258
2,0.1777,0.500421,0.87626,0.850277,0.836334,0.842468
3,0.0825,0.570037,0.873511,0.875168,0.844581,0.856615
4,0.0457,0.698631,0.869844,0.882245,0.832367,0.850274
5,0.0263,0.739493,0.874427,0.884608,0.835124,0.853449
6,0.0156,0.720042,0.87901,0.877168,0.84857,0.860384
7,0.0094,0.815718,0.879927,0.890436,0.839254,0.858353
8,0.007,0.919454,0.87626,0.887522,0.837792,0.856109
9,0.0047,0.916406,0.885426,0.895199,0.84389,0.863322
10,0.0017,0.954447,0.879927,0.889153,0.840757,0.858957


[I 2025-03-22 08:08:37,660] Trial 76 pruned. 


Trial 77 with params: {'learning_rate': 0.0025269336442951132, 'weight_decay': 0.005, 'warmup_steps': 24}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4794,0.505472,0.866178,0.879822,0.829622,0.84802
2,0.107,0.504499,0.886343,0.894311,0.845913,0.864097
3,0.0449,0.615017,0.879927,0.865019,0.841266,0.850599
4,0.0234,0.670848,0.882676,0.893781,0.852572,0.868131
5,0.0141,0.798419,0.875344,0.873121,0.837099,0.850572
6,0.0112,0.683724,0.888176,0.89823,0.855693,0.872804
7,0.0051,0.852184,0.879927,0.889671,0.84105,0.858852
8,0.0049,0.840134,0.88176,0.888843,0.843061,0.859989
9,0.0037,0.894948,0.888176,0.897594,0.847276,0.866178
10,0.0017,0.934596,0.886343,0.882771,0.846811,0.860743


[I 2025-03-22 08:12:00,043] Trial 77 finished with value: 0.8675899019953933 and parameters: {'learning_rate': 0.0025269336442951132, 'weight_decay': 0.005, 'warmup_steps': 24}. Best is trial 61 with value: 0.8887268625210981.


Trial 78 with params: {'learning_rate': 0.0017630567372033316, 'weight_decay': 0.002, 'warmup_steps': 27}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5329,0.517197,0.857012,0.850946,0.795642,0.811733
2,0.1348,0.513413,0.882676,0.880999,0.842589,0.858084
3,0.0579,0.58018,0.873511,0.852151,0.837491,0.841643
4,0.0293,0.734624,0.864345,0.875587,0.828912,0.845291
5,0.0187,0.798704,0.867094,0.854789,0.831812,0.839825
6,0.0115,0.84966,0.871677,0.852165,0.843944,0.846842
7,0.0058,0.943978,0.879927,0.875927,0.841247,0.853661
8,0.0036,0.998697,0.87901,0.875631,0.840446,0.853887
9,0.0022,1.019561,0.87626,0.872838,0.838142,0.851023
10,0.0013,1.032946,0.879927,0.876707,0.840316,0.854719


[I 2025-03-22 08:14:37,064] Trial 78 pruned. 


Trial 79 with params: {'learning_rate': 0.002224859693044019, 'weight_decay': 0.007, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4729,0.475598,0.863428,0.877676,0.817167,0.838758
2,0.117,0.5214,0.879927,0.888228,0.841704,0.858451
3,0.0496,0.614307,0.88176,0.895647,0.850656,0.868687
4,0.0272,0.660528,0.886343,0.87646,0.855367,0.863905
5,0.0182,0.771172,0.883593,0.886115,0.851778,0.866131
6,0.0102,0.751211,0.885426,0.897007,0.852925,0.870966
7,0.0064,0.924795,0.883593,0.89493,0.843759,0.862607
8,0.0036,0.936513,0.88451,0.892674,0.845113,0.862535
9,0.0038,0.953564,0.879927,0.888165,0.841474,0.858282
10,0.001,0.993696,0.885426,0.896723,0.854382,0.871226


[I 2025-03-22 08:17:53,460] Trial 79 finished with value: 0.8630321653823992 and parameters: {'learning_rate': 0.002224859693044019, 'weight_decay': 0.007, 'warmup_steps': 2}. Best is trial 61 with value: 0.8887268625210981.


Trial 80 with params: {'learning_rate': 0.002855880857068783, 'weight_decay': 0.005, 'warmup_steps': 18}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4582,0.485946,0.871677,0.884282,0.833737,0.852353
2,0.0988,0.50512,0.88176,0.866647,0.842736,0.852172
3,0.0432,0.623244,0.87626,0.858886,0.8475,0.851247
4,0.0231,0.770327,0.882676,0.873572,0.852056,0.860551
5,0.0147,0.756426,0.879927,0.870094,0.84975,0.85807
6,0.0079,0.795286,0.890926,0.897981,0.858079,0.873712
7,0.0055,0.926516,0.892759,0.88112,0.858756,0.868714
8,0.0022,0.948363,0.888176,0.867343,0.856494,0.861136
9,0.0012,0.953316,0.895509,0.88863,0.853759,0.867493
10,0.0008,1.02329,0.887259,0.860659,0.85573,0.857705


[I 2025-03-22 08:20:06,198] Trial 80 pruned. 


Trial 81 with params: {'learning_rate': 0.0010763313734414267, 'weight_decay': 0.005, 'warmup_steps': 22}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5959,0.504526,0.849679,0.848964,0.797232,0.814072
2,0.1872,0.506273,0.873511,0.873441,0.824598,0.843623
3,0.089,0.576449,0.863428,0.854214,0.837088,0.843497
4,0.0489,0.732148,0.867094,0.877465,0.829513,0.847014
5,0.0305,0.707518,0.873511,0.873384,0.833979,0.849817


[I 2025-03-22 08:21:12,991] Trial 81 pruned. 


Trial 82 with params: {'learning_rate': 0.0012315261197628753, 'weight_decay': 0.005, 'warmup_steps': 29}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5766,0.512365,0.848763,0.861514,0.788521,0.808509
2,0.1707,0.505901,0.87901,0.889724,0.847651,0.86452
3,0.0784,0.625147,0.853346,0.846903,0.829255,0.835242
4,0.0428,0.683887,0.877177,0.886075,0.837655,0.855478
5,0.0224,0.775648,0.873511,0.886632,0.834519,0.853667
6,0.0176,0.821893,0.865261,0.866271,0.838275,0.849624
7,0.0098,0.894852,0.868011,0.867259,0.831058,0.844654
8,0.006,0.857093,0.873511,0.871588,0.835807,0.849737
9,0.0035,0.933603,0.877177,0.874838,0.838034,0.852662
10,0.0022,1.020609,0.878093,0.87782,0.848297,0.860243


[I 2025-03-22 08:24:22,468] Trial 82 finished with value: 0.8543615265214384 and parameters: {'learning_rate': 0.0012315261197628753, 'weight_decay': 0.005, 'warmup_steps': 29}. Best is trial 61 with value: 0.8887268625210981.


Trial 83 with params: {'learning_rate': 0.003467234407212441, 'weight_decay': 0.005, 'warmup_steps': 0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4146,0.495705,0.869844,0.879072,0.83322,0.849443
2,0.0966,0.528162,0.880843,0.89018,0.843197,0.860152
3,0.0408,0.635363,0.870761,0.882493,0.842975,0.857532
4,0.0205,0.820769,0.880843,0.892917,0.839966,0.860124
5,0.0134,0.777455,0.88176,0.890425,0.842154,0.860139
6,0.0131,0.836393,0.88451,0.884371,0.853079,0.865749
7,0.0061,1.022461,0.877177,0.885871,0.83044,0.848295
8,0.0024,1.100482,0.880843,0.880597,0.850608,0.862476
9,0.0017,1.058772,0.889093,0.895132,0.848118,0.865593
10,0.0006,1.110435,0.889093,0.887143,0.857168,0.869283


Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--recall/11f90e583db35601050aed380d48e83202a896976b9608432fba9244fb447f24 (last modified on Fri Jan 10 23:14:00 2025) since it couldn't be found locally at evaluate-metric--recall, or remotely on the Hugging Face Hub.
[I 2025-03-22 08:27:43,236] Trial 83 finished with value: 0.8643997350253924 and parameters: {'learning_rate': 0.003467234407212441, 'weight_decay': 0.005, 'warmup_steps': 0}. Best is trial 61 with value: 0.8887268625210981.


Trial 84 with params: {'learning_rate': 0.002831178019482135, 'weight_decay': 0.004, 'warmup_steps': 24}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.47,0.499612,0.866178,0.878126,0.830632,0.847509
2,0.1012,0.5055,0.879927,0.889511,0.840672,0.859265
3,0.0436,0.599197,0.879927,0.86777,0.849347,0.856507
4,0.022,0.709844,0.879927,0.891003,0.850744,0.866091
5,0.0165,0.774169,0.880843,0.889638,0.850232,0.865445
6,0.0083,0.779202,0.897342,0.882787,0.862504,0.871378
7,0.0041,0.888628,0.896425,0.903489,0.86266,0.878748
8,0.0023,0.978636,0.890009,0.895897,0.848572,0.866133
9,0.0062,0.884815,0.886343,0.893175,0.846782,0.863341
10,0.002,0.962882,0.890009,0.897887,0.858286,0.873349


[I 2025-03-22 08:30:53,370] Trial 84 finished with value: 0.8752742544518969 and parameters: {'learning_rate': 0.002831178019482135, 'weight_decay': 0.004, 'warmup_steps': 24}. Best is trial 61 with value: 0.8887268625210981.


Trial 85 with params: {'learning_rate': 0.004482795850699945, 'weight_decay': 0.008, 'warmup_steps': 1}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.3886,0.454017,0.873511,0.884116,0.845459,0.86022
2,0.0832,0.526847,0.878093,0.859726,0.84957,0.853416
3,0.0341,0.654505,0.882676,0.891804,0.851543,0.867182
4,0.0233,0.59121,0.890009,0.881536,0.85677,0.867058
5,0.0165,0.776143,0.885426,0.885502,0.844271,0.859977
6,0.0109,0.750112,0.892759,0.889801,0.848848,0.865537
7,0.0048,0.845569,0.890009,0.885642,0.857241,0.869004
8,0.0045,0.94394,0.885426,0.899097,0.85345,0.871253
9,0.0049,0.812094,0.899175,0.906789,0.854866,0.874898
10,0.0009,0.951149,0.893676,0.902381,0.850737,0.870596


[I 2025-03-22 08:34:16,239] Trial 85 finished with value: 0.8710922679147194 and parameters: {'learning_rate': 0.004482795850699945, 'weight_decay': 0.008, 'warmup_steps': 1}. Best is trial 61 with value: 0.8887268625210981.


Trial 86 with params: {'learning_rate': 0.002888926579450355, 'weight_decay': 0.008, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4453,0.461072,0.871677,0.882518,0.835299,0.852175
2,0.1015,0.51214,0.880843,0.88834,0.851764,0.865564
3,0.0436,0.677078,0.874427,0.873608,0.845509,0.855594
4,0.0244,0.704883,0.889093,0.89795,0.856476,0.872414
5,0.0133,0.712702,0.88176,0.882249,0.8498,0.863119
6,0.0074,0.856486,0.889093,0.896645,0.85639,0.87249
7,0.0078,0.814864,0.889093,0.898036,0.85723,0.872858
8,0.0056,0.96367,0.882676,0.893245,0.852572,0.868043
9,0.004,0.963399,0.886343,0.89438,0.855398,0.869718
10,0.0023,1.013506,0.885426,0.894026,0.853816,0.869773


[I 2025-03-22 08:37:43,479] Trial 86 finished with value: 0.8668341124345668 and parameters: {'learning_rate': 0.002888926579450355, 'weight_decay': 0.008, 'warmup_steps': 3}. Best is trial 61 with value: 0.8887268625210981.


Trial 87 with params: {'learning_rate': 0.003532688151296137, 'weight_decay': 0.008, 'warmup_steps': 1}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4121,0.488915,0.878093,0.87677,0.839291,0.853354
2,0.0918,0.516889,0.891842,0.876645,0.849235,0.860674
3,0.0378,0.677543,0.87901,0.876209,0.841365,0.852949
4,0.0213,0.703303,0.889093,0.866486,0.857084,0.860827
5,0.0156,0.75386,0.898258,0.890514,0.854388,0.868363
6,0.0114,0.786785,0.899175,0.882782,0.854955,0.8665
7,0.005,0.903117,0.894592,0.900518,0.862004,0.877127
8,0.003,0.973762,0.894592,0.878572,0.853081,0.863039
9,0.0028,1.061751,0.889093,0.882106,0.849612,0.860729
10,0.0006,1.051514,0.898258,0.890947,0.855924,0.8696


[I 2025-03-22 08:41:16,254] Trial 87 finished with value: 0.8702489611412325 and parameters: {'learning_rate': 0.003532688151296137, 'weight_decay': 0.008, 'warmup_steps': 1}. Best is trial 61 with value: 0.8887268625210981.


Trial 88 with params: {'learning_rate': 0.0038355506910187857, 'weight_decay': 0.01, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.407,0.473677,0.877177,0.885241,0.838438,0.855268
2,0.0911,0.536826,0.88451,0.881529,0.845041,0.859145
3,0.0394,0.687548,0.883593,0.87756,0.844859,0.856195
4,0.022,0.665048,0.885426,0.877943,0.83619,0.851518
5,0.0149,0.75723,0.885426,0.854323,0.834548,0.842576


[I 2025-03-22 08:42:17,346] Trial 88 pruned. 


Trial 89 with params: {'learning_rate': 0.0025430560360055854, 'weight_decay': 0.008, 'warmup_steps': 0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4516,0.472347,0.868011,0.866713,0.822841,0.838919
2,0.105,0.544859,0.877177,0.845791,0.840307,0.841617
3,0.0441,0.657638,0.868928,0.841683,0.831834,0.835084
4,0.0222,0.763764,0.885426,0.872036,0.845231,0.855794
5,0.0154,0.755253,0.889093,0.883755,0.837529,0.855266
6,0.009,0.887675,0.878093,0.886574,0.83965,0.856376
7,0.0067,0.82945,0.890009,0.892073,0.829981,0.850048
8,0.0042,0.962637,0.87901,0.88299,0.822358,0.841593
9,0.003,0.909991,0.890926,0.883549,0.849433,0.8626
10,0.0012,0.968912,0.885426,0.880399,0.844339,0.858268


Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--precision/155d3220d6cd4a6553f12da68eeb3d1f97cf431206304a4bc6e2d564c29502e9 (last modified on Fri Jan 10 23:13:59 2025) since it couldn't be found locally at evaluate-metric--precision, or remotely on the Hugging Face Hub.
[I 2025-03-22 08:44:30,248] Trial 89 pruned. 


Trial 90 with params: {'learning_rate': 0.0047074462685288716, 'weight_decay': 0.009000000000000001, 'warmup_steps': 0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.3893,0.478628,0.87626,0.889301,0.836421,0.856639
2,0.0831,0.469969,0.891842,0.886047,0.850126,0.86419
3,0.039,0.584323,0.888176,0.858697,0.847545,0.852057
4,0.024,0.722413,0.885426,0.871113,0.83417,0.849035
5,0.015,0.725621,0.888176,0.873274,0.837453,0.851671
6,0.0094,0.828548,0.883593,0.858057,0.833875,0.8441
7,0.0076,0.862467,0.880843,0.863571,0.834011,0.84385
8,0.0061,0.815374,0.888176,0.871278,0.838412,0.850464
9,0.0023,0.942246,0.889093,0.881653,0.83899,0.854547
10,0.0013,0.954606,0.897342,0.878075,0.845189,0.85822


[I 2025-03-22 08:46:33,188] Trial 90 pruned. 


Trial 91 with params: {'learning_rate': 0.0035535160759692197, 'weight_decay': 0.008, 'warmup_steps': 1}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4091,0.508157,0.871677,0.882546,0.825014,0.844402
2,0.0904,0.4896,0.872594,0.854598,0.826968,0.83717
3,0.0402,0.623073,0.87901,0.888658,0.840433,0.857699
4,0.0211,0.731119,0.88176,0.891267,0.831942,0.852807
5,0.0146,0.838687,0.882676,0.888828,0.834624,0.85266


[I 2025-03-22 08:48:44,066] Trial 91 pruned. 


Trial 92 with params: {'learning_rate': 0.0038858862457713216, 'weight_decay': 0.008, 'warmup_steps': 6}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4222,0.496841,0.878093,0.889691,0.838047,0.85738
2,0.0903,0.512523,0.88176,0.888862,0.842186,0.858627
3,0.0387,0.607956,0.88451,0.881154,0.845602,0.858445
4,0.0225,0.679614,0.889093,0.885246,0.847691,0.862556
5,0.0151,0.8279,0.879927,0.865524,0.840975,0.850863
6,0.0095,0.872443,0.88176,0.879079,0.831737,0.849506
7,0.0061,0.989728,0.88451,0.862369,0.826352,0.83897
8,0.0072,0.943827,0.882676,0.86819,0.833888,0.845952
9,0.0022,1.005441,0.890926,0.874874,0.848922,0.859664
10,0.0009,1.042463,0.894592,0.885977,0.842306,0.858841


[I 2025-03-22 08:51:16,751] Trial 92 pruned. 


Trial 93 with params: {'learning_rate': 0.002584644802981531, 'weight_decay': 0.006, 'warmup_steps': 7}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4631,0.499782,0.868011,0.880696,0.821213,0.841244
2,0.1104,0.491963,0.879927,0.886882,0.842055,0.858316
3,0.0472,0.608792,0.873511,0.852406,0.847443,0.848018
4,0.026,0.669185,0.893676,0.902247,0.851082,0.870367
5,0.015,0.83616,0.873511,0.886384,0.835671,0.853135
6,0.008,0.825559,0.883593,0.871232,0.843931,0.855328
7,0.0102,0.827926,0.890009,0.898234,0.848863,0.86636
8,0.0067,0.889045,0.888176,0.896282,0.84706,0.864938
9,0.0015,0.917309,0.891842,0.887746,0.859349,0.870846
10,0.0004,0.980835,0.889093,0.886039,0.856869,0.868527


[I 2025-03-22 08:54:52,590] Trial 93 finished with value: 0.873151430513544 and parameters: {'learning_rate': 0.002584644802981531, 'weight_decay': 0.006, 'warmup_steps': 7}. Best is trial 61 with value: 0.8887268625210981.


Trial 94 with params: {'learning_rate': 0.002800878103361948, 'weight_decay': 0.005, 'warmup_steps': 5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4486,0.493914,0.864345,0.871297,0.801147,0.820747
2,0.106,0.484037,0.88451,0.892844,0.845195,0.862617
3,0.0459,0.643713,0.874427,0.875436,0.847729,0.855887
4,0.0242,0.626561,0.885426,0.895671,0.835146,0.856712
5,0.0139,0.739412,0.891842,0.898322,0.84126,0.861519
6,0.0109,0.769719,0.887259,0.870431,0.837941,0.849777
7,0.0076,0.861221,0.885426,0.890875,0.836595,0.855029
8,0.0058,0.944104,0.890009,0.899578,0.857642,0.873396
9,0.002,0.976818,0.88451,0.88414,0.852878,0.865408
10,0.0028,1.015597,0.890926,0.900596,0.858382,0.874657


[I 2025-03-22 08:58:07,185] Trial 94 finished with value: 0.8728038697952908 and parameters: {'learning_rate': 0.002800878103361948, 'weight_decay': 0.005, 'warmup_steps': 5}. Best is trial 61 with value: 0.8887268625210981.


Trial 95 with params: {'learning_rate': 0.0017083360256675874, 'weight_decay': 0.006, 'warmup_steps': 10}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5036,0.491314,0.854262,0.859984,0.784202,0.803066
2,0.1356,0.496565,0.883593,0.849156,0.851613,0.850011
3,0.0574,0.595396,0.869844,0.869724,0.833014,0.84672
4,0.0296,0.73343,0.874427,0.863932,0.845967,0.852689
5,0.0211,0.733974,0.873511,0.861236,0.836174,0.84507


[I 2025-03-22 08:59:05,386] Trial 95 pruned. 


Trial 96 with params: {'learning_rate': 0.0032957393457909955, 'weight_decay': 0.004, 'warmup_steps': 8}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4375,0.476052,0.871677,0.883056,0.833964,0.851755
2,0.0958,0.495231,0.87901,0.874625,0.840276,0.853256
3,0.0406,0.630071,0.872594,0.881801,0.836828,0.851791
4,0.0219,0.669627,0.892759,0.890685,0.850283,0.865964
5,0.0165,0.759599,0.887259,0.883519,0.838568,0.85352
6,0.0088,0.869147,0.882676,0.88828,0.825663,0.845403
7,0.0042,0.86193,0.885426,0.891496,0.835155,0.854461
8,0.003,0.862824,0.896425,0.902532,0.85278,0.871775
9,0.0014,0.978524,0.898258,0.90407,0.855093,0.873487
10,0.0005,1.020513,0.897342,0.903106,0.854148,0.872565


[I 2025-03-22 09:02:07,711] Trial 96 finished with value: 0.8713243356353936 and parameters: {'learning_rate': 0.0032957393457909955, 'weight_decay': 0.004, 'warmup_steps': 8}. Best is trial 61 with value: 0.8887268625210981.


Trial 97 with params: {'learning_rate': 0.004208193546270649, 'weight_decay': 0.005, 'warmup_steps': 7}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4048,0.499379,0.870761,0.885616,0.841969,0.858749
2,0.0863,0.508039,0.88451,0.880304,0.854623,0.864777
3,0.0402,0.678995,0.868928,0.869556,0.842467,0.851423
4,0.0197,0.620687,0.890009,0.889646,0.856846,0.870666
5,0.0124,0.740528,0.894592,0.888858,0.861393,0.872458
6,0.0098,0.809756,0.891842,0.888327,0.858853,0.871111
7,0.0063,0.925007,0.889093,0.898109,0.84661,0.865881
8,0.0027,0.968533,0.889093,0.876711,0.856375,0.864314
9,0.002,1.093363,0.890926,0.878912,0.857398,0.866298
10,0.0013,0.997958,0.895509,0.875008,0.861566,0.867494


[I 2025-03-22 09:05:23,145] Trial 97 finished with value: 0.8665572771125656 and parameters: {'learning_rate': 0.004208193546270649, 'weight_decay': 0.005, 'warmup_steps': 7}. Best is trial 61 with value: 0.8887268625210981.


Trial 98 with params: {'learning_rate': 0.0024323541639513814, 'weight_decay': 0.004, 'warmup_steps': 7}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4638,0.494231,0.864345,0.878842,0.827881,0.846222
2,0.1111,0.501946,0.885426,0.883335,0.855182,0.866768
3,0.0493,0.592469,0.88176,0.847637,0.852876,0.849175
4,0.0257,0.700093,0.887259,0.885534,0.845251,0.861646
5,0.0148,0.847349,0.868928,0.869596,0.82348,0.838879


[I 2025-03-22 09:06:21,879] Trial 98 pruned. 


Trial 99 with params: {'learning_rate': 0.0017181547089033429, 'weight_decay': 0.003, 'warmup_steps': 5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5022,0.489571,0.858845,0.868043,0.796687,0.816962
2,0.1355,0.537603,0.874427,0.866574,0.834306,0.847244
3,0.0589,0.633552,0.872594,0.861045,0.844085,0.85049
4,0.0331,0.690819,0.874427,0.885852,0.835089,0.853419
5,0.0208,0.721826,0.872594,0.882626,0.835466,0.852007
6,0.0128,0.768026,0.88176,0.860975,0.84167,0.84954
7,0.0066,0.906204,0.875344,0.881757,0.828866,0.846283
8,0.0054,0.896695,0.88176,0.889297,0.842762,0.859676
9,0.0024,0.985662,0.883593,0.892192,0.84392,0.861907
10,0.0005,1.039442,0.879927,0.889744,0.841016,0.858837


[I 2025-03-22 09:08:22,029] Trial 99 pruned. 


Trial 100 with params: {'learning_rate': 0.002198493824069799, 'weight_decay': 0.006, 'warmup_steps': 7}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4723,0.494941,0.866178,0.875696,0.811067,0.831003
2,0.1167,0.50293,0.877177,0.889603,0.848354,0.864525
3,0.0511,0.600709,0.87626,0.886213,0.84666,0.862161
4,0.0249,0.685492,0.892759,0.900491,0.850436,0.869457
5,0.0144,0.763558,0.88176,0.891563,0.851718,0.867417
6,0.0114,0.767274,0.88451,0.88236,0.853776,0.865483
7,0.0059,0.941636,0.886343,0.894582,0.855232,0.870837
8,0.0032,1.038832,0.88176,0.879677,0.851052,0.862882
9,0.0034,0.957541,0.887259,0.884377,0.855592,0.867249
10,0.0016,1.122736,0.880843,0.881366,0.849302,0.862281


[I 2025-03-22 09:10:20,164] Trial 100 pruned. 


Trial 101 with params: {'learning_rate': 0.003061540186942208, 'weight_decay': 0.002, 'warmup_steps': 14}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4549,0.497333,0.868928,0.876799,0.814461,0.833361
2,0.0994,0.4649,0.87901,0.885436,0.831394,0.850224
3,0.0405,0.625703,0.872594,0.835225,0.835031,0.833405
4,0.0216,0.750305,0.874427,0.859042,0.843314,0.850272
5,0.0153,0.788543,0.887259,0.894779,0.855827,0.87052
6,0.0092,0.764592,0.892759,0.901564,0.858186,0.875952
7,0.0065,1.006337,0.873511,0.869994,0.827061,0.841772
8,0.0032,1.092995,0.880843,0.879819,0.850573,0.862565
9,0.0023,1.017435,0.879927,0.87668,0.840491,0.853801
10,0.0008,1.094505,0.883593,0.882397,0.852231,0.864532


Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--recall/11f90e583db35601050aed380d48e83202a896976b9608432fba9244fb447f24 (last modified on Fri Jan 10 23:14:00 2025) since it couldn't be found locally at evaluate-metric--recall, or remotely on the Hugging Face Hub.
[I 2025-03-22 09:13:56,933] Trial 101 finished with value: 0.8682594781571767 and parameters: {'learning_rate': 0.003061540186942208, 'weight_decay': 0.002, 'warmup_steps': 14}. Best is trial 61 with value: 0.8887268625210981.


Trial 102 with params: {'learning_rate': 0.0028757205718790676, 'weight_decay': 0.004, 'warmup_steps': 12}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4582,0.475751,0.870761,0.883496,0.83416,0.851568
2,0.102,0.476461,0.886343,0.882809,0.845834,0.860683
3,0.0438,0.63988,0.868928,0.866062,0.833418,0.844177
4,0.0231,0.638952,0.879927,0.880191,0.849394,0.862235
5,0.0137,0.704364,0.899175,0.878756,0.864204,0.870848
6,0.0105,0.79299,0.885426,0.895246,0.844516,0.863539
7,0.005,0.965589,0.882676,0.891414,0.843227,0.860688
8,0.003,0.935259,0.883593,0.893906,0.851651,0.867885
9,0.0032,1.007065,0.883593,0.889953,0.844733,0.860263
10,0.002,0.992226,0.890926,0.896104,0.849674,0.866653


[I 2025-03-22 09:17:02,954] Trial 102 finished with value: 0.8774936733499432 and parameters: {'learning_rate': 0.0028757205718790676, 'weight_decay': 0.004, 'warmup_steps': 12}. Best is trial 61 with value: 0.8887268625210981.


Trial 103 with params: {'learning_rate': 0.0038852119907255467, 'weight_decay': 0.004, 'warmup_steps': 7}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4267,0.497001,0.873511,0.8828,0.835754,0.85297
2,0.0912,0.508028,0.880843,0.859795,0.841779,0.848942
3,0.0405,0.675573,0.879927,0.892749,0.842054,0.858267
4,0.0218,0.669063,0.890009,0.891352,0.856781,0.871081
5,0.0142,0.706626,0.891842,0.895201,0.855823,0.87257
6,0.0086,0.887544,0.883593,0.883387,0.84181,0.858823
7,0.0076,0.986216,0.880843,0.884992,0.823649,0.843145
8,0.0041,0.952107,0.885426,0.880924,0.834733,0.852077
9,0.0024,1.011149,0.880843,0.879892,0.840877,0.855341
10,0.0008,0.999177,0.882676,0.879177,0.842316,0.856537


[I 2025-03-22 09:19:07,995] Trial 103 pruned. 


Trial 104 with params: {'learning_rate': 0.0019172768767450417, 'weight_decay': 0.005, 'warmup_steps': 12}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4949,0.51865,0.858845,0.86938,0.796782,0.81678
2,0.1265,0.507534,0.87626,0.886496,0.837924,0.856209
3,0.0532,0.572029,0.877177,0.887778,0.847006,0.862895
4,0.0277,0.661924,0.872594,0.886137,0.83379,0.85329
5,0.0178,0.75429,0.874427,0.880912,0.828906,0.845687


[I 2025-03-22 09:20:43,560] Trial 104 pruned. 


Trial 105 with params: {'learning_rate': 0.0035597754275176288, 'weight_decay': 0.005, 'warmup_steps': 7}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4575,0.494419,0.865261,0.866757,0.830243,0.841759
2,0.0954,0.494441,0.888176,0.896489,0.846808,0.865647
3,0.0414,0.636694,0.882676,0.862242,0.851658,0.855775
4,0.0225,0.670497,0.889093,0.881482,0.854631,0.866538
5,0.014,0.751347,0.878093,0.88844,0.828132,0.849374
6,0.0097,0.743328,0.887259,0.894608,0.846102,0.864167
7,0.0032,0.870638,0.890009,0.897331,0.848199,0.866536
8,0.0021,0.915879,0.893676,0.899182,0.851602,0.869462
9,0.0011,0.975972,0.886343,0.892745,0.846299,0.862987
10,0.001,0.964803,0.890926,0.896619,0.840072,0.859728


[I 2025-03-22 09:24:04,839] Trial 105 finished with value: 0.8725227269639085 and parameters: {'learning_rate': 0.0035597754275176288, 'weight_decay': 0.005, 'warmup_steps': 7}. Best is trial 61 with value: 0.8887268625210981.


Trial 106 with params: {'learning_rate': 0.00021261909483259904, 'weight_decay': 0.008, 'warmup_steps': 12}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8849,0.605696,0.785518,0.661899,0.670553,0.666031
2,0.434,0.527351,0.830431,0.700786,0.707498,0.703306
3,0.3287,0.540285,0.827681,0.85707,0.737296,0.748253
4,0.2472,0.549411,0.830431,0.852027,0.788992,0.81096
5,0.1929,0.547488,0.851512,0.871306,0.825076,0.843228


[I 2025-03-22 09:25:02,120] Trial 106 pruned. 


Trial 107 with params: {'learning_rate': 0.0039163960810031226, 'weight_decay': 0.003, 'warmup_steps': 25}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4381,0.476948,0.869844,0.878959,0.823399,0.842342
2,0.0905,0.493844,0.893676,0.899637,0.842932,0.863142
3,0.0405,0.660955,0.872594,0.884376,0.835706,0.852834
4,0.021,0.727059,0.882676,0.879585,0.832407,0.850676
5,0.0141,0.782935,0.875344,0.873582,0.835776,0.850642
6,0.011,0.821444,0.878093,0.875982,0.839226,0.85348
7,0.0058,0.961541,0.879927,0.89216,0.840507,0.859971
8,0.0051,0.857736,0.890926,0.89742,0.840368,0.860314
9,0.003,1.002097,0.877177,0.88542,0.82975,0.848789
10,0.0013,1.036317,0.883593,0.89276,0.853535,0.868531


[I 2025-03-22 09:28:43,423] Trial 107 finished with value: 0.8696554548301538 and parameters: {'learning_rate': 0.0039163960810031226, 'weight_decay': 0.003, 'warmup_steps': 25}. Best is trial 61 with value: 0.8887268625210981.


Trial 108 with params: {'learning_rate': 0.0019835449739856277, 'weight_decay': 0.005, 'warmup_steps': 6}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4785,0.524566,0.857012,0.869311,0.796059,0.815289
2,0.1241,0.530189,0.878093,0.886871,0.840603,0.857108
3,0.0546,0.631978,0.875344,0.889541,0.845423,0.862078
4,0.0265,0.696076,0.882676,0.89146,0.852932,0.867365
5,0.0158,0.796229,0.889093,0.897887,0.847106,0.866636
6,0.011,0.809403,0.889093,0.898861,0.847032,0.866747
7,0.0071,0.929878,0.88176,0.890102,0.833949,0.852365
8,0.0066,0.970027,0.87626,0.885708,0.848178,0.861276
9,0.002,0.994383,0.882676,0.89236,0.853301,0.86727
10,0.0012,0.992196,0.883593,0.894667,0.852628,0.869179


[I 2025-03-22 09:33:10,349] Trial 108 finished with value: 0.8682736455648538 and parameters: {'learning_rate': 0.0019835449739856277, 'weight_decay': 0.005, 'warmup_steps': 6}. Best is trial 61 with value: 0.8887268625210981.


Trial 109 with params: {'learning_rate': 0.004427955877979533, 'weight_decay': 0.004, 'warmup_steps': 10}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4361,0.503925,0.886343,0.895555,0.853602,0.869656
2,0.0867,0.498833,0.889093,0.887765,0.846902,0.863647
3,0.0383,0.593771,0.887259,0.894541,0.855873,0.870897
4,0.0193,0.646038,0.890926,0.899695,0.85825,0.874821
5,0.0151,0.791138,0.878093,0.889676,0.840183,0.858361
6,0.0126,0.90399,0.87901,0.877188,0.850176,0.860988
7,0.0082,0.905923,0.886343,0.894419,0.856518,0.869907
8,0.0035,0.920944,0.889093,0.895998,0.849298,0.866673
9,0.0021,0.971461,0.890009,0.897354,0.858773,0.873892
10,0.0004,1.015958,0.892759,0.899697,0.8608,0.876056


[I 2025-03-22 09:36:26,639] Trial 109 finished with value: 0.8776779536981844 and parameters: {'learning_rate': 0.004427955877979533, 'weight_decay': 0.004, 'warmup_steps': 10}. Best is trial 61 with value: 0.8887268625210981.


Trial 110 with params: {'learning_rate': 0.004706671657325586, 'weight_decay': 0.004, 'warmup_steps': 11}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4297,0.478419,0.87901,0.871904,0.840645,0.851358
2,0.0856,0.464104,0.887259,0.865139,0.847471,0.854179
3,0.0415,0.639981,0.878093,0.866592,0.839158,0.848905
4,0.0181,0.644324,0.887259,0.872431,0.836106,0.850745
5,0.017,0.730691,0.889093,0.8809,0.829073,0.84752


[I 2025-03-22 09:37:23,699] Trial 110 pruned. 


Trial 111 with params: {'learning_rate': 0.00463280529905223, 'weight_decay': 0.005, 'warmup_steps': 9}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4486,0.473956,0.88176,0.889046,0.843473,0.859545
2,0.0875,0.448789,0.899175,0.904677,0.856683,0.874544
3,0.0394,0.593856,0.891842,0.898002,0.851103,0.868465
4,0.0218,0.653466,0.891842,0.899592,0.849592,0.868636
5,0.0173,0.691548,0.898258,0.901008,0.846278,0.865484
6,0.0087,0.789719,0.895509,0.900754,0.843049,0.863591
7,0.0056,0.843294,0.895509,0.897902,0.844814,0.862607
8,0.0029,0.993951,0.889093,0.894664,0.839174,0.857846
9,0.0054,0.814048,0.897342,0.901763,0.844257,0.864748
10,0.0034,0.883157,0.896425,0.90185,0.852909,0.87145


[I 2025-03-22 09:40:29,491] Trial 111 finished with value: 0.8701625831639203 and parameters: {'learning_rate': 0.00463280529905223, 'weight_decay': 0.005, 'warmup_steps': 9}. Best is trial 61 with value: 0.8887268625210981.


Trial 112 with params: {'learning_rate': 0.0034049162054150977, 'weight_decay': 0.004, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4255,0.493277,0.871677,0.863864,0.843895,0.851229
2,0.0961,0.508892,0.887259,0.882218,0.855931,0.866422
3,0.0418,0.585959,0.885426,0.89347,0.855617,0.869966
4,0.0232,0.673805,0.887259,0.882815,0.84538,0.860285
5,0.0152,0.715231,0.886343,0.894305,0.846284,0.864393
6,0.0126,0.687128,0.896425,0.888292,0.853738,0.867265
7,0.0059,0.841172,0.891842,0.897692,0.840489,0.860688
8,0.0023,0.875843,0.893676,0.899947,0.851868,0.869855
9,0.0011,0.933283,0.895509,0.901123,0.853166,0.871077
10,0.0004,0.981894,0.892759,0.899004,0.85118,0.868908


[I 2025-03-22 09:43:38,732] Trial 112 finished with value: 0.8694910017478806 and parameters: {'learning_rate': 0.0034049162054150977, 'weight_decay': 0.004, 'warmup_steps': 4}. Best is trial 61 with value: 0.8887268625210981.


Trial 113 with params: {'learning_rate': 0.002627213784357724, 'weight_decay': 0.003, 'warmup_steps': 12}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.467,0.510189,0.864345,0.879111,0.827837,0.846799
2,0.1069,0.516432,0.885426,0.86338,0.844818,0.853011
3,0.0446,0.585363,0.87626,0.877294,0.838284,0.851891
4,0.0248,0.701072,0.885426,0.894184,0.844865,0.863512
5,0.0139,0.677646,0.895509,0.888618,0.842688,0.860166
6,0.0089,0.813176,0.882676,0.893625,0.852108,0.868028
7,0.0079,0.84078,0.88176,0.88993,0.842924,0.859375
8,0.0047,0.966683,0.888176,0.877182,0.856686,0.865167
9,0.0033,1.0253,0.880843,0.878196,0.852236,0.860908
10,0.0025,0.945836,0.898258,0.891433,0.855607,0.869697


[I 2025-03-22 09:46:39,971] Trial 113 finished with value: 0.8732166456781538 and parameters: {'learning_rate': 0.002627213784357724, 'weight_decay': 0.003, 'warmup_steps': 12}. Best is trial 61 with value: 0.8887268625210981.


Trial 114 with params: {'learning_rate': 0.0013407825053128061, 'weight_decay': 0.003, 'warmup_steps': 15}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5508,0.523458,0.854262,0.842588,0.801283,0.81415
2,0.1597,0.529841,0.873511,0.855211,0.82375,0.837105
3,0.0728,0.589377,0.873511,0.872983,0.835164,0.84972
4,0.0379,0.737407,0.871677,0.869577,0.833173,0.846777
5,0.0235,0.871942,0.865261,0.877989,0.819997,0.839244


[I 2025-03-22 09:47:35,331] Trial 114 pruned. 


Trial 115 with params: {'learning_rate': 0.0017563000350765948, 'weight_decay': 0.002, 'warmup_steps': 10}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4993,0.498239,0.860678,0.854674,0.798517,0.814837
2,0.1349,0.485395,0.877177,0.86004,0.837663,0.847545
3,0.056,0.589509,0.871677,0.871048,0.834416,0.84767
4,0.0297,0.69436,0.88176,0.890081,0.852791,0.866278
5,0.0178,0.793426,0.871677,0.881681,0.843975,0.857892
6,0.0132,0.832063,0.871677,0.88389,0.842621,0.85874
7,0.006,1.004331,0.864345,0.881286,0.827991,0.846309
8,0.0054,0.903928,0.885426,0.896334,0.853239,0.870586
9,0.004,0.944965,0.880843,0.889674,0.850667,0.865041
10,0.0018,0.968606,0.890009,0.899503,0.857273,0.874449


[I 2025-03-22 09:51:11,738] Trial 115 finished with value: 0.8722717993389812 and parameters: {'learning_rate': 0.0017563000350765948, 'weight_decay': 0.002, 'warmup_steps': 10}. Best is trial 61 with value: 0.8887268625210981.


Trial 116 with params: {'learning_rate': 0.0012482194446075622, 'weight_decay': 0.0, 'warmup_steps': 7}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5422,0.511572,0.854262,0.845405,0.783473,0.80067
2,0.1664,0.510843,0.87626,0.8767,0.836573,0.852642
3,0.0759,0.595783,0.857012,0.859411,0.822856,0.83601
4,0.0409,0.765891,0.864345,0.880745,0.826528,0.84693
5,0.0233,0.81603,0.870761,0.883696,0.833211,0.851453


[I 2025-03-22 09:52:13,318] Trial 116 pruned. 


Trial 117 with params: {'learning_rate': 0.0015188853461631305, 'weight_decay': 0.001, 'warmup_steps': 13}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5262,0.527939,0.853346,0.847106,0.792001,0.808321
2,0.1484,0.53646,0.874427,0.861241,0.843536,0.851375
3,0.0628,0.665671,0.862511,0.874787,0.83688,0.850246
4,0.0333,0.734929,0.871677,0.87221,0.832539,0.847958
5,0.0199,0.870661,0.870761,0.883536,0.834065,0.851529
6,0.0121,0.897562,0.872594,0.883164,0.843933,0.859134
7,0.0093,0.993175,0.868011,0.879193,0.832093,0.848406
8,0.0058,1.023895,0.872594,0.883136,0.835504,0.853107
9,0.0049,1.094015,0.870761,0.878602,0.835678,0.849433
10,0.0022,1.085409,0.874427,0.884299,0.845828,0.860862


[I 2025-03-22 09:55:18,603] Trial 117 finished with value: 0.8558874008170757 and parameters: {'learning_rate': 0.0015188853461631305, 'weight_decay': 0.001, 'warmup_steps': 13}. Best is trial 61 with value: 0.8887268625210981.


Trial 118 with params: {'learning_rate': 0.003415776125918378, 'weight_decay': 0.002, 'warmup_steps': 9}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.438,0.486274,0.887259,0.896876,0.85579,0.871299
2,0.0953,0.586555,0.879927,0.860227,0.849667,0.853018
3,0.0406,0.577653,0.885426,0.895023,0.853128,0.869486
4,0.0222,0.621611,0.896425,0.891322,0.852532,0.868065
5,0.0144,0.831794,0.880843,0.878852,0.851282,0.86216
6,0.0108,0.842616,0.890009,0.898224,0.857487,0.873865
7,0.0047,0.926218,0.890009,0.895472,0.849412,0.865974
8,0.0033,0.923631,0.889093,0.89441,0.847981,0.865034
9,0.0038,0.87703,0.892759,0.89983,0.860445,0.876017
10,0.0009,0.958975,0.888176,0.895703,0.857739,0.872208


[I 2025-03-22 09:58:25,733] Trial 118 finished with value: 0.8752116223652969 and parameters: {'learning_rate': 0.003415776125918378, 'weight_decay': 0.002, 'warmup_steps': 9}. Best is trial 61 with value: 0.8887268625210981.


Trial 119 with params: {'learning_rate': 0.0016009452644257989, 'weight_decay': 0.003, 'warmup_steps': 11}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5154,0.545721,0.846013,0.829716,0.786338,0.798006
2,0.1425,0.512451,0.878093,0.878054,0.837898,0.854007
3,0.0614,0.65171,0.860678,0.86213,0.826866,0.839054
4,0.0332,0.722641,0.874427,0.874939,0.845411,0.857304
5,0.0195,0.855166,0.868011,0.864108,0.832631,0.84312


[I 2025-03-22 09:59:23,044] Trial 119 pruned. 


Trial 120 with params: {'learning_rate': 0.0017516355420573075, 'weight_decay': 0.003, 'warmup_steps': 11}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5037,0.501061,0.857012,0.86743,0.795143,0.815517
2,0.1358,0.509081,0.874427,0.865174,0.836055,0.847991
3,0.056,0.631771,0.869844,0.870877,0.842975,0.853282
4,0.0316,0.702082,0.87901,0.887899,0.839375,0.857605
5,0.0182,0.808941,0.87901,0.891402,0.847709,0.864779
6,0.0106,0.856104,0.877177,0.889409,0.836485,0.85625
7,0.0062,0.929709,0.882676,0.892373,0.842418,0.860485
8,0.0045,1.075841,0.868011,0.871991,0.838292,0.852175
9,0.0054,1.000044,0.870761,0.87343,0.841668,0.854693
10,0.0021,1.029997,0.872594,0.8754,0.843685,0.856112


[I 2025-03-22 10:01:21,680] Trial 120 pruned. 


Trial 121 with params: {'learning_rate': 0.002581119347662408, 'weight_decay': 0.002, 'warmup_steps': 10}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.463,0.4914,0.87626,0.887208,0.837724,0.854969
2,0.1061,0.53589,0.882676,0.878269,0.844012,0.857195
3,0.0468,0.597294,0.883593,0.892697,0.853481,0.868381
4,0.0219,0.702325,0.894592,0.900086,0.843268,0.863396
5,0.0158,0.829182,0.885426,0.8937,0.836112,0.856296
6,0.0118,0.850311,0.885426,0.88351,0.85542,0.865287
7,0.0073,0.819234,0.883593,0.878906,0.843604,0.857639
8,0.0034,0.924248,0.891842,0.876142,0.851022,0.860802
9,0.0013,0.998597,0.890926,0.87506,0.849892,0.859799
10,0.0008,1.034777,0.892759,0.879223,0.861144,0.868276


[I 2025-03-22 10:04:48,514] Trial 121 finished with value: 0.869859763123996 and parameters: {'learning_rate': 0.002581119347662408, 'weight_decay': 0.002, 'warmup_steps': 10}. Best is trial 61 with value: 0.8887268625210981.


Trial 122 with params: {'learning_rate': 0.003808592539378974, 'weight_decay': 0.002, 'warmup_steps': 9}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4389,0.524147,0.883593,0.887216,0.825279,0.844389
2,0.0895,0.501863,0.890926,0.887781,0.857788,0.870219
3,0.0398,0.566918,0.87901,0.890011,0.830614,0.850937
4,0.0246,0.619761,0.890009,0.887946,0.858326,0.870714
5,0.0117,0.776664,0.889093,0.887821,0.85893,0.869804
6,0.0103,0.828339,0.885426,0.884194,0.853346,0.86628
7,0.006,0.872435,0.899175,0.904723,0.846765,0.867243
8,0.0056,0.806259,0.890926,0.900745,0.858184,0.875504
9,0.0052,0.895453,0.87901,0.890011,0.850531,0.864562
10,0.0014,0.952574,0.890926,0.901572,0.857586,0.875171


[I 2025-03-22 10:08:33,859] Trial 122 finished with value: 0.8735046977562148 and parameters: {'learning_rate': 0.003808592539378974, 'weight_decay': 0.002, 'warmup_steps': 9}. Best is trial 61 with value: 0.8887268625210981.


Trial 123 with params: {'learning_rate': 0.0028771505909817047, 'weight_decay': 0.001, 'warmup_steps': 10}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4457,0.485737,0.880843,0.890604,0.831194,0.851673
2,0.0994,0.496401,0.889093,0.883038,0.849729,0.862231
3,0.0424,0.653831,0.866178,0.870695,0.840288,0.850209
4,0.0257,0.69719,0.87901,0.881277,0.848198,0.861576
5,0.016,0.722339,0.886343,0.894219,0.846323,0.863615
6,0.0097,0.730041,0.890926,0.872865,0.840276,0.853088
7,0.005,1.032766,0.87626,0.886583,0.839353,0.854679
8,0.0052,0.967229,0.873511,0.883399,0.835478,0.852615
9,0.0039,1.030312,0.878093,0.887637,0.839227,0.856897
10,0.0019,1.053953,0.875344,0.885927,0.847314,0.860845


[I 2025-03-22 10:10:41,622] Trial 123 pruned. 


Trial 124 with params: {'learning_rate': 0.0031265666636470213, 'weight_decay': 0.002, 'warmup_steps': 7}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4428,0.48316,0.875344,0.888203,0.845118,0.861709
2,0.0968,0.498085,0.88451,0.891033,0.844883,0.862035
3,0.0411,0.598646,0.87901,0.875689,0.840979,0.852574
4,0.0278,0.656241,0.889093,0.8886,0.856731,0.869567
5,0.0107,0.789799,0.887259,0.893788,0.846288,0.863823
6,0.0096,0.821074,0.894592,0.901109,0.861369,0.877154
7,0.0088,0.869573,0.890926,0.88267,0.850465,0.862116
8,0.0049,0.932099,0.887259,0.885638,0.845909,0.86079
9,0.0046,0.906316,0.88451,0.87128,0.844537,0.855141
10,0.0021,0.952437,0.891842,0.898099,0.850144,0.868161


[I 2025-03-22 10:14:11,985] Trial 124 finished with value: 0.8696212647048859 and parameters: {'learning_rate': 0.0031265666636470213, 'weight_decay': 0.002, 'warmup_steps': 7}. Best is trial 61 with value: 0.8887268625210981.


Trial 125 with params: {'learning_rate': 0.004490823109274964, 'weight_decay': 0.002, 'warmup_steps': 11}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4277,0.481227,0.888176,0.883226,0.8455,0.860161
2,0.0854,0.510762,0.890926,0.884115,0.839205,0.856447
3,0.0391,0.645059,0.879927,0.889773,0.84252,0.858244
4,0.0215,0.737635,0.885426,0.893974,0.835721,0.856233
5,0.0171,0.754673,0.879927,0.885782,0.841431,0.857464
6,0.0095,0.884403,0.87901,0.885802,0.830728,0.849038
7,0.0059,0.927115,0.890009,0.898079,0.837688,0.859732
8,0.0047,0.977259,0.890009,0.896489,0.839637,0.859219
9,0.0032,1.009069,0.879927,0.873163,0.832632,0.846915
10,0.0013,1.080661,0.886343,0.879452,0.83692,0.852634


[I 2025-03-22 10:16:01,157] Trial 125 pruned. 


Trial 126 with params: {'learning_rate': 0.003698768288892298, 'weight_decay': 0.001, 'warmup_steps': 6}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4272,0.508578,0.873511,0.881199,0.816092,0.836611
2,0.0923,0.516904,0.885426,0.857239,0.855732,0.855851
3,0.0397,0.72783,0.873511,0.885778,0.836822,0.853273
4,0.0207,0.78735,0.887259,0.89599,0.845719,0.864172
5,0.018,0.819098,0.883593,0.891169,0.83464,0.854747
6,0.0077,0.935553,0.882676,0.891104,0.852046,0.866753
7,0.0062,0.905517,0.892759,0.880802,0.859547,0.868883
8,0.0034,1.05867,0.890009,0.885147,0.84886,0.862817
9,0.0019,1.103397,0.886343,0.86964,0.836619,0.849528
10,0.0024,1.113836,0.887259,0.893385,0.837353,0.857078


[I 2025-03-22 10:18:04,250] Trial 126 pruned. 


Trial 127 with params: {'learning_rate': 0.000695543731723662, 'weight_decay': 0.002, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6166,0.515237,0.834097,0.824253,0.75707,0.77409
2,0.2475,0.544663,0.857929,0.872495,0.831026,0.847375
3,0.1373,0.57015,0.861595,0.872278,0.82622,0.843158
4,0.0824,0.677915,0.857929,0.871581,0.820488,0.84011
5,0.0505,0.711199,0.857012,0.850342,0.820232,0.833084


[I 2025-03-22 10:19:03,854] Trial 127 pruned. 


Trial 128 with params: {'learning_rate': 0.0026954450632950037, 'weight_decay': 0.005, 'warmup_steps': 11}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4586,0.489938,0.868011,0.876222,0.803468,0.823676
2,0.1041,0.454201,0.888176,0.898856,0.847034,0.867001
3,0.045,0.544966,0.879927,0.889467,0.841073,0.858863
4,0.0213,0.681417,0.888176,0.899378,0.855617,0.873327
5,0.0147,0.799727,0.874427,0.871952,0.836226,0.849974


[I 2025-03-22 10:19:56,686] Trial 128 pruned. 


Trial 129 with params: {'learning_rate': 0.0008218255942917282, 'weight_decay': 0.0, 'warmup_steps': 22}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.6272,0.532821,0.835014,0.825128,0.776027,0.791089
2,0.2173,0.55244,0.867094,0.879204,0.839276,0.854657
3,0.1127,0.594637,0.856095,0.859824,0.828873,0.841293
4,0.0656,0.69932,0.857929,0.871741,0.821667,0.839186
5,0.0387,0.728428,0.872594,0.882052,0.832722,0.851603
6,0.0251,0.742625,0.872594,0.863368,0.842378,0.8516
7,0.0135,0.889799,0.863428,0.874277,0.826371,0.844403
8,0.0115,0.87596,0.873511,0.873456,0.843712,0.855322
9,0.0065,0.918778,0.877177,0.865765,0.846224,0.854634
10,0.0027,1.04123,0.872594,0.870458,0.842959,0.854115


[I 2025-03-22 10:21:52,712] Trial 129 pruned. 


Trial 130 with params: {'learning_rate': 0.002608540345806254, 'weight_decay': 0.004, 'warmup_steps': 13}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4677,0.476553,0.865261,0.87596,0.811443,0.831712
2,0.1089,0.479481,0.886343,0.874804,0.846055,0.857875
3,0.0468,0.636534,0.877177,0.890559,0.848407,0.863525
4,0.0221,0.666294,0.88176,0.882248,0.850507,0.864036
5,0.0169,0.725337,0.885426,0.882742,0.854639,0.866284
6,0.0097,0.845072,0.875344,0.88449,0.827777,0.847266
7,0.0065,0.862321,0.880843,0.878031,0.841967,0.854986
8,0.0045,0.883913,0.88451,0.878298,0.845245,0.857486
9,0.0024,0.968586,0.877177,0.874589,0.848557,0.858636
10,0.0012,0.981883,0.887259,0.895549,0.856113,0.871216


[I 2025-03-22 10:24:53,126] Trial 130 finished with value: 0.8719649716734978 and parameters: {'learning_rate': 0.002608540345806254, 'weight_decay': 0.004, 'warmup_steps': 13}. Best is trial 61 with value: 0.8887268625210981.


Trial 131 with params: {'learning_rate': 0.004984440708317279, 'weight_decay': 0.003, 'warmup_steps': 12}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4245,0.510238,0.875344,0.884536,0.836497,0.853671
2,0.0824,0.495761,0.87901,0.862684,0.831439,0.843569
3,0.0397,0.718933,0.865261,0.85657,0.83167,0.836451
4,0.0227,0.70482,0.880843,0.87717,0.822617,0.841359
5,0.0167,0.771073,0.885426,0.879301,0.836132,0.85175


[I 2025-03-22 10:26:23,208] Trial 131 pruned. 


Trial 132 with params: {'learning_rate': 0.002037656887781006, 'weight_decay': 0.003, 'warmup_steps': 12}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4893,0.484686,0.864345,0.878476,0.81964,0.838791
2,0.1226,0.488475,0.87901,0.855629,0.85811,0.856311
3,0.0521,0.63092,0.875344,0.886973,0.84584,0.861757
4,0.0262,0.691082,0.882676,0.880828,0.843125,0.857707
5,0.015,0.777534,0.878093,0.888352,0.839572,0.857343
6,0.0081,0.863627,0.885426,0.893834,0.845229,0.863057
7,0.006,0.880635,0.883593,0.894039,0.842843,0.862073
8,0.0075,0.915329,0.879927,0.887452,0.842064,0.857889
9,0.0034,1.002371,0.877177,0.885605,0.839967,0.855792
10,0.002,0.922057,0.878093,0.889189,0.839199,0.858187


Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--accuracy/f887c0aab52c2d38e1f8a215681126379eca617f96c447638f751434e8e65b14 (last modified on Sat Oct 12 13:56:14 2024) since it couldn't be found locally at evaluate-metric--accuracy, or remotely on the Hugging Face Hub.
[I 2025-03-22 10:29:07,645] Trial 132 pruned. 


Trial 133 with params: {'learning_rate': 0.002762200223227277, 'weight_decay': 0.004, 'warmup_steps': 19}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4646,0.496958,0.873511,0.887016,0.834373,0.853838
2,0.1015,0.506655,0.889093,0.869164,0.858396,0.863089
3,0.0437,0.617643,0.886343,0.881868,0.854529,0.865177
4,0.0213,0.661658,0.88451,0.881551,0.853592,0.864703
5,0.0176,0.770715,0.88451,0.872326,0.853909,0.86082
6,0.01,0.809876,0.895509,0.881177,0.852792,0.864818
7,0.0062,0.883265,0.866178,0.855624,0.832012,0.839229
8,0.0034,1.029401,0.877177,0.868238,0.847415,0.855642
9,0.0039,0.913848,0.87901,0.869565,0.848756,0.85764
10,0.0015,0.98531,0.879927,0.870195,0.849689,0.858384


[I 2025-03-22 10:31:16,496] Trial 133 pruned. 


Trial 134 with params: {'learning_rate': 0.00018138557105707126, 'weight_decay': 0.0, 'warmup_steps': 10}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.9294,0.627886,0.776352,0.654599,0.662497,0.658422
2,0.4678,0.544106,0.814849,0.689319,0.694571,0.690984
3,0.3652,0.543188,0.819432,0.851939,0.711669,0.711339
4,0.2875,0.547415,0.831347,0.848984,0.771874,0.795046
5,0.2295,0.549246,0.845096,0.864596,0.802439,0.823908
6,0.1907,0.551022,0.851512,0.870514,0.824515,0.842611
7,0.1588,0.561835,0.855179,0.868043,0.830189,0.844557
8,0.1312,0.613374,0.84143,0.860143,0.808976,0.826604
9,0.1125,0.636883,0.840513,0.861797,0.80625,0.827108
10,0.0947,0.631675,0.849679,0.865507,0.814471,0.833871


[I 2025-03-22 10:33:29,758] Trial 134 pruned. 


Trial 135 with params: {'learning_rate': 0.0034459933532438296, 'weight_decay': 0.004, 'warmup_steps': 16}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4384,0.455686,0.879927,0.891611,0.850499,0.866493
2,0.0954,0.489253,0.883593,0.877891,0.844884,0.857551
3,0.0388,0.746175,0.865261,0.880098,0.830046,0.846227
4,0.0218,0.663782,0.893676,0.886243,0.84172,0.858702
5,0.0119,0.766016,0.892759,0.876375,0.840552,0.855146
6,0.0117,0.811781,0.888176,0.87314,0.836434,0.851416
7,0.0065,0.857473,0.892759,0.899385,0.850792,0.869266
8,0.0029,0.945109,0.887259,0.895921,0.846555,0.864993
9,0.0037,0.96942,0.87901,0.874989,0.831412,0.847338
10,0.0014,1.01749,0.890009,0.898164,0.849027,0.867286


[I 2025-03-22 10:36:42,233] Trial 135 finished with value: 0.8683517647962087 and parameters: {'learning_rate': 0.0034459933532438296, 'weight_decay': 0.004, 'warmup_steps': 16}. Best is trial 61 with value: 0.8887268625210981.


Trial 136 with params: {'learning_rate': 0.001121194903617636, 'weight_decay': 0.005, 'warmup_steps': 3}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5458,0.52261,0.845096,0.840119,0.785248,0.801376
2,0.1791,0.517738,0.871677,0.885838,0.832376,0.852927
3,0.0868,0.594085,0.861595,0.840152,0.834646,0.836219
4,0.0483,0.72691,0.858845,0.873255,0.823994,0.840089
5,0.0283,0.768335,0.872594,0.884147,0.834282,0.852868
6,0.0168,0.813789,0.864345,0.873414,0.829169,0.844985
7,0.0118,0.906335,0.868011,0.877371,0.83189,0.847833
8,0.0062,0.863245,0.87901,0.889065,0.840325,0.858227
9,0.0048,0.963774,0.87901,0.888548,0.850157,0.864421
10,0.0015,1.06856,0.870761,0.881576,0.843992,0.857944


[I 2025-03-22 10:38:45,795] Trial 136 pruned. 


Trial 137 with params: {'learning_rate': 0.004926451998423531, 'weight_decay': 0.003, 'warmup_steps': 6}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4065,0.491202,0.88451,0.891703,0.853825,0.868388
2,0.0854,0.503618,0.892759,0.878839,0.861066,0.868411
3,0.0385,0.644376,0.878093,0.890775,0.847552,0.863827
4,0.0235,0.709673,0.880843,0.891633,0.84907,0.865807
5,0.0149,0.72555,0.88176,0.871082,0.851814,0.859875
6,0.0114,0.786447,0.88451,0.860351,0.853308,0.856573
7,0.0081,0.90426,0.882676,0.891458,0.842203,0.860617
8,0.0071,0.94508,0.879927,0.88896,0.84047,0.85876
9,0.0043,0.971194,0.879927,0.886893,0.849578,0.864033
10,0.0026,1.0824,0.879927,0.887453,0.83125,0.851051


[I 2025-03-22 10:40:51,579] Trial 137 pruned. 


Trial 138 with params: {'learning_rate': 0.0022594588197059526, 'weight_decay': 0.004, 'warmup_steps': 10}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4789,0.494458,0.862511,0.876688,0.818246,0.83803
2,0.1167,0.486711,0.880843,0.865315,0.842669,0.85143
3,0.0484,0.573452,0.872594,0.885066,0.844923,0.858699
4,0.0269,0.672074,0.87626,0.8885,0.845773,0.861847
5,0.016,0.692959,0.890926,0.897815,0.848542,0.8671
6,0.01,0.812674,0.88451,0.894951,0.844095,0.863311
7,0.0078,0.824106,0.882676,0.886927,0.834203,0.851849
8,0.0054,0.947158,0.886343,0.89492,0.845056,0.863651
9,0.0028,1.058707,0.878093,0.888144,0.839982,0.85722
10,0.0025,0.923608,0.889093,0.895587,0.8378,0.858272


[I 2025-03-22 10:43:10,707] Trial 138 pruned. 


Trial 139 with params: {'learning_rate': 0.002607979551978919, 'weight_decay': 0.005, 'warmup_steps': 11}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4687,0.482986,0.863428,0.876819,0.82802,0.84509
2,0.1059,0.479261,0.887259,0.875285,0.855985,0.864423
3,0.0476,0.521175,0.892759,0.879372,0.860167,0.867778
4,0.0232,0.6281,0.891842,0.899608,0.849284,0.868429
5,0.0138,0.71604,0.897342,0.902515,0.853878,0.872415
6,0.0102,0.754582,0.893676,0.8799,0.860076,0.868134
7,0.0058,0.85112,0.890009,0.895781,0.858337,0.872703
8,0.0022,0.908868,0.891842,0.899551,0.849846,0.868547
9,0.0046,0.855595,0.899175,0.906457,0.864675,0.881305
10,0.002,0.908722,0.890926,0.888285,0.858628,0.870937


[I 2025-03-22 10:46:17,066] Trial 139 finished with value: 0.8762917685112087 and parameters: {'learning_rate': 0.002607979551978919, 'weight_decay': 0.005, 'warmup_steps': 11}. Best is trial 61 with value: 0.8887268625210981.


Trial 140 with params: {'learning_rate': 0.0020415668867981565, 'weight_decay': 0.006, 'warmup_steps': 10}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4837,0.496792,0.863428,0.877801,0.818603,0.838371
2,0.1221,0.522329,0.878093,0.8876,0.849811,0.863534
3,0.0532,0.62038,0.872594,0.873039,0.845115,0.854999
4,0.0255,0.711723,0.882676,0.89123,0.84326,0.860411
5,0.0175,0.782692,0.880843,0.891952,0.840812,0.8597
6,0.0129,0.841632,0.877177,0.867805,0.858343,0.861733
7,0.0072,0.966272,0.88176,0.877569,0.85301,0.861387
8,0.0062,0.942745,0.88176,0.880176,0.852045,0.863579
9,0.003,0.973214,0.87626,0.887615,0.847627,0.862644
10,0.0008,1.021844,0.882676,0.89148,0.853011,0.867941


[I 2025-03-22 10:50:39,043] Trial 140 finished with value: 0.8687220471890873 and parameters: {'learning_rate': 0.0020415668867981565, 'weight_decay': 0.006, 'warmup_steps': 10}. Best is trial 61 with value: 0.8887268625210981.


Trial 141 with params: {'learning_rate': 0.0021682051769781403, 'weight_decay': 0.005, 'warmup_steps': 14}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4815,0.518581,0.862511,0.870944,0.799726,0.819897
2,0.1174,0.526985,0.882676,0.892229,0.842209,0.86133
3,0.0508,0.586519,0.875344,0.853835,0.837537,0.843817
4,0.0275,0.672237,0.88451,0.880394,0.85356,0.863999
5,0.0158,0.805898,0.878093,0.888658,0.838369,0.857213
6,0.0095,0.795097,0.893676,0.880501,0.8517,0.862981
7,0.0068,0.823105,0.890009,0.89697,0.84919,0.867122
8,0.003,1.00691,0.888176,0.87321,0.846529,0.857622
9,0.0028,1.046469,0.888176,0.881369,0.847857,0.860515
10,0.0021,1.10368,0.887259,0.874697,0.846613,0.857642


[I 2025-03-22 10:52:45,550] Trial 141 pruned. 


Trial 142 with params: {'learning_rate': 0.0026448767056926567, 'weight_decay': 0.004, 'warmup_steps': 13}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4609,0.530985,0.861595,0.871274,0.798236,0.818914
2,0.1052,0.465491,0.892759,0.901738,0.849756,0.86976
3,0.0449,0.597401,0.87626,0.85548,0.838813,0.84471
4,0.0222,0.654391,0.888176,0.896976,0.846718,0.86555
5,0.0172,0.750678,0.870761,0.862872,0.834541,0.843751


[I 2025-03-22 10:53:48,853] Trial 142 pruned. 


Trial 143 with params: {'learning_rate': 0.004288099316900291, 'weight_decay': 0.009000000000000001, 'warmup_steps': 17}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4328,0.455817,0.88451,0.892638,0.844464,0.862547
2,0.0896,0.541306,0.87901,0.879262,0.849763,0.861263
3,0.0399,0.674049,0.882676,0.868222,0.844606,0.853027
4,0.0225,0.722988,0.87901,0.867821,0.840151,0.851822
5,0.0199,0.760485,0.888176,0.886571,0.846678,0.862456
6,0.0074,0.789264,0.890009,0.89542,0.830319,0.851709
7,0.0056,0.843898,0.889093,0.893485,0.840817,0.858266
8,0.0022,1.007207,0.882676,0.887715,0.844485,0.859777
9,0.0016,1.016092,0.888176,0.893656,0.83908,0.858162
10,0.0052,0.956267,0.890009,0.895082,0.850227,0.866238


[I 2025-03-22 10:57:01,995] Trial 143 finished with value: 0.8593459321322281 and parameters: {'learning_rate': 0.004288099316900291, 'weight_decay': 0.009000000000000001, 'warmup_steps': 17}. Best is trial 61 with value: 0.8887268625210981.


Trial 144 with params: {'learning_rate': 0.004003291987602186, 'weight_decay': 0.006, 'warmup_steps': 11}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4216,0.469546,0.882676,0.892365,0.851641,0.866632
2,0.0899,0.4719,0.889093,0.899353,0.856535,0.873553
3,0.0385,0.632229,0.883593,0.893486,0.853358,0.868426
4,0.0196,0.644177,0.893676,0.887144,0.851831,0.865554
5,0.0163,0.714416,0.885426,0.880601,0.835157,0.852439


[I 2025-03-22 10:58:08,271] Trial 144 pruned. 


Trial 145 with params: {'learning_rate': 0.002372549022958543, 'weight_decay': 0.005, 'warmup_steps': 6}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4628,0.496578,0.867094,0.882582,0.829784,0.848914
2,0.1148,0.507334,0.882676,0.897184,0.851874,0.870162
3,0.0504,0.587864,0.877177,0.866725,0.848044,0.855244
4,0.0238,0.691922,0.889093,0.887572,0.857281,0.87001
5,0.0165,0.743623,0.879927,0.888807,0.841071,0.858391
6,0.0098,0.920887,0.879927,0.889618,0.850171,0.865848
7,0.0082,0.88449,0.890009,0.898557,0.857163,0.873848
8,0.0045,0.957369,0.885426,0.896649,0.854114,0.870469
9,0.0029,0.998725,0.88451,0.895717,0.852747,0.869117
10,0.001,1.050548,0.893676,0.90145,0.860679,0.876687


[I 2025-03-22 11:01:10,071] Trial 145 finished with value: 0.8771297169776893 and parameters: {'learning_rate': 0.002372549022958543, 'weight_decay': 0.005, 'warmup_steps': 6}. Best is trial 61 with value: 0.8887268625210981.


Trial 146 with params: {'learning_rate': 0.00023476631919622148, 'weight_decay': 0.01, 'warmup_steps': 2}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.848,0.600467,0.786434,0.663397,0.670391,0.666608
2,0.4219,0.525172,0.835014,0.705359,0.711257,0.707317
3,0.313,0.534749,0.834097,0.829815,0.751516,0.763961
4,0.2318,0.551368,0.837764,0.860784,0.803519,0.825451
5,0.176,0.574988,0.847846,0.87082,0.821549,0.840524


[I 2025-03-22 11:02:10,345] Trial 146 pruned. 


Trial 147 with params: {'learning_rate': 0.00313751472902782, 'weight_decay': 0.005, 'warmup_steps': 6}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4458,0.49384,0.874427,0.889272,0.843359,0.861118
2,0.1005,0.504706,0.886343,0.89128,0.855431,0.868888
3,0.0441,0.668889,0.873511,0.871751,0.845009,0.855359
4,0.021,0.716134,0.888176,0.897274,0.855848,0.872199
5,0.0149,0.903863,0.870761,0.883881,0.831627,0.850478
6,0.0102,0.835917,0.888176,0.886406,0.85569,0.868107
7,0.0053,0.921669,0.882676,0.882278,0.852076,0.863284
8,0.0042,1.075963,0.87626,0.878045,0.845879,0.857305
9,0.0047,0.968473,0.886343,0.88749,0.854263,0.867901
10,0.0011,1.040479,0.892759,0.891199,0.859536,0.872585


[I 2025-03-22 11:05:27,250] Trial 147 finished with value: 0.8772908597923829 and parameters: {'learning_rate': 0.00313751472902782, 'weight_decay': 0.005, 'warmup_steps': 6}. Best is trial 61 with value: 0.8887268625210981.


Trial 148 with params: {'learning_rate': 0.0025644695728777034, 'weight_decay': 0.006, 'warmup_steps': 6}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4558,0.468828,0.87626,0.882508,0.810374,0.831575
2,0.1053,0.525199,0.870761,0.866727,0.83607,0.846454
3,0.0489,0.640151,0.878093,0.857704,0.850068,0.851577
4,0.0238,0.650993,0.878093,0.875697,0.83761,0.852683
5,0.0154,0.791981,0.870761,0.881685,0.83369,0.850197
6,0.0095,0.813271,0.890009,0.899437,0.847083,0.867033
7,0.0072,0.888969,0.880843,0.8774,0.840018,0.854662
8,0.0028,0.965374,0.889093,0.884895,0.846375,0.861564
9,0.003,1.025497,0.88451,0.894583,0.84324,0.862242
10,0.0023,0.905758,0.894592,0.890246,0.850861,0.866767


[I 2025-03-22 11:09:02,990] Trial 148 finished with value: 0.8660006581303742 and parameters: {'learning_rate': 0.0025644695728777034, 'weight_decay': 0.006, 'warmup_steps': 6}. Best is trial 61 with value: 0.8887268625210981.


Trial 149 with params: {'learning_rate': 0.0021384459068938236, 'weight_decay': 0.005, 'warmup_steps': 4}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4793,0.496594,0.861595,0.866208,0.790251,0.809739
2,0.1237,0.480494,0.875344,0.876147,0.846684,0.858662
3,0.0541,0.559513,0.88176,0.866647,0.861056,0.86262
4,0.0245,0.688874,0.87901,0.891614,0.849169,0.864993
5,0.017,0.824152,0.873511,0.875067,0.836693,0.849233


[I 2025-03-22 11:10:06,390] Trial 149 pruned. 


In [40]:
print(best_trial3)

BestRun(run_id='61', objective=0.8887268625210981, hyperparameters={'learning_rate': 0.004589457243558559, 'weight_decay': 0.01, 'warmup_steps': 22}, run_summary=None)


In [41]:
base.reset_seed()

In [42]:
training_args = base.get_training_args(output_dir=f"~/results/{DATASET}/bilstm-distill_coarse_aug_hp-search", logging_dir=f"~/logs/{DATASET}/bilstm-distill_coarse_aug_hp-search", remove_unused_columns=False, epochs=num_epochs, batch_size=batch_size)

In [43]:
def hp_space(trial):
    params =  {
        "learning_rate": trial.suggest_float("learning_rate", 5e-5, 5e-3, log=True),
        "weight_decay": trial.suggest_float("weight_decay", 0, 1e-2, step=1e-3),
        "warmup_steps" : trial.suggest_int("warmup_steps", 0, warm_up),
        "lambda_param": trial.suggest_float("lambda_param",0,1,step=.1),
        "temperature": trial.suggest_float("temperature", 2,7, step=.5)
    }
    print(f"Trial {trial.number} with params: {params}")
    return params

In [44]:
pruner = optuna.pruners.HyperbandPruner(min_resource=min_r, max_resource=max_r, reduction_factor=2, bootstrap_count=2)
sampler = optuna.samplers.TPESampler(seed=42, multivariate=True)



In [45]:
trainer = base.DistilTrainer(
    args=training_args,
    train_dataset=all_train_data,
    eval_dataset=eval_data,
    compute_metrics=base.compute_metrics,
    model_init = lambda: get_BiLSTM(),
)
  

In [46]:
best_trial4 = trainer.hyperparameter_search(
    direction="maximize",
    backend="optuna",
    hp_space=hp_space,
    compute_objective=lambda metrics: metrics["eval_f1"],
    pruner=pruner,
    sampler=sampler,
    study_name="Distill-aug",
    n_trials=150
)

[I 2025-03-22 11:10:06,668] A new study created in memory with name: Distill-aug


Trial 0 with params: {'learning_rate': 0.0002805758207667253, 'weight_decay': 0.01, 'warmup_steps': 23, 'lambda_param': 0.6000000000000001, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8594,1.294197,0.783685,0.657775,0.671114,0.664079
2,0.8485,1.055298,0.829514,0.702343,0.708354,0.704396
3,0.6308,0.929389,0.851512,0.715117,0.726351,0.719966
4,0.4822,0.879125,0.849679,0.852295,0.771285,0.78964
5,0.3683,0.837346,0.860678,0.872959,0.824425,0.842884


[I 2025-03-22 11:11:14,448] Trial 0 pruned. 


Trial 1 with params: {'learning_rate': 0.00010255552094216992, 'weight_decay': 0.0, 'warmup_steps': 27, 'lambda_param': 0.6000000000000001, 'temperature': 5.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.5907,1.887397,0.699358,0.58961,0.596113,0.59198
2,1.3788,1.454081,0.768103,0.645128,0.658157,0.650689
3,1.0622,1.311375,0.794684,0.674787,0.678648,0.675384
4,0.9146,1.215401,0.8011,0.67259,0.685224,0.678201
5,0.8115,1.143249,0.821265,0.687872,0.702987,0.695031
6,0.7332,1.094557,0.826764,0.693421,0.706738,0.699749
7,0.6761,1.071764,0.823098,0.690293,0.703479,0.696681
8,0.6254,1.054829,0.832264,0.699413,0.710193,0.704563
9,0.581,1.035473,0.83868,0.707109,0.714157,0.710293
10,0.5464,1.003362,0.84418,0.709047,0.719239,0.713799


[I 2025-03-22 11:13:32,590] Trial 1 pruned. 


Trial 2 with params: {'learning_rate': 5.497167787383099e-05, 'weight_decay': 0.01, 'warmup_steps': 26, 'lambda_param': 0.2, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.0092,2.424104,0.60495,0.536725,0.505209,0.505162
2,1.9193,1.862516,0.698442,0.591514,0.595287,0.59158
3,1.4585,1.60716,0.744271,0.625347,0.636558,0.63005
4,1.2301,1.475219,0.770852,0.651268,0.660146,0.654252
5,1.0988,1.396039,0.780018,0.654586,0.669014,0.660762


[I 2025-03-22 11:14:32,849] Trial 2 pruned. 


Trial 3 with params: {'learning_rate': 0.00011635338541918901, 'weight_decay': 0.003, 'warmup_steps': 16, 'lambda_param': 0.4, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.4736,1.798275,0.71494,0.60794,0.60691,0.604377
2,1.2929,1.38802,0.780935,0.658568,0.667883,0.662373
3,0.9997,1.256248,0.8011,0.681008,0.683374,0.680736
4,0.8562,1.167253,0.813016,0.683101,0.694592,0.688406
5,0.7541,1.09286,0.822181,0.689323,0.703138,0.695929
6,0.6744,1.047997,0.832264,0.697623,0.711127,0.704076
7,0.6154,1.024659,0.836847,0.701251,0.713927,0.707256
8,0.5639,1.014803,0.83593,0.702965,0.713377,0.707686
9,0.5211,0.998167,0.843263,0.71094,0.717995,0.713997
10,0.4865,0.960111,0.843263,0.8752,0.727613,0.731364


[I 2025-03-22 11:16:42,836] Trial 3 pruned. 


Trial 4 with params: {'learning_rate': 0.0008369042894376068, 'weight_decay': 0.001, 'warmup_steps': 9, 'lambda_param': 0.4, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.2897,0.959425,0.83868,0.705498,0.717183,0.709733
2,0.4602,0.79662,0.863428,0.875933,0.81858,0.838591
3,0.2516,0.702227,0.883593,0.892228,0.854,0.868132
4,0.1711,0.669437,0.885426,0.896771,0.853005,0.870915
5,0.1333,0.674781,0.878093,0.888483,0.848516,0.864316
6,0.1099,0.636402,0.887259,0.897952,0.85481,0.872432
7,0.0949,0.624769,0.892759,0.90183,0.859381,0.876662
8,0.0865,0.613664,0.890009,0.898808,0.856959,0.873853
9,0.0791,0.61023,0.890009,0.898634,0.856966,0.873885
10,0.0736,0.610272,0.893676,0.902517,0.860237,0.877387


[I 2025-03-22 11:18:56,748] Trial 4 pruned. 


Trial 5 with params: {'learning_rate': 0.0018591820902866042, 'weight_decay': 0.002, 'warmup_steps': 16, 'lambda_param': 0.6000000000000001, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.049,0.765221,0.869844,0.859108,0.8066,0.822687
2,0.2788,0.63918,0.892759,0.902341,0.859272,0.876862
3,0.1523,0.606935,0.899175,0.906553,0.864981,0.881751
4,0.1134,0.604135,0.896425,0.903985,0.862817,0.879423
5,0.0942,0.591581,0.893676,0.904105,0.859809,0.877999
6,0.0817,0.586394,0.898258,0.90645,0.86418,0.881346
7,0.0728,0.571642,0.899175,0.907989,0.864583,0.882331
8,0.0663,0.571063,0.900092,0.907924,0.865465,0.882772
9,0.0624,0.562069,0.901925,0.909541,0.867227,0.88445
10,0.0591,0.563795,0.902841,0.91045,0.867549,0.885069


[I 2025-03-22 11:22:41,685] Trial 5 finished with value: 0.8865633505836604 and parameters: {'learning_rate': 0.0018591820902866042, 'weight_decay': 0.002, 'warmup_steps': 16, 'lambda_param': 0.6000000000000001, 'temperature': 2.0}. Best is trial 5 with value: 0.8865633505836604.


Trial 6 with params: {'learning_rate': 0.0008204643365323959, 'weight_decay': 0.001, 'warmup_steps': 2, 'lambda_param': 1.0, 'temperature': 7.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.2704,0.954075,0.84418,0.705859,0.72136,0.713081
2,0.4744,0.81174,0.857929,0.869218,0.814227,0.833142
3,0.2667,0.690859,0.88451,0.892932,0.85364,0.869107
4,0.18,0.698726,0.87626,0.88921,0.844867,0.862906
5,0.137,0.677599,0.879927,0.891545,0.849204,0.866457
6,0.1148,0.647881,0.88176,0.893859,0.85003,0.867973
7,0.1004,0.635937,0.890926,0.898961,0.857876,0.87451
8,0.089,0.635772,0.885426,0.895198,0.853288,0.870308
9,0.0825,0.617487,0.887259,0.897409,0.85467,0.872135
10,0.0766,0.620185,0.892759,0.90254,0.858985,0.876794


[I 2025-03-22 11:25:03,625] Trial 6 pruned. 


Trial 7 with params: {'learning_rate': 0.0020690200562805084, 'weight_decay': 0.003, 'warmup_steps': 3, 'lambda_param': 0.7000000000000001, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.9872,0.798077,0.866178,0.853057,0.794418,0.810473
2,0.2649,0.599085,0.901925,0.910488,0.865994,0.884122
3,0.147,0.576183,0.896425,0.894328,0.862171,0.875706
4,0.1127,0.547808,0.907424,0.913403,0.870734,0.888122
5,0.0933,0.551095,0.906508,0.914003,0.869151,0.88753
6,0.08,0.553359,0.901925,0.910421,0.865481,0.88392
7,0.0728,0.530232,0.907424,0.914293,0.869751,0.888017
8,0.0667,0.528496,0.907424,0.914519,0.870076,0.888323
9,0.064,0.520887,0.908341,0.91459,0.870846,0.888719
10,0.0598,0.520619,0.905591,0.912594,0.868427,0.886465


[I 2025-03-22 11:28:27,707] Trial 7 finished with value: 0.8910440570581759 and parameters: {'learning_rate': 0.0020690200562805084, 'weight_decay': 0.003, 'warmup_steps': 3, 'lambda_param': 0.7000000000000001, 'temperature': 4.0}. Best is trial 7 with value: 0.8910440570581759.


Trial 8 with params: {'learning_rate': 8.770946743725407e-05, 'weight_decay': 0.005, 'warmup_steps': 1, 'lambda_param': 1.0, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.6553,2.02724,0.661778,0.564936,0.561427,0.561628
2,1.5015,1.541002,0.758937,0.635838,0.649968,0.641827
3,1.1498,1.37353,0.784601,0.664913,0.670195,0.666463
4,0.986,1.275974,0.794684,0.667603,0.679575,0.673014
5,0.8829,1.20183,0.813016,0.680826,0.696224,0.688157
6,0.8044,1.148785,0.820348,0.691208,0.700044,0.695209
7,0.7454,1.127245,0.824015,0.69354,0.704005,0.698347
8,0.6968,1.114375,0.820348,0.690649,0.701897,0.695374
9,0.6501,1.085547,0.830431,0.702296,0.708382,0.704775
10,0.6185,1.054551,0.83593,0.703175,0.713265,0.707805


[I 2025-03-22 11:30:44,381] Trial 8 pruned. 


Trial 9 with params: {'learning_rate': 0.0010568529720322872, 'weight_decay': 0.003, 'warmup_steps': 16, 'lambda_param': 0.6000000000000001, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.243,0.922062,0.846929,0.715061,0.723805,0.717173
2,0.3875,0.738021,0.869844,0.882931,0.823927,0.844887
3,0.2118,0.668128,0.887259,0.895417,0.856538,0.871905
4,0.1464,0.658925,0.885426,0.895884,0.853627,0.870627
5,0.1133,0.644835,0.88451,0.89489,0.852264,0.869529
6,0.0967,0.634357,0.885426,0.896449,0.853963,0.871292
7,0.0863,0.621605,0.889093,0.898781,0.856493,0.873662
8,0.0779,0.611271,0.893676,0.902189,0.860639,0.877389
9,0.0733,0.603218,0.893676,0.901279,0.860887,0.877157
10,0.0683,0.586773,0.901008,0.909548,0.866118,0.88389


[I 2025-03-22 11:34:06,569] Trial 9 finished with value: 0.8815957693992643 and parameters: {'learning_rate': 0.0010568529720322872, 'weight_decay': 0.003, 'warmup_steps': 16, 'lambda_param': 0.6000000000000001, 'temperature': 3.0}. Best is trial 7 with value: 0.8910440570581759.


Trial 10 with params: {'learning_rate': 0.004794768110099147, 'weight_decay': 0.002, 'warmup_steps': 3, 'lambda_param': 0.8, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.7993,0.631173,0.892759,0.891209,0.816289,0.835679
2,0.1887,0.547504,0.901008,0.896291,0.857414,0.873287
3,0.1189,0.503389,0.903758,0.892218,0.852182,0.866314
4,0.0953,0.478834,0.912007,0.900573,0.857777,0.873845
5,0.0832,0.474554,0.914757,0.905924,0.868645,0.883654
6,0.0738,0.475313,0.911091,0.901417,0.865651,0.879814
7,0.0678,0.471954,0.910174,0.903035,0.874672,0.88649
8,0.0626,0.45915,0.912007,0.90302,0.86666,0.881225
9,0.0592,0.466396,0.913841,0.904328,0.867972,0.882561
10,0.055,0.44808,0.920257,0.909121,0.873359,0.887598


[I 2025-03-22 11:37:18,313] Trial 10 finished with value: 0.8864672179891886 and parameters: {'learning_rate': 0.004794768110099147, 'weight_decay': 0.002, 'warmup_steps': 3, 'lambda_param': 0.8, 'temperature': 4.5}. Best is trial 7 with value: 0.8910440570581759.


Trial 11 with params: {'learning_rate': 0.0036642776254065634, 'weight_decay': 0.001, 'warmup_steps': 26, 'lambda_param': 0.4, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.9184,0.694404,0.883593,0.882765,0.808253,0.827112
2,0.1997,0.575038,0.905591,0.911234,0.868948,0.886194
3,0.1251,0.519737,0.913841,0.918826,0.875796,0.893223
4,0.0948,0.52882,0.906508,0.913199,0.869426,0.886985
5,0.0828,0.502865,0.912007,0.917814,0.873814,0.891791
6,0.0745,0.498062,0.910174,0.91525,0.862627,0.883091
7,0.0673,0.49068,0.913841,0.918398,0.874794,0.892621
8,0.0627,0.49367,0.911091,0.915705,0.873412,0.890593
9,0.0586,0.492266,0.913841,0.917538,0.876635,0.893077
10,0.0549,0.485286,0.912924,0.917739,0.874944,0.892358


[I 2025-03-22 11:41:10,244] Trial 11 finished with value: 0.8820714044548331 and parameters: {'learning_rate': 0.0036642776254065634, 'weight_decay': 0.001, 'warmup_steps': 26, 'lambda_param': 0.4, 'temperature': 3.0}. Best is trial 7 with value: 0.8910440570581759.


Trial 12 with params: {'learning_rate': 0.001023470136982372, 'weight_decay': 0.005, 'warmup_steps': 24, 'lambda_param': 1.0, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.2657,0.908061,0.846013,0.712855,0.722858,0.716319
2,0.3969,0.722009,0.87626,0.88844,0.827881,0.849743
3,0.2168,0.647499,0.890926,0.900748,0.859168,0.875272
4,0.1498,0.65124,0.885426,0.89608,0.85368,0.870858
5,0.1166,0.638215,0.886343,0.897429,0.854494,0.871985
6,0.0977,0.599166,0.890926,0.900789,0.858162,0.875544
7,0.0864,0.595095,0.898258,0.905506,0.855153,0.874536
8,0.0787,0.58534,0.898258,0.906927,0.86422,0.881663
9,0.074,0.58803,0.901008,0.908881,0.865907,0.883457
10,0.0684,0.593717,0.896425,0.90537,0.862242,0.879894


[I 2025-03-22 11:43:10,292] Trial 12 pruned. 


Trial 13 with params: {'learning_rate': 0.0019692037522530117, 'weight_decay': 0.0, 'warmup_steps': 1, 'lambda_param': 0.9, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.9729,0.783268,0.860678,0.868818,0.799106,0.818304
2,0.2653,0.622797,0.890926,0.899222,0.850364,0.86889
3,0.1488,0.600025,0.888176,0.897218,0.846825,0.865889
4,0.1121,0.579954,0.900092,0.909245,0.865422,0.883293
5,0.0949,0.586718,0.888176,0.899422,0.855269,0.873223
6,0.0817,0.575031,0.893676,0.903804,0.859895,0.877727
7,0.0746,0.565199,0.895509,0.904521,0.862873,0.879728
8,0.0679,0.56076,0.893676,0.903211,0.85055,0.870975
9,0.0647,0.560341,0.893676,0.902856,0.861324,0.878031
10,0.0611,0.554865,0.901008,0.909985,0.866101,0.884051


[I 2025-03-22 11:45:38,857] Trial 13 pruned. 


Trial 14 with params: {'learning_rate': 0.0009700813739546189, 'weight_decay': 0.007, 'warmup_steps': 5, 'lambda_param': 0.8, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.2229,0.90134,0.847846,0.710621,0.724486,0.717133
2,0.4152,0.786307,0.867094,0.876815,0.822356,0.840961
3,0.2298,0.680716,0.88176,0.89096,0.853073,0.86774
4,0.1574,0.671412,0.88176,0.894973,0.850874,0.868821
5,0.1245,0.657518,0.882676,0.895335,0.85144,0.868978
6,0.1037,0.626172,0.885426,0.896585,0.853721,0.871222
7,0.09,0.615454,0.890926,0.899301,0.859341,0.875368
8,0.0819,0.605747,0.887259,0.896021,0.85584,0.871885
9,0.0759,0.599134,0.890009,0.898782,0.857875,0.874374
10,0.0708,0.596877,0.887259,0.89777,0.855416,0.872671


[I 2025-03-22 11:47:58,260] Trial 14 pruned. 


Trial 15 with params: {'learning_rate': 0.0031938729076120406, 'weight_decay': 0.005, 'warmup_steps': 5, 'lambda_param': 0.30000000000000004, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8935,0.724952,0.87901,0.884202,0.81342,0.834132
2,0.2146,0.594675,0.896425,0.902567,0.86307,0.878741
3,0.1304,0.569485,0.905591,0.89895,0.870355,0.882255
4,0.1016,0.509781,0.910174,0.904355,0.873839,0.886726
5,0.0849,0.526196,0.904675,0.899981,0.869297,0.882231
6,0.0757,0.518627,0.905591,0.900262,0.86953,0.882546
7,0.0673,0.499621,0.907424,0.899546,0.863095,0.877536
8,0.0632,0.513523,0.906508,0.899209,0.861847,0.876716
9,0.0597,0.508471,0.908341,0.902064,0.873261,0.88502
10,0.0567,0.500059,0.912007,0.903259,0.866693,0.881313


[I 2025-03-22 11:49:54,190] Trial 15 pruned. 


Trial 16 with params: {'learning_rate': 0.0038754935359631665, 'weight_decay': 0.008, 'warmup_steps': 11, 'lambda_param': 0.4, 'temperature': 5.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8514,0.64702,0.894592,0.892356,0.816692,0.836585
2,0.2001,0.535078,0.910174,0.916984,0.87357,0.89112
3,0.1234,0.511687,0.911091,0.904057,0.875044,0.886896
4,0.0969,0.494444,0.910174,0.91413,0.874501,0.890141
5,0.0827,0.500361,0.909258,0.915521,0.872359,0.889985
6,0.0726,0.501715,0.904675,0.911184,0.868725,0.885801
7,0.0661,0.47801,0.911091,0.916186,0.874033,0.891056
8,0.0609,0.471489,0.915674,0.917819,0.869289,0.887595
9,0.0573,0.471847,0.912007,0.915685,0.866277,0.88503
10,0.0546,0.469073,0.913841,0.918005,0.875919,0.892993


[I 2025-03-22 11:53:02,702] Trial 16 finished with value: 0.8855665495311182 and parameters: {'learning_rate': 0.0038754935359631665, 'weight_decay': 0.008, 'warmup_steps': 11, 'lambda_param': 0.4, 'temperature': 5.0}. Best is trial 7 with value: 0.8910440570581759.


Trial 17 with params: {'learning_rate': 0.0008797446343538097, 'weight_decay': 0.005, 'warmup_steps': 24, 'lambda_param': 0.7000000000000001, 'temperature': 6.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.3257,0.960459,0.840513,0.710273,0.717579,0.711663
2,0.437,0.756377,0.868928,0.879814,0.822295,0.842471
3,0.2385,0.676142,0.890009,0.896704,0.858015,0.873202
4,0.1629,0.686432,0.87901,0.892472,0.847767,0.865903
5,0.1277,0.654602,0.882676,0.893437,0.85142,0.86847
6,0.1063,0.62667,0.888176,0.89853,0.855778,0.873209
7,0.092,0.628833,0.887259,0.897445,0.854793,0.872204
8,0.0831,0.605472,0.891842,0.900875,0.858962,0.875952
9,0.0773,0.615503,0.894592,0.902366,0.861178,0.877851
10,0.0721,0.607122,0.896425,0.905803,0.862512,0.880184


[I 2025-03-22 11:54:46,284] Trial 17 pruned. 


Trial 18 with params: {'learning_rate': 0.0042550693908098985, 'weight_decay': 0.001, 'warmup_steps': 14, 'lambda_param': 0.4, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8485,0.711965,0.88176,0.880838,0.806871,0.825624
2,0.1941,0.55263,0.904675,0.900581,0.870006,0.882856
3,0.1216,0.538609,0.903758,0.910464,0.869331,0.885879
4,0.0971,0.52653,0.904675,0.912042,0.860164,0.880139
5,0.0818,0.536804,0.901925,0.908371,0.857881,0.877288
6,0.0723,0.517471,0.902841,0.90846,0.85865,0.87765
7,0.0656,0.513571,0.906508,0.898893,0.862663,0.877053
8,0.0606,0.512894,0.901008,0.894594,0.857846,0.872495
9,0.0567,0.498661,0.906508,0.911975,0.86211,0.881219
10,0.0543,0.498893,0.906508,0.91241,0.862101,0.881401


Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--recall/11f90e583db35601050aed380d48e83202a896976b9608432fba9244fb447f24 (last modified on Fri Jan 10 23:14:00 2025) since it couldn't be found locally at evaluate-metric--recall, or remotely on the Hugging Face Hub.
[I 2025-03-22 11:58:05,163] Trial 18 finished with value: 0.8830783856223201 and parameters: {'learning_rate': 0.0042550693908098985, 'weight_decay': 0.001, 'warmup_steps': 14, 'lambda_param': 0.4, 'temperature': 2.0}. Best is trial 7 with value: 0.8910440570581759.


Trial 19 with params: {'learning_rate': 0.002321065622125405, 'weight_decay': 0.001, 'warmup_steps': 17, 'lambda_param': 0.9, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.9862,0.736967,0.874427,0.869642,0.827334,0.842981
2,0.2413,0.615321,0.887259,0.896612,0.846077,0.865545
3,0.1378,0.589301,0.897342,0.905426,0.863677,0.880209
4,0.1067,0.56908,0.895509,0.904762,0.861636,0.879073
5,0.0889,0.53862,0.901925,0.909862,0.866564,0.884105
6,0.0771,0.538461,0.901925,0.909794,0.866589,0.884221
7,0.0697,0.527739,0.900092,0.907029,0.86612,0.882594
8,0.0643,0.520773,0.906508,0.912726,0.870536,0.887702
9,0.0608,0.52191,0.905591,0.911569,0.869587,0.88664
10,0.0573,0.508559,0.908341,0.914929,0.871681,0.889301


Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--accuracy/f887c0aab52c2d38e1f8a215681126379eca617f96c447638f751434e8e65b14 (last modified on Sat Oct 12 13:56:14 2024) since it couldn't be found locally at evaluate-metric--accuracy, or remotely on the Hugging Face Hub.
[I 2025-03-22 12:01:12,936] Trial 19 finished with value: 0.8891526063416993 and parameters: {'learning_rate': 0.002321065622125405, 'weight_decay': 0.001, 'warmup_steps': 17, 'lambda_param': 0.9, 'temperature': 2.0}. Best is trial 7 with value: 0.8910440570581759.


Trial 20 with params: {'learning_rate': 0.0035655335623074897, 'weight_decay': 0.0, 'warmup_steps': 18, 'lambda_param': 0.9, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8905,0.703272,0.883593,0.882868,0.80809,0.827071
2,0.202,0.575457,0.902841,0.909644,0.867605,0.884616
3,0.1239,0.530027,0.911091,0.916002,0.874988,0.891398
4,0.0999,0.524824,0.908341,0.904281,0.872056,0.885689
5,0.0842,0.524324,0.908341,0.914926,0.871717,0.889177
6,0.0741,0.501442,0.908341,0.904031,0.871121,0.885122
7,0.0684,0.500949,0.910174,0.904805,0.873525,0.886871
8,0.0625,0.491901,0.914757,0.908472,0.876865,0.890237
9,0.0581,0.472084,0.912007,0.917911,0.874133,0.891892
10,0.055,0.471019,0.917507,0.922488,0.878499,0.89646


[I 2025-03-22 12:04:13,799] Trial 20 finished with value: 0.8939883366509976 and parameters: {'learning_rate': 0.0035655335623074897, 'weight_decay': 0.0, 'warmup_steps': 18, 'lambda_param': 0.9, 'temperature': 2.0}. Best is trial 20 with value: 0.8939883366509976.


Trial 21 with params: {'learning_rate': 0.004464075840803069, 'weight_decay': 0.002, 'warmup_steps': 20, 'lambda_param': 1.0, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8601,0.658341,0.888176,0.886101,0.811875,0.830873
2,0.1967,0.558633,0.905591,0.911178,0.870831,0.887044
3,0.1213,0.512145,0.915674,0.91946,0.87799,0.894756
4,0.0942,0.515122,0.902841,0.911422,0.867947,0.885721
5,0.0811,0.512007,0.907424,0.914818,0.871005,0.888838
6,0.0718,0.507945,0.910174,0.916415,0.873335,0.890878
7,0.0665,0.498021,0.911091,0.91664,0.874276,0.891433
8,0.0608,0.486433,0.915674,0.920312,0.878042,0.895217
9,0.0573,0.494745,0.91659,0.921687,0.878101,0.895825
10,0.0551,0.481874,0.912924,0.917181,0.866222,0.885808


[I 2025-03-22 12:07:28,274] Trial 21 finished with value: 0.8921660987694899 and parameters: {'learning_rate': 0.004464075840803069, 'weight_decay': 0.002, 'warmup_steps': 20, 'lambda_param': 1.0, 'temperature': 2.5}. Best is trial 20 with value: 0.8939883366509976.


Trial 22 with params: {'learning_rate': 0.0013100144716848485, 'weight_decay': 0.001, 'warmup_steps': 30, 'lambda_param': 0.9, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.2067,0.858721,0.857929,0.891055,0.759397,0.774967
2,0.3432,0.679101,0.88176,0.893539,0.841971,0.861794
3,0.1832,0.63986,0.88451,0.892998,0.854317,0.86941
4,0.1308,0.625351,0.890926,0.900685,0.858343,0.875529
5,0.1023,0.603722,0.893676,0.904028,0.859478,0.877798
6,0.0892,0.580994,0.891842,0.902354,0.859152,0.876744
7,0.0796,0.586345,0.893676,0.902486,0.860755,0.877704
8,0.073,0.577853,0.902841,0.910216,0.867684,0.884851
9,0.0675,0.5699,0.894592,0.90274,0.861532,0.878172
10,0.0636,0.57075,0.897342,0.90494,0.854395,0.87381


[I 2025-03-22 12:09:28,924] Trial 22 pruned. 


Trial 23 with params: {'learning_rate': 0.0033846730046551373, 'weight_decay': 0.003, 'warmup_steps': 17, 'lambda_param': 0.9, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8997,0.674974,0.879927,0.888902,0.832017,0.852036
2,0.2058,0.546835,0.904675,0.909911,0.869529,0.885689
3,0.1284,0.542426,0.908341,0.914942,0.872565,0.889613
4,0.098,0.530417,0.907424,0.914956,0.871702,0.889075
5,0.081,0.540625,0.909258,0.904647,0.872894,0.886359
6,0.0718,0.517044,0.910174,0.905322,0.873441,0.887071
7,0.0659,0.529816,0.906508,0.912546,0.871359,0.887578
8,0.062,0.507205,0.911091,0.905159,0.87338,0.886974
9,0.0578,0.50118,0.912007,0.905493,0.874583,0.887584
10,0.054,0.49586,0.912924,0.917945,0.874195,0.891915


[I 2025-03-22 12:12:35,578] Trial 23 finished with value: 0.8940111986312399 and parameters: {'learning_rate': 0.0033846730046551373, 'weight_decay': 0.003, 'warmup_steps': 17, 'lambda_param': 0.9, 'temperature': 3.5}. Best is trial 23 with value: 0.8940111986312399.


Trial 24 with params: {'learning_rate': 0.002318890210419788, 'weight_decay': 0.001, 'warmup_steps': 18, 'lambda_param': 0.8, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0004,0.740613,0.878093,0.869516,0.822268,0.838208
2,0.2413,0.607772,0.896425,0.905253,0.86193,0.879649
3,0.1382,0.564711,0.901925,0.90906,0.867216,0.88415
4,0.1055,0.547031,0.902841,0.90967,0.867158,0.884514
5,0.0886,0.542916,0.901925,0.909078,0.866567,0.883839
6,0.0778,0.544588,0.899175,0.907754,0.864222,0.882001
7,0.0694,0.534346,0.908341,0.913808,0.872021,0.888817
8,0.0638,0.533158,0.906508,0.912599,0.870337,0.887527
9,0.0612,0.531049,0.907424,0.914499,0.871009,0.888764
10,0.057,0.51297,0.907424,0.913735,0.871341,0.888533


[I 2025-03-22 12:16:26,850] Trial 24 finished with value: 0.8844732903587519 and parameters: {'learning_rate': 0.002318890210419788, 'weight_decay': 0.001, 'warmup_steps': 18, 'lambda_param': 0.8, 'temperature': 4.5}. Best is trial 23 with value: 0.8940111986312399.


Trial 25 with params: {'learning_rate': 0.0036537249729028474, 'weight_decay': 0.002, 'warmup_steps': 17, 'lambda_param': 1.0, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8876,0.713739,0.883593,0.885812,0.81689,0.836443
2,0.2039,0.582056,0.897342,0.904491,0.863105,0.879655
3,0.1241,0.516462,0.902841,0.910157,0.867667,0.884956
4,0.0969,0.509959,0.905591,0.911254,0.869848,0.886515
5,0.0814,0.493515,0.905591,0.91354,0.868904,0.887082
6,0.0723,0.489909,0.901925,0.912113,0.865472,0.88465
7,0.0657,0.504212,0.901925,0.909618,0.866788,0.884138
8,0.0608,0.471318,0.915674,0.920995,0.877675,0.89535
9,0.057,0.479196,0.907424,0.914219,0.871599,0.888943
10,0.0532,0.470645,0.909258,0.915985,0.871624,0.889705


[I 2025-03-22 12:19:28,902] Trial 25 finished with value: 0.8940128612225117 and parameters: {'learning_rate': 0.0036537249729028474, 'weight_decay': 0.002, 'warmup_steps': 17, 'lambda_param': 1.0, 'temperature': 3.0}. Best is trial 25 with value: 0.8940128612225117.


Trial 26 with params: {'learning_rate': 0.0039037226917133664, 'weight_decay': 0.005, 'warmup_steps': 15, 'lambda_param': 0.8, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8555,0.680338,0.88451,0.887266,0.819089,0.838026
2,0.1964,0.554404,0.901008,0.909475,0.865923,0.883684
3,0.1194,0.534896,0.904675,0.910151,0.868815,0.885467
4,0.0934,0.504868,0.906508,0.913383,0.870911,0.888112
5,0.0808,0.521214,0.907424,0.913156,0.871516,0.888357
6,0.0713,0.506557,0.908341,0.912598,0.862781,0.881872
7,0.0656,0.500036,0.913841,0.917443,0.876474,0.892956
8,0.0616,0.497694,0.908341,0.912608,0.872199,0.888467
9,0.0576,0.502272,0.914757,0.918162,0.877264,0.893733
10,0.0545,0.48594,0.912007,0.916394,0.875326,0.891937


[I 2025-03-22 12:22:38,378] Trial 26 finished with value: 0.8933817149804897 and parameters: {'learning_rate': 0.0039037226917133664, 'weight_decay': 0.005, 'warmup_steps': 15, 'lambda_param': 0.8, 'temperature': 3.0}. Best is trial 25 with value: 0.8940128612225117.


Trial 27 with params: {'learning_rate': 0.00043189124749833823, 'weight_decay': 0.002, 'warmup_steps': 15, 'lambda_param': 0.9, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.6072,1.137389,0.815765,0.688757,0.697127,0.691746
2,0.6894,0.935475,0.851512,0.72231,0.725369,0.722601
3,0.4546,0.824133,0.860678,0.859301,0.826336,0.838411
4,0.3125,0.77589,0.867094,0.879347,0.828998,0.84811
5,0.2337,0.759061,0.872594,0.883281,0.832868,0.852324


[I 2025-03-22 12:23:46,313] Trial 27 pruned. 


Trial 28 with params: {'learning_rate': 0.0046557244440487856, 'weight_decay': 0.005, 'warmup_steps': 28, 'lambda_param': 0.8, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8573,0.694076,0.882676,0.879278,0.808421,0.825599
2,0.1889,0.598731,0.895509,0.89134,0.862411,0.874609
3,0.1179,0.557963,0.901008,0.897987,0.866207,0.879503
4,0.0954,0.55379,0.900092,0.895279,0.866409,0.878291
5,0.0828,0.53959,0.905591,0.899863,0.870382,0.88273
6,0.0727,0.527496,0.903758,0.900125,0.867688,0.881628
7,0.065,0.515098,0.909258,0.914087,0.873287,0.88969
8,0.061,0.516097,0.908341,0.902758,0.872328,0.885229
9,0.0568,0.512678,0.908341,0.903033,0.872625,0.88549
10,0.0536,0.50113,0.912007,0.906083,0.87542,0.888378


[I 2025-03-22 12:26:54,886] Trial 28 finished with value: 0.8891187843965694 and parameters: {'learning_rate': 0.0046557244440487856, 'weight_decay': 0.005, 'warmup_steps': 28, 'lambda_param': 0.8, 'temperature': 4.0}. Best is trial 25 with value: 0.8940128612225117.


Trial 29 with params: {'learning_rate': 0.0034855540353454775, 'weight_decay': 0.003, 'warmup_steps': 9, 'lambda_param': 1.0, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8761,0.67674,0.887259,0.89221,0.829212,0.849698
2,0.2057,0.586097,0.900092,0.897415,0.865158,0.878877
3,0.125,0.547844,0.900092,0.895437,0.866059,0.878124
4,0.0971,0.523281,0.900092,0.895327,0.866202,0.878454
5,0.0823,0.522283,0.909258,0.904705,0.872831,0.886472
6,0.073,0.515464,0.906508,0.912455,0.870388,0.887484
7,0.0672,0.51013,0.906508,0.900613,0.871011,0.883484
8,0.0612,0.51068,0.909258,0.913499,0.873338,0.889385
9,0.058,0.499622,0.909258,0.902635,0.873422,0.88565
10,0.0546,0.492942,0.911091,0.903311,0.865259,0.880751


[I 2025-03-22 12:28:48,021] Trial 29 pruned. 


Trial 30 with params: {'learning_rate': 0.000311584806759745, 'weight_decay': 0.008, 'warmup_steps': 0, 'lambda_param': 0.0, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.7321,1.257696,0.791017,0.664546,0.67676,0.670049
2,0.8201,1.024189,0.833181,0.700133,0.711542,0.705238
3,0.6014,0.916249,0.855179,0.716652,0.729204,0.722368
4,0.4463,0.877519,0.853346,0.857063,0.8184,0.83374
5,0.3369,0.829415,0.868011,0.88037,0.829869,0.849301
6,0.2644,0.82784,0.861595,0.876253,0.82388,0.843403
7,0.2198,0.78161,0.870761,0.883415,0.830957,0.851315
8,0.1919,0.832982,0.855179,0.871186,0.820937,0.838624
9,0.1663,0.798716,0.859762,0.873822,0.822788,0.842013
10,0.1499,0.764288,0.871677,0.882397,0.831424,0.851143


[I 2025-03-22 12:30:40,220] Trial 30 pruned. 


Trial 31 with params: {'learning_rate': 0.004222635128040945, 'weight_decay': 0.006, 'warmup_steps': 14, 'lambda_param': 0.8, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8464,0.678516,0.887259,0.884256,0.812506,0.829765
2,0.1947,0.58844,0.899175,0.90671,0.864443,0.881627
3,0.1229,0.567611,0.897342,0.904501,0.865322,0.880527
4,0.0968,0.550675,0.899175,0.906397,0.865191,0.881577
5,0.0814,0.529984,0.909258,0.915414,0.873256,0.89033
6,0.072,0.51603,0.906508,0.899492,0.861518,0.876862
7,0.0649,0.524865,0.904675,0.911049,0.869247,0.886153
8,0.0608,0.512292,0.904675,0.911753,0.869975,0.886742
9,0.0571,0.517631,0.901925,0.909624,0.868036,0.88491
10,0.0534,0.512197,0.902841,0.907069,0.859579,0.877501


[I 2025-03-22 12:32:40,492] Trial 31 pruned. 


Trial 32 with params: {'learning_rate': 0.003417826505094754, 'weight_decay': 0.005, 'warmup_steps': 18, 'lambda_param': 0.9, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.9034,0.696232,0.878093,0.885362,0.822134,0.842494
2,0.2073,0.575978,0.896425,0.905135,0.861648,0.879453
3,0.1262,0.542138,0.906508,0.911353,0.871165,0.887081
4,0.0961,0.509878,0.902841,0.898499,0.868091,0.880968
5,0.0842,0.521296,0.910174,0.904805,0.873672,0.886887
6,0.0737,0.50182,0.905591,0.90223,0.868972,0.883303
7,0.0679,0.510352,0.902841,0.898843,0.868471,0.881163
8,0.0641,0.480474,0.909258,0.903063,0.873645,0.885914
9,0.0593,0.481233,0.913841,0.907952,0.87677,0.890106
10,0.056,0.466435,0.915674,0.909709,0.877664,0.891341


[I 2025-03-22 12:36:35,654] Trial 32 finished with value: 0.8916735542648385 and parameters: {'learning_rate': 0.003417826505094754, 'weight_decay': 0.005, 'warmup_steps': 18, 'lambda_param': 0.9, 'temperature': 4.0}. Best is trial 25 with value: 0.8940128612225117.


Trial 33 with params: {'learning_rate': 5.8367877335939255e-05, 'weight_decay': 0.01, 'warmup_steps': 18, 'lambda_param': 0.8, 'temperature': 6.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.9627,2.383112,0.616865,0.540732,0.517111,0.516295
2,1.8679,1.818516,0.707608,0.599648,0.604084,0.599921
3,1.4136,1.576004,0.752521,0.631839,0.644265,0.637363
4,1.1956,1.447686,0.774519,0.654214,0.663037,0.657235
5,1.0693,1.371797,0.787351,0.660019,0.675189,0.666775


[I 2025-03-22 12:37:35,992] Trial 33 pruned. 


Trial 34 with params: {'learning_rate': 0.002128341841130116, 'weight_decay': 0.0, 'warmup_steps': 21, 'lambda_param': 1.0, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0254,0.764328,0.870761,0.874286,0.797372,0.817641
2,0.2568,0.629232,0.890926,0.899454,0.848912,0.868274
3,0.1438,0.601104,0.895509,0.902274,0.863016,0.878505
4,0.1081,0.583714,0.897342,0.90538,0.863789,0.88052
5,0.091,0.579381,0.896425,0.90468,0.863108,0.879895
6,0.0795,0.556994,0.903758,0.911622,0.868875,0.886156
7,0.0715,0.560072,0.895509,0.901647,0.853574,0.871754
8,0.0661,0.544931,0.900092,0.907069,0.865872,0.882527
9,0.0612,0.543656,0.903758,0.910434,0.868553,0.88556
10,0.0584,0.536223,0.900092,0.908039,0.865734,0.882958


[I 2025-03-22 12:40:28,501] Trial 34 finished with value: 0.8857815965002014 and parameters: {'learning_rate': 0.002128341841130116, 'weight_decay': 0.0, 'warmup_steps': 21, 'lambda_param': 1.0, 'temperature': 3.0}. Best is trial 25 with value: 0.8940128612225117.


Trial 35 with params: {'learning_rate': 5.817102176211476e-05, 'weight_decay': 0.0, 'warmup_steps': 10, 'lambda_param': 0.8, 'temperature': 6.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.9519,2.384057,0.617782,0.544256,0.51753,0.517373
2,1.867,1.820034,0.707608,0.600173,0.603501,0.599687
3,1.4155,1.576602,0.75527,0.63459,0.646084,0.639557
4,1.197,1.449517,0.772686,0.65202,0.661324,0.655451
5,1.0712,1.371927,0.782768,0.656431,0.671508,0.663034


Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--f1/34c46321f42186df33a6260966e34a368f14868d9cc2ba47d142112e2800d233 (last modified on Fri Jan 10 23:14:01 2025) since it couldn't be found locally at evaluate-metric--f1, or remotely on the Hugging Face Hub.
[I 2025-03-22 12:41:48,082] Trial 35 pruned. 


Trial 36 with params: {'learning_rate': 0.0028589145173823927, 'weight_decay': 0.003, 'warmup_steps': 13, 'lambda_param': 0.7000000000000001, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.9136,0.697593,0.883593,0.886065,0.81753,0.837096
2,0.2166,0.567382,0.901008,0.909795,0.865598,0.883744
3,0.1286,0.541635,0.901925,0.898454,0.867163,0.880422
4,0.0991,0.523014,0.907424,0.903001,0.871519,0.885008
5,0.0837,0.535486,0.901925,0.898463,0.867131,0.88044
6,0.0744,0.515579,0.905591,0.901467,0.87079,0.883705
7,0.0685,0.521833,0.901925,0.898914,0.867171,0.880746
8,0.0635,0.525826,0.905591,0.902277,0.870467,0.884052
9,0.059,0.521885,0.900092,0.89763,0.865754,0.879282
10,0.0554,0.505381,0.905591,0.901754,0.869921,0.883584


[I 2025-03-22 12:44:38,124] Trial 36 finished with value: 0.8823632458021388 and parameters: {'learning_rate': 0.0028589145173823927, 'weight_decay': 0.003, 'warmup_steps': 13, 'lambda_param': 0.7000000000000001, 'temperature': 3.5}. Best is trial 25 with value: 0.8940128612225117.


Trial 37 with params: {'learning_rate': 5.431299921217806e-05, 'weight_decay': 0.009000000000000001, 'warmup_steps': 1, 'lambda_param': 0.4, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.9813,2.439786,0.6022,0.536706,0.500901,0.50022
2,1.9352,1.880741,0.696609,0.59097,0.593243,0.590263
3,1.481,1.625703,0.745188,0.625487,0.637252,0.630394
4,1.2509,1.493573,0.76352,0.644463,0.654352,0.64793
5,1.1167,1.408415,0.778185,0.651589,0.667001,0.658258


[I 2025-03-22 12:45:46,812] Trial 37 pruned. 


Trial 38 with params: {'learning_rate': 0.00014198795619548116, 'weight_decay': 0.005, 'warmup_steps': 20, 'lambda_param': 0.30000000000000004, 'temperature': 6.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.3227,1.628225,0.749771,0.630974,0.639599,0.634472
2,1.172,1.300399,0.797434,0.671044,0.681838,0.675634
3,0.9139,1.177393,0.814849,0.6911,0.695501,0.69217
4,0.7749,1.098169,0.824931,0.695377,0.703531,0.699135
5,0.673,1.02697,0.840513,0.703829,0.717166,0.710104


[I 2025-03-22 12:46:52,704] Trial 38 pruned. 


Trial 39 with params: {'learning_rate': 0.001395039612162253, 'weight_decay': 0.001, 'warmup_steps': 15, 'lambda_param': 0.2, 'temperature': 7.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.1311,0.826504,0.857012,0.854971,0.768691,0.785148
2,0.3336,0.688581,0.887259,0.898572,0.845616,0.865886
3,0.1792,0.656618,0.885426,0.894598,0.855175,0.870417
4,0.1284,0.63584,0.889093,0.89783,0.857219,0.873406
5,0.1043,0.623985,0.891842,0.90047,0.859522,0.875992
6,0.0881,0.610605,0.890926,0.901571,0.858502,0.87594
7,0.0789,0.600636,0.894592,0.903351,0.861522,0.878542
8,0.071,0.590947,0.895509,0.90361,0.862105,0.878943
9,0.0664,0.589111,0.896425,0.903931,0.863147,0.879538
10,0.0627,0.599946,0.897342,0.906293,0.862804,0.880549


[I 2025-03-22 12:48:57,138] Trial 39 pruned. 


Trial 40 with params: {'learning_rate': 0.002783057490109808, 'weight_decay': 0.0, 'warmup_steps': 19, 'lambda_param': 0.7000000000000001, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.9474,0.732509,0.87626,0.880878,0.811068,0.831583
2,0.226,0.616672,0.891842,0.881101,0.858215,0.868459
3,0.1313,0.596871,0.896425,0.904889,0.853203,0.872412
4,0.1026,0.565799,0.899175,0.907559,0.864882,0.881957
5,0.0875,0.565395,0.901008,0.909924,0.865644,0.88379
6,0.0757,0.557255,0.895509,0.904025,0.861688,0.878724
7,0.0698,0.547238,0.901008,0.907956,0.866863,0.883346
8,0.0639,0.55657,0.900092,0.907875,0.865172,0.882399
9,0.06,0.557305,0.901925,0.908814,0.866624,0.883769
10,0.0561,0.539599,0.902841,0.910212,0.86729,0.884824


[I 2025-03-22 12:51:04,688] Trial 40 pruned. 


Trial 41 with params: {'learning_rate': 0.004305318553399093, 'weight_decay': 0.002, 'warmup_steps': 14, 'lambda_param': 0.9, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8565,0.640979,0.893676,0.891154,0.815685,0.835506
2,0.1958,0.580629,0.903758,0.909126,0.868703,0.884943
3,0.123,0.554796,0.902841,0.909934,0.867857,0.884803
4,0.0951,0.553144,0.904675,0.909018,0.870443,0.885686
5,0.0825,0.539852,0.906508,0.901749,0.871094,0.884012
6,0.0732,0.536212,0.904675,0.912513,0.868333,0.886407
7,0.0664,0.52434,0.911091,0.916176,0.874854,0.891506
8,0.0612,0.524421,0.906508,0.91349,0.870664,0.88805
9,0.0572,0.518037,0.910174,0.915792,0.873511,0.890731
10,0.0535,0.517297,0.902841,0.90938,0.858705,0.878243


[I 2025-03-22 12:52:57,392] Trial 41 pruned. 


Trial 42 with params: {'learning_rate': 0.0029609704610900605, 'weight_decay': 0.003, 'warmup_steps': 20, 'lambda_param': 0.9, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.9382,0.746915,0.87901,0.881004,0.81368,0.832866
2,0.2174,0.585434,0.900092,0.90712,0.865286,0.882223
3,0.1313,0.547813,0.906508,0.912572,0.870017,0.886997
4,0.0999,0.538808,0.904675,0.912897,0.868046,0.886463
5,0.0845,0.533919,0.907424,0.914359,0.871806,0.889148
6,0.0752,0.541979,0.904675,0.91265,0.868389,0.886527
7,0.0688,0.523297,0.902841,0.90987,0.866959,0.88446
8,0.0633,0.524719,0.903758,0.910511,0.868068,0.885255
9,0.0591,0.517988,0.901925,0.909143,0.866651,0.883842
10,0.0572,0.509708,0.909258,0.915768,0.872159,0.890007


[I 2025-03-22 12:55:56,844] Trial 42 finished with value: 0.8857923096228171 and parameters: {'learning_rate': 0.0029609704610900605, 'weight_decay': 0.003, 'warmup_steps': 20, 'lambda_param': 0.9, 'temperature': 2.5}. Best is trial 25 with value: 0.8940128612225117.


Trial 43 with params: {'learning_rate': 0.0042695698452040335, 'weight_decay': 0.001, 'warmup_steps': 26, 'lambda_param': 1.0, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8693,0.722215,0.87901,0.876429,0.8058,0.822099
2,0.2003,0.60101,0.894592,0.900582,0.86064,0.876676
3,0.1205,0.517871,0.906508,0.90134,0.870092,0.883329
4,0.0958,0.509057,0.907424,0.913335,0.87152,0.888494
5,0.0812,0.515145,0.902841,0.910104,0.867672,0.88481
6,0.0712,0.501606,0.904675,0.898146,0.869031,0.881248
7,0.0659,0.501364,0.909258,0.902588,0.873332,0.885432
8,0.0623,0.492489,0.910174,0.91463,0.874276,0.890509
9,0.0583,0.485051,0.913841,0.91777,0.876409,0.89315
10,0.0547,0.477993,0.912007,0.915953,0.875566,0.891779


[I 2025-03-22 12:59:05,410] Trial 43 finished with value: 0.8906988808957053 and parameters: {'learning_rate': 0.0042695698452040335, 'weight_decay': 0.001, 'warmup_steps': 26, 'lambda_param': 1.0, 'temperature': 2.0}. Best is trial 25 with value: 0.8940128612225117.


Trial 44 with params: {'learning_rate': 0.0037906172026621545, 'weight_decay': 0.0, 'warmup_steps': 16, 'lambda_param': 1.0, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8856,0.668561,0.890009,0.889959,0.813123,0.833501
2,0.1989,0.576388,0.900092,0.906477,0.856503,0.875695
3,0.1233,0.549348,0.899175,0.906338,0.865042,0.881751
4,0.0933,0.527677,0.906508,0.913449,0.870579,0.888037
5,0.0808,0.531354,0.904675,0.910873,0.869826,0.88624
6,0.0726,0.519229,0.902841,0.909631,0.867842,0.884744
7,0.0676,0.527916,0.904675,0.91125,0.868942,0.885987
8,0.0611,0.492089,0.914757,0.918451,0.878394,0.894395
9,0.0573,0.496352,0.908341,0.913244,0.872387,0.888688
10,0.0539,0.475504,0.91934,0.923405,0.880948,0.898128


[I 2025-03-22 13:02:23,256] Trial 44 finished with value: 0.8969803270673352 and parameters: {'learning_rate': 0.0037906172026621545, 'weight_decay': 0.0, 'warmup_steps': 16, 'lambda_param': 1.0, 'temperature': 4.0}. Best is trial 44 with value: 0.8969803270673352.


Trial 45 with params: {'learning_rate': 0.004229168606699789, 'weight_decay': 0.009000000000000001, 'warmup_steps': 23, 'lambda_param': 0.5, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.87,0.649472,0.895509,0.895013,0.827724,0.846782
2,0.1958,0.596332,0.894592,0.89931,0.852937,0.870083
3,0.1227,0.578108,0.899175,0.895735,0.86704,0.878001
4,0.0935,0.519673,0.911091,0.904873,0.875018,0.8875
5,0.0808,0.54192,0.908341,0.902703,0.87266,0.885324
6,0.0716,0.519099,0.909258,0.904807,0.872671,0.886466
7,0.066,0.51638,0.906508,0.901296,0.870876,0.883724
8,0.0617,0.491643,0.912007,0.90634,0.87518,0.888505
9,0.0575,0.485951,0.911091,0.903934,0.86539,0.881083
10,0.0541,0.479587,0.914757,0.908614,0.877431,0.890756


[I 2025-03-22 13:05:23,377] Trial 45 finished with value: 0.8900797915320103 and parameters: {'learning_rate': 0.004229168606699789, 'weight_decay': 0.009000000000000001, 'warmup_steps': 23, 'lambda_param': 0.5, 'temperature': 2.0}. Best is trial 44 with value: 0.8969803270673352.


Trial 46 with params: {'learning_rate': 0.004197332413507969, 'weight_decay': 0.0, 'warmup_steps': 20, 'lambda_param': 1.0, 'temperature': 5.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8649,0.697151,0.882676,0.885837,0.826503,0.844242
2,0.1973,0.558521,0.901008,0.907576,0.867142,0.883235
3,0.1212,0.555012,0.899175,0.907071,0.864629,0.881681
4,0.0952,0.553555,0.903758,0.911538,0.86877,0.886082
5,0.0812,0.560559,0.898258,0.904163,0.855716,0.874023
6,0.0723,0.561351,0.899175,0.906216,0.855866,0.875124
7,0.0666,0.544644,0.901008,0.906754,0.857496,0.876189
8,0.0617,0.528536,0.905591,0.911723,0.861478,0.880742
9,0.0583,0.527883,0.904675,0.910897,0.860569,0.879892
10,0.0543,0.524259,0.909258,0.913794,0.864221,0.883129


[I 2025-03-22 13:07:53,501] Trial 46 pruned. 


Trial 47 with params: {'learning_rate': 0.002493208994095196, 'weight_decay': 0.001, 'warmup_steps': 11, 'lambda_param': 1.0, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.9404,0.743517,0.878093,0.884898,0.822424,0.841786
2,0.2343,0.57319,0.901925,0.910369,0.866454,0.884474
3,0.133,0.558343,0.898258,0.906363,0.863808,0.880843
4,0.1044,0.530027,0.905591,0.912132,0.869565,0.886858
5,0.088,0.525824,0.904675,0.91216,0.868942,0.886356
6,0.0765,0.512209,0.905591,0.912589,0.870027,0.887271
7,0.0692,0.507865,0.904675,0.912079,0.869184,0.886648
8,0.0641,0.500708,0.913841,0.919123,0.876143,0.893703
9,0.0601,0.496276,0.906508,0.912779,0.870809,0.887733
10,0.0575,0.488207,0.914757,0.920595,0.877141,0.894888


[I 2025-03-22 13:10:43,093] Trial 47 finished with value: 0.8927117349269172 and parameters: {'learning_rate': 0.002493208994095196, 'weight_decay': 0.001, 'warmup_steps': 11, 'lambda_param': 1.0, 'temperature': 3.5}. Best is trial 44 with value: 0.8969803270673352.


Trial 48 with params: {'learning_rate': 0.0027511979602444763, 'weight_decay': 0.005, 'warmup_steps': 1, 'lambda_param': 0.7000000000000001, 'temperature': 7.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.9023,0.736156,0.874427,0.879724,0.810157,0.829586
2,0.2278,0.598518,0.896425,0.894045,0.862461,0.875776
3,0.1328,0.575586,0.901008,0.896409,0.865895,0.878903
4,0.1026,0.570101,0.898258,0.90477,0.865238,0.880642
5,0.0882,0.5482,0.901925,0.898705,0.866553,0.880319
6,0.0768,0.538091,0.905591,0.902465,0.869684,0.883773
7,0.0707,0.517167,0.901925,0.898354,0.867467,0.880567
8,0.0649,0.522191,0.906508,0.901671,0.871092,0.884017
9,0.06,0.515107,0.901008,0.897636,0.866633,0.87978
10,0.0576,0.518941,0.902841,0.909563,0.867716,0.884604


[I 2025-03-22 13:14:17,658] Trial 48 finished with value: 0.8816424064673823 and parameters: {'learning_rate': 0.0027511979602444763, 'weight_decay': 0.005, 'warmup_steps': 1, 'lambda_param': 0.7000000000000001, 'temperature': 7.0}. Best is trial 44 with value: 0.8969803270673352.


Trial 49 with params: {'learning_rate': 0.004426648522842388, 'weight_decay': 0.0, 'warmup_steps': 20, 'lambda_param': 0.9, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8455,0.664374,0.890009,0.897505,0.84883,0.866861
2,0.1891,0.551118,0.902841,0.907335,0.867556,0.88342
3,0.1242,0.585421,0.899175,0.906768,0.86577,0.881111
4,0.0937,0.506411,0.906508,0.913671,0.870078,0.887768
5,0.0794,0.511626,0.912007,0.917455,0.874625,0.89192
6,0.0703,0.502099,0.912924,0.917824,0.875412,0.892531
7,0.0649,0.501798,0.910174,0.915306,0.873549,0.890235
8,0.0596,0.488396,0.915674,0.920368,0.877142,0.894782
9,0.0565,0.485467,0.912924,0.917758,0.875354,0.892497
10,0.0538,0.479506,0.914757,0.919021,0.876596,0.893668


[I 2025-03-22 13:18:40,807] Trial 49 finished with value: 0.8948312404667892 and parameters: {'learning_rate': 0.004426648522842388, 'weight_decay': 0.0, 'warmup_steps': 20, 'lambda_param': 0.9, 'temperature': 4.0}. Best is trial 44 with value: 0.8969803270673352.


Trial 50 with params: {'learning_rate': 0.004975192451771645, 'weight_decay': 0.0, 'warmup_steps': 15, 'lambda_param': 0.9, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8244,0.648683,0.880843,0.886919,0.833543,0.85143
2,0.1874,0.57492,0.897342,0.905122,0.86371,0.880484
3,0.1233,0.525785,0.905591,0.911487,0.870524,0.886698
4,0.0975,0.493208,0.910174,0.915901,0.873833,0.890932
5,0.0818,0.501417,0.912924,0.916889,0.876756,0.892879
6,0.0729,0.483052,0.907424,0.912735,0.863252,0.882168
7,0.0676,0.497502,0.910174,0.915032,0.874389,0.890767
8,0.0629,0.490078,0.911091,0.915236,0.866021,0.884825
9,0.0589,0.499396,0.912924,0.9177,0.876098,0.893005
10,0.0566,0.480793,0.912007,0.913635,0.866537,0.884271


[I 2025-03-22 13:21:21,502] Trial 50 pruned. 


Trial 51 with params: {'learning_rate': 0.004258646108272671, 'weight_decay': 0.002, 'warmup_steps': 25, 'lambda_param': 1.0, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8737,0.716329,0.87901,0.883409,0.813144,0.833905
2,0.1917,0.601869,0.894592,0.903968,0.861475,0.878656
3,0.1194,0.566891,0.903758,0.909979,0.868476,0.885009
4,0.0956,0.549114,0.906508,0.913785,0.869949,0.887705
5,0.0797,0.531361,0.906508,0.912608,0.87056,0.887613
6,0.0707,0.530829,0.907424,0.913936,0.870933,0.888483
7,0.0658,0.53555,0.905591,0.912046,0.87064,0.887371
8,0.0605,0.510672,0.912007,0.917894,0.874518,0.892215
9,0.056,0.505081,0.908341,0.914788,0.872103,0.889438
10,0.0542,0.517613,0.910174,0.915582,0.873327,0.890513


[I 2025-03-22 13:25:24,230] Trial 51 finished with value: 0.8901103085972646 and parameters: {'learning_rate': 0.004258646108272671, 'weight_decay': 0.002, 'warmup_steps': 25, 'lambda_param': 1.0, 'temperature': 4.5}. Best is trial 44 with value: 0.8969803270673352.


Trial 52 with params: {'learning_rate': 0.00017540349959161965, 'weight_decay': 0.007, 'warmup_steps': 13, 'lambda_param': 0.2, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.143,1.509267,0.75802,0.633126,0.650732,0.640753
2,1.0638,1.21396,0.811182,0.686726,0.692551,0.688647
3,0.8264,1.094453,0.825848,0.696931,0.705326,0.700101
4,0.6872,1.009957,0.834097,0.701332,0.711365,0.705888
5,0.5809,0.960139,0.848763,0.711955,0.723489,0.717042


[I 2025-03-22 13:26:36,026] Trial 52 pruned. 


Trial 53 with params: {'learning_rate': 0.0038215348751600986, 'weight_decay': 0.0, 'warmup_steps': 24, 'lambda_param': 0.8, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8827,0.715656,0.878093,0.876349,0.804497,0.821622
2,0.2043,0.584006,0.897342,0.905217,0.862392,0.879878
3,0.1242,0.546387,0.902841,0.91049,0.867495,0.884874
4,0.0972,0.520942,0.906508,0.913563,0.870109,0.887715
5,0.0812,0.5159,0.904675,0.913022,0.869017,0.886994
6,0.0707,0.508781,0.909258,0.91547,0.872602,0.889999
7,0.0652,0.509189,0.907424,0.914096,0.872032,0.888924
8,0.0603,0.490953,0.907424,0.914119,0.870991,0.888526
9,0.0562,0.481401,0.914757,0.919608,0.877569,0.894644
10,0.0531,0.482372,0.910174,0.916708,0.873063,0.890858


[I 2025-03-22 13:30:25,350] Trial 53 finished with value: 0.8954074995715332 and parameters: {'learning_rate': 0.0038215348751600986, 'weight_decay': 0.0, 'warmup_steps': 24, 'lambda_param': 0.8, 'temperature': 3.5}. Best is trial 44 with value: 0.8969803270673352.


Trial 54 with params: {'learning_rate': 0.0023755529872613655, 'weight_decay': 0.0, 'warmup_steps': 24, 'lambda_param': 0.6000000000000001, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.02,0.762162,0.873511,0.881295,0.818009,0.838003
2,0.2423,0.624408,0.896425,0.905016,0.863787,0.880443
3,0.1393,0.571125,0.899175,0.905751,0.865868,0.881801
4,0.1049,0.567518,0.900092,0.905701,0.866657,0.882231
5,0.0894,0.565751,0.904675,0.910523,0.86971,0.885987
6,0.0772,0.544039,0.911091,0.917577,0.87417,0.891754
7,0.0715,0.548946,0.904675,0.911578,0.869926,0.886634
8,0.0654,0.541526,0.903758,0.910703,0.869086,0.885954
9,0.0604,0.538886,0.907424,0.912329,0.871383,0.887928
10,0.058,0.529016,0.906508,0.913167,0.87062,0.887931


[I 2025-03-22 13:34:16,781] Trial 54 finished with value: 0.8887302804650649 and parameters: {'learning_rate': 0.0023755529872613655, 'weight_decay': 0.0, 'warmup_steps': 24, 'lambda_param': 0.6000000000000001, 'temperature': 3.5}. Best is trial 44 with value: 0.8969803270673352.


Trial 55 with params: {'learning_rate': 7.242888062473813e-05, 'weight_decay': 0.001, 'warmup_steps': 23, 'lambda_param': 0.0, 'temperature': 6.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.8379,2.202664,0.633364,0.545103,0.533877,0.533118
2,1.6655,1.649918,0.742438,0.621896,0.635849,0.627799
3,1.2544,1.457114,0.768103,0.648157,0.657684,0.652382
4,1.0697,1.350793,0.783685,0.657873,0.671302,0.663817
5,0.9594,1.275163,0.799267,0.669725,0.684503,0.676672
6,0.8814,1.210708,0.812099,0.686008,0.692379,0.688755
7,0.8215,1.186801,0.826764,0.696349,0.706272,0.700755
8,0.7737,1.170521,0.815765,0.686602,0.698143,0.691663
9,0.73,1.144145,0.824931,0.694186,0.704744,0.699114
10,0.6986,1.109285,0.830431,0.697659,0.708853,0.703105


[I 2025-03-22 13:36:44,193] Trial 55 pruned. 


Trial 56 with params: {'learning_rate': 0.003443988689899885, 'weight_decay': 0.0, 'warmup_steps': 29, 'lambda_param': 0.7000000000000001, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.9313,0.717248,0.874427,0.878991,0.810508,0.829744
2,0.2101,0.572964,0.898258,0.894019,0.864556,0.876917
3,0.1246,0.566722,0.899175,0.904963,0.865553,0.880822
4,0.0952,0.507732,0.908341,0.914303,0.872296,0.889364
5,0.0825,0.512082,0.909258,0.915886,0.872811,0.89033
6,0.0743,0.5252,0.912924,0.917853,0.875525,0.892778
7,0.0666,0.502365,0.911091,0.916094,0.874514,0.891355
8,0.0623,0.508323,0.904675,0.91052,0.869765,0.885645
9,0.059,0.497663,0.912007,0.916149,0.875645,0.891831
10,0.056,0.491039,0.914757,0.919587,0.877287,0.894483


[I 2025-03-22 13:40:27,352] Trial 56 finished with value: 0.8941896569147673 and parameters: {'learning_rate': 0.003443988689899885, 'weight_decay': 0.0, 'warmup_steps': 29, 'lambda_param': 0.7000000000000001, 'temperature': 4.0}. Best is trial 44 with value: 0.8969803270673352.


Trial 57 with params: {'learning_rate': 0.002588574368085673, 'weight_decay': 0.0, 'warmup_steps': 29, 'lambda_param': 0.6000000000000001, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0003,0.763641,0.872594,0.880667,0.817333,0.837683
2,0.2333,0.592582,0.898258,0.90608,0.864042,0.881141
3,0.1341,0.574481,0.898258,0.904072,0.865624,0.880861
4,0.103,0.558683,0.902841,0.910941,0.867356,0.885161
5,0.0862,0.555038,0.901925,0.909195,0.867276,0.884273
6,0.0755,0.563567,0.900092,0.909277,0.865717,0.883249
7,0.0695,0.557369,0.892759,0.901206,0.860755,0.876822
8,0.0636,0.537487,0.908341,0.914162,0.872123,0.889247
9,0.0597,0.52446,0.909258,0.915431,0.872909,0.89024
10,0.057,0.512804,0.908341,0.914852,0.872283,0.889635


[I 2025-03-22 13:44:05,833] Trial 57 finished with value: 0.8913369164982283 and parameters: {'learning_rate': 0.002588574368085673, 'weight_decay': 0.0, 'warmup_steps': 29, 'lambda_param': 0.6000000000000001, 'temperature': 4.5}. Best is trial 44 with value: 0.8969803270673352.


Trial 58 with params: {'learning_rate': 0.002454186751122107, 'weight_decay': 0.0, 'warmup_steps': 27, 'lambda_param': 0.8, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0015,0.777143,0.863428,0.87143,0.801322,0.820821
2,0.2379,0.647334,0.887259,0.895654,0.855606,0.87165
3,0.1361,0.608337,0.888176,0.89641,0.857434,0.872965
4,0.1035,0.614738,0.891842,0.899204,0.859506,0.875185
5,0.0876,0.584882,0.901008,0.897824,0.865679,0.879437
6,0.0769,0.586486,0.898258,0.896391,0.863387,0.877419
7,0.0694,0.563167,0.896425,0.904137,0.862651,0.879222
8,0.0635,0.554654,0.901008,0.908087,0.866664,0.88328
9,0.0608,0.561138,0.897342,0.905348,0.86424,0.880429
10,0.0576,0.540179,0.904675,0.91147,0.869247,0.886401


[I 2025-03-22 13:48:23,743] Trial 58 finished with value: 0.8859090085173635 and parameters: {'learning_rate': 0.002454186751122107, 'weight_decay': 0.0, 'warmup_steps': 27, 'lambda_param': 0.8, 'temperature': 4.0}. Best is trial 44 with value: 0.8969803270673352.


Trial 59 with params: {'learning_rate': 0.0035451263419651905, 'weight_decay': 0.002, 'warmup_steps': 21, 'lambda_param': 0.8, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8967,0.686051,0.878093,0.886686,0.821978,0.843311
2,0.2016,0.57556,0.902841,0.909343,0.866888,0.884159
3,0.125,0.565841,0.898258,0.904917,0.855434,0.874252
4,0.0974,0.522541,0.902841,0.908715,0.858694,0.877686
5,0.0836,0.538046,0.906508,0.913553,0.870876,0.888171
6,0.073,0.528141,0.901008,0.9084,0.86589,0.883166
7,0.0661,0.511289,0.906508,0.910603,0.862391,0.880657
8,0.0613,0.508719,0.906508,0.912983,0.870752,0.887836
9,0.0575,0.504328,0.908341,0.914126,0.872579,0.88942
10,0.054,0.499008,0.910174,0.916298,0.87278,0.890573


[I 2025-03-22 13:53:00,823] Trial 59 finished with value: 0.8910156402346502 and parameters: {'learning_rate': 0.0035451263419651905, 'weight_decay': 0.002, 'warmup_steps': 21, 'lambda_param': 0.8, 'temperature': 3.5}. Best is trial 44 with value: 0.8969803270673352.


Trial 60 with params: {'learning_rate': 0.0003020939879565185, 'weight_decay': 0.005, 'warmup_steps': 31, 'lambda_param': 0.5, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8322,1.262728,0.788268,0.660871,0.674938,0.667561
2,0.8127,1.02318,0.836847,0.707002,0.714258,0.709735
3,0.5933,0.905827,0.847846,0.878096,0.733399,0.735286
4,0.4415,0.87953,0.843263,0.859118,0.812265,0.828313
5,0.3357,0.824524,0.863428,0.875214,0.826831,0.84519
6,0.2647,0.829042,0.852429,0.855712,0.818168,0.832481
7,0.2199,0.766241,0.875344,0.885529,0.836015,0.854935
8,0.1915,0.786523,0.864345,0.876627,0.827676,0.845799
9,0.1671,0.798949,0.857012,0.872154,0.820109,0.839733
10,0.1501,0.768237,0.870761,0.882761,0.830776,0.850878


[I 2025-03-22 13:55:27,108] Trial 60 pruned. 


Trial 61 with params: {'learning_rate': 0.0040930965105191175, 'weight_decay': 0.001, 'warmup_steps': 22, 'lambda_param': 0.7000000000000001, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8667,0.68774,0.886343,0.88951,0.829156,0.847724
2,0.1964,0.57671,0.902841,0.910122,0.868348,0.885258
3,0.1205,0.567188,0.902841,0.909567,0.869123,0.885256
4,0.096,0.529567,0.912924,0.91847,0.875882,0.893197
5,0.0833,0.528587,0.910174,0.916435,0.873472,0.890835
6,0.0723,0.517707,0.907424,0.913111,0.871224,0.888083
7,0.0671,0.514643,0.906508,0.911947,0.87186,0.887796
8,0.0618,0.517446,0.906508,0.912884,0.871326,0.887981
9,0.0577,0.520313,0.907424,0.914341,0.872084,0.889219
10,0.054,0.512742,0.910174,0.916563,0.873281,0.89081


[I 2025-03-22 13:59:08,969] Trial 61 finished with value: 0.889218132067072 and parameters: {'learning_rate': 0.0040930965105191175, 'weight_decay': 0.001, 'warmup_steps': 22, 'lambda_param': 0.7000000000000001, 'temperature': 3.5}. Best is trial 44 with value: 0.8969803270673352.


Trial 62 with params: {'learning_rate': 0.0005177627782238657, 'weight_decay': 0.008, 'warmup_steps': 21, 'lambda_param': 0.5, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.5337,1.073216,0.826764,0.69539,0.706859,0.700351
2,0.6188,0.876033,0.857929,0.892259,0.767249,0.789518
3,0.3829,0.782907,0.866178,0.874825,0.831871,0.846659
4,0.2615,0.761109,0.870761,0.88587,0.840156,0.858855
5,0.1969,0.738238,0.869844,0.881557,0.831177,0.850504
6,0.1561,0.701099,0.878093,0.88901,0.837133,0.857149
7,0.1329,0.68542,0.87901,0.889697,0.847957,0.86485
8,0.1145,0.691725,0.872594,0.885786,0.843348,0.860071
9,0.1023,0.680703,0.874427,0.888736,0.843519,0.86212
10,0.0928,0.676435,0.87901,0.891281,0.847224,0.865273


[I 2025-03-22 14:01:34,975] Trial 62 pruned. 


Trial 63 with params: {'learning_rate': 0.004672345134616893, 'weight_decay': 0.0, 'warmup_steps': 23, 'lambda_param': 1.0, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.85,0.702373,0.882676,0.885144,0.818731,0.835538
2,0.1898,0.578171,0.897342,0.902517,0.864625,0.879135
3,0.1182,0.524996,0.905591,0.912642,0.87086,0.887231
4,0.0944,0.482597,0.908341,0.915484,0.872038,0.88967
5,0.0802,0.4992,0.905591,0.912674,0.870518,0.887601
6,0.0718,0.496379,0.911091,0.917582,0.873603,0.891498
7,0.0661,0.49347,0.907424,0.913827,0.871774,0.888698
8,0.0614,0.490683,0.905591,0.911898,0.870141,0.886868
9,0.0581,0.482895,0.905591,0.911644,0.860984,0.880395
10,0.0563,0.474463,0.912924,0.9182,0.87585,0.892944


[I 2025-03-22 14:05:57,040] Trial 63 finished with value: 0.8947305846522507 and parameters: {'learning_rate': 0.004672345134616893, 'weight_decay': 0.0, 'warmup_steps': 23, 'lambda_param': 1.0, 'temperature': 4.0}. Best is trial 44 with value: 0.8969803270673352.


Trial 64 with params: {'learning_rate': 0.00013405290551132384, 'weight_decay': 0.0, 'warmup_steps': 1, 'lambda_param': 0.2, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.3272,1.696734,0.736022,0.618253,0.629927,0.620643
2,1.2157,1.328603,0.797434,0.670243,0.681782,0.675243
3,0.9429,1.202292,0.811182,0.691169,0.691723,0.689746
4,0.8033,1.120084,0.819432,0.68871,0.699773,0.693858
5,0.7004,1.049691,0.831347,0.697656,0.709616,0.703205
6,0.6216,1.008644,0.835014,0.699507,0.713656,0.706263
7,0.5572,0.988302,0.84143,0.706446,0.718072,0.711723
8,0.5045,0.988126,0.83868,0.87051,0.725933,0.727476
9,0.4602,0.965492,0.845096,0.878473,0.755333,0.777173
10,0.4227,0.924263,0.850596,0.85788,0.779246,0.800519


[I 2025-03-22 14:08:53,001] Trial 64 pruned. 


Trial 65 with params: {'learning_rate': 0.002487399866956142, 'weight_decay': 0.0, 'warmup_steps': 20, 'lambda_param': 1.0, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.9873,0.734012,0.879927,0.886738,0.823387,0.843643
2,0.2395,0.621204,0.895509,0.903255,0.862182,0.878768
3,0.1379,0.567094,0.899175,0.906743,0.865283,0.881827
4,0.1038,0.535135,0.897342,0.906993,0.863417,0.88129
5,0.0877,0.557329,0.899175,0.908482,0.864284,0.882234


[I 2025-03-22 14:10:03,118] Trial 65 pruned. 


Trial 66 with params: {'learning_rate': 0.004387816666803014, 'weight_decay': 0.003, 'warmup_steps': 30, 'lambda_param': 0.0, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8933,0.676409,0.882676,0.886271,0.815589,0.836137
2,0.1962,0.585211,0.901008,0.907914,0.8665,0.883115
3,0.1236,0.58132,0.896425,0.904481,0.862397,0.879098
4,0.0931,0.54151,0.900092,0.907007,0.865954,0.882225
5,0.0811,0.521178,0.904675,0.911371,0.86888,0.886171
6,0.071,0.500416,0.909258,0.915032,0.862678,0.883036
7,0.0655,0.51662,0.903758,0.910244,0.868841,0.885525
8,0.0616,0.492376,0.909258,0.914957,0.863076,0.883183
9,0.0566,0.497404,0.906508,0.91199,0.86121,0.880765
10,0.0531,0.485916,0.912007,0.916768,0.865205,0.885172


[I 2025-03-22 14:13:52,221] Trial 66 finished with value: 0.894525195706445 and parameters: {'learning_rate': 0.004387816666803014, 'weight_decay': 0.003, 'warmup_steps': 30, 'lambda_param': 0.0, 'temperature': 4.0}. Best is trial 44 with value: 0.8969803270673352.


Trial 67 with params: {'learning_rate': 0.004013793535360168, 'weight_decay': 0.004, 'warmup_steps': 26, 'lambda_param': 0.1, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.9121,0.741394,0.880843,0.878809,0.806545,0.824038
2,0.1967,0.572958,0.895509,0.901523,0.852713,0.871266
3,0.125,0.52116,0.905591,0.909503,0.851053,0.87208
4,0.0978,0.539877,0.907424,0.914059,0.880767,0.894936
5,0.0827,0.502843,0.910174,0.915241,0.873044,0.890134
6,0.073,0.513403,0.908341,0.914739,0.871194,0.88895
7,0.067,0.507163,0.914757,0.918172,0.868295,0.887434
8,0.0621,0.492404,0.912924,0.916836,0.866916,0.886087
9,0.0593,0.502268,0.910174,0.914205,0.864724,0.883517
10,0.0562,0.489465,0.911091,0.916576,0.864859,0.884873


[I 2025-03-22 14:16:10,471] Trial 67 pruned. 


Trial 68 with params: {'learning_rate': 0.002913259215588334, 'weight_decay': 0.0, 'warmup_steps': 29, 'lambda_param': 0.0, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.9738,0.726555,0.88176,0.884487,0.815993,0.8354
2,0.2236,0.591102,0.899175,0.905756,0.865201,0.881527
3,0.1294,0.537724,0.904675,0.911314,0.869745,0.886532
4,0.1006,0.510642,0.909258,0.915601,0.873645,0.890544
5,0.0854,0.528965,0.905591,0.912861,0.869732,0.887264
6,0.0759,0.524339,0.908341,0.915006,0.872652,0.889771
7,0.0692,0.506088,0.910174,0.916713,0.873579,0.891044
8,0.0629,0.504505,0.910174,0.917085,0.874064,0.89144
9,0.0586,0.497576,0.911091,0.917115,0.874704,0.891892
10,0.0551,0.496896,0.909258,0.915494,0.873036,0.89018


[I 2025-03-22 14:19:45,809] Trial 68 finished with value: 0.8932610884744054 and parameters: {'learning_rate': 0.002913259215588334, 'weight_decay': 0.0, 'warmup_steps': 29, 'lambda_param': 0.0, 'temperature': 2.5}. Best is trial 44 with value: 0.8969803270673352.


Trial 69 with params: {'learning_rate': 7.808255793137976e-05, 'weight_decay': 0.009000000000000001, 'warmup_steps': 20, 'lambda_param': 0.8, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.7873,2.148008,0.641613,0.552502,0.540818,0.540291
2,1.6006,1.603419,0.753437,0.629909,0.645387,0.636188
3,1.2055,1.419747,0.774519,0.654272,0.6632,0.657905
4,1.0295,1.319776,0.790101,0.664315,0.676093,0.669452
5,0.9238,1.240927,0.805683,0.675088,0.689766,0.682042
6,0.8461,1.178979,0.815765,0.688348,0.695787,0.691693
7,0.7862,1.158819,0.822181,0.692526,0.702925,0.697231
8,0.7383,1.139478,0.824931,0.693354,0.70553,0.698765
9,0.694,1.114722,0.824931,0.69465,0.704563,0.699185
10,0.662,1.080728,0.832264,0.69884,0.710289,0.704351


[I 2025-03-22 14:22:38,642] Trial 69 pruned. 


Trial 70 with params: {'learning_rate': 0.001882648269128631, 'weight_decay': 0.0, 'warmup_steps': 26, 'lambda_param': 0.1, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0849,0.770542,0.870761,0.87505,0.807105,0.826296
2,0.2744,0.637254,0.893676,0.904729,0.860436,0.878529
3,0.1518,0.584187,0.898258,0.906446,0.864335,0.881188
4,0.1151,0.606353,0.892759,0.90054,0.860525,0.876401
5,0.0935,0.574369,0.902841,0.910569,0.867789,0.885204
6,0.0807,0.562561,0.901925,0.910026,0.867619,0.884791
7,0.0728,0.544462,0.901008,0.906596,0.86687,0.882745
8,0.0664,0.546064,0.903758,0.911186,0.869003,0.886155
9,0.0625,0.541939,0.900092,0.907803,0.865935,0.882823
10,0.0591,0.528559,0.905591,0.913575,0.869679,0.887588


[I 2025-03-22 14:26:59,878] Trial 70 finished with value: 0.8864619295645637 and parameters: {'learning_rate': 0.001882648269128631, 'weight_decay': 0.0, 'warmup_steps': 26, 'lambda_param': 0.1, 'temperature': 4.5}. Best is trial 44 with value: 0.8969803270673352.


Trial 71 with params: {'learning_rate': 0.003603502778565345, 'weight_decay': 0.003, 'warmup_steps': 31, 'lambda_param': 0.0, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.9365,0.736658,0.882676,0.88634,0.827779,0.844825
2,0.2081,0.596248,0.896425,0.903944,0.864302,0.879972
3,0.1237,0.553884,0.897342,0.905834,0.864514,0.881066
4,0.0957,0.532299,0.901925,0.910443,0.867318,0.884872
5,0.0831,0.553435,0.900092,0.908077,0.86646,0.883129


[I 2025-03-22 14:28:13,357] Trial 71 pruned. 


Trial 72 with params: {'learning_rate': 0.002816294156664504, 'weight_decay': 0.003, 'warmup_steps': 26, 'lambda_param': 0.0, 'temperature': 5.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.9648,0.749104,0.871677,0.877193,0.807878,0.827113
2,0.2264,0.61139,0.893676,0.902657,0.86127,0.877972
3,0.1318,0.584738,0.898258,0.904766,0.865048,0.880845
4,0.0986,0.567114,0.898258,0.905871,0.864041,0.880956
5,0.0873,0.561171,0.902841,0.90984,0.867962,0.884768
6,0.0764,0.538036,0.903758,0.911459,0.868782,0.886138
7,0.0689,0.53675,0.900092,0.907432,0.865886,0.882624
8,0.0637,0.536032,0.904675,0.911758,0.869408,0.886685
9,0.0594,0.526659,0.906508,0.912501,0.871633,0.888125
10,0.0562,0.510958,0.905591,0.911576,0.870032,0.886815


[I 2025-03-22 14:32:35,580] Trial 72 finished with value: 0.8911288797551569 and parameters: {'learning_rate': 0.002816294156664504, 'weight_decay': 0.003, 'warmup_steps': 26, 'lambda_param': 0.0, 'temperature': 5.0}. Best is trial 44 with value: 0.8969803270673352.


Trial 73 with params: {'learning_rate': 0.004766406349360372, 'weight_decay': 0.0, 'warmup_steps': 29, 'lambda_param': 1.0, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8629,0.66708,0.895509,0.894789,0.826312,0.845984
2,0.1917,0.596349,0.891842,0.886884,0.859346,0.870777
3,0.1204,0.535652,0.903758,0.898938,0.869337,0.881601
4,0.0918,0.516119,0.905591,0.901275,0.869711,0.883147
5,0.0795,0.519777,0.904675,0.901592,0.869105,0.882991


Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--recall/11f90e583db35601050aed380d48e83202a896976b9608432fba9244fb447f24 (last modified on Fri Jan 10 23:14:00 2025) since it couldn't be found locally at evaluate-metric--recall, or remotely on the Hugging Face Hub.
[I 2025-03-22 14:34:05,824] Trial 73 pruned. 


Trial 74 with params: {'learning_rate': 0.0002952710041203322, 'weight_decay': 0.01, 'warmup_steps': 28, 'lambda_param': 0.30000000000000004, 'temperature': 6.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8354,1.270173,0.787351,0.658952,0.67463,0.666405
2,0.8222,1.030491,0.833181,0.703874,0.711194,0.706756
3,0.6068,0.912355,0.846929,0.710674,0.723066,0.716102
4,0.4547,0.867118,0.850596,0.865071,0.817307,0.834707
5,0.3442,0.823029,0.864345,0.875572,0.8274,0.845712
6,0.2715,0.828566,0.851512,0.855389,0.816896,0.83197
7,0.2252,0.76914,0.87626,0.886151,0.836682,0.855589
8,0.1943,0.795112,0.863428,0.86418,0.827645,0.841768
9,0.1709,0.805484,0.865261,0.878979,0.827872,0.846768
10,0.1538,0.772352,0.869844,0.88315,0.829169,0.850192


[I 2025-03-22 14:36:21,894] Trial 74 pruned. 


Trial 75 with params: {'learning_rate': 0.0041300373603497, 'weight_decay': 0.0, 'warmup_steps': 21, 'lambda_param': 0.8, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8767,0.679559,0.88176,0.880828,0.807488,0.82566
2,0.1991,0.570116,0.905591,0.912074,0.870594,0.887307
3,0.1203,0.570612,0.900092,0.908165,0.865778,0.882655
4,0.0971,0.506734,0.909258,0.903959,0.87297,0.886074
5,0.0804,0.537677,0.901925,0.909915,0.867499,0.884668
6,0.0721,0.524451,0.901925,0.909764,0.867119,0.884405
7,0.0662,0.517416,0.911091,0.916098,0.875074,0.891541
8,0.0608,0.503474,0.906508,0.912835,0.871132,0.887991
9,0.0569,0.506634,0.903758,0.910839,0.868881,0.885831
10,0.0539,0.501353,0.908341,0.915154,0.872147,0.889647


[I 2025-03-22 14:40:15,513] Trial 75 finished with value: 0.8903124539562451 and parameters: {'learning_rate': 0.0041300373603497, 'weight_decay': 0.0, 'warmup_steps': 21, 'lambda_param': 0.8, 'temperature': 4.5}. Best is trial 44 with value: 0.8969803270673352.


Trial 76 with params: {'learning_rate': 0.00032423698784873585, 'weight_decay': 0.003, 'warmup_steps': 22, 'lambda_param': 0.1, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.7639,1.229787,0.793767,0.665761,0.680392,0.672565
2,0.7919,0.998014,0.839597,0.708119,0.716423,0.71162
3,0.5786,0.883113,0.851512,0.880235,0.735829,0.73769
4,0.4264,0.850421,0.853346,0.868378,0.819003,0.837243
5,0.3188,0.804092,0.867094,0.87841,0.82934,0.848098
6,0.2494,0.812312,0.862511,0.864587,0.825604,0.840704
7,0.2079,0.760883,0.868011,0.879317,0.829601,0.848639
8,0.1787,0.77,0.866178,0.878604,0.828833,0.847402
9,0.1564,0.781899,0.862511,0.877381,0.824931,0.84455
10,0.14,0.760606,0.868011,0.881384,0.828245,0.848945


Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--precision/155d3220d6cd4a6553f12da68eeb3d1f97cf431206304a4bc6e2d564c29502e9 (last modified on Fri Jan 10 23:13:59 2025) since it couldn't be found locally at evaluate-metric--precision, or remotely on the Hugging Face Hub.
[I 2025-03-22 14:43:29,545] Trial 76 pruned. 


Trial 77 with params: {'learning_rate': 0.002661110646392324, 'weight_decay': 0.001, 'warmup_steps': 25, 'lambda_param': 0.9, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.9775,0.769031,0.872594,0.872715,0.800467,0.817596
2,0.2309,0.601878,0.897342,0.894243,0.863015,0.876335
3,0.1325,0.575621,0.899175,0.905959,0.865243,0.881448
4,0.1025,0.573064,0.901925,0.906844,0.86756,0.883088
5,0.0854,0.552249,0.901925,0.909243,0.867146,0.884041
6,0.0761,0.545883,0.899175,0.907067,0.864858,0.882021
7,0.0691,0.558176,0.898258,0.903208,0.86522,0.880065
8,0.0631,0.524588,0.903758,0.910464,0.867832,0.885209
9,0.0589,0.522954,0.905591,0.910938,0.870596,0.88678
10,0.0558,0.511666,0.902841,0.909892,0.867891,0.884969


[I 2025-03-22 14:46:09,059] Trial 77 pruned. 


Trial 78 with params: {'learning_rate': 0.004267984222474292, 'weight_decay': 0.0, 'warmup_steps': 23, 'lambda_param': 0.9, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8612,0.700902,0.874427,0.877508,0.810532,0.82849
2,0.1914,0.586295,0.906508,0.91229,0.871357,0.887724
3,0.1194,0.564393,0.904675,0.897831,0.870125,0.881514
4,0.0956,0.538006,0.907424,0.911665,0.863078,0.881498
5,0.0805,0.521367,0.910174,0.91453,0.873547,0.890077
6,0.0727,0.517353,0.909258,0.903123,0.873022,0.88563
7,0.0662,0.522703,0.908341,0.912878,0.872533,0.888537
8,0.062,0.495729,0.910174,0.903379,0.874338,0.886498
9,0.0576,0.480046,0.913841,0.916982,0.876541,0.892821
10,0.0534,0.481726,0.914757,0.906831,0.877539,0.889884


[I 2025-03-22 14:49:41,884] Trial 78 finished with value: 0.8929951027265184 and parameters: {'learning_rate': 0.004267984222474292, 'weight_decay': 0.0, 'warmup_steps': 23, 'lambda_param': 0.9, 'temperature': 3.5}. Best is trial 44 with value: 0.8969803270673352.


Trial 79 with params: {'learning_rate': 0.0041970193386191195, 'weight_decay': 0.002, 'warmup_steps': 17, 'lambda_param': 1.0, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8551,0.663603,0.887259,0.89063,0.819892,0.840368
2,0.1921,0.579984,0.896425,0.900909,0.863615,0.878148
3,0.1238,0.518117,0.907424,0.902158,0.872465,0.884896
4,0.094,0.510419,0.908341,0.900784,0.873026,0.884506
5,0.0815,0.50676,0.911091,0.903559,0.874158,0.886535
6,0.0729,0.500315,0.903758,0.910405,0.859415,0.879139
7,0.0666,0.496675,0.912007,0.914216,0.8663,0.884452
8,0.0611,0.490133,0.909258,0.913301,0.873493,0.889459
9,0.0578,0.478815,0.912924,0.916673,0.866503,0.885805
10,0.0549,0.477376,0.912007,0.916275,0.866075,0.885295


[I 2025-03-22 14:54:17,709] Trial 79 finished with value: 0.896515848210675 and parameters: {'learning_rate': 0.0041970193386191195, 'weight_decay': 0.002, 'warmup_steps': 17, 'lambda_param': 1.0, 'temperature': 3.5}. Best is trial 44 with value: 0.8969803270673352.


Trial 80 with params: {'learning_rate': 0.003992310414837302, 'weight_decay': 0.001, 'warmup_steps': 16, 'lambda_param': 1.0, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8484,0.672141,0.885426,0.890124,0.828289,0.847283
2,0.194,0.57526,0.903758,0.911052,0.868971,0.885892
3,0.1204,0.530499,0.910174,0.903698,0.874661,0.88661
4,0.0968,0.516899,0.902841,0.909196,0.868588,0.884909
5,0.0835,0.535571,0.905591,0.911839,0.870535,0.887243
6,0.0731,0.513939,0.904675,0.912623,0.868479,0.886407
7,0.067,0.509265,0.904675,0.910562,0.870259,0.886174
8,0.0616,0.492816,0.912924,0.919338,0.876283,0.893856
9,0.0579,0.501918,0.912007,0.917245,0.875515,0.892475
10,0.0542,0.475186,0.914757,0.919483,0.87735,0.894417


[I 2025-03-22 14:58:19,784] Trial 80 finished with value: 0.8954730062719632 and parameters: {'learning_rate': 0.003992310414837302, 'weight_decay': 0.001, 'warmup_steps': 16, 'lambda_param': 1.0, 'temperature': 4.0}. Best is trial 44 with value: 0.8969803270673352.


Trial 81 with params: {'learning_rate': 0.002545115384295365, 'weight_decay': 0.0, 'warmup_steps': 11, 'lambda_param': 1.0, 'temperature': 5.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.9454,0.720142,0.88176,0.885291,0.815803,0.835661
2,0.2279,0.598211,0.894592,0.904252,0.861125,0.878747
3,0.1335,0.529375,0.907424,0.913343,0.871155,0.88823
4,0.1045,0.544566,0.901925,0.909388,0.86713,0.884357
5,0.0885,0.531828,0.907424,0.913782,0.870921,0.8883
6,0.078,0.512778,0.905591,0.911532,0.860704,0.88021
7,0.07,0.499271,0.908341,0.913581,0.862458,0.882183
8,0.0648,0.480853,0.914757,0.917866,0.867734,0.887015
9,0.0608,0.489267,0.912007,0.915695,0.865961,0.884923
10,0.0579,0.477924,0.912924,0.91725,0.86562,0.885604


[I 2025-03-22 15:02:22,016] Trial 81 finished with value: 0.8880396959004204 and parameters: {'learning_rate': 0.002545115384295365, 'weight_decay': 0.0, 'warmup_steps': 11, 'lambda_param': 1.0, 'temperature': 5.5}. Best is trial 44 with value: 0.8969803270673352.


Trial 82 with params: {'learning_rate': 0.0012321411790773863, 'weight_decay': 0.001, 'warmup_steps': 14, 'lambda_param': 0.9, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.1634,0.848811,0.856095,0.853603,0.767132,0.783669
2,0.3506,0.716826,0.877177,0.89089,0.838299,0.858524
3,0.1911,0.652237,0.889093,0.896904,0.857854,0.873231
4,0.1337,0.662472,0.882676,0.893629,0.852241,0.86865
5,0.1078,0.647137,0.889093,0.898858,0.856395,0.873219


[I 2025-03-22 15:03:30,797] Trial 82 pruned. 


Trial 83 with params: {'learning_rate': 0.0042478369692948185, 'weight_decay': 0.003, 'warmup_steps': 16, 'lambda_param': 1.0, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.851,0.667126,0.893676,0.892937,0.815429,0.836201
2,0.1891,0.588371,0.897342,0.893286,0.863996,0.876151
3,0.1181,0.539491,0.901008,0.906711,0.867864,0.883038
4,0.0936,0.511059,0.909258,0.913474,0.873846,0.889636
5,0.0811,0.525571,0.911091,0.905251,0.873885,0.887041
6,0.0716,0.508049,0.911091,0.904945,0.873597,0.886913
7,0.0658,0.507931,0.915674,0.918634,0.877993,0.894327
8,0.0615,0.51353,0.911091,0.904334,0.874651,0.887092
9,0.0591,0.508317,0.909258,0.91404,0.872452,0.889252
10,0.0541,0.492169,0.909258,0.903346,0.871889,0.885177


[I 2025-03-22 15:07:43,379] Trial 83 finished with value: 0.8937522567220609 and parameters: {'learning_rate': 0.0042478369692948185, 'weight_decay': 0.003, 'warmup_steps': 16, 'lambda_param': 1.0, 'temperature': 4.0}. Best is trial 44 with value: 0.8969803270673352.


Trial 84 with params: {'learning_rate': 0.004717087808646469, 'weight_decay': 0.002, 'warmup_steps': 15, 'lambda_param': 1.0, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8505,0.714533,0.88451,0.881878,0.809493,0.827246
2,0.1928,0.552231,0.909258,0.914322,0.873334,0.889933
3,0.1218,0.543728,0.900092,0.897108,0.866214,0.879261
4,0.0944,0.521301,0.907424,0.912759,0.871605,0.888167
5,0.0816,0.524538,0.905591,0.91237,0.869861,0.887105
6,0.0723,0.52515,0.905591,0.912842,0.869453,0.887194
7,0.0666,0.521525,0.906508,0.912747,0.871065,0.887985
8,0.0645,0.51439,0.907424,0.901905,0.87187,0.884456
9,0.0592,0.509202,0.908341,0.902953,0.872611,0.885492
10,0.057,0.505028,0.909258,0.903238,0.873534,0.886011


[I 2025-03-22 15:10:42,900] Trial 84 pruned. 


Trial 85 with params: {'learning_rate': 0.0031813196525500953, 'weight_decay': 0.0, 'warmup_steps': 13, 'lambda_param': 1.0, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.9104,0.716326,0.879927,0.886346,0.813675,0.834962
2,0.2091,0.586792,0.900092,0.903037,0.848025,0.867327
3,0.1279,0.575739,0.897342,0.904062,0.864387,0.879637
4,0.098,0.520315,0.904675,0.910523,0.869459,0.885866
5,0.0838,0.543117,0.901925,0.910271,0.866163,0.883766
6,0.0748,0.518483,0.905591,0.913508,0.868889,0.887145
7,0.0685,0.522427,0.903758,0.909903,0.868372,0.885169
8,0.0628,0.517554,0.905591,0.912421,0.86952,0.886925
9,0.0593,0.518203,0.907424,0.913738,0.871247,0.8884
10,0.0551,0.504698,0.906508,0.911701,0.870294,0.886888


[I 2025-03-22 15:14:27,126] Trial 85 finished with value: 0.8915236365737319 and parameters: {'learning_rate': 0.0031813196525500953, 'weight_decay': 0.0, 'warmup_steps': 13, 'lambda_param': 1.0, 'temperature': 3.0}. Best is trial 44 with value: 0.8969803270673352.


Trial 86 with params: {'learning_rate': 0.0002681159956916346, 'weight_decay': 0.003, 'warmup_steps': 22, 'lambda_param': 1.0, 'temperature': 7.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8844,1.321804,0.783685,0.661189,0.670466,0.664759
2,0.8672,1.065787,0.829514,0.703288,0.70757,0.704359
3,0.6496,0.937557,0.850596,0.714868,0.725803,0.719578
4,0.4984,0.87739,0.846013,0.835587,0.749337,0.762508
5,0.3816,0.847426,0.861595,0.872969,0.825078,0.843179


[I 2025-03-22 15:15:39,046] Trial 86 pruned. 


Trial 87 with params: {'learning_rate': 0.00025335316923329827, 'weight_decay': 0.004, 'warmup_steps': 8, 'lambda_param': 0.6000000000000001, 'temperature': 5.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8876,1.342448,0.777269,0.652871,0.665505,0.658796
2,0.8943,1.078389,0.829514,0.702247,0.707556,0.70378
3,0.6776,0.96971,0.843263,0.710286,0.719556,0.714015
4,0.5325,0.90883,0.845096,0.874155,0.740508,0.748573
5,0.4166,0.859174,0.862511,0.872949,0.817156,0.836712
6,0.334,0.863412,0.850596,0.867885,0.815475,0.835152
7,0.2788,0.811319,0.869844,0.882014,0.831021,0.850602
8,0.2408,0.823585,0.862511,0.875814,0.826529,0.844718
9,0.2105,0.840198,0.851512,0.869582,0.816709,0.836423
10,0.1884,0.788808,0.868928,0.882453,0.828704,0.849675


[I 2025-03-22 15:18:14,416] Trial 87 pruned. 


Trial 88 with params: {'learning_rate': 0.002924918889386329, 'weight_decay': 0.0, 'warmup_steps': 15, 'lambda_param': 1.0, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.9281,0.738484,0.877177,0.879223,0.802386,0.822434
2,0.2192,0.566829,0.903758,0.909371,0.869256,0.885222
3,0.1289,0.560269,0.896425,0.903479,0.863028,0.87892
4,0.099,0.557607,0.898258,0.905702,0.864759,0.88129
5,0.0847,0.529517,0.901925,0.910267,0.866889,0.884607
6,0.075,0.540113,0.901008,0.909186,0.86534,0.883277
7,0.0682,0.516519,0.905591,0.912466,0.869565,0.887038
8,0.0641,0.504982,0.911091,0.916551,0.873822,0.891129
9,0.0592,0.513325,0.909258,0.915652,0.872183,0.889919
10,0.0558,0.495215,0.904675,0.912169,0.868127,0.886126


[I 2025-03-22 15:22:03,667] Trial 88 finished with value: 0.890696072659698 and parameters: {'learning_rate': 0.002924918889386329, 'weight_decay': 0.0, 'warmup_steps': 15, 'lambda_param': 1.0, 'temperature': 4.0}. Best is trial 44 with value: 0.8969803270673352.


Trial 89 with params: {'learning_rate': 0.0022005012908076337, 'weight_decay': 0.005, 'warmup_steps': 30, 'lambda_param': 0.30000000000000004, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0544,0.766523,0.868011,0.875527,0.804775,0.824668
2,0.2538,0.607621,0.892759,0.902782,0.859015,0.876962
3,0.1454,0.585674,0.901008,0.908593,0.865808,0.883092
4,0.1077,0.574017,0.900092,0.908394,0.865536,0.882901
5,0.0889,0.576548,0.901008,0.909181,0.866539,0.883863


[I 2025-03-22 15:23:21,438] Trial 89 pruned. 


Trial 90 with params: {'learning_rate': 0.004398042250760573, 'weight_decay': 0.003, 'warmup_steps': 30, 'lambda_param': 0.6000000000000001, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8942,0.720801,0.87901,0.880504,0.814572,0.832446
2,0.1964,0.606274,0.894592,0.90252,0.861284,0.878012
3,0.1203,0.585506,0.892759,0.889959,0.85935,0.871969
4,0.0936,0.549432,0.903758,0.910866,0.86752,0.885209
5,0.0809,0.546968,0.907424,0.912799,0.871096,0.888018
6,0.0731,0.542385,0.899175,0.906101,0.865221,0.88156
7,0.0658,0.529014,0.904675,0.910494,0.868818,0.885741
8,0.0603,0.520234,0.906508,0.913387,0.869817,0.887537
9,0.057,0.505543,0.908341,0.914448,0.871573,0.889074
10,0.0541,0.501926,0.910174,0.915702,0.873882,0.890817


[I 2025-03-22 15:27:18,807] Trial 90 finished with value: 0.8921074052048171 and parameters: {'learning_rate': 0.004398042250760573, 'weight_decay': 0.003, 'warmup_steps': 30, 'lambda_param': 0.6000000000000001, 'temperature': 3.0}. Best is trial 44 with value: 0.8969803270673352.


Trial 91 with params: {'learning_rate': 0.00021177702946688744, 'weight_decay': 0.01, 'warmup_steps': 9, 'lambda_param': 0.4, 'temperature': 6.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.0046,1.423671,0.768103,0.647502,0.658453,0.651898
2,0.9764,1.148068,0.817599,0.692041,0.698215,0.694021
3,0.7521,1.035931,0.830431,0.699124,0.709563,0.703459
4,0.6091,0.973705,0.830431,0.697169,0.710858,0.702523
5,0.4992,0.914553,0.857012,0.829319,0.74908,0.756749
6,0.4111,0.884246,0.856095,0.870595,0.819504,0.839137
7,0.3478,0.849078,0.866178,0.878717,0.827886,0.847403
8,0.3012,0.867261,0.855179,0.86808,0.821484,0.837853
9,0.2661,0.881356,0.846929,0.866295,0.812881,0.833133
10,0.2373,0.821355,0.864345,0.877664,0.825927,0.845883


[I 2025-03-22 15:29:49,774] Trial 91 pruned. 


Trial 92 with params: {'learning_rate': 0.002704623581380898, 'weight_decay': 0.003, 'warmup_steps': 20, 'lambda_param': 1.0, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.9711,0.758314,0.871677,0.878886,0.816702,0.835773
2,0.2232,0.612514,0.899175,0.904702,0.864893,0.880856
3,0.1283,0.597003,0.896425,0.904289,0.862426,0.879231
4,0.1014,0.552196,0.903758,0.910226,0.868305,0.885325
5,0.0856,0.546568,0.904675,0.91306,0.868733,0.88687
6,0.0754,0.564255,0.900092,0.907174,0.864924,0.882094
7,0.069,0.546272,0.901925,0.908213,0.867108,0.883716
8,0.0632,0.526737,0.904675,0.912374,0.868645,0.886543
9,0.0601,0.520911,0.908341,0.914743,0.871231,0.889044
10,0.0565,0.515469,0.912007,0.91777,0.874566,0.892202


[I 2025-03-22 15:33:41,093] Trial 92 finished with value: 0.8915485537950375 and parameters: {'learning_rate': 0.002704623581380898, 'weight_decay': 0.003, 'warmup_steps': 20, 'lambda_param': 1.0, 'temperature': 3.5}. Best is trial 44 with value: 0.8969803270673352.


Trial 93 with params: {'learning_rate': 0.00265768294671018, 'weight_decay': 0.008, 'warmup_steps': 16, 'lambda_param': 0.1, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.9518,0.730107,0.882676,0.891163,0.833904,0.854009
2,0.2316,0.612379,0.891842,0.900288,0.858313,0.875391
3,0.1321,0.576093,0.897342,0.905256,0.864058,0.880658
4,0.1021,0.56625,0.897342,0.895369,0.863524,0.87707
5,0.0862,0.558535,0.901008,0.898462,0.866428,0.880192
6,0.0753,0.559126,0.900092,0.908586,0.864865,0.882792
7,0.0688,0.542879,0.901925,0.909669,0.867359,0.884573
8,0.0634,0.532648,0.909258,0.904463,0.873202,0.886563
9,0.0599,0.520215,0.904675,0.911405,0.869966,0.886659
10,0.057,0.532737,0.907424,0.903495,0.871537,0.885254


[I 2025-03-22 15:37:26,954] Trial 93 finished with value: 0.8890082281512104 and parameters: {'learning_rate': 0.00265768294671018, 'weight_decay': 0.008, 'warmup_steps': 16, 'lambda_param': 0.1, 'temperature': 3.5}. Best is trial 44 with value: 0.8969803270673352.


Trial 94 with params: {'learning_rate': 0.004302603997747711, 'weight_decay': 0.001, 'warmup_steps': 19, 'lambda_param': 1.0, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8864,0.668093,0.890009,0.890676,0.823103,0.8425
2,0.1935,0.551467,0.911091,0.903908,0.874649,0.886755
3,0.1191,0.548167,0.902841,0.896173,0.867632,0.879439
4,0.0944,0.520209,0.912007,0.906475,0.874467,0.888045
5,0.0813,0.511411,0.915674,0.90835,0.877682,0.890533
6,0.0722,0.494227,0.913841,0.9078,0.876153,0.889556
7,0.0661,0.491115,0.912924,0.906305,0.875443,0.888479
8,0.0605,0.497131,0.915674,0.909321,0.876791,0.890602
9,0.0566,0.468166,0.92484,0.917035,0.884851,0.898613
10,0.0542,0.471522,0.92209,0.914539,0.882776,0.89627


[I 2025-03-22 15:41:04,362] Trial 94 finished with value: 0.8959132797293364 and parameters: {'learning_rate': 0.004302603997747711, 'weight_decay': 0.001, 'warmup_steps': 19, 'lambda_param': 1.0, 'temperature': 3.5}. Best is trial 44 with value: 0.8969803270673352.


Trial 95 with params: {'learning_rate': 0.002913999902924364, 'weight_decay': 0.0, 'warmup_steps': 20, 'lambda_param': 1.0, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.9433,0.732571,0.87626,0.878091,0.801534,0.821663
2,0.2216,0.593568,0.901925,0.908355,0.867373,0.883983
3,0.1286,0.575542,0.895509,0.90308,0.861517,0.878136
4,0.1009,0.558583,0.903758,0.91079,0.868253,0.885524
5,0.0863,0.544977,0.905591,0.91334,0.868899,0.886982
6,0.0748,0.531199,0.904675,0.912862,0.868298,0.886498
7,0.0681,0.536041,0.901925,0.909524,0.866209,0.88391
8,0.0622,0.530235,0.903758,0.912099,0.867253,0.885459
9,0.059,0.523971,0.907424,0.913167,0.87074,0.887911
10,0.0562,0.507305,0.911091,0.916708,0.873331,0.891052


[I 2025-03-22 15:44:49,539] Trial 95 finished with value: 0.8844356840337312 and parameters: {'learning_rate': 0.002913999902924364, 'weight_decay': 0.0, 'warmup_steps': 20, 'lambda_param': 1.0, 'temperature': 4.0}. Best is trial 44 with value: 0.8969803270673352.


Trial 96 with params: {'learning_rate': 5.399635979922363e-05, 'weight_decay': 0.0, 'warmup_steps': 25, 'lambda_param': 0.30000000000000004, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,3.0179,2.436758,0.600367,0.536836,0.500751,0.500757
2,1.9358,1.879136,0.691109,0.582719,0.590197,0.585149
3,1.4731,1.615455,0.743355,0.62453,0.63556,0.629216
4,1.2411,1.482977,0.767186,0.64856,0.656803,0.651189
5,1.1085,1.403259,0.779102,0.653596,0.668626,0.659889


[I 2025-03-22 15:46:20,179] Trial 96 pruned. 


Trial 97 with params: {'learning_rate': 0.0039058677192692977, 'weight_decay': 0.001, 'warmup_steps': 18, 'lambda_param': 1.0, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.875,0.67599,0.888176,0.886318,0.81277,0.831359
2,0.1999,0.553337,0.908341,0.913718,0.872338,0.889031
3,0.1215,0.553586,0.898258,0.891852,0.855125,0.869759
4,0.0948,0.522265,0.907424,0.910192,0.85264,0.873124
5,0.08,0.530042,0.902841,0.908509,0.858805,0.877481
6,0.0727,0.525855,0.904675,0.911303,0.858854,0.878862
7,0.0668,0.516185,0.908341,0.912208,0.862888,0.881583
8,0.0618,0.497611,0.912007,0.914667,0.85633,0.877282
9,0.058,0.484141,0.912924,0.915748,0.85685,0.878151
10,0.0551,0.474192,0.915674,0.917228,0.859204,0.88005


[I 2025-03-22 15:48:55,460] Trial 97 pruned. 


Trial 98 with params: {'learning_rate': 0.004618707757140584, 'weight_decay': 0.002, 'warmup_steps': 20, 'lambda_param': 0.0, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8883,0.707019,0.886343,0.88857,0.819432,0.838706
2,0.1915,0.590438,0.904675,0.911407,0.868685,0.886111
3,0.1196,0.57717,0.895509,0.901864,0.853087,0.871074
4,0.0943,0.524454,0.907424,0.912429,0.862175,0.88151
5,0.0797,0.502262,0.911091,0.916172,0.87488,0.891536
6,0.0707,0.508515,0.912007,0.916953,0.875472,0.892236
7,0.0663,0.499519,0.912007,0.915088,0.866402,0.884938
8,0.0607,0.497439,0.911091,0.915029,0.86562,0.88451
9,0.0571,0.485058,0.909258,0.912552,0.864194,0.882559
10,0.0537,0.485363,0.910174,0.913401,0.865103,0.88344


[I 2025-03-22 15:51:20,784] Trial 98 pruned. 


Trial 99 with params: {'learning_rate': 0.0038505722669524662, 'weight_decay': 0.0, 'warmup_steps': 30, 'lambda_param': 0.7000000000000001, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.9233,0.715407,0.880843,0.880575,0.806584,0.824521
2,0.2004,0.652145,0.890009,0.895132,0.857819,0.87239
3,0.1218,0.591172,0.893676,0.900446,0.860788,0.876458
4,0.0967,0.560389,0.906508,0.912997,0.869827,0.887263
5,0.0826,0.55561,0.895509,0.903699,0.860894,0.878292


[I 2025-03-22 15:52:32,313] Trial 99 pruned. 


Trial 100 with params: {'learning_rate': 0.00026885910198952694, 'weight_decay': 0.008, 'warmup_steps': 30, 'lambda_param': 1.0, 'temperature': 5.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.8914,1.302066,0.787351,0.65901,0.674745,0.666509
2,0.8553,1.054876,0.834097,0.703653,0.712084,0.707005
3,0.6409,0.9308,0.850596,0.714933,0.72538,0.719514
4,0.4948,0.882448,0.846013,0.819863,0.740993,0.747862
5,0.3801,0.836785,0.861595,0.873471,0.824822,0.843274
6,0.3001,0.847642,0.851512,0.857277,0.816977,0.832741
7,0.2506,0.792004,0.872594,0.88282,0.833745,0.852313
8,0.2167,0.810058,0.864345,0.864536,0.828082,0.842276
9,0.1897,0.828002,0.858845,0.874451,0.822582,0.84172
10,0.1719,0.784145,0.869844,0.883247,0.828758,0.849969


[I 2025-03-22 15:54:52,315] Trial 100 pruned. 


Trial 101 with params: {'learning_rate': 0.004508501969182648, 'weight_decay': 0.0, 'warmup_steps': 18, 'lambda_param': 1.0, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8414,0.679524,0.882676,0.887787,0.82661,0.845317
2,0.191,0.557102,0.901925,0.909267,0.866326,0.883785
3,0.1208,0.517049,0.911091,0.906667,0.883806,0.893832
4,0.0936,0.501287,0.912007,0.916948,0.875066,0.892022
5,0.0801,0.492756,0.913841,0.918377,0.876734,0.893578
6,0.0716,0.493944,0.918423,0.921972,0.879786,0.89695
7,0.0651,0.489186,0.91934,0.923072,0.881168,0.898131
8,0.0606,0.474172,0.91934,0.923791,0.880506,0.898176
9,0.0571,0.484736,0.913841,0.916648,0.857856,0.87913
10,0.054,0.475393,0.91934,0.919742,0.862757,0.883169


[I 2025-03-22 15:57:13,180] Trial 101 pruned. 


Trial 102 with params: {'learning_rate': 0.004575615814244494, 'weight_decay': 0.002, 'warmup_steps': 19, 'lambda_param': 0.8, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.841,0.650962,0.888176,0.895014,0.829037,0.850502
2,0.1884,0.52831,0.906508,0.912465,0.870901,0.88769
3,0.12,0.524664,0.907424,0.913247,0.871201,0.887985
4,0.0921,0.490795,0.913841,0.920625,0.875083,0.893825
5,0.0805,0.520689,0.906508,0.912721,0.870767,0.887655
6,0.0707,0.514092,0.906508,0.912486,0.8713,0.887654
7,0.0655,0.498603,0.912007,0.916918,0.875697,0.892211
8,0.0617,0.499298,0.911091,0.916305,0.874119,0.89131
9,0.058,0.494442,0.910174,0.916082,0.87394,0.891049
10,0.0555,0.488411,0.915674,0.920116,0.878468,0.895387


[I 2025-03-22 16:01:38,289] Trial 102 finished with value: 0.8926170941247246 and parameters: {'learning_rate': 0.004575615814244494, 'weight_decay': 0.002, 'warmup_steps': 19, 'lambda_param': 0.8, 'temperature': 3.5}. Best is trial 44 with value: 0.8969803270673352.


Trial 103 with params: {'learning_rate': 0.0038464522695252088, 'weight_decay': 0.002, 'warmup_steps': 18, 'lambda_param': 1.0, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8724,0.679055,0.883593,0.889672,0.827285,0.84555
2,0.2003,0.528746,0.907424,0.913702,0.870798,0.888208
3,0.1217,0.50019,0.912007,0.915876,0.875835,0.891594
4,0.0966,0.522752,0.908341,0.914042,0.871659,0.888746
5,0.084,0.512759,0.906508,0.901735,0.871299,0.884215
6,0.0729,0.486511,0.911091,0.916203,0.874073,0.891136
7,0.0671,0.504793,0.906508,0.900573,0.87096,0.883385
8,0.0621,0.482666,0.909258,0.90366,0.872326,0.885516
9,0.0583,0.483867,0.905591,0.911917,0.86967,0.886859
10,0.0551,0.48037,0.912924,0.91816,0.87516,0.892657


[I 2025-03-22 16:05:35,334] Trial 103 finished with value: 0.8957478957614152 and parameters: {'learning_rate': 0.0038464522695252088, 'weight_decay': 0.002, 'warmup_steps': 18, 'lambda_param': 1.0, 'temperature': 3.5}. Best is trial 44 with value: 0.8969803270673352.


Trial 104 with params: {'learning_rate': 0.0046126729873686555, 'weight_decay': 0.001, 'warmup_steps': 20, 'lambda_param': 1.0, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.9005,0.659119,0.886343,0.889262,0.81846,0.839452
2,0.1926,0.587628,0.894592,0.900476,0.861973,0.876989
3,0.121,0.524273,0.904675,0.911044,0.869102,0.885981
4,0.0956,0.51227,0.907424,0.912875,0.871564,0.888277
5,0.0818,0.540402,0.902841,0.910103,0.867144,0.88468
6,0.0717,0.52823,0.901925,0.909097,0.866982,0.88399
7,0.0661,0.524925,0.903758,0.909526,0.869372,0.885246
8,0.0606,0.508491,0.908341,0.913487,0.872691,0.88912
9,0.0571,0.496665,0.910174,0.914696,0.873826,0.890246
10,0.0541,0.497434,0.909258,0.914265,0.873124,0.889684


[I 2025-03-22 16:09:27,415] Trial 104 finished with value: 0.889724547173446 and parameters: {'learning_rate': 0.0046126729873686555, 'weight_decay': 0.001, 'warmup_steps': 20, 'lambda_param': 1.0, 'temperature': 3.0}. Best is trial 44 with value: 0.8969803270673352.


Trial 105 with params: {'learning_rate': 0.0012442980391826782, 'weight_decay': 0.004, 'warmup_steps': 23, 'lambda_param': 1.0, 'temperature': 5.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.2236,0.873237,0.854262,0.887487,0.766394,0.785282
2,0.352,0.697686,0.883593,0.895932,0.842859,0.863261
3,0.1918,0.640332,0.891842,0.900527,0.858994,0.875525
4,0.1341,0.660688,0.880843,0.889479,0.850863,0.866019
5,0.1059,0.642114,0.885426,0.895433,0.853782,0.870638


[I 2025-03-22 16:10:44,594] Trial 105 pruned. 


Trial 106 with params: {'learning_rate': 0.00016644555832767357, 'weight_decay': 0.0, 'warmup_steps': 2, 'lambda_param': 0.30000000000000004, 'temperature': 6.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.157,1.513529,0.75802,0.638342,0.649132,0.643154
2,1.0876,1.238906,0.810266,0.684736,0.691857,0.687264
3,0.8511,1.114371,0.821265,0.693984,0.701471,0.696548
4,0.7084,1.053307,0.830431,0.700522,0.708758,0.704067
5,0.6072,0.986349,0.84418,0.70886,0.720045,0.713738
6,0.5251,0.941121,0.854262,0.882798,0.746759,0.75645
7,0.4606,0.918481,0.849679,0.837593,0.75257,0.764989
8,0.4024,0.944876,0.848763,0.862256,0.807498,0.825005
9,0.3646,0.916695,0.852429,0.868282,0.808035,0.829594
10,0.3291,0.873647,0.858845,0.87203,0.813209,0.83424


[I 2025-03-22 16:13:14,211] Trial 106 pruned. 


Trial 107 with params: {'learning_rate': 0.0013161869593851033, 'weight_decay': 0.003, 'warmup_steps': 12, 'lambda_param': 1.0, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.1358,0.836613,0.857012,0.852208,0.768027,0.783642
2,0.3371,0.690505,0.879927,0.89372,0.848879,0.867096
3,0.1872,0.639426,0.887259,0.895674,0.856571,0.871521
4,0.1304,0.654023,0.888176,0.896552,0.856992,0.872749
5,0.1054,0.630082,0.886343,0.896141,0.854815,0.871534


[I 2025-03-22 16:14:28,993] Trial 107 pruned. 


Trial 108 with params: {'learning_rate': 0.003930314284829318, 'weight_decay': 0.004, 'warmup_steps': 14, 'lambda_param': 1.0, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8565,0.687736,0.88451,0.884096,0.809532,0.827687
2,0.1942,0.552914,0.900092,0.89269,0.85715,0.871261
3,0.1224,0.546213,0.903758,0.908685,0.859736,0.878426
4,0.095,0.529021,0.908341,0.912438,0.863841,0.881862
5,0.0849,0.5227,0.910174,0.915178,0.874174,0.890623
6,0.0736,0.536191,0.906508,0.91288,0.870301,0.887225
7,0.0677,0.497885,0.915674,0.920175,0.877856,0.895085
8,0.0625,0.503024,0.911091,0.91717,0.873984,0.891469
9,0.0583,0.49578,0.91659,0.92119,0.878741,0.89584
10,0.0555,0.500739,0.913841,0.918948,0.876073,0.893521


Using the latest cached version of the module from /home/jovyan/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--recall/11f90e583db35601050aed380d48e83202a896976b9608432fba9244fb447f24 (last modified on Fri Jan 10 23:14:00 2025) since it couldn't be found locally at evaluate-metric--recall, or remotely on the Hugging Face Hub.
[I 2025-03-22 16:18:51,500] Trial 108 finished with value: 0.8969089099280163 and parameters: {'learning_rate': 0.003930314284829318, 'weight_decay': 0.004, 'warmup_steps': 14, 'lambda_param': 1.0, 'temperature': 2.5}. Best is trial 44 with value: 0.8969803270673352.


Trial 109 with params: {'learning_rate': 0.0025542621571883646, 'weight_decay': 0.002, 'warmup_steps': 27, 'lambda_param': 0.7000000000000001, 'temperature': 5.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.002,0.753674,0.874427,0.879717,0.810869,0.829491
2,0.2388,0.601879,0.896425,0.904748,0.861652,0.879285
3,0.1352,0.588336,0.896425,0.90463,0.863475,0.880091
4,0.1042,0.586321,0.898258,0.907086,0.864773,0.88198
5,0.0867,0.573583,0.898258,0.906026,0.864585,0.881192
6,0.076,0.561922,0.898258,0.906446,0.864712,0.881307
7,0.0695,0.544691,0.897342,0.905485,0.863492,0.880522
8,0.0637,0.545377,0.900092,0.90747,0.865692,0.882607
9,0.0594,0.544537,0.902841,0.908856,0.867921,0.884451
10,0.056,0.539974,0.899175,0.906626,0.864659,0.881646


[I 2025-03-22 16:21:17,130] Trial 109 pruned. 


Trial 110 with params: {'learning_rate': 0.004966261087130382, 'weight_decay': 0.0, 'warmup_steps': 19, 'lambda_param': 0.9, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8328,0.663286,0.88176,0.88641,0.815136,0.836062
2,0.1879,0.566349,0.902841,0.907838,0.869555,0.884376
3,0.1217,0.506944,0.906508,0.898794,0.862439,0.87653
4,0.0936,0.481369,0.914757,0.906179,0.86782,0.883356
5,0.0812,0.507957,0.911091,0.905224,0.874176,0.887276
6,0.072,0.501332,0.907424,0.900244,0.871693,0.883559
7,0.0663,0.503991,0.911091,0.901466,0.866199,0.880131
8,0.0611,0.499162,0.911091,0.904494,0.874479,0.887192
9,0.057,0.498529,0.911091,0.903512,0.874969,0.88686
10,0.0551,0.485938,0.914757,0.904608,0.868604,0.883004


[I 2025-03-22 16:23:56,381] Trial 110 pruned. 


Trial 111 with params: {'learning_rate': 0.004382360980255659, 'weight_decay': 0.005, 'warmup_steps': 14, 'lambda_param': 1.0, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8561,0.666425,0.890926,0.893536,0.831657,0.851533
2,0.1951,0.552354,0.904675,0.909356,0.870036,0.88573
3,0.1215,0.568679,0.892759,0.900393,0.861281,0.876817
4,0.0965,0.534572,0.905591,0.912869,0.870548,0.887719
5,0.0848,0.530851,0.906508,0.91347,0.871352,0.88841
6,0.0744,0.557777,0.898258,0.906025,0.864588,0.881233
7,0.0678,0.528918,0.899175,0.907103,0.864589,0.88186
8,0.0624,0.509651,0.907424,0.913551,0.872033,0.888825
9,0.0586,0.51626,0.901008,0.907214,0.86687,0.883116
10,0.0546,0.510519,0.911091,0.916493,0.874351,0.891479


[I 2025-03-22 16:27:28,574] Trial 111 finished with value: 0.8856181822913937 and parameters: {'learning_rate': 0.004382360980255659, 'weight_decay': 0.005, 'warmup_steps': 14, 'lambda_param': 1.0, 'temperature': 3.0}. Best is trial 44 with value: 0.8969803270673352.


Trial 112 with params: {'learning_rate': 0.002914635322545712, 'weight_decay': 0.004, 'warmup_steps': 23, 'lambda_param': 1.0, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.9512,0.742126,0.871677,0.874187,0.799036,0.818244
2,0.2237,0.608895,0.896425,0.904698,0.862386,0.87942
3,0.1285,0.568147,0.898258,0.905396,0.864796,0.88086
4,0.104,0.563575,0.901925,0.910108,0.867317,0.884608
5,0.0852,0.541638,0.901008,0.909213,0.865864,0.883549
6,0.0756,0.536022,0.905591,0.912017,0.870447,0.887292
7,0.0675,0.531408,0.901008,0.908068,0.866598,0.883237
8,0.0622,0.521687,0.903758,0.910492,0.868151,0.885339
9,0.0585,0.513015,0.905591,0.912009,0.870444,0.887239
10,0.0553,0.499473,0.913841,0.918961,0.876272,0.893651


[I 2025-03-22 16:31:05,800] Trial 112 finished with value: 0.8908890106335007 and parameters: {'learning_rate': 0.002914635322545712, 'weight_decay': 0.004, 'warmup_steps': 23, 'lambda_param': 1.0, 'temperature': 2.5}. Best is trial 44 with value: 0.8969803270673352.


Trial 113 with params: {'learning_rate': 0.0006356619019956763, 'weight_decay': 0.008, 'warmup_steps': 10, 'lambda_param': 1.0, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.4114,1.024165,0.828598,0.697345,0.707545,0.701195
2,0.5488,0.827749,0.857929,0.866395,0.78575,0.807866
3,0.3199,0.733721,0.873511,0.881194,0.83599,0.852364
4,0.2157,0.723516,0.879927,0.88971,0.849268,0.865408
5,0.1644,0.701517,0.877177,0.888812,0.846641,0.863724


[I 2025-03-22 16:32:29,466] Trial 113 pruned. 


Trial 114 with params: {'learning_rate': 0.002610707640083097, 'weight_decay': 0.003, 'warmup_steps': 12, 'lambda_param': 1.0, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.9375,0.717427,0.87626,0.883909,0.819588,0.840645
2,0.2276,0.596436,0.896425,0.905794,0.862999,0.880315
3,0.1316,0.554391,0.901008,0.89907,0.865571,0.879865
4,0.1025,0.532966,0.901925,0.909487,0.867198,0.884285
5,0.085,0.530454,0.902841,0.899676,0.867317,0.881166
6,0.0751,0.518751,0.909258,0.905045,0.872634,0.886354
7,0.0682,0.503384,0.910174,0.916909,0.873001,0.890848
8,0.0624,0.501225,0.911091,0.905943,0.874253,0.88777
9,0.0584,0.499936,0.910174,0.916775,0.874239,0.891507
10,0.0551,0.495184,0.911091,0.906029,0.874536,0.88798


[I 2025-03-22 16:36:10,695] Trial 114 finished with value: 0.8899861927357607 and parameters: {'learning_rate': 0.002610707640083097, 'weight_decay': 0.003, 'warmup_steps': 12, 'lambda_param': 1.0, 'temperature': 3.0}. Best is trial 44 with value: 0.8969803270673352.


Trial 115 with params: {'learning_rate': 7.374452554554239e-05, 'weight_decay': 0.005, 'warmup_steps': 11, 'lambda_param': 0.7000000000000001, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.806,2.191977,0.635197,0.54607,0.534444,0.533336
2,1.6568,1.647126,0.747021,0.625112,0.639827,0.631388
3,1.2493,1.453382,0.766269,0.646457,0.656574,0.65088
4,1.0652,1.349299,0.787351,0.661038,0.674031,0.666727
5,0.9566,1.2697,0.799267,0.669429,0.684325,0.676502


[I 2025-03-22 16:37:31,082] Trial 115 pruned. 


Trial 116 with params: {'learning_rate': 0.0032124543995351615, 'weight_decay': 0.004, 'warmup_steps': 16, 'lambda_param': 1.0, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.9074,0.713272,0.882676,0.884471,0.807149,0.827475
2,0.2109,0.574396,0.897342,0.903132,0.864082,0.879631
3,0.1255,0.555224,0.905591,0.912852,0.869861,0.887229
4,0.097,0.530014,0.910174,0.915828,0.873953,0.890797
5,0.0844,0.531182,0.912924,0.918606,0.875689,0.893123
6,0.0745,0.5087,0.903758,0.909886,0.858856,0.878442
7,0.0685,0.503937,0.909258,0.915519,0.873216,0.890249
8,0.0637,0.502217,0.909258,0.915467,0.872995,0.890236
9,0.0596,0.50358,0.912007,0.915996,0.865898,0.885066
10,0.0548,0.494478,0.912007,0.916667,0.865748,0.885338


[I 2025-03-22 16:41:25,148] Trial 116 finished with value: 0.8844914922053911 and parameters: {'learning_rate': 0.0032124543995351615, 'weight_decay': 0.004, 'warmup_steps': 16, 'lambda_param': 1.0, 'temperature': 2.0}. Best is trial 44 with value: 0.8969803270673352.


Trial 117 with params: {'learning_rate': 0.0008273468343264632, 'weight_decay': 0.008, 'warmup_steps': 0, 'lambda_param': 0.4, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.2663,0.978516,0.83868,0.704075,0.717314,0.709697
2,0.4771,0.830811,0.856095,0.866348,0.81274,0.830988
3,0.2659,0.732829,0.873511,0.882678,0.836419,0.853146
4,0.1803,0.714865,0.874427,0.885042,0.835497,0.85448
5,0.1375,0.729901,0.874427,0.88777,0.833688,0.854458


[I 2025-03-22 16:42:33,036] Trial 117 pruned. 


Trial 118 with params: {'learning_rate': 0.004391133895559514, 'weight_decay': 0.005, 'warmup_steps': 28, 'lambda_param': 0.1, 'temperature': 5.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8562,0.749628,0.868011,0.874156,0.805107,0.823715
2,0.1899,0.645149,0.890926,0.898072,0.857855,0.87404
3,0.1185,0.594289,0.892759,0.899594,0.850775,0.868948
4,0.0942,0.565401,0.904675,0.899124,0.869196,0.881861
5,0.0804,0.52275,0.905591,0.892318,0.8696,0.87974


[I 2025-03-22 16:43:46,769] Trial 118 pruned. 


Trial 119 with params: {'learning_rate': 0.004031609400582658, 'weight_decay': 0.002, 'warmup_steps': 11, 'lambda_param': 1.0, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.849,0.667283,0.883593,0.888768,0.826266,0.846094
2,0.1941,0.55952,0.897342,0.904201,0.864936,0.880337
3,0.1214,0.530061,0.903758,0.898491,0.869359,0.880901
4,0.0979,0.503985,0.904675,0.909751,0.870298,0.885667
5,0.0831,0.497527,0.905591,0.911831,0.870597,0.887228
6,0.0735,0.502638,0.898258,0.907506,0.863615,0.881118
7,0.0667,0.505921,0.902841,0.908109,0.869109,0.884538
8,0.0614,0.489385,0.906508,0.912315,0.87123,0.887395
9,0.0577,0.486148,0.906508,0.912545,0.87065,0.887657
10,0.0553,0.482794,0.904675,0.91058,0.870034,0.886057


[I 2025-03-22 16:47:38,977] Trial 119 finished with value: 0.8917514187472704 and parameters: {'learning_rate': 0.004031609400582658, 'weight_decay': 0.002, 'warmup_steps': 11, 'lambda_param': 1.0, 'temperature': 3.5}. Best is trial 44 with value: 0.8969803270673352.


Trial 120 with params: {'learning_rate': 0.0008884868830894429, 'weight_decay': 0.001, 'warmup_steps': 24, 'lambda_param': 0.8, 'temperature': 5.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.3212,0.953575,0.83868,0.707361,0.716639,0.709995
2,0.4335,0.75122,0.871677,0.882659,0.82539,0.84557
3,0.236,0.67544,0.883593,0.892855,0.853114,0.86849
4,0.1618,0.685656,0.886343,0.896665,0.854362,0.871372
5,0.1258,0.655658,0.885426,0.896157,0.853062,0.870695


[I 2025-03-22 16:48:56,663] Trial 120 pruned. 


Trial 121 with params: {'learning_rate': 8.532115701682182e-05, 'weight_decay': 0.003, 'warmup_steps': 20, 'lambda_param': 1.0, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.7193,2.060581,0.659028,0.568947,0.557137,0.558944
2,1.5218,1.549746,0.758937,0.635217,0.650305,0.641302
3,1.1535,1.381728,0.780935,0.661199,0.668096,0.66351
4,0.9902,1.284362,0.791934,0.665599,0.677109,0.670625
5,0.8877,1.207632,0.812099,0.680742,0.695606,0.687813
6,0.8106,1.150335,0.821265,0.690451,0.700679,0.695314
7,0.7526,1.130951,0.823098,0.691671,0.703522,0.697313
8,0.7035,1.112365,0.824931,0.69271,0.705578,0.698429
9,0.6582,1.084435,0.829514,0.699497,0.707848,0.70338
10,0.6266,1.055051,0.83593,0.702381,0.713349,0.707534


[I 2025-03-22 16:51:21,102] Trial 121 pruned. 


Trial 122 with params: {'learning_rate': 0.004969207176450133, 'weight_decay': 0.004, 'warmup_steps': 16, 'lambda_param': 0.7000000000000001, 'temperature': 5.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8253,0.659138,0.890009,0.888347,0.813826,0.832826
2,0.1885,0.537737,0.901008,0.906847,0.867717,0.883221
3,0.1207,0.555276,0.898258,0.903966,0.864996,0.880232
4,0.095,0.496086,0.908341,0.902187,0.872497,0.884984
5,0.0809,0.499668,0.907424,0.91296,0.871277,0.888141
6,0.0728,0.49554,0.902841,0.908801,0.858165,0.877566
7,0.067,0.505235,0.907424,0.911097,0.87161,0.887361
8,0.0629,0.504065,0.906508,0.900394,0.870856,0.883187
9,0.0583,0.491862,0.907424,0.912064,0.870467,0.887356
10,0.0552,0.487729,0.909258,0.912818,0.873171,0.889061


[I 2025-03-22 16:54:00,509] Trial 122 pruned. 


Trial 123 with params: {'learning_rate': 0.0029456116905626126, 'weight_decay': 0.0, 'warmup_steps': 15, 'lambda_param': 0.6000000000000001, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.9216,0.697961,0.87626,0.881442,0.810221,0.831537
2,0.2207,0.578777,0.901925,0.909244,0.86638,0.883874
3,0.1271,0.560118,0.904675,0.900849,0.869593,0.882818
4,0.0996,0.547882,0.904675,0.899744,0.86926,0.882113
5,0.0847,0.544826,0.902841,0.910279,0.867071,0.884669
6,0.0755,0.540051,0.894592,0.903454,0.860249,0.87788
7,0.0678,0.516693,0.903758,0.910669,0.86794,0.885372
8,0.0625,0.525377,0.904675,0.909305,0.859664,0.878665
9,0.0586,0.512069,0.908341,0.914692,0.871653,0.889178
10,0.0556,0.501743,0.912007,0.917942,0.874002,0.891975


[I 2025-03-22 16:57:43,607] Trial 123 finished with value: 0.8891937265055065 and parameters: {'learning_rate': 0.0029456116905626126, 'weight_decay': 0.0, 'warmup_steps': 15, 'lambda_param': 0.6000000000000001, 'temperature': 4.5}. Best is trial 44 with value: 0.8969803270673352.


Trial 124 with params: {'learning_rate': 0.0012588364494897986, 'weight_decay': 0.002, 'warmup_steps': 17, 'lambda_param': 0.8, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.1859,0.875838,0.854262,0.887633,0.74806,0.757879
2,0.3516,0.699206,0.877177,0.889652,0.838025,0.857839
3,0.1892,0.634425,0.88451,0.893698,0.854371,0.869771
4,0.1346,0.632831,0.890926,0.900296,0.85847,0.87539
5,0.1053,0.625879,0.888176,0.897964,0.856382,0.873287


[I 2025-03-22 16:59:35,232] Trial 124 pruned. 


Trial 125 with params: {'learning_rate': 0.0031841096654598748, 'weight_decay': 0.002, 'warmup_steps': 17, 'lambda_param': 1.0, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.9059,0.710123,0.879927,0.888438,0.823639,0.844
2,0.2112,0.58391,0.897342,0.905972,0.863356,0.8807
3,0.1265,0.557646,0.902841,0.910018,0.867975,0.884878
4,0.0996,0.532931,0.911091,0.917697,0.874398,0.892051
5,0.0831,0.541189,0.912924,0.919264,0.875742,0.893492
6,0.0739,0.523756,0.909258,0.916072,0.872135,0.890075
7,0.068,0.524094,0.909258,0.914853,0.873278,0.890094
8,0.0628,0.526572,0.911091,0.915531,0.874875,0.891268
9,0.0587,0.521957,0.910174,0.915672,0.873423,0.890571
10,0.0558,0.496556,0.913841,0.91955,0.876307,0.893952


[I 2025-03-22 17:04:01,663] Trial 125 finished with value: 0.8940213470199764 and parameters: {'learning_rate': 0.0031841096654598748, 'weight_decay': 0.002, 'warmup_steps': 17, 'lambda_param': 1.0, 'temperature': 3.5}. Best is trial 44 with value: 0.8969803270673352.


Trial 126 with params: {'learning_rate': 0.004518825992014643, 'weight_decay': 0.003, 'warmup_steps': 31, 'lambda_param': 0.1, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8922,0.652948,0.890926,0.893802,0.831145,0.85116
2,0.1948,0.588473,0.899175,0.904532,0.866305,0.881392
3,0.1221,0.565683,0.902841,0.908737,0.868344,0.883996
4,0.0947,0.5176,0.904675,0.910777,0.868964,0.885905
5,0.0805,0.522294,0.908341,0.914915,0.870928,0.8889
6,0.0712,0.511537,0.904675,0.911753,0.868616,0.88617
7,0.066,0.508638,0.914757,0.918794,0.877499,0.894121
8,0.0603,0.493721,0.910174,0.903661,0.873829,0.886474
9,0.0563,0.515724,0.904675,0.910651,0.869382,0.886035
10,0.0533,0.495782,0.911091,0.916179,0.874134,0.891212


[I 2025-03-22 17:07:45,787] Trial 126 finished with value: 0.8920443296730444 and parameters: {'learning_rate': 0.004518825992014643, 'weight_decay': 0.003, 'warmup_steps': 31, 'lambda_param': 0.1, 'temperature': 4.0}. Best is trial 44 with value: 0.8969803270673352.


Trial 127 with params: {'learning_rate': 0.004216869644596193, 'weight_decay': 0.003, 'warmup_steps': 24, 'lambda_param': 1.0, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8784,0.702823,0.883593,0.88295,0.808609,0.827277
2,0.1945,0.584573,0.894592,0.902361,0.862362,0.878394
3,0.1211,0.557252,0.897342,0.894906,0.864077,0.87705
4,0.0928,0.549145,0.896425,0.894202,0.863033,0.876355
5,0.0793,0.546298,0.901925,0.896625,0.858003,0.873683
6,0.0714,0.516806,0.908341,0.913959,0.872466,0.889095
7,0.0656,0.519616,0.904675,0.909354,0.860863,0.879301
8,0.0609,0.507351,0.912007,0.917684,0.875771,0.89273
9,0.0567,0.486456,0.912007,0.916811,0.865976,0.885595
10,0.0536,0.495653,0.912924,0.918101,0.876152,0.8931


[I 2025-03-22 17:11:42,186] Trial 127 finished with value: 0.8925769914334968 and parameters: {'learning_rate': 0.004216869644596193, 'weight_decay': 0.003, 'warmup_steps': 24, 'lambda_param': 1.0, 'temperature': 3.5}. Best is trial 44 with value: 0.8969803270673352.


Trial 128 with params: {'learning_rate': 0.001251334828119151, 'weight_decay': 0.003, 'warmup_steps': 21, 'lambda_param': 1.0, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.2073,0.85721,0.856095,0.887391,0.767304,0.786312
2,0.3475,0.690661,0.878093,0.889072,0.83091,0.851734
3,0.1905,0.632084,0.887259,0.8964,0.856247,0.872031
4,0.1333,0.630516,0.891842,0.898916,0.859786,0.87526
5,0.1071,0.644371,0.88176,0.886983,0.82431,0.844685
6,0.0911,0.60992,0.886343,0.894812,0.845589,0.86432
7,0.0808,0.598882,0.895509,0.903539,0.862374,0.879059
8,0.0735,0.590737,0.890926,0.898531,0.849362,0.868049
9,0.0685,0.582078,0.896425,0.904884,0.863291,0.880203
10,0.0643,0.573418,0.896425,0.904982,0.863075,0.880124


[I 2025-03-22 17:14:21,866] Trial 128 pruned. 


Trial 129 with params: {'learning_rate': 0.0023245399147167165, 'weight_decay': 0.006, 'warmup_steps': 7, 'lambda_param': 1.0, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.9595,0.746068,0.878093,0.856488,0.812513,0.827089
2,0.2485,0.571476,0.904675,0.911137,0.86869,0.88598
3,0.1413,0.572938,0.902841,0.909479,0.867551,0.884465
4,0.1084,0.532404,0.904675,0.910303,0.868549,0.885439
5,0.0915,0.543286,0.902841,0.91038,0.867407,0.884765
6,0.0795,0.517675,0.906508,0.913143,0.870495,0.88783
7,0.0733,0.533399,0.910174,0.915198,0.873085,0.890108
8,0.0671,0.52002,0.907424,0.911995,0.861638,0.88098
9,0.0633,0.522376,0.909258,0.913983,0.863567,0.882873
10,0.0585,0.517378,0.910174,0.914878,0.863307,0.883273


[I 2025-03-22 17:17:16,358] Trial 129 pruned. 


Trial 130 with params: {'learning_rate': 0.002643805205669524, 'weight_decay': 0.002, 'warmup_steps': 15, 'lambda_param': 0.9, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.9514,0.734497,0.878093,0.883171,0.813295,0.833605
2,0.2302,0.590286,0.900092,0.896003,0.865401,0.878378
3,0.1359,0.566364,0.899175,0.895378,0.865812,0.878156
4,0.1028,0.538393,0.901925,0.899291,0.867136,0.880902
5,0.086,0.528804,0.907424,0.914606,0.871551,0.889146
6,0.0764,0.53342,0.899175,0.90837,0.864371,0.882344
7,0.0691,0.535597,0.903758,0.896806,0.861111,0.875074
8,0.0636,0.527616,0.906508,0.910232,0.862669,0.880558
9,0.0595,0.513819,0.907424,0.91301,0.872058,0.888541
10,0.0569,0.506098,0.911091,0.917198,0.87498,0.892165


[I 2025-03-22 17:20:54,279] Trial 130 finished with value: 0.887644010306229 and parameters: {'learning_rate': 0.002643805205669524, 'weight_decay': 0.002, 'warmup_steps': 15, 'lambda_param': 0.9, 'temperature': 3.5}. Best is trial 44 with value: 0.8969803270673352.


Trial 131 with params: {'learning_rate': 0.0005612567161548509, 'weight_decay': 0.01, 'warmup_steps': 28, 'lambda_param': 0.0, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.5109,1.038172,0.830431,0.699822,0.709386,0.703884
2,0.5855,0.842728,0.861595,0.869533,0.788182,0.81076
3,0.3501,0.755742,0.874427,0.881073,0.837769,0.852975
4,0.2345,0.736937,0.869844,0.884825,0.840818,0.858682
5,0.1805,0.722577,0.873511,0.886691,0.842848,0.860782
6,0.1444,0.696303,0.87626,0.890201,0.844892,0.8635
7,0.1218,0.684945,0.874427,0.887565,0.843815,0.861702
8,0.1082,0.685091,0.873511,0.886205,0.843042,0.860346
9,0.0971,0.662875,0.877177,0.890282,0.845738,0.864054
10,0.088,0.664563,0.87626,0.888747,0.845302,0.863054


[I 2025-03-22 17:23:36,402] Trial 131 pruned. 


Trial 132 with params: {'learning_rate': 0.004938847324534137, 'weight_decay': 0.003, 'warmup_steps': 21, 'lambda_param': 1.0, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8261,0.666991,0.889093,0.88697,0.813276,0.8315
2,0.1866,0.56698,0.901925,0.906425,0.859044,0.876818
3,0.1209,0.530189,0.911091,0.915286,0.875255,0.891294
4,0.096,0.507824,0.91659,0.920321,0.878784,0.895623
5,0.081,0.502584,0.911091,0.91536,0.874975,0.891064
6,0.0724,0.494547,0.914757,0.919178,0.877128,0.894113
7,0.0662,0.494397,0.91659,0.919329,0.879191,0.895215
8,0.0607,0.48147,0.918423,0.921881,0.880312,0.897172
9,0.0568,0.479278,0.91934,0.921802,0.881032,0.89749
10,0.0553,0.486008,0.917507,0.919956,0.879928,0.895964


[I 2025-03-22 17:27:41,933] Trial 132 finished with value: 0.8932700903578331 and parameters: {'learning_rate': 0.004938847324534137, 'weight_decay': 0.003, 'warmup_steps': 21, 'lambda_param': 1.0, 'temperature': 4.0}. Best is trial 44 with value: 0.8969803270673352.


Trial 133 with params: {'learning_rate': 0.004123895021596128, 'weight_decay': 0.003, 'warmup_steps': 10, 'lambda_param': 0.8, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8415,0.644528,0.885426,0.887522,0.819406,0.839096
2,0.1958,0.564837,0.896425,0.901623,0.863128,0.878324
3,0.1219,0.538185,0.899175,0.903675,0.856842,0.874349
4,0.0956,0.499429,0.906508,0.911099,0.870721,0.886971
5,0.0832,0.500223,0.907424,0.913848,0.870041,0.888007
6,0.0736,0.498233,0.904675,0.911811,0.868388,0.885988
7,0.0666,0.495806,0.904675,0.910331,0.868569,0.885411
8,0.0614,0.472666,0.912924,0.916867,0.865815,0.885504
9,0.0574,0.481276,0.914757,0.917398,0.876736,0.893108
10,0.0555,0.479543,0.914757,0.917689,0.877084,0.893409


[I 2025-03-22 17:31:57,878] Trial 133 finished with value: 0.8933011002398681 and parameters: {'learning_rate': 0.004123895021596128, 'weight_decay': 0.003, 'warmup_steps': 10, 'lambda_param': 0.8, 'temperature': 2.0}. Best is trial 44 with value: 0.8969803270673352.


Trial 134 with params: {'learning_rate': 0.0007078235644524678, 'weight_decay': 0.005, 'warmup_steps': 10, 'lambda_param': 0.2, 'temperature': 5.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.3641,1.023011,0.828598,0.697141,0.708464,0.701333
2,0.5197,0.815341,0.856095,0.869018,0.812409,0.83222
3,0.2969,0.741222,0.869844,0.878166,0.834021,0.849493
4,0.1996,0.704966,0.879927,0.891957,0.849204,0.866592
5,0.1513,0.677914,0.883593,0.894692,0.852543,0.869544


[I 2025-03-22 17:33:18,806] Trial 134 pruned. 


Trial 135 with params: {'learning_rate': 0.004450546605378194, 'weight_decay': 0.0, 'warmup_steps': 29, 'lambda_param': 0.9, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8818,0.752989,0.872594,0.877736,0.80984,0.826671
2,0.1965,0.60015,0.896425,0.902604,0.863469,0.879087
3,0.1217,0.53733,0.900092,0.907334,0.864531,0.881841
4,0.0943,0.52709,0.904675,0.912601,0.868487,0.886534
5,0.0813,0.492224,0.901925,0.910822,0.866227,0.884542
6,0.0743,0.516262,0.902841,0.908109,0.858625,0.87755
7,0.0668,0.514652,0.898258,0.90616,0.863779,0.881067
8,0.0612,0.488279,0.901925,0.906948,0.857447,0.876397
9,0.0569,0.495457,0.901925,0.907733,0.85802,0.877127
10,0.0536,0.489339,0.902841,0.909426,0.868099,0.8848


[I 2025-03-22 17:35:51,808] Trial 135 pruned. 


Trial 136 with params: {'learning_rate': 0.001156179201901999, 'weight_decay': 0.0, 'warmup_steps': 2, 'lambda_param': 0.4, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.1548,0.890658,0.849679,0.716981,0.726229,0.719831
2,0.3759,0.703497,0.875344,0.88764,0.837337,0.856502
3,0.206,0.644977,0.88451,0.891993,0.844431,0.862159
4,0.1422,0.665877,0.887259,0.89625,0.856422,0.872295
5,0.1124,0.640775,0.888176,0.898304,0.855698,0.873109


[I 2025-03-22 17:37:05,305] Trial 136 pruned. 


Trial 137 with params: {'learning_rate': 0.002472023290700323, 'weight_decay': 0.009000000000000001, 'warmup_steps': 14, 'lambda_param': 0.7000000000000001, 'temperature': 6.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.9612,0.730671,0.883593,0.8753,0.826965,0.843425
2,0.2342,0.59381,0.899175,0.885974,0.865409,0.874498
3,0.137,0.598069,0.896425,0.892564,0.863707,0.875294
4,0.1061,0.555463,0.904675,0.900296,0.869643,0.882527
5,0.0903,0.567356,0.902841,0.899977,0.8679,0.88159
6,0.0784,0.548307,0.901008,0.898869,0.866705,0.880463
7,0.0701,0.544354,0.901925,0.88818,0.867798,0.876713
8,0.0639,0.549107,0.903758,0.910858,0.869121,0.886033
9,0.0604,0.538311,0.901925,0.908362,0.867497,0.883878
10,0.058,0.535315,0.902841,0.911005,0.867532,0.885291


[I 2025-03-22 17:40:59,212] Trial 137 finished with value: 0.8838555416371948 and parameters: {'learning_rate': 0.002472023290700323, 'weight_decay': 0.009000000000000001, 'warmup_steps': 14, 'lambda_param': 0.7000000000000001, 'temperature': 6.5}. Best is trial 44 with value: 0.8969803270673352.


Trial 138 with params: {'learning_rate': 0.0022852589590548296, 'weight_decay': 0.003, 'warmup_steps': 17, 'lambda_param': 0.8, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.9892,0.754171,0.877177,0.880726,0.811597,0.831859
2,0.2467,0.63926,0.891842,0.901752,0.859316,0.876401
3,0.1377,0.599167,0.896425,0.90379,0.862523,0.879069
4,0.1056,0.544525,0.904675,0.91191,0.868769,0.88632
5,0.09,0.532394,0.900092,0.908501,0.865096,0.882805
6,0.078,0.531271,0.902841,0.911336,0.866694,0.884989
7,0.0707,0.515507,0.906508,0.911826,0.861332,0.880777
8,0.0647,0.51874,0.908341,0.915479,0.871058,0.889244
9,0.0611,0.516692,0.903758,0.910211,0.867957,0.885137
10,0.0569,0.510748,0.909258,0.915746,0.87206,0.889825


[I 2025-03-22 17:44:55,753] Trial 138 finished with value: 0.8931218008084194 and parameters: {'learning_rate': 0.0022852589590548296, 'weight_decay': 0.003, 'warmup_steps': 17, 'lambda_param': 0.8, 'temperature': 4.0}. Best is trial 44 with value: 0.8969803270673352.


Trial 139 with params: {'learning_rate': 0.004796321655262078, 'weight_decay': 0.003, 'warmup_steps': 18, 'lambda_param': 0.9, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8349,0.656245,0.890009,0.889926,0.812229,0.832867
2,0.186,0.557986,0.902841,0.907043,0.859015,0.877098
3,0.1186,0.53566,0.905591,0.897865,0.861642,0.875682
4,0.0931,0.517455,0.910174,0.902645,0.873964,0.886049
5,0.0817,0.530797,0.903758,0.898455,0.868069,0.880785


[I 2025-03-22 17:46:08,882] Trial 139 pruned. 


Trial 140 with params: {'learning_rate': 0.0007607957450972455, 'weight_decay': 0.009000000000000001, 'warmup_steps': 7, 'lambda_param': 0.0, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.3187,0.993062,0.832264,0.701229,0.71178,0.704687
2,0.4898,0.804748,0.860678,0.872691,0.815761,0.835588
3,0.2762,0.737216,0.878093,0.886079,0.840571,0.856317
4,0.1881,0.673652,0.88176,0.894406,0.850403,0.868435
5,0.1417,0.689904,0.875344,0.886328,0.845853,0.861899
6,0.1171,0.657118,0.88451,0.895453,0.852489,0.870024
7,0.1006,0.632455,0.891842,0.900539,0.858335,0.875463
8,0.0905,0.629053,0.882676,0.895346,0.850957,0.869054
9,0.0831,0.619849,0.88451,0.895054,0.852647,0.869892
10,0.0762,0.619745,0.883593,0.894858,0.852412,0.869668


[I 2025-03-22 17:48:39,215] Trial 140 pruned. 


Trial 141 with params: {'learning_rate': 0.003920491429055261, 'weight_decay': 0.0, 'warmup_steps': 23, 'lambda_param': 0.7000000000000001, 'temperature': 3.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8861,0.683447,0.891842,0.897762,0.841103,0.860752
2,0.1979,0.593412,0.897342,0.902598,0.863575,0.879085
3,0.1201,0.56733,0.903758,0.911084,0.869189,0.885437
4,0.0934,0.526719,0.900092,0.908886,0.864807,0.882764
5,0.0805,0.521097,0.908341,0.914914,0.872252,0.889655
6,0.0723,0.510027,0.912924,0.918144,0.875918,0.893054
7,0.0661,0.514599,0.905591,0.911389,0.870322,0.886947
8,0.0615,0.505494,0.911091,0.915991,0.874363,0.891275
9,0.0584,0.504646,0.910174,0.915717,0.873,0.890428
10,0.0539,0.500791,0.909258,0.915899,0.872375,0.890093


[I 2025-03-22 17:52:19,707] Trial 141 finished with value: 0.8916246808050287 and parameters: {'learning_rate': 0.003920491429055261, 'weight_decay': 0.0, 'warmup_steps': 23, 'lambda_param': 0.7000000000000001, 'temperature': 3.0}. Best is trial 44 with value: 0.8969803270673352.


Trial 142 with params: {'learning_rate': 0.004389860921912742, 'weight_decay': 0.0, 'warmup_steps': 18, 'lambda_param': 1.0, 'temperature': 2.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8473,0.743703,0.874427,0.881205,0.811064,0.828861
2,0.1908,0.569446,0.901008,0.907396,0.866846,0.883137
3,0.1171,0.554963,0.905591,0.899964,0.871802,0.883325
4,0.0936,0.548296,0.906508,0.91205,0.871209,0.887636
5,0.0804,0.529811,0.906508,0.911672,0.870981,0.887293
6,0.0721,0.523461,0.905591,0.912095,0.869884,0.887037
7,0.067,0.511757,0.907424,0.912836,0.872163,0.888565
8,0.0616,0.521302,0.911091,0.914101,0.875582,0.890682
9,0.0584,0.514923,0.910174,0.915458,0.873943,0.890741
10,0.055,0.515544,0.910174,0.915564,0.873525,0.890515


[I 2025-03-22 17:56:01,812] Trial 142 finished with value: 0.8937063129804989 and parameters: {'learning_rate': 0.004389860921912742, 'weight_decay': 0.0, 'warmup_steps': 18, 'lambda_param': 1.0, 'temperature': 2.0}. Best is trial 44 with value: 0.8969803270673352.


Trial 143 with params: {'learning_rate': 0.004912590001916162, 'weight_decay': 0.001, 'warmup_steps': 14, 'lambda_param': 1.0, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8294,0.634479,0.890009,0.892264,0.832073,0.851047
2,0.1879,0.546026,0.910174,0.915216,0.873211,0.890273
3,0.12,0.531737,0.904675,0.899235,0.868802,0.881564
4,0.0959,0.540159,0.903758,0.908311,0.868858,0.884495
5,0.0822,0.517069,0.904675,0.912204,0.868555,0.886218
6,0.0724,0.487946,0.912924,0.919519,0.875239,0.893362
7,0.0665,0.503259,0.902841,0.910094,0.867354,0.884602
8,0.0617,0.483012,0.906508,0.913943,0.870183,0.888034
9,0.0573,0.475829,0.912007,0.917893,0.874855,0.892348
10,0.0541,0.479377,0.911091,0.915588,0.874188,0.890852


[I 2025-03-22 17:59:34,690] Trial 143 finished with value: 0.8909826832653365 and parameters: {'learning_rate': 0.004912590001916162, 'weight_decay': 0.001, 'warmup_steps': 14, 'lambda_param': 1.0, 'temperature': 3.5}. Best is trial 44 with value: 0.8969803270673352.


Trial 144 with params: {'learning_rate': 5.8193477735771966e-05, 'weight_decay': 0.009000000000000001, 'warmup_steps': 11, 'lambda_param': 0.0, 'temperature': 5.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,2.9523,2.38472,0.614115,0.541219,0.514571,0.514546
2,1.871,1.82259,0.705775,0.59797,0.602168,0.598156
3,1.4207,1.58034,0.751604,0.631718,0.642889,0.636454
4,1.2011,1.452102,0.773602,0.653303,0.662026,0.656425
5,1.0738,1.375667,0.781852,0.65526,0.670814,0.662056
6,0.9868,1.298091,0.79835,0.673223,0.681719,0.676877
7,0.9208,1.26894,0.809349,0.681503,0.691709,0.686209
8,0.8727,1.235661,0.813932,0.685731,0.694464,0.689765
9,0.8292,1.214123,0.808433,0.679933,0.691271,0.685252
10,0.7977,1.187417,0.813932,0.684445,0.696018,0.690063


[I 2025-03-22 18:01:47,179] Trial 144 pruned. 


Trial 145 with params: {'learning_rate': 0.0020347623994134193, 'weight_decay': 0.0, 'warmup_steps': 20, 'lambda_param': 0.8, 'temperature': 3.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0452,0.764728,0.872594,0.874577,0.79956,0.818936
2,0.2638,0.635741,0.890009,0.900342,0.85717,0.874722
3,0.1466,0.626788,0.893676,0.901093,0.860861,0.876895
4,0.1104,0.583624,0.898258,0.907003,0.863717,0.881424
5,0.0931,0.607631,0.898258,0.905781,0.863259,0.880482


[I 2025-03-22 18:02:51,291] Trial 145 pruned. 


Trial 146 with params: {'learning_rate': 0.0015168380796626668, 'weight_decay': 0.002, 'warmup_steps': 18, 'lambda_param': 0.9, 'temperature': 2.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.1131,0.826819,0.866178,0.849523,0.801677,0.81752
2,0.3116,0.683252,0.879927,0.889989,0.840225,0.858917
3,0.167,0.647538,0.888176,0.895358,0.857814,0.872234
4,0.1225,0.631918,0.892759,0.901802,0.860094,0.876883
5,0.1004,0.616938,0.893676,0.902802,0.860314,0.877468
6,0.087,0.61063,0.887259,0.898053,0.855191,0.872568
7,0.0762,0.592632,0.893676,0.903282,0.860706,0.878078
8,0.0706,0.585574,0.901008,0.908878,0.86656,0.883795
9,0.0654,0.59508,0.892759,0.902126,0.860387,0.877357
10,0.0616,0.579928,0.897342,0.906611,0.863602,0.881177


[I 2025-03-22 18:04:54,325] Trial 146 pruned. 


Trial 147 with params: {'learning_rate': 0.0024718944541401266, 'weight_decay': 0.001, 'warmup_steps': 20, 'lambda_param': 1.0, 'temperature': 4.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.9875,0.727207,0.87626,0.884256,0.819955,0.84074
2,0.2364,0.610044,0.895509,0.904383,0.862237,0.879278
3,0.1362,0.5609,0.899175,0.906744,0.865318,0.881917
4,0.104,0.533372,0.901925,0.909462,0.86726,0.884206
5,0.0889,0.542489,0.903758,0.912101,0.868642,0.886376
6,0.0775,0.549944,0.900092,0.907673,0.866056,0.882616
7,0.0698,0.526233,0.897342,0.9053,0.85512,0.874407
8,0.0642,0.525974,0.901925,0.910572,0.867421,0.88509
9,0.0603,0.515864,0.900092,0.908682,0.866169,0.883448
10,0.0572,0.517378,0.902841,0.910174,0.858816,0.878703


[I 2025-03-22 18:06:59,999] Trial 147 pruned. 


Trial 148 with params: {'learning_rate': 0.003199645143713299, 'weight_decay': 0.007, 'warmup_steps': 0, 'lambda_param': 0.1, 'temperature': 5.0}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.8522,0.688981,0.880843,0.88703,0.814404,0.83618
2,0.2115,0.580507,0.900092,0.896368,0.865038,0.878399
3,0.1303,0.5155,0.909258,0.902953,0.873157,0.885767
4,0.0996,0.540241,0.905591,0.909112,0.861644,0.879553
5,0.0873,0.525096,0.905591,0.912768,0.869743,0.887193
6,0.0774,0.524293,0.913841,0.916021,0.857785,0.87881
7,0.0696,0.513525,0.905591,0.908721,0.852382,0.872522
8,0.0639,0.509558,0.907424,0.912745,0.862559,0.881809
9,0.0602,0.512827,0.912007,0.916287,0.866052,0.885402
10,0.0574,0.507933,0.914757,0.917439,0.868418,0.887182


[I 2025-03-22 18:09:04,671] Trial 148 pruned. 


Trial 149 with params: {'learning_rate': 0.0017795655138595975, 'weight_decay': 0.002, 'warmup_steps': 15, 'lambda_param': 1.0, 'temperature': 4.5}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.0548,0.781391,0.870761,0.860386,0.806849,0.823513
2,0.2836,0.635593,0.887259,0.898147,0.855039,0.87245
3,0.1572,0.61104,0.891842,0.900613,0.859096,0.875734
4,0.1159,0.630078,0.895509,0.904981,0.861712,0.879331
5,0.0959,0.621274,0.890926,0.901536,0.857642,0.875585
6,0.0832,0.592813,0.895509,0.904742,0.861703,0.879275
7,0.0749,0.582884,0.898258,0.908077,0.863218,0.881658
8,0.0682,0.580514,0.897342,0.9064,0.863099,0.880779
9,0.064,0.576732,0.898258,0.907164,0.863849,0.881546
10,0.0605,0.580563,0.899175,0.907542,0.864736,0.882207


[I 2025-03-22 18:11:10,000] Trial 149 pruned. 


In [47]:
print(best_trial4)

BestRun(run_id='44', objective=0.8969803270673352, hyperparameters={'learning_rate': 0.0037906172026621545, 'weight_decay': 0.0, 'warmup_steps': 16, 'lambda_param': 1.0, 'temperature': 4.0}, run_summary=None)


In [48]:
print("Best normal training score: ", best_trial)
print("Best distilation trianing score: ", best_trial2)
print("Best normal training score with augmentations: ", best_trial3)
print("Best distilation trianing score with augmentations: ",best_trial4)

Best normal training score:  BestRun(run_id='123', objective=0.8691821658235614, hyperparameters={'learning_rate': 0.004483170022431039, 'weight_decay': 0.0, 'warmup_steps': 3}, run_summary=None)
Best distilation trianing score:  BestRun(run_id='53', objective=0.8696635593759803, hyperparameters={'learning_rate': 0.004269637593700472, 'weight_decay': 0.004, 'warmup_steps': 3, 'lambda_param': 0.1, 'temperature': 3.0}, run_summary=None)
Best normal training score with augmentations:  BestRun(run_id='61', objective=0.8887268625210981, hyperparameters={'learning_rate': 0.004589457243558559, 'weight_decay': 0.01, 'warmup_steps': 22}, run_summary=None)
Best distilation trianing score with augmentations:  BestRun(run_id='44', objective=0.8969803270673352, hyperparameters={'learning_rate': 0.0037906172026621545, 'weight_decay': 0.0, 'warmup_steps': 16, 'lambda_param': 1.0, 'temperature': 4.0}, run_summary=None)
