In [1]:
import datasets
import torch
from transformers import BeitForImageClassification, BeitFeatureExtractor, BeitImageProcessor
import numpy as np

base_model_name = "microsoft/beit-base-patch16-224-pt22k"

processor = BeitImageProcessor.from_pretrained(base_model_name)

def crop_image(img):
    width, height = img.size
    target_size = min(width, height)
    left = max(0, (width - target_size ) // 2)
    right = left + target_size
    top = max(0, (height - target_size ) // 2)
    bottom = top + target_size
    return img.crop((left, top, right, bottom)).convert('RGB')

def process_example(example):
    img = crop_image(example['image'])
    inputs = processor(img, return_tensors='pt')
    inputs['labels'] = example['label']
    return inputs

def transform(example_batch):
    # Take a list of PIL images and turn them to pixel values
    inputs = processor([crop_image(x) for x in example_batch['image']], return_tensors='pt')

    # Don't forget to include the labels!
    inputs['labels'] = example_batch['label']
    return inputs

ds = datasets.load_from_disk("../../data/diffusion_and_real/")

prepared_ds = ds.with_transform(transform)


def collate_fn(batch):
    return {
        'pixel_values': torch.stack([x['pixel_values'] for x in batch]),
        'labels': torch.tensor([x['labels'] for x in batch])
    }

metric = datasets.load_metric("f1")
def compute_metrics(p):
    return metric.compute(predictions=np.argmax(p.predictions, axis=1), references=p.label_ids)

labels = ["negative","positive"]

model = BeitForImageClassification.from_pretrained(
    base_model_name,
    num_labels=len(labels),
    id2label={str(i): c for i, c in enumerate(labels)},
    label2id={c: str(i) for i, c in enumerate(labels)}
).to("cuda")

  metric = datasets.load_metric("f1")
Some weights of the model checkpoint at microsoft/beit-base-patch16-224-pt22k were not used when initializing BeitForImageClassification: ['layernorm.bias', 'lm_head.bias', 'lm_head.weight', 'layernorm.weight']
- This IS expected if you are initializing BeitForImageClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BeitForImageClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BeitForImageClassification were not initialized from the model checkpoint at microsoft/beit-base-patch16-224-pt22k and are newly initialized: ['beit.pooler.layernorm.weight', 'classifier.weight', 'beit.pooler.layernorm.bias', 'classifier.bias']
You

In [2]:
from transformers import TrainingArguments

training_args = TrainingArguments(
  output_dir="./BEiT-diff-detect",
  per_device_train_batch_size=8, # consider 16 or higher
  evaluation_strategy="steps",
  num_train_epochs=4,
  fp16=True,
  save_steps=6000,
  eval_steps=12,
  logging_steps=1125,
  learning_rate=2e-4,
  save_total_limit=3,
  logging_first_step=True,
  remove_unused_columns=False,
  push_to_hub=False,
  report_to='tensorboard',
  load_best_model_at_end=True,
)

from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=collate_fn,
    compute_metrics=compute_metrics,
    train_dataset=prepared_ds["train"],
    eval_dataset=prepared_ds["vali"],
    tokenizer=processor,
)

In [3]:
# start the training
train_results = trainer.train()
trainer.save_model()
trainer.log_metrics("train", train_results.metrics)
trainer.save_metrics("train", train_results.metrics)
trainer.save_state()

  0%|          | 2/9000 [00:09<10:00:28,  4.00s/it]

{'loss': 0.7854, 'learning_rate': 0.0002, 'epoch': 0.0}


                                                   
  0%|          | 13/9000 [00:19<6:39:07,  2.66s/it]

{'eval_loss': 0.8682252168655396, 'eval_f1': 0.0, 'eval_runtime': 8.1714, 'eval_samples_per_second': 122.379, 'eval_steps_per_second': 15.297, 'epoch': 0.01}


                                                   
  0%|          | 25/9000 [00:28<6:06:16,  2.45s/it]

{'eval_loss': 0.687817394733429, 'eval_f1': 0.0, 'eval_runtime': 7.5589, 'eval_samples_per_second': 132.295, 'eval_steps_per_second': 16.537, 'epoch': 0.01}


                                                   
  0%|          | 37/9000 [00:38<6:07:02,  2.46s/it]

{'eval_loss': 0.666903018951416, 'eval_f1': 0.3235747303543914, 'eval_runtime': 7.6012, 'eval_samples_per_second': 131.558, 'eval_steps_per_second': 16.445, 'epoch': 0.02}


                                                   
  1%|          | 49/9000 [00:47<6:05:23,  2.45s/it]

{'eval_loss': 0.6685433387756348, 'eval_f1': 0.14552238805970147, 'eval_runtime': 7.5701, 'eval_samples_per_second': 132.099, 'eval_steps_per_second': 16.512, 'epoch': 0.02}


                                                   
  1%|          | 61/9000 [00:56<6:07:54,  2.47s/it]

{'eval_loss': 0.6357203722000122, 'eval_f1': 0.6953046953046954, 'eval_runtime': 7.6293, 'eval_samples_per_second': 131.074, 'eval_steps_per_second': 16.384, 'epoch': 0.03}


                                                   
  1%|          | 73/9000 [01:06<6:07:10,  2.47s/it]

{'eval_loss': 0.8203582763671875, 'eval_f1': 0.6675749318801089, 'eval_runtime': 7.6434, 'eval_samples_per_second': 130.832, 'eval_steps_per_second': 16.354, 'epoch': 0.03}


                                                   
  1%|          | 85/9000 [01:15<6:10:55,  2.50s/it]

{'eval_loss': 1.04645836353302, 'eval_f1': 0.6929824561403508, 'eval_runtime': 7.7269, 'eval_samples_per_second': 129.418, 'eval_steps_per_second': 16.177, 'epoch': 0.04}


                                                   
  1%|          | 97/9000 [01:25<6:10:31,  2.50s/it]

{'eval_loss': 0.7235434651374817, 'eval_f1': 0.6996996996996997, 'eval_runtime': 7.732, 'eval_samples_per_second': 129.333, 'eval_steps_per_second': 16.167, 'epoch': 0.04}


                                                    
  1%|          | 109/9000 [01:34<6:03:38,  2.45s/it]

{'eval_loss': 0.7608819603919983, 'eval_f1': 0.687637161667886, 'eval_runtime': 7.5864, 'eval_samples_per_second': 131.815, 'eval_steps_per_second': 16.477, 'epoch': 0.05}


                                                    
  1%|▏         | 121/9000 [01:44<6:04:11,  2.46s/it]

{'eval_loss': 0.7956396341323853, 'eval_f1': 0.6893555394641564, 'eval_runtime': 7.6157, 'eval_samples_per_second': 131.309, 'eval_steps_per_second': 16.414, 'epoch': 0.05}


                                                    
  1%|▏         | 133/9000 [01:53<6:11:22,  2.51s/it]

{'eval_loss': 1.5532582998275757, 'eval_f1': 0.0, 'eval_runtime': 7.7721, 'eval_samples_per_second': 128.666, 'eval_steps_per_second': 16.083, 'epoch': 0.06}


                                                    
  2%|▏         | 145/9000 [02:03<6:14:16,  2.54s/it]

{'eval_loss': 0.7279778122901917, 'eval_f1': 0.6754325259515571, 'eval_runtime': 7.8586, 'eval_samples_per_second': 127.249, 'eval_steps_per_second': 15.906, 'epoch': 0.06}


                                                    
  2%|▏         | 157/9000 [02:12<6:12:53,  2.53s/it]

{'eval_loss': 0.6665380597114563, 'eval_f1': 0.3089430894308943, 'eval_runtime': 7.836, 'eval_samples_per_second': 127.617, 'eval_steps_per_second': 15.952, 'epoch': 0.07}


                                                    
  2%|▏         | 169/9000 [02:22<6:10:48,  2.52s/it]

{'eval_loss': 0.789008378982544, 'eval_f1': 0.6919770773638969, 'eval_runtime': 7.7781, 'eval_samples_per_second': 128.566, 'eval_steps_per_second': 16.071, 'epoch': 0.07}


                                                    
  2%|▏         | 181/9000 [02:32<6:15:45,  2.56s/it]

{'eval_loss': 0.6877660155296326, 'eval_f1': 0.39111111111111113, 'eval_runtime': 7.9131, 'eval_samples_per_second': 126.373, 'eval_steps_per_second': 15.797, 'epoch': 0.08}


                                                    
  2%|▏         | 193/9000 [02:41<6:16:14,  2.56s/it]

{'eval_loss': 0.6102913618087769, 'eval_f1': 0.6498353457738749, 'eval_runtime': 7.9268, 'eval_samples_per_second': 126.155, 'eval_steps_per_second': 15.769, 'epoch': 0.09}


                                                    
  2%|▏         | 205/9000 [02:51<6:17:55,  2.58s/it]

{'eval_loss': 0.7139623761177063, 'eval_f1': 0.6904422253922967, 'eval_runtime': 7.9784, 'eval_samples_per_second': 125.338, 'eval_steps_per_second': 15.667, 'epoch': 0.09}


                                                    
  2%|▏         | 217/9000 [03:01<6:15:48,  2.57s/it]

{'eval_loss': 0.5827012062072754, 'eval_f1': 0.730732635585157, 'eval_runtime': 7.9612, 'eval_samples_per_second': 125.608, 'eval_steps_per_second': 15.701, 'epoch': 0.1}


                                                    
  3%|▎         | 229/9000 [03:10<6:14:59,  2.57s/it]

{'eval_loss': 0.7610008716583252, 'eval_f1': 0.3458401305057096, 'eval_runtime': 7.929, 'eval_samples_per_second': 126.119, 'eval_steps_per_second': 15.765, 'epoch': 0.1}


                                                    
  3%|▎         | 240/9000 [03:21<28:19,  5.16it/s]

{'eval_loss': 0.6044796109199524, 'eval_f1': 0.7335375191424196, 'eval_runtime': 8.6642, 'eval_samples_per_second': 115.417, 'eval_steps_per_second': 14.427, 'epoch': 0.11}


                                                    
  3%|▎         | 253/9000 [03:32<7:10:34,  2.95s/it]

{'eval_loss': 0.5280126929283142, 'eval_f1': 0.7360157016683023, 'eval_runtime': 9.1523, 'eval_samples_per_second': 109.262, 'eval_steps_per_second': 13.658, 'epoch': 0.11}


                                                    
  3%|▎         | 265/9000 [03:43<7:05:34,  2.92s/it]

{'eval_loss': 0.5831730961799622, 'eval_f1': 0.717479674796748, 'eval_runtime': 9.0461, 'eval_samples_per_second': 110.545, 'eval_steps_per_second': 13.818, 'epoch': 0.12}


                                                    
  3%|▎         | 277/9000 [03:54<7:02:49,  2.91s/it]

{'eval_loss': 0.9211247563362122, 'eval_f1': 0.3727272727272727, 'eval_runtime': 9.0045, 'eval_samples_per_second': 111.056, 'eval_steps_per_second': 13.882, 'epoch': 0.12}


                                                    
  3%|▎         | 289/9000 [04:05<7:01:57,  2.91s/it]

{'eval_loss': 0.6645998358726501, 'eval_f1': 0.6842105263157895, 'eval_runtime': 9.0103, 'eval_samples_per_second': 110.984, 'eval_steps_per_second': 13.873, 'epoch': 0.13}


                                                    
  3%|▎         | 301/9000 [04:16<7:04:01,  2.92s/it]

{'eval_loss': 0.6402146220207214, 'eval_f1': 0.4587155963302752, 'eval_runtime': 9.0614, 'eval_samples_per_second': 110.359, 'eval_steps_per_second': 13.795, 'epoch': 0.13}


                                                    
  3%|▎         | 313/9000 [04:27<7:01:46,  2.91s/it]

{'eval_loss': 0.5938137173652649, 'eval_f1': 0.72869429241595, 'eval_runtime': 9.0142, 'eval_samples_per_second': 110.936, 'eval_steps_per_second': 13.867, 'epoch': 0.14}


                                                    
  4%|▎         | 325/9000 [04:39<7:02:59,  2.93s/it]

{'eval_loss': 0.5938589572906494, 'eval_f1': 0.6950959488272922, 'eval_runtime': 9.0671, 'eval_samples_per_second': 110.289, 'eval_steps_per_second': 13.786, 'epoch': 0.14}


                                                    
  4%|▎         | 337/9000 [04:50<7:06:44,  2.96s/it]

{'eval_loss': 0.5831770300865173, 'eval_f1': 0.668298653610771, 'eval_runtime': 9.1583, 'eval_samples_per_second': 109.191, 'eval_steps_per_second': 13.649, 'epoch': 0.15}


                                                    
  4%|▍         | 349/9000 [05:01<7:00:23,  2.92s/it]

{'eval_loss': 0.6642481684684753, 'eval_f1': 0.697924673328209, 'eval_runtime': 9.0331, 'eval_samples_per_second': 110.703, 'eval_steps_per_second': 13.838, 'epoch': 0.15}


                                                    
  4%|▍         | 361/9000 [05:12<6:58:07,  2.90s/it]

{'eval_loss': 0.5396855473518372, 'eval_f1': 0.7400194741966893, 'eval_runtime': 8.9953, 'eval_samples_per_second': 111.169, 'eval_steps_per_second': 13.896, 'epoch': 0.16}


                                                    
  4%|▍         | 373/9000 [05:23<7:01:01,  2.93s/it]

{'eval_loss': 0.5269531607627869, 'eval_f1': 0.7692307692307692, 'eval_runtime': 9.0725, 'eval_samples_per_second': 110.223, 'eval_steps_per_second': 13.778, 'epoch': 0.17}


                                                    
  4%|▍         | 385/9000 [05:34<6:57:33,  2.91s/it]

{'eval_loss': 0.5469575524330139, 'eval_f1': 0.693422519509476, 'eval_runtime': 8.9892, 'eval_samples_per_second': 111.245, 'eval_steps_per_second': 13.906, 'epoch': 0.17}


                                                    
  4%|▍         | 397/9000 [05:45<6:58:26,  2.92s/it]

{'eval_loss': 0.5394798517227173, 'eval_f1': 0.7701543739279588, 'eval_runtime': 9.0437, 'eval_samples_per_second': 110.574, 'eval_steps_per_second': 13.822, 'epoch': 0.18}


                                                    
  5%|▍         | 409/9000 [05:56<7:00:25,  2.94s/it]

{'eval_loss': 0.5082908868789673, 'eval_f1': 0.7350835322195703, 'eval_runtime': 9.1005, 'eval_samples_per_second': 109.884, 'eval_steps_per_second': 13.736, 'epoch': 0.18}


                                                    
  5%|▍         | 421/9000 [06:07<6:57:02,  2.92s/it]

{'eval_loss': 0.4257315397262573, 'eval_f1': 0.8029350104821802, 'eval_runtime': 9.0428, 'eval_samples_per_second': 110.585, 'eval_steps_per_second': 13.823, 'epoch': 0.19}


                                                    
  5%|▍         | 433/9000 [06:18<6:55:28,  2.91s/it]

{'eval_loss': 0.5724645853042603, 'eval_f1': 0.44790046656298593, 'eval_runtime': 9.0135, 'eval_samples_per_second': 110.945, 'eval_steps_per_second': 13.868, 'epoch': 0.19}


                                                    
  5%|▍         | 445/9000 [06:29<6:55:53,  2.92s/it]

{'eval_loss': 0.4730542004108429, 'eval_f1': 0.7114914425427873, 'eval_runtime': 9.036, 'eval_samples_per_second': 110.669, 'eval_steps_per_second': 13.834, 'epoch': 0.2}


                                                    
  5%|▌         | 457/9000 [06:40<6:55:21,  2.92s/it]

{'eval_loss': 0.433398962020874, 'eval_f1': 0.7952586206896552, 'eval_runtime': 9.0193, 'eval_samples_per_second': 110.873, 'eval_steps_per_second': 13.859, 'epoch': 0.2}


                                                    
  5%|▌         | 469/9000 [06:51<6:52:44,  2.90s/it]

{'eval_loss': 0.49086079001426697, 'eval_f1': 0.7452830188679246, 'eval_runtime': 8.983, 'eval_samples_per_second': 111.322, 'eval_steps_per_second': 13.915, 'epoch': 0.21}


                                                    
  5%|▌         | 481/9000 [07:02<6:56:58,  2.94s/it]

{'eval_loss': 0.49719589948654175, 'eval_f1': 0.7053140096618358, 'eval_runtime': 9.1093, 'eval_samples_per_second': 109.778, 'eval_steps_per_second': 13.722, 'epoch': 0.21}


                                                    
  5%|▌         | 493/9000 [07:13<6:28:37,  2.74s/it]

{'eval_loss': 0.49658820033073425, 'eval_f1': 0.7475409836065573, 'eval_runtime': 8.4799, 'eval_samples_per_second': 117.926, 'eval_steps_per_second': 14.741, 'epoch': 0.22}


                                                    
  6%|▌         | 505/9000 [07:24<6:58:15,  2.95s/it]

{'eval_loss': 0.44573190808296204, 'eval_f1': 0.7902869757174393, 'eval_runtime': 9.215, 'eval_samples_per_second': 108.519, 'eval_steps_per_second': 13.565, 'epoch': 0.22}


                                                    
  6%|▌         | 517/9000 [07:35<7:05:05,  3.01s/it]

{'eval_loss': 0.5656459927558899, 'eval_f1': 0.7793103448275862, 'eval_runtime': 9.3031, 'eval_samples_per_second': 107.491, 'eval_steps_per_second': 13.436, 'epoch': 0.23}


                                                    
  6%|▌         | 529/9000 [07:47<7:00:55,  2.98s/it]

{'eval_loss': 0.44567444920539856, 'eval_f1': 0.8031037827352085, 'eval_runtime': 9.2289, 'eval_samples_per_second': 108.355, 'eval_steps_per_second': 13.544, 'epoch': 0.23}


                                                    
  6%|▌         | 541/9000 [07:58<7:00:27,  2.98s/it]

{'eval_loss': 0.6065680384635925, 'eval_f1': 0.750195771339076, 'eval_runtime': 9.2392, 'eval_samples_per_second': 108.235, 'eval_steps_per_second': 13.529, 'epoch': 0.24}


                                                    
  6%|▌         | 553/9000 [08:09<7:00:36,  2.99s/it]

{'eval_loss': 0.46167412400245667, 'eval_f1': 0.7494646680942184, 'eval_runtime': 9.2452, 'eval_samples_per_second': 108.164, 'eval_steps_per_second': 13.521, 'epoch': 0.25}


                                                    
  6%|▋         | 565/9000 [08:20<6:54:30,  2.95s/it]

{'eval_loss': 0.7678300738334656, 'eval_f1': 0.39549839228295813, 'eval_runtime': 9.1189, 'eval_samples_per_second': 109.662, 'eval_steps_per_second': 13.708, 'epoch': 0.25}


                                                    
  6%|▋         | 577/9000 [08:32<6:53:27,  2.95s/it]

{'eval_loss': 0.4814533591270447, 'eval_f1': 0.7846291331546024, 'eval_runtime': 9.122, 'eval_samples_per_second': 109.625, 'eval_steps_per_second': 13.703, 'epoch': 0.26}


                                                    
  7%|▋         | 589/9000 [08:43<7:17:42,  3.12s/it]

{'eval_loss': 0.4119221270084381, 'eval_f1': 0.7963152507676562, 'eval_runtime': 9.7087, 'eval_samples_per_second': 103.0, 'eval_steps_per_second': 12.875, 'epoch': 0.26}


                                                    
  7%|▋         | 601/9000 [08:55<7:16:15,  3.12s/it]

{'eval_loss': 0.40741053223609924, 'eval_f1': 0.8047210300429184, 'eval_runtime': 9.6467, 'eval_samples_per_second': 103.662, 'eval_steps_per_second': 12.958, 'epoch': 0.27}


                                                    
  7%|▋         | 613/9000 [09:07<7:20:08,  3.15s/it]

{'eval_loss': 0.6164291501045227, 'eval_f1': 0.7770382695507488, 'eval_runtime': 9.7499, 'eval_samples_per_second': 102.565, 'eval_steps_per_second': 12.821, 'epoch': 0.27}


                                                    
  7%|▋         | 625/9000 [09:19<7:14:38,  3.11s/it]

{'eval_loss': 0.5387335419654846, 'eval_f1': 0.6909547738693468, 'eval_runtime': 9.6598, 'eval_samples_per_second': 103.522, 'eval_steps_per_second': 12.94, 'epoch': 0.28}


                                                    
  7%|▋         | 637/9000 [09:31<7:15:34,  3.13s/it]

{'eval_loss': 0.42523476481437683, 'eval_f1': 0.8228070175438597, 'eval_runtime': 9.6862, 'eval_samples_per_second': 103.239, 'eval_steps_per_second': 12.905, 'epoch': 0.28}


                                                    
  7%|▋         | 649/9000 [09:43<7:13:42,  3.12s/it]

{'eval_loss': 0.3720128834247589, 'eval_f1': 0.8135964912280701, 'eval_runtime': 9.6611, 'eval_samples_per_second': 103.508, 'eval_steps_per_second': 12.939, 'epoch': 0.29}


                                                    
  7%|▋         | 661/9000 [09:54<7:12:45,  3.11s/it]

{'eval_loss': 0.37976714968681335, 'eval_f1': 0.8373015873015873, 'eval_runtime': 9.6463, 'eval_samples_per_second': 103.667, 'eval_steps_per_second': 12.958, 'epoch': 0.29}


                                                    
  7%|▋         | 673/9000 [10:06<7:13:14,  3.12s/it]

{'eval_loss': 0.40124738216400146, 'eval_f1': 0.8172757475083056, 'eval_runtime': 9.6721, 'eval_samples_per_second': 103.39, 'eval_steps_per_second': 12.924, 'epoch': 0.3}


                                                    
  8%|▊         | 685/9000 [10:18<7:10:03,  3.10s/it]

{'eval_loss': 0.3711351454257965, 'eval_f1': 0.8571428571428571, 'eval_runtime': 9.6103, 'eval_samples_per_second': 104.055, 'eval_steps_per_second': 13.007, 'epoch': 0.3}


                                                    
  8%|▊         | 697/9000 [10:30<7:10:37,  3.11s/it]

{'eval_loss': 0.40704649686813354, 'eval_f1': 0.8149732620320855, 'eval_runtime': 9.6368, 'eval_samples_per_second': 103.769, 'eval_steps_per_second': 12.971, 'epoch': 0.31}


                                                    
  8%|▊         | 709/9000 [10:42<7:08:55,  3.10s/it]

{'eval_loss': 0.5639503002166748, 'eval_f1': 0.7846027846027845, 'eval_runtime': 9.6121, 'eval_samples_per_second': 104.035, 'eval_steps_per_second': 13.004, 'epoch': 0.31}


                                                    
  8%|▊         | 721/9000 [10:53<7:10:57,  3.12s/it]

{'eval_loss': 0.41052088141441345, 'eval_f1': 0.8422001803426511, 'eval_runtime': 9.6754, 'eval_samples_per_second': 103.355, 'eval_steps_per_second': 12.919, 'epoch': 0.32}


                                                    
  8%|▊         | 733/9000 [11:05<7:09:04,  3.11s/it]

{'eval_loss': 0.39410802721977234, 'eval_f1': 0.8425841674249318, 'eval_runtime': 9.6638, 'eval_samples_per_second': 103.479, 'eval_steps_per_second': 12.935, 'epoch': 0.33}


                                                    
  8%|▊         | 745/9000 [11:17<6:53:49,  3.01s/it]

{'eval_loss': 0.40142619609832764, 'eval_f1': 0.8291262135922332, 'eval_runtime': 9.3298, 'eval_samples_per_second': 107.183, 'eval_steps_per_second': 13.398, 'epoch': 0.33}


                                                    
  8%|▊         | 756/9000 [11:29<31:18,  4.39it/s]

{'eval_loss': 0.4044800400733948, 'eval_f1': 0.8056155507559395, 'eval_runtime': 10.5658, 'eval_samples_per_second': 94.645, 'eval_steps_per_second': 11.831, 'epoch': 0.34}


                                                    
  9%|▊         | 769/9000 [11:41<7:00:36,  3.07s/it]

{'eval_loss': 0.7497906684875488, 'eval_f1': 0.5565217391304348, 'eval_runtime': 9.4748, 'eval_samples_per_second': 105.543, 'eval_steps_per_second': 13.193, 'epoch': 0.34}


                                                    
  9%|▊         | 781/9000 [11:52<6:34:23,  2.88s/it]

{'eval_loss': 0.3924407362937927, 'eval_f1': 0.8344627299128752, 'eval_runtime': 8.8845, 'eval_samples_per_second': 112.556, 'eval_steps_per_second': 14.069, 'epoch': 0.35}


                                                    
  9%|▉         | 793/9000 [12:03<6:35:59,  2.90s/it]

{'eval_loss': 0.3955797851085663, 'eval_f1': 0.8470588235294118, 'eval_runtime': 8.9587, 'eval_samples_per_second': 111.623, 'eval_steps_per_second': 13.953, 'epoch': 0.35}


                                                    
  9%|▉         | 805/9000 [12:14<6:35:00,  2.89s/it]

{'eval_loss': 0.5376977324485779, 'eval_f1': 0.7970049916805324, 'eval_runtime': 8.94, 'eval_samples_per_second': 111.857, 'eval_steps_per_second': 13.982, 'epoch': 0.36}


                                                    
  9%|▉         | 817/9000 [12:26<7:08:46,  3.14s/it]

{'eval_loss': 0.37814104557037354, 'eval_f1': 0.8273748723186926, 'eval_runtime': 9.783, 'eval_samples_per_second': 102.219, 'eval_steps_per_second': 12.777, 'epoch': 0.36}


                                                    
  9%|▉         | 828/9000 [12:39<33:07,  4.11it/s]

{'eval_loss': 0.3811715841293335, 'eval_f1': 0.8357289527720738, 'eval_runtime': 10.9708, 'eval_samples_per_second': 91.151, 'eval_steps_per_second': 11.394, 'epoch': 0.37}


                                                    
  9%|▉         | 840/9000 [12:52<36:49,  3.69it/s]

{'eval_loss': 0.7075744867324829, 'eval_f1': 0.7715481171548116, 'eval_runtime': 10.5746, 'eval_samples_per_second': 94.567, 'eval_steps_per_second': 11.821, 'epoch': 0.37}


                                                    
  9%|▉         | 852/9000 [13:04<37:10,  3.65it/s]

{'eval_loss': 0.6387742757797241, 'eval_f1': 0.5829846582984658, 'eval_runtime': 10.5146, 'eval_samples_per_second': 95.106, 'eval_steps_per_second': 11.888, 'epoch': 0.38}


                                                    
 10%|▉         | 865/9000 [13:17<7:26:25,  3.29s/it]

{'eval_loss': 0.39876294136047363, 'eval_f1': 0.8026172300981462, 'eval_runtime': 10.1799, 'eval_samples_per_second': 98.233, 'eval_steps_per_second': 12.279, 'epoch': 0.38}


                                                    
 10%|▉         | 877/9000 [13:30<7:28:23,  3.31s/it]

{'eval_loss': 0.5523622035980225, 'eval_f1': 0.8131487889273356, 'eval_runtime': 10.2422, 'eval_samples_per_second': 97.635, 'eval_steps_per_second': 12.204, 'epoch': 0.39}


                                                    
 10%|▉         | 889/9000 [13:42<7:34:07,  3.36s/it]

{'eval_loss': 0.3688977360725403, 'eval_f1': 0.844, 'eval_runtime': 10.3713, 'eval_samples_per_second': 96.42, 'eval_steps_per_second': 12.052, 'epoch': 0.39}


                                                    
 10%|█         | 901/9000 [13:56<7:49:24,  3.48s/it]

{'eval_loss': 0.3781126141548157, 'eval_f1': 0.8453608247422681, 'eval_runtime': 10.8055, 'eval_samples_per_second': 92.545, 'eval_steps_per_second': 11.568, 'epoch': 0.4}


                                                    
 10%|█         | 913/9000 [14:09<7:52:25,  3.51s/it]

{'eval_loss': 0.42074885964393616, 'eval_f1': 0.8180212014134275, 'eval_runtime': 10.8622, 'eval_samples_per_second': 92.063, 'eval_steps_per_second': 11.508, 'epoch': 0.41}


                                                    
 10%|█         | 924/9000 [14:22<34:48,  3.87it/s]

{'eval_loss': 0.4212273359298706, 'eval_f1': 0.8240656335460347, 'eval_runtime': 10.7551, 'eval_samples_per_second': 92.979, 'eval_steps_per_second': 11.622, 'epoch': 0.41}


                                                    
 10%|█         | 936/9000 [14:35<35:01,  3.84it/s]

{'eval_loss': 0.40025395154953003, 'eval_f1': 0.8004246284501061, 'eval_runtime': 10.8418, 'eval_samples_per_second': 92.236, 'eval_steps_per_second': 11.529, 'epoch': 0.42}


                                                    
 11%|█         | 948/9000 [14:48<35:01,  3.83it/s]

{'eval_loss': 0.3596910238265991, 'eval_f1': 0.8486096807415036, 'eval_runtime': 10.6554, 'eval_samples_per_second': 93.849, 'eval_steps_per_second': 11.731, 'epoch': 0.42}


                                                    
 11%|█         | 960/9000 [15:01<35:10,  3.81it/s]

{'eval_loss': 0.4000903069972992, 'eval_f1': 0.810989010989011, 'eval_runtime': 10.9416, 'eval_samples_per_second': 91.394, 'eval_steps_per_second': 11.424, 'epoch': 0.43}


                                                    
 11%|█         | 972/9000 [15:14<34:21,  3.89it/s]

{'eval_loss': 0.3663633465766907, 'eval_f1': 0.8411214953271028, 'eval_runtime': 10.7756, 'eval_samples_per_second': 92.802, 'eval_steps_per_second': 11.6, 'epoch': 0.43}


                                                    
 11%|█         | 985/9000 [15:28<7:50:57,  3.53s/it]

{'eval_loss': 0.42095398902893066, 'eval_f1': 0.8493150684931506, 'eval_runtime': 10.9414, 'eval_samples_per_second': 91.396, 'eval_steps_per_second': 11.424, 'epoch': 0.44}


                                                    
 11%|█         | 996/9000 [15:41<34:23,  3.88it/s]

{'eval_loss': 0.43260425329208374, 'eval_f1': 0.7429963459196102, 'eval_runtime': 10.748, 'eval_samples_per_second': 93.041, 'eval_steps_per_second': 11.63, 'epoch': 0.44}


                                                     
 11%|█         | 1008/9000 [15:54<35:38,  3.74it/s]

{'eval_loss': 0.3397828936576843, 'eval_f1': 0.8639391056137012, 'eval_runtime': 10.662, 'eval_samples_per_second': 93.791, 'eval_steps_per_second': 11.724, 'epoch': 0.45}


                                                     
 11%|█▏        | 1020/9000 [16:07<34:10,  3.89it/s]

{'eval_loss': 0.5449643135070801, 'eval_f1': 0.7775119617224879, 'eval_runtime': 10.7599, 'eval_samples_per_second': 92.938, 'eval_steps_per_second': 11.617, 'epoch': 0.45}


                                                     
 11%|█▏        | 1032/9000 [16:20<34:38,  3.83it/s]

{'eval_loss': 0.7058507204055786, 'eval_f1': 0.7864238410596027, 'eval_runtime': 10.7103, 'eval_samples_per_second': 93.368, 'eval_steps_per_second': 11.671, 'epoch': 0.46}


                                                     
 12%|█▏        | 1044/9000 [16:33<34:35,  3.83it/s]

{'eval_loss': 0.44335728883743286, 'eval_f1': 0.8128654970760234, 'eval_runtime': 10.8075, 'eval_samples_per_second': 92.529, 'eval_steps_per_second': 11.566, 'epoch': 0.46}


                                                     
 12%|█▏        | 1056/9000 [16:46<34:03,  3.89it/s]

{'eval_loss': 0.3457319438457489, 'eval_f1': 0.849846782431052, 'eval_runtime': 10.6378, 'eval_samples_per_second': 94.005, 'eval_steps_per_second': 11.751, 'epoch': 0.47}


                                                     
 12%|█▏        | 1069/9000 [17:00<7:39:49,  3.48s/it]

{'eval_loss': 0.34285059571266174, 'eval_f1': 0.8580121703853956, 'eval_runtime': 10.7833, 'eval_samples_per_second': 92.736, 'eval_steps_per_second': 11.592, 'epoch': 0.47}


                                                     
 12%|█▏        | 1081/9000 [17:13<7:41:36,  3.50s/it]

{'eval_loss': 0.3094126582145691, 'eval_f1': 0.8771929824561403, 'eval_runtime': 10.8441, 'eval_samples_per_second': 92.216, 'eval_steps_per_second': 11.527, 'epoch': 0.48}


                                                     
 12%|█▏        | 1092/9000 [17:26<35:34,  3.70it/s]

{'eval_loss': 0.36678916215896606, 'eval_f1': 0.8488805970149254, 'eval_runtime': 10.6264, 'eval_samples_per_second': 94.105, 'eval_steps_per_second': 11.763, 'epoch': 0.49}


                                                     
 12%|█▏        | 1105/9000 [17:38<6:54:52,  3.15s/it]

{'eval_loss': 0.33401554822921753, 'eval_f1': 0.8455640744797371, 'eval_runtime': 9.7799, 'eval_samples_per_second': 102.25, 'eval_steps_per_second': 12.781, 'epoch': 0.49}


                                                     
 12%|█▏        | 1117/9000 [17:49<6:26:23,  2.94s/it]

{'eval_loss': 0.3499488830566406, 'eval_f1': 0.8340611353711791, 'eval_runtime': 9.0768, 'eval_samples_per_second': 110.171, 'eval_steps_per_second': 13.771, 'epoch': 0.5}


 13%|█▎        | 1126/9000 [17:51<38:21,  3.42it/s]  

{'loss': 0.5534, 'learning_rate': 0.00017511111111111113, 'epoch': 0.5}


                                                   
 13%|█▎        | 1129/9000 [18:01<6:46:26,  3.10s/it]

{'eval_loss': 0.33790120482444763, 'eval_f1': 0.8673978065802592, 'eval_runtime': 9.591, 'eval_samples_per_second': 104.264, 'eval_steps_per_second': 13.033, 'epoch': 0.5}


                                                     
 13%|█▎        | 1141/9000 [18:12<6:10:53,  2.83s/it]

{'eval_loss': 0.35293513536453247, 'eval_f1': 0.8518918918918917, 'eval_runtime': 8.7312, 'eval_samples_per_second': 114.532, 'eval_steps_per_second': 14.316, 'epoch': 0.51}


                                                     
 13%|█▎        | 1153/9000 [18:22<6:07:41,  2.81s/it]

{'eval_loss': 0.3629544973373413, 'eval_f1': 0.8389189189189189, 'eval_runtime': 8.6598, 'eval_samples_per_second': 115.476, 'eval_steps_per_second': 14.434, 'epoch': 0.51}


                                                     
 13%|█▎        | 1165/9000 [18:33<6:02:38,  2.78s/it]

{'eval_loss': 0.33829641342163086, 'eval_f1': 0.8724035608308606, 'eval_runtime': 8.5988, 'eval_samples_per_second': 116.295, 'eval_steps_per_second': 14.537, 'epoch': 0.52}


                                                     
 13%|█▎        | 1177/9000 [18:43<6:05:06,  2.80s/it]

{'eval_loss': 0.4326837956905365, 'eval_f1': 0.780373831775701, 'eval_runtime': 8.6776, 'eval_samples_per_second': 115.239, 'eval_steps_per_second': 14.405, 'epoch': 0.52}


                                                     
 13%|█▎        | 1189/9000 [18:54<6:06:08,  2.81s/it]

{'eval_loss': 0.3487282395362854, 'eval_f1': 0.8562933597621408, 'eval_runtime': 8.7341, 'eval_samples_per_second': 114.493, 'eval_steps_per_second': 14.312, 'epoch': 0.53}


                                                     
 13%|█▎        | 1201/9000 [19:05<6:11:34,  2.86s/it]

{'eval_loss': 0.32205188274383545, 'eval_f1': 0.8671472708547888, 'eval_runtime': 8.8752, 'eval_samples_per_second': 112.674, 'eval_steps_per_second': 14.084, 'epoch': 0.53}


                                                     
 13%|█▎        | 1213/9000 [19:17<6:50:47,  3.17s/it]

{'eval_loss': 0.3447277545928955, 'eval_f1': 0.8641049671977507, 'eval_runtime': 9.8573, 'eval_samples_per_second': 101.448, 'eval_steps_per_second': 12.681, 'epoch': 0.54}


                                                     
 14%|█▎        | 1225/9000 [19:30<7:24:44,  3.43s/it]

{'eval_loss': 0.3127029240131378, 'eval_f1': 0.875251509054326, 'eval_runtime': 10.7366, 'eval_samples_per_second': 93.14, 'eval_steps_per_second': 11.642, 'epoch': 0.54}


                                                     
 14%|█▎        | 1236/9000 [19:42<33:16,  3.89it/s]

{'eval_loss': 0.5359534621238708, 'eval_f1': 0.8053917438921651, 'eval_runtime': 10.1904, 'eval_samples_per_second': 98.131, 'eval_steps_per_second': 12.266, 'epoch': 0.55}


                                                     
 14%|█▍        | 1249/9000 [19:57<8:11:20,  3.80s/it]

{'eval_loss': 0.31674134731292725, 'eval_f1': 0.8766603415559772, 'eval_runtime': 11.783, 'eval_samples_per_second': 84.868, 'eval_steps_per_second': 10.608, 'epoch': 0.55}


                                                     
 14%|█▍        | 1260/9000 [20:11<34:09,  3.78it/s]

{'eval_loss': 0.33416929841041565, 'eval_f1': 0.8259911894273128, 'eval_runtime': 12.0331, 'eval_samples_per_second': 83.104, 'eval_steps_per_second': 10.388, 'epoch': 0.56}


                                                     
 14%|█▍        | 1272/9000 [20:29<51:56,  2.48it/s]

{'eval_loss': 0.4402143955230713, 'eval_f1': 0.8343558282208589, 'eval_runtime': 14.2956, 'eval_samples_per_second': 69.951, 'eval_steps_per_second': 8.744, 'epoch': 0.57}


                                                     
 14%|█▍        | 1285/9000 [20:46<8:51:08,  4.13s/it]

{'eval_loss': 0.3370400667190552, 'eval_f1': 0.8727272727272727, 'eval_runtime': 12.6287, 'eval_samples_per_second': 79.185, 'eval_steps_per_second': 9.898, 'epoch': 0.57}


                                                     
 14%|█▍        | 1296/9000 [21:05<54:07,  2.37it/s]

{'eval_loss': 0.33496275544166565, 'eval_f1': 0.8738049713193117, 'eval_runtime': 16.8489, 'eval_samples_per_second': 59.351, 'eval_steps_per_second': 7.419, 'epoch': 0.58}


                                                      
 15%|█▍        | 1308/9000 [21:26<55:24,  2.31it/s]

{'eval_loss': 0.3495575487613678, 'eval_f1': 0.862453531598513, 'eval_runtime': 16.0975, 'eval_samples_per_second': 62.121, 'eval_steps_per_second': 7.765, 'epoch': 0.58}


                                                      
 15%|█▍        | 1320/9000 [21:46<58:13,  2.20it/s]

{'eval_loss': 0.36624962091445923, 'eval_f1': 0.8476190476190477, 'eval_runtime': 15.9129, 'eval_samples_per_second': 62.842, 'eval_steps_per_second': 7.855, 'epoch': 0.59}


                                                      
 15%|█▍        | 1332/9000 [22:06<58:29,  2.18it/s]

{'eval_loss': 0.32476240396499634, 'eval_f1': 0.8668032786885247, 'eval_runtime': 15.5755, 'eval_samples_per_second': 64.203, 'eval_steps_per_second': 8.025, 'epoch': 0.59}


                                                      
 15%|█▍        | 1344/9000 [22:27<1:06:55,  1.91it/s]

{'eval_loss': 0.29776379466056824, 'eval_f1': 0.8901734104046244, 'eval_runtime': 16.4776, 'eval_samples_per_second': 60.688, 'eval_steps_per_second': 7.586, 'epoch': 0.6}


                                                      
 15%|█▌        | 1356/9000 [22:49<1:09:31,  1.83it/s]

{'eval_loss': 0.3743668794631958, 'eval_f1': 0.8566210045662099, 'eval_runtime': 16.2312, 'eval_samples_per_second': 61.61, 'eval_steps_per_second': 7.701, 'epoch': 0.6}


                                                      
 15%|█▌        | 1368/9000 [23:09<1:06:58,  1.90it/s]

{'eval_loss': 0.37057799100875854, 'eval_f1': 0.8225988700564971, 'eval_runtime': 15.8088, 'eval_samples_per_second': 63.256, 'eval_steps_per_second': 7.907, 'epoch': 0.61}


                                                      
 15%|█▌        | 1380/9000 [23:30<1:04:50,  1.96it/s]

{'eval_loss': 0.3191242814064026, 'eval_f1': 0.8473451327433628, 'eval_runtime': 16.0519, 'eval_samples_per_second': 62.298, 'eval_steps_per_second': 7.787, 'epoch': 0.61}


                                                      
 15%|█▌        | 1392/9000 [23:51<1:10:11,  1.81it/s]

{'eval_loss': 0.3221730887889862, 'eval_f1': 0.8749999999999999, 'eval_runtime': 15.7085, 'eval_samples_per_second': 63.66, 'eval_steps_per_second': 7.957, 'epoch': 0.62}


                                                      
 16%|█▌        | 1404/9000 [24:14<1:02:09,  2.04it/s]

{'eval_loss': 0.373409241437912, 'eval_f1': 0.8198198198198197, 'eval_runtime': 17.3701, 'eval_samples_per_second': 57.57, 'eval_steps_per_second': 7.196, 'epoch': 0.62}


                                                      
 16%|█▌        | 1416/9000 [24:34<59:25,  2.13it/s]

{'eval_loss': 0.3607952892780304, 'eval_f1': 0.8552875695732839, 'eval_runtime': 15.4344, 'eval_samples_per_second': 64.79, 'eval_steps_per_second': 8.099, 'epoch': 0.63}


                                                      
 16%|█▌        | 1428/9000 [24:54<1:00:05,  2.10it/s]

{'eval_loss': 0.41768190264701843, 'eval_f1': 0.7990867579908676, 'eval_runtime': 15.9707, 'eval_samples_per_second': 62.615, 'eval_steps_per_second': 7.827, 'epoch': 0.63}


                                                      
 16%|█▌        | 1440/9000 [25:15<1:07:39,  1.86it/s]

{'eval_loss': 0.5189932584762573, 'eval_f1': 0.8377896613190732, 'eval_runtime': 16.4816, 'eval_samples_per_second': 60.674, 'eval_steps_per_second': 7.584, 'epoch': 0.64}


                                                      
 16%|█▌        | 1452/9000 [25:37<1:05:42,  1.91it/s]

{'eval_loss': 0.4923628568649292, 'eval_f1': 0.8046511627906977, 'eval_runtime': 16.6284, 'eval_samples_per_second': 60.138, 'eval_steps_per_second': 7.517, 'epoch': 0.65}


                                                      
 16%|█▋        | 1464/9000 [25:58<1:05:18,  1.92it/s]

{'eval_loss': 0.36056432127952576, 'eval_f1': 0.8526970954356846, 'eval_runtime': 16.5584, 'eval_samples_per_second': 60.392, 'eval_steps_per_second': 7.549, 'epoch': 0.65}


                                                      
 16%|█▋        | 1476/9000 [26:19<54:20,  2.31it/s]

{'eval_loss': 0.36728915572166443, 'eval_f1': 0.839430894308943, 'eval_runtime': 16.187, 'eval_samples_per_second': 61.778, 'eval_steps_per_second': 7.722, 'epoch': 0.66}


                                                      
 17%|█▋        | 1488/9000 [26:40<51:12,  2.45it/s]

{'eval_loss': 0.38502123951911926, 'eval_f1': 0.8300312825860271, 'eval_runtime': 16.9793, 'eval_samples_per_second': 58.895, 'eval_steps_per_second': 7.362, 'epoch': 0.66}


                                                      
 17%|█▋        | 1500/9000 [26:58<1:10:29,  1.77it/s]

{'eval_loss': 0.36021286249160767, 'eval_f1': 0.8413361169102296, 'eval_runtime': 12.7518, 'eval_samples_per_second': 78.42, 'eval_steps_per_second': 9.803, 'epoch': 0.67}


                                                     
 17%|█▋        | 1512/9000 [27:19<1:03:20,  1.97it/s]

{'eval_loss': 0.44074198603630066, 'eval_f1': 0.7679425837320575, 'eval_runtime': 16.5188, 'eval_samples_per_second': 60.537, 'eval_steps_per_second': 7.567, 'epoch': 0.67}


                                                      
 17%|█▋        | 1524/9000 [27:40<54:18,  2.29it/s]

{'eval_loss': 0.4187784492969513, 'eval_f1': 0.8188976377952756, 'eval_runtime': 17.0361, 'eval_samples_per_second': 58.699, 'eval_steps_per_second': 7.337, 'epoch': 0.68}


                                                      
 17%|█▋        | 1536/9000 [28:02<1:00:47,  2.05it/s]

{'eval_loss': 0.4682459831237793, 'eval_f1': 0.7664670658682634, 'eval_runtime': 16.7321, 'eval_samples_per_second': 59.765, 'eval_steps_per_second': 7.471, 'epoch': 0.68}


                                                      
 17%|█▋        | 1548/9000 [28:22<57:09,  2.17it/s]

{'eval_loss': 0.45313093066215515, 'eval_f1': 0.8007662835249042, 'eval_runtime': 15.7778, 'eval_samples_per_second': 63.38, 'eval_steps_per_second': 7.923, 'epoch': 0.69}


                                                      
 17%|█▋        | 1560/9000 [28:43<1:01:13,  2.03it/s]

{'eval_loss': 0.39475104212760925, 'eval_f1': 0.835707502374169, 'eval_runtime': 16.3846, 'eval_samples_per_second': 61.033, 'eval_steps_per_second': 7.629, 'epoch': 0.69}


                                                      
 17%|█▋        | 1572/9000 [29:04<58:25,  2.12it/s]

{'eval_loss': 0.4811655282974243, 'eval_f1': 0.7070967741935483, 'eval_runtime': 16.3318, 'eval_samples_per_second': 61.23, 'eval_steps_per_second': 7.654, 'epoch': 0.7}


                                                      
 18%|█▊        | 1584/9000 [29:24<54:39,  2.26it/s]

{'eval_loss': 0.5561934113502502, 'eval_f1': 0.8056713928273561, 'eval_runtime': 15.6478, 'eval_samples_per_second': 63.907, 'eval_steps_per_second': 7.988, 'epoch': 0.7}


                                                      
 18%|█▊        | 1596/9000 [29:45<58:46,  2.10it/s]

{'eval_loss': 0.522612988948822, 'eval_f1': 0.6160220994475137, 'eval_runtime': 16.3171, 'eval_samples_per_second': 61.286, 'eval_steps_per_second': 7.661, 'epoch': 0.71}


                                                      
 18%|█▊        | 1608/9000 [30:07<52:08,  2.36it/s]

{'eval_loss': 0.3371990919113159, 'eval_f1': 0.8677150786308973, 'eval_runtime': 17.2204, 'eval_samples_per_second': 58.07, 'eval_steps_per_second': 7.259, 'epoch': 0.71}


                                                      
 18%|█▊        | 1620/9000 [30:26<52:06,  2.36it/s]

{'eval_loss': 0.30665600299835205, 'eval_f1': 0.8826479438314946, 'eval_runtime': 15.8024, 'eval_samples_per_second': 63.282, 'eval_steps_per_second': 7.91, 'epoch': 0.72}


                                                      
 18%|█▊        | 1632/9000 [30:47<53:25,  2.30it/s]

{'eval_loss': 0.32750698924064636, 'eval_f1': 0.8309392265193369, 'eval_runtime': 16.2945, 'eval_samples_per_second': 61.37, 'eval_steps_per_second': 7.671, 'epoch': 0.73}


                                                      
 18%|█▊        | 1644/9000 [31:08<55:58,  2.19it/s]

{'eval_loss': 0.2980591356754303, 'eval_f1': 0.8764278296988577, 'eval_runtime': 16.2761, 'eval_samples_per_second': 61.44, 'eval_steps_per_second': 7.68, 'epoch': 0.73}


                                                      
 18%|█▊        | 1656/9000 [31:29<57:27,  2.13it/s]

{'eval_loss': 0.4037829041481018, 'eval_f1': 0.8330373001776199, 'eval_runtime': 16.2252, 'eval_samples_per_second': 61.633, 'eval_steps_per_second': 7.704, 'epoch': 0.74}


                                                      
 19%|█▊        | 1668/9000 [31:49<57:45,  2.12it/s]

{'eval_loss': 0.2838844656944275, 'eval_f1': 0.8948948948948948, 'eval_runtime': 14.5266, 'eval_samples_per_second': 68.839, 'eval_steps_per_second': 8.605, 'epoch': 0.74}


                                                     
 19%|█▊        | 1680/9000 [32:09<52:29,  2.32it/s]

{'eval_loss': 0.2887594997882843, 'eval_f1': 0.8936170212765958, 'eval_runtime': 16.5307, 'eval_samples_per_second': 60.493, 'eval_steps_per_second': 7.562, 'epoch': 0.75}


                                                      
 19%|█▉        | 1692/9000 [32:31<56:23,  2.16it/s]

{'eval_loss': 0.2740037441253662, 'eval_f1': 0.8922155688622756, 'eval_runtime': 17.0399, 'eval_samples_per_second': 58.686, 'eval_steps_per_second': 7.336, 'epoch': 0.75}


                                                      
 19%|█▉        | 1704/9000 [32:51<54:38,  2.23it/s]

{'eval_loss': 0.3024064302444458, 'eval_f1': 0.8808080808080808, 'eval_runtime': 16.2894, 'eval_samples_per_second': 61.389, 'eval_steps_per_second': 7.674, 'epoch': 0.76}


                                                      
 19%|█▉        | 1716/9000 [33:12<54:39,  2.22it/s]

{'eval_loss': 0.2947736084461212, 'eval_f1': 0.889795918367347, 'eval_runtime': 16.5657, 'eval_samples_per_second': 60.366, 'eval_steps_per_second': 7.546, 'epoch': 0.76}


                                                      
 19%|█▉        | 1728/9000 [33:36<57:10,  2.12it/s]

{'eval_loss': 0.3212447464466095, 'eval_f1': 0.8944820909970959, 'eval_runtime': 19.1674, 'eval_samples_per_second': 52.172, 'eval_steps_per_second': 6.521, 'epoch': 0.77}


                                                      
 19%|█▉        | 1740/9000 [34:07<1:26:32,  1.40it/s]

{'eval_loss': 0.6613250970840454, 'eval_f1': 0.7048346055979643, 'eval_runtime': 23.9802, 'eval_samples_per_second': 41.701, 'eval_steps_per_second': 5.213, 'epoch': 0.77}


                                                      
 19%|█▉        | 1752/9000 [34:34<1:14:48,  1.61it/s]

{'eval_loss': 0.34355780482292175, 'eval_f1': 0.864490603363007, 'eval_runtime': 21.7568, 'eval_samples_per_second': 45.963, 'eval_steps_per_second': 5.745, 'epoch': 0.78}


                                                      
 20%|█▉        | 1764/9000 [35:04<1:30:29,  1.33it/s]

{'eval_loss': 0.39110273122787476, 'eval_f1': 0.8502994011976046, 'eval_runtime': 23.4494, 'eval_samples_per_second': 42.645, 'eval_steps_per_second': 5.331, 'epoch': 0.78}


                                                      
 20%|█▉        | 1776/9000 [35:34<1:16:27,  1.57it/s]

{'eval_loss': 0.4300231337547302, 'eval_f1': 0.8402848423194303, 'eval_runtime': 23.4642, 'eval_samples_per_second': 42.618, 'eval_steps_per_second': 5.327, 'epoch': 0.79}


                                                      
 20%|█▉        | 1788/9000 [36:05<1:21:42,  1.47it/s]

{'eval_loss': 0.35286271572113037, 'eval_f1': 0.8722280887011616, 'eval_runtime': 23.9989, 'eval_samples_per_second': 41.669, 'eval_steps_per_second': 5.209, 'epoch': 0.79}


                                                      
 20%|██        | 1800/9000 [36:30<1:05:07,  1.84it/s]

{'eval_loss': 0.33789679408073425, 'eval_f1': 0.8829365079365078, 'eval_runtime': 20.4612, 'eval_samples_per_second': 48.873, 'eval_steps_per_second': 6.109, 'epoch': 0.8}


                                                      
 20%|██        | 1812/9000 [36:57<1:11:21,  1.68it/s]

{'eval_loss': 0.2939116358757019, 'eval_f1': 0.898898898898899, 'eval_runtime': 20.004, 'eval_samples_per_second': 49.99, 'eval_steps_per_second': 6.249, 'epoch': 0.81}


                                                      
 20%|██        | 1824/9000 [37:24<1:29:35,  1.34it/s]

{'eval_loss': 0.2684990465641022, 'eval_f1': 0.8929663608562691, 'eval_runtime': 20.9826, 'eval_samples_per_second': 47.659, 'eval_steps_per_second': 5.957, 'epoch': 0.81}


                                                      
 20%|██        | 1836/9000 [37:51<1:20:44,  1.48it/s]

{'eval_loss': 0.5189013481140137, 'eval_f1': 0.7286624203821657, 'eval_runtime': 20.8157, 'eval_samples_per_second': 48.041, 'eval_steps_per_second': 6.005, 'epoch': 0.82}


                                                      
 21%|██        | 1848/9000 [38:19<1:06:19,  1.80it/s]

{'eval_loss': 0.3074113428592682, 'eval_f1': 0.8815533980582525, 'eval_runtime': 21.5598, 'eval_samples_per_second': 46.383, 'eval_steps_per_second': 5.798, 'epoch': 0.82}


                                                      
 21%|██        | 1860/9000 [38:47<1:11:42,  1.66it/s]

{'eval_loss': 0.2781936526298523, 'eval_f1': 0.8948453608247424, 'eval_runtime': 22.3798, 'eval_samples_per_second': 44.683, 'eval_steps_per_second': 5.585, 'epoch': 0.83}


                                                      
 21%|██        | 1872/9000 [39:13<1:21:47,  1.45it/s]

{'eval_loss': 0.5957587361335754, 'eval_f1': 0.8265746333045729, 'eval_runtime': 20.9851, 'eval_samples_per_second': 47.653, 'eval_steps_per_second': 5.957, 'epoch': 0.83}


                                                      
 21%|██        | 1884/9000 [39:34<55:05,  2.15it/s]

{'eval_loss': 0.4773429036140442, 'eval_f1': 0.8045267489711934, 'eval_runtime': 15.5257, 'eval_samples_per_second': 64.409, 'eval_steps_per_second': 8.051, 'epoch': 0.84}


                                                      
 21%|██        | 1896/9000 [39:57<53:58,  2.19it/s]

{'eval_loss': 0.5088829398155212, 'eval_f1': 0.7399267399267399, 'eval_runtime': 18.5254, 'eval_samples_per_second': 53.98, 'eval_steps_per_second': 6.748, 'epoch': 0.84}


                                                      
 21%|██        | 1908/9000 [40:19<56:45,  2.08it/s]

{'eval_loss': 0.2915784418582916, 'eval_f1': 0.8778947368421053, 'eval_runtime': 17.6626, 'eval_samples_per_second': 56.617, 'eval_steps_per_second': 7.077, 'epoch': 0.85}


                                                      
 21%|██▏       | 1920/9000 [40:44<1:19:49,  1.48it/s]

{'eval_loss': 0.3517284691333771, 'eval_f1': 0.8353863381858903, 'eval_runtime': 19.3232, 'eval_samples_per_second': 51.751, 'eval_steps_per_second': 6.469, 'epoch': 0.85}


                                                      
 21%|██▏       | 1932/9000 [41:07<54:27,  2.16it/s]

{'eval_loss': 0.2928158640861511, 'eval_f1': 0.8790983606557377, 'eval_runtime': 18.4116, 'eval_samples_per_second': 54.314, 'eval_steps_per_second': 6.789, 'epoch': 0.86}


                                                      
 22%|██▏       | 1944/9000 [41:29<55:59,  2.10it/s]

{'eval_loss': 0.28955328464508057, 'eval_f1': 0.8721174004192872, 'eval_runtime': 17.7665, 'eval_samples_per_second': 56.286, 'eval_steps_per_second': 7.036, 'epoch': 0.86}


                                                      
 22%|██▏       | 1956/9000 [41:52<1:21:25,  1.44it/s]

{'eval_loss': 0.40913403034210205, 'eval_f1': 0.8256880733944953, 'eval_runtime': 17.6537, 'eval_samples_per_second': 56.645, 'eval_steps_per_second': 7.081, 'epoch': 0.87}


                                                      
 22%|██▏       | 1968/9000 [42:16<1:00:16,  1.94it/s]

{'eval_loss': 0.31390202045440674, 'eval_f1': 0.8799149840595112, 'eval_runtime': 18.4232, 'eval_samples_per_second': 54.279, 'eval_steps_per_second': 6.785, 'epoch': 0.87}


                                                      
 22%|██▏       | 1980/9000 [42:39<54:12,  2.16it/s]

{'eval_loss': 0.3427278399467468, 'eval_f1': 0.8711538461538461, 'eval_runtime': 18.1888, 'eval_samples_per_second': 54.979, 'eval_steps_per_second': 6.872, 'epoch': 0.88}


                                                      
 22%|██▏       | 1992/9000 [43:00<1:11:42,  1.63it/s]

{'eval_loss': 0.4311593770980835, 'eval_f1': 0.7581227436823106, 'eval_runtime': 16.9095, 'eval_samples_per_second': 59.138, 'eval_steps_per_second': 7.392, 'epoch': 0.89}


                                                      
 22%|██▏       | 2004/9000 [43:24<1:09:27,  1.68it/s]

{'eval_loss': 0.3106456398963928, 'eval_f1': 0.8583509513742072, 'eval_runtime': 17.5208, 'eval_samples_per_second': 57.075, 'eval_steps_per_second': 7.134, 'epoch': 0.89}


                                                      
 22%|██▏       | 2016/9000 [43:47<51:43,  2.25it/s]

{'eval_loss': 0.3746424913406372, 'eval_f1': 0.8610354223433242, 'eval_runtime': 17.8075, 'eval_samples_per_second': 56.156, 'eval_steps_per_second': 7.019, 'epoch': 0.9}


                                                      
 23%|██▎       | 2028/9000 [44:10<1:01:06,  1.90it/s]

{'eval_loss': 0.3201484978199005, 'eval_f1': 0.8565121412803531, 'eval_runtime': 18.3984, 'eval_samples_per_second': 54.352, 'eval_steps_per_second': 6.794, 'epoch': 0.9}


                                                      
 23%|██▎       | 2040/9000 [44:33<1:13:11,  1.58it/s]

{'eval_loss': 0.2916080951690674, 'eval_f1': 0.8731808731808732, 'eval_runtime': 17.3449, 'eval_samples_per_second': 57.654, 'eval_steps_per_second': 7.207, 'epoch': 0.91}


                                                      
 23%|██▎       | 2052/9000 [44:56<52:02,  2.23it/s]

{'eval_loss': 0.303383469581604, 'eval_f1': 0.8775510204081632, 'eval_runtime': 18.4489, 'eval_samples_per_second': 54.204, 'eval_steps_per_second': 6.775, 'epoch': 0.91}


                                                      
 23%|██▎       | 2064/9000 [45:19<50:46,  2.28it/s]

{'eval_loss': 0.324284166097641, 'eval_f1': 0.8795056642636457, 'eval_runtime': 18.2975, 'eval_samples_per_second': 54.652, 'eval_steps_per_second': 6.832, 'epoch': 0.92}


                                                      
 23%|██▎       | 2076/9000 [45:43<1:15:10,  1.54it/s]

{'eval_loss': 0.3756735622882843, 'eval_f1': 0.8769953051643192, 'eval_runtime': 18.0164, 'eval_samples_per_second': 55.505, 'eval_steps_per_second': 6.938, 'epoch': 0.92}


                                                      
 23%|██▎       | 2088/9000 [46:08<57:51,  1.99it/s]

{'eval_loss': 0.2887902855873108, 'eval_f1': 0.8765690376569037, 'eval_runtime': 19.6885, 'eval_samples_per_second': 50.791, 'eval_steps_per_second': 6.349, 'epoch': 0.93}


                                                      
 23%|██▎       | 2100/9000 [46:30<53:36,  2.15it/s]

{'eval_loss': 0.3004765510559082, 'eval_f1': 0.903353057199211, 'eval_runtime': 17.7815, 'eval_samples_per_second': 56.238, 'eval_steps_per_second': 7.03, 'epoch': 0.93}


                                                      
 23%|██▎       | 2112/9000 [46:56<1:08:45,  1.67it/s]

{'eval_loss': 0.32530736923217773, 'eval_f1': 0.8524590163934426, 'eval_runtime': 20.7581, 'eval_samples_per_second': 48.174, 'eval_steps_per_second': 6.022, 'epoch': 0.94}


                                                      
 24%|██▎       | 2124/9000 [47:21<1:00:32,  1.89it/s]

{'eval_loss': 0.41820603609085083, 'eval_f1': 0.8748841519925857, 'eval_runtime': 19.7172, 'eval_samples_per_second': 50.717, 'eval_steps_per_second': 6.34, 'epoch': 0.94}


                                                      
 24%|██▎       | 2136/9000 [47:46<1:00:17,  1.90it/s]

{'eval_loss': 0.4215988516807556, 'eval_f1': 0.8366445916114792, 'eval_runtime': 20.1908, 'eval_samples_per_second': 49.527, 'eval_steps_per_second': 6.191, 'epoch': 0.95}


                                                      
 24%|██▍       | 2148/9000 [48:12<1:15:48,  1.51it/s]

{'eval_loss': 0.3461383879184723, 'eval_f1': 0.8497297297297297, 'eval_runtime': 20.2587, 'eval_samples_per_second': 49.361, 'eval_steps_per_second': 6.17, 'epoch': 0.95}


                                                      
 24%|██▍       | 2160/9000 [48:40<1:08:14,  1.67it/s]

{'eval_loss': 0.3077264726161957, 'eval_f1': 0.8769074262461852, 'eval_runtime': 21.8058, 'eval_samples_per_second': 45.859, 'eval_steps_per_second': 5.732, 'epoch': 0.96}


                                                      
 24%|██▍       | 2172/9000 [49:07<1:08:37,  1.66it/s]

{'eval_loss': 0.3753894567489624, 'eval_f1': 0.8299776286353467, 'eval_runtime': 20.6144, 'eval_samples_per_second': 48.51, 'eval_steps_per_second': 6.064, 'epoch': 0.97}


                                                      
 24%|██▍       | 2184/9000 [49:32<1:12:33,  1.57it/s]

{'eval_loss': 0.3667590916156769, 'eval_f1': 0.8638059701492538, 'eval_runtime': 18.9522, 'eval_samples_per_second': 52.764, 'eval_steps_per_second': 6.596, 'epoch': 0.97}


                                                      
 24%|██▍       | 2196/9000 [49:56<1:06:26,  1.71it/s]

{'eval_loss': 0.33415713906288147, 'eval_f1': 0.880859375, 'eval_runtime': 18.8827, 'eval_samples_per_second': 52.958, 'eval_steps_per_second': 6.62, 'epoch': 0.98}


                                                      
 25%|██▍       | 2208/9000 [50:20<58:20,  1.94it/s]

{'eval_loss': 0.4235011041164398, 'eval_f1': 0.7933884297520662, 'eval_runtime': 19.567, 'eval_samples_per_second': 51.106, 'eval_steps_per_second': 6.388, 'epoch': 0.98}


                                                      
 25%|██▍       | 2220/9000 [50:45<1:03:19,  1.78it/s]

{'eval_loss': 0.29741033911705017, 'eval_f1': 0.8875621890547264, 'eval_runtime': 19.3381, 'eval_samples_per_second': 51.711, 'eval_steps_per_second': 6.464, 'epoch': 0.99}


                                                      
 25%|██▍       | 2232/9000 [51:11<1:02:55,  1.79it/s]

{'eval_loss': 0.26273325085639954, 'eval_f1': 0.9005128205128206, 'eval_runtime': 20.2016, 'eval_samples_per_second': 49.501, 'eval_steps_per_second': 6.188, 'epoch': 0.99}


                                                      
 25%|██▍       | 2244/9000 [51:36<1:05:14,  1.73it/s]

{'eval_loss': 0.6341702342033386, 'eval_f1': 0.6921052631578947, 'eval_runtime': 19.5904, 'eval_samples_per_second': 51.045, 'eval_steps_per_second': 6.381, 'epoch': 1.0}


 25%|██▌       | 2250/9000 [51:38<2:40:14,  1.42s/it] 

{'loss': 0.3856, 'learning_rate': 0.00015011111111111112, 'epoch': 1.0}


                                                     
 25%|██▌       | 2256/9000 [52:02<1:04:24,  1.75it/s]

{'eval_loss': 0.36946433782577515, 'eval_f1': 0.878731343283582, 'eval_runtime': 20.4652, 'eval_samples_per_second': 48.863, 'eval_steps_per_second': 6.108, 'epoch': 1.0}


                                                      
 25%|██▌       | 2268/9000 [52:27<1:11:21,  1.57it/s]

{'eval_loss': 0.36708366870880127, 'eval_f1': 0.8725868725868726, 'eval_runtime': 20.1264, 'eval_samples_per_second': 49.686, 'eval_steps_per_second': 6.211, 'epoch': 1.01}


                                                      
 25%|██▌       | 2280/9000 [52:50<1:24:42,  1.32it/s]

{'eval_loss': 0.3085925281047821, 'eval_f1': 0.8716904276985743, 'eval_runtime': 16.2405, 'eval_samples_per_second': 61.574, 'eval_steps_per_second': 7.697, 'epoch': 1.01}


                                                      
 25%|██▌       | 2292/9000 [53:11<53:31,  2.09it/s]

{'eval_loss': 0.30629634857177734, 'eval_f1': 0.8764044943820224, 'eval_runtime': 16.5977, 'eval_samples_per_second': 60.249, 'eval_steps_per_second': 7.531, 'epoch': 1.02}


                                                      
 26%|██▌       | 2304/9000 [53:35<57:54,  1.93it/s]

{'eval_loss': 0.31603583693504333, 'eval_f1': 0.8676470588235293, 'eval_runtime': 18.8878, 'eval_samples_per_second': 52.944, 'eval_steps_per_second': 6.618, 'epoch': 1.02}


                                                      
 26%|██▌       | 2316/9000 [53:59<59:52,  1.86it/s]

{'eval_loss': 0.6147627830505371, 'eval_f1': 0.736318407960199, 'eval_runtime': 18.5407, 'eval_samples_per_second': 53.935, 'eval_steps_per_second': 6.742, 'epoch': 1.03}


                                                      
 26%|██▌       | 2328/9000 [54:22<59:36,  1.87it/s]

{'eval_loss': 0.36741772294044495, 'eval_f1': 0.8683957732949088, 'eval_runtime': 18.2761, 'eval_samples_per_second': 54.716, 'eval_steps_per_second': 6.84, 'epoch': 1.03}


                                                      
 26%|██▌       | 2340/9000 [54:45<57:52,  1.92it/s]

{'eval_loss': 0.3413061499595642, 'eval_f1': 0.8582600195503421, 'eval_runtime': 17.6827, 'eval_samples_per_second': 56.553, 'eval_steps_per_second': 7.069, 'epoch': 1.04}


                                                      
 26%|██▌       | 2352/9000 [55:09<1:01:43,  1.80it/s]

{'eval_loss': 0.3463025391101837, 'eval_f1': 0.8536853685368537, 'eval_runtime': 18.8964, 'eval_samples_per_second': 52.92, 'eval_steps_per_second': 6.615, 'epoch': 1.05}


                                                      
 26%|██▋       | 2364/9000 [55:33<57:23,  1.93it/s]

{'eval_loss': 0.37791189551353455, 'eval_f1': 0.8738404452690167, 'eval_runtime': 18.6616, 'eval_samples_per_second': 53.586, 'eval_steps_per_second': 6.698, 'epoch': 1.05}


                                                      
 26%|██▋       | 2376/9000 [55:57<1:03:49,  1.73it/s]

{'eval_loss': 0.3220405876636505, 'eval_f1': 0.8517699115044247, 'eval_runtime': 18.7073, 'eval_samples_per_second': 53.455, 'eval_steps_per_second': 6.682, 'epoch': 1.06}


                                                      
 27%|██▋       | 2388/9000 [56:21<58:46,  1.87it/s]

{'eval_loss': 0.30077624320983887, 'eval_f1': 0.8662280701754387, 'eval_runtime': 19.2418, 'eval_samples_per_second': 51.97, 'eval_steps_per_second': 6.496, 'epoch': 1.06}


                                                      
 27%|██▋       | 2400/9000 [56:44<55:19,  1.99it/s]

{'eval_loss': 0.4388122856616974, 'eval_f1': 0.85, 'eval_runtime': 18.3216, 'eval_samples_per_second': 54.58, 'eval_steps_per_second': 6.823, 'epoch': 1.07}


                                                      
 27%|██▋       | 2412/9000 [57:08<56:25,  1.95it/s]

{'eval_loss': 0.2764367163181305, 'eval_f1': 0.8884297520661157, 'eval_runtime': 18.6628, 'eval_samples_per_second': 53.583, 'eval_steps_per_second': 6.698, 'epoch': 1.07}


                                                      
 27%|██▋       | 2424/9000 [57:30<56:24,  1.94it/s]

{'eval_loss': 0.2812386453151703, 'eval_f1': 0.8997078870496593, 'eval_runtime': 17.4139, 'eval_samples_per_second': 57.425, 'eval_steps_per_second': 7.178, 'epoch': 1.08}


                                                      
 27%|██▋       | 2436/9000 [57:54<55:05,  1.99it/s]

{'eval_loss': 0.2836548089981079, 'eval_f1': 0.8944723618090451, 'eval_runtime': 18.4797, 'eval_samples_per_second': 54.114, 'eval_steps_per_second': 6.764, 'epoch': 1.08}


                                                      
 27%|██▋       | 2448/9000 [58:17<59:01,  1.85it/s]

{'eval_loss': 0.48234882950782776, 'eval_f1': 0.7277353689567431, 'eval_runtime': 18.3069, 'eval_samples_per_second': 54.624, 'eval_steps_per_second': 6.828, 'epoch': 1.09}


                                                      
 27%|██▋       | 2460/9000 [58:42<1:02:18,  1.75it/s]

{'eval_loss': 0.37291380763053894, 'eval_f1': 0.8634280476626949, 'eval_runtime': 18.8546, 'eval_samples_per_second': 53.037, 'eval_steps_per_second': 6.63, 'epoch': 1.09}


                                                      
 27%|██▋       | 2472/9000 [59:03<52:54,  2.06it/s]

{'eval_loss': 0.2706412374973297, 'eval_f1': 0.8884254431699686, 'eval_runtime': 16.4862, 'eval_samples_per_second': 60.657, 'eval_steps_per_second': 7.582, 'epoch': 1.1}


                                                     
 28%|██▊       | 2484/9000 [59:27<56:40,  1.92it/s]

{'eval_loss': 0.2756044268608093, 'eval_f1': 0.893042575285566, 'eval_runtime': 19.1608, 'eval_samples_per_second': 52.19, 'eval_steps_per_second': 6.524, 'epoch': 1.1}


                                                      
 28%|██▊       | 2496/9000 [59:53<59:49,  1.81it/s]

{'eval_loss': 0.3007683753967285, 'eval_f1': 0.8775292864749733, 'eval_runtime': 20.3777, 'eval_samples_per_second': 49.073, 'eval_steps_per_second': 6.134, 'epoch': 1.11}


                                                      
 28%|██▊       | 2508/9000 [1:00:16<1:01:17,  1.77it/s]

{'eval_loss': 0.32512688636779785, 'eval_f1': 0.88911495422177, 'eval_runtime': 18.1736, 'eval_samples_per_second': 55.025, 'eval_steps_per_second': 6.878, 'epoch': 1.11}


                                                        
 28%|██▊       | 2520/9000 [1:00:41<58:41,  1.84it/s]

{'eval_loss': 0.42024311423301697, 'eval_f1': 0.8793103448275862, 'eval_runtime': 19.0614, 'eval_samples_per_second': 52.462, 'eval_steps_per_second': 6.558, 'epoch': 1.12}


                                                        
 28%|██▊       | 2532/9000 [1:01:07<58:24,  1.85it/s]

{'eval_loss': 0.4667767286300659, 'eval_f1': 0.8658777120315582, 'eval_runtime': 20.0754, 'eval_samples_per_second': 49.812, 'eval_steps_per_second': 6.227, 'epoch': 1.13}


                                                        
 28%|██▊       | 2544/9000 [1:01:33<1:00:41,  1.77it/s]

{'eval_loss': 0.47871866822242737, 'eval_f1': 0.8716904276985743, 'eval_runtime': 20.2483, 'eval_samples_per_second': 49.387, 'eval_steps_per_second': 6.173, 'epoch': 1.13}


                                                        
 28%|██▊       | 2556/9000 [1:01:58<1:02:53,  1.71it/s]

{'eval_loss': 0.41245830059051514, 'eval_f1': 0.8778004073319755, 'eval_runtime': 20.4125, 'eval_samples_per_second': 48.99, 'eval_steps_per_second': 6.124, 'epoch': 1.14}


                                                        
 29%|██▊       | 2568/9000 [1:02:24<58:15,  1.84it/s]

{'eval_loss': 0.4212847948074341, 'eval_f1': 0.807467911318553, 'eval_runtime': 20.8699, 'eval_samples_per_second': 47.916, 'eval_steps_per_second': 5.989, 'epoch': 1.14}


                                                        
 29%|██▊       | 2580/9000 [1:02:51<1:05:02,  1.64it/s]

{'eval_loss': 0.28182685375213623, 'eval_f1': 0.8879753340184995, 'eval_runtime': 21.2522, 'eval_samples_per_second': 47.054, 'eval_steps_per_second': 5.882, 'epoch': 1.15}


                                                        
 29%|██▉       | 2592/9000 [1:03:18<1:22:04,  1.30it/s]

{'eval_loss': 0.2973081171512604, 'eval_f1': 0.8752642706131077, 'eval_runtime': 20.2119, 'eval_samples_per_second': 49.476, 'eval_steps_per_second': 6.184, 'epoch': 1.15}


                                                        
 29%|██▉       | 2604/9000 [1:03:44<1:02:55,  1.69it/s]

{'eval_loss': 0.2892146110534668, 'eval_f1': 0.8827726809378186, 'eval_runtime': 19.7723, 'eval_samples_per_second': 50.576, 'eval_steps_per_second': 6.322, 'epoch': 1.16}


                                                        
 29%|██▉       | 2616/9000 [1:04:10<55:31,  1.92it/s]

{'eval_loss': 0.2754286825656891, 'eval_f1': 0.8943089430894309, 'eval_runtime': 19.9807, 'eval_samples_per_second': 50.048, 'eval_steps_per_second': 6.256, 'epoch': 1.16}


                                                        
 29%|██▉       | 2628/9000 [1:04:36<1:03:51,  1.66it/s]

{'eval_loss': 0.36047467589378357, 'eval_f1': 0.8677595628415301, 'eval_runtime': 20.1998, 'eval_samples_per_second': 49.506, 'eval_steps_per_second': 6.188, 'epoch': 1.17}


                                                        
 29%|██▉       | 2640/9000 [1:05:02<54:10,  1.96it/s]

{'eval_loss': 0.5099462270736694, 'eval_f1': 0.8672237697307337, 'eval_runtime': 20.644, 'eval_samples_per_second': 48.44, 'eval_steps_per_second': 6.055, 'epoch': 1.17}


                                                        
 29%|██▉       | 2652/9000 [1:05:28<1:00:58,  1.74it/s]

{'eval_loss': 0.5379915833473206, 'eval_f1': 0.8125, 'eval_runtime': 20.6435, 'eval_samples_per_second': 48.441, 'eval_steps_per_second': 6.055, 'epoch': 1.18}


                                                        
 30%|██▉       | 2664/9000 [1:05:50<59:20,  1.78it/s]

{'eval_loss': 0.5760007500648499, 'eval_f1': 0.8370044052863436, 'eval_runtime': 16.5772, 'eval_samples_per_second': 60.324, 'eval_steps_per_second': 7.54, 'epoch': 1.18}


                                                       
 30%|██▉       | 2676/9000 [1:06:14<52:03,  2.02it/s]

{'eval_loss': 0.38638436794281006, 'eval_f1': 0.8730009407337723, 'eval_runtime': 20.0017, 'eval_samples_per_second': 49.996, 'eval_steps_per_second': 6.249, 'epoch': 1.19}


                                                        
 30%|██▉       | 2688/9000 [1:06:47<1:23:57,  1.25it/s]

{'eval_loss': 0.5049360990524292, 'eval_f1': 0.7745803357314148, 'eval_runtime': 24.4837, 'eval_samples_per_second': 40.844, 'eval_steps_per_second': 5.105, 'epoch': 1.19}


                                                        
 30%|███       | 2700/9000 [1:07:16<1:13:03,  1.44it/s]

{'eval_loss': 0.3205433487892151, 'eval_f1': 0.886576482830385, 'eval_runtime': 22.2285, 'eval_samples_per_second': 44.987, 'eval_steps_per_second': 5.623, 'epoch': 1.2}


                                                        
 30%|███       | 2712/9000 [1:07:46<1:23:34,  1.25it/s]

{'eval_loss': 0.3196086287498474, 'eval_f1': 0.878625134264232, 'eval_runtime': 22.0082, 'eval_samples_per_second': 45.438, 'eval_steps_per_second': 5.68, 'epoch': 1.21}


                                                        
 30%|███       | 2724/9000 [1:08:15<1:01:03,  1.71it/s]

{'eval_loss': 0.33760327100753784, 'eval_f1': 0.8806983511154219, 'eval_runtime': 22.5271, 'eval_samples_per_second': 44.391, 'eval_steps_per_second': 5.549, 'epoch': 1.21}


                                                        
 30%|███       | 2736/9000 [1:08:43<1:04:36,  1.62it/s]

{'eval_loss': 0.27867767214775085, 'eval_f1': 0.8874734607218684, 'eval_runtime': 21.2577, 'eval_samples_per_second': 47.042, 'eval_steps_per_second': 5.88, 'epoch': 1.22}


                                                        
 31%|███       | 2748/9000 [1:09:12<1:04:31,  1.62it/s]

{'eval_loss': 0.3212698996067047, 'eval_f1': 0.8857938718662952, 'eval_runtime': 22.6436, 'eval_samples_per_second': 44.163, 'eval_steps_per_second': 5.52, 'epoch': 1.22}


                                                        
 31%|███       | 2760/9000 [1:09:43<1:12:50,  1.43it/s]

{'eval_loss': 0.27277469635009766, 'eval_f1': 0.8900523560209425, 'eval_runtime': 23.2836, 'eval_samples_per_second': 42.949, 'eval_steps_per_second': 5.369, 'epoch': 1.23}


                                                        
 31%|███       | 2772/9000 [1:10:10<59:03,  1.76it/s]

{'eval_loss': 0.3644863963127136, 'eval_f1': 0.8852459016393442, 'eval_runtime': 21.3993, 'eval_samples_per_second': 46.731, 'eval_steps_per_second': 5.841, 'epoch': 1.23}


                                                        
 31%|███       | 2784/9000 [1:10:35<58:33,  1.77it/s]

{'eval_loss': 0.3479589819908142, 'eval_f1': 0.8796019900497513, 'eval_runtime': 19.4967, 'eval_samples_per_second': 51.291, 'eval_steps_per_second': 6.411, 'epoch': 1.24}


                                                        
 31%|███       | 2796/9000 [1:11:01<55:12,  1.87it/s]

{'eval_loss': 0.29655951261520386, 'eval_f1': 0.88911495422177, 'eval_runtime': 20.9142, 'eval_samples_per_second': 47.814, 'eval_steps_per_second': 5.977, 'epoch': 1.24}


                                                        
 31%|███       | 2808/9000 [1:11:30<59:29,  1.73it/s]

{'eval_loss': 0.35237208008766174, 'eval_f1': 0.853631284916201, 'eval_runtime': 23.1051, 'eval_samples_per_second': 43.281, 'eval_steps_per_second': 5.41, 'epoch': 1.25}


                                                        
 31%|███▏      | 2820/9000 [1:11:57<1:05:47,  1.57it/s]

{'eval_loss': 0.30095943808555603, 'eval_f1': 0.8898043254376931, 'eval_runtime': 20.541, 'eval_samples_per_second': 48.683, 'eval_steps_per_second': 6.085, 'epoch': 1.25}


                                                        
 31%|███▏      | 2832/9000 [1:12:22<1:10:14,  1.46it/s]

{'eval_loss': 0.3052186369895935, 'eval_f1': 0.8982161594963274, 'eval_runtime': 18.5539, 'eval_samples_per_second': 53.897, 'eval_steps_per_second': 6.737, 'epoch': 1.26}


                                                        
 32%|███▏      | 2844/9000 [1:12:45<33:34,  3.06it/s]

{'eval_loss': 0.29877105355262756, 'eval_f1': 0.8937048503611971, 'eval_runtime': 19.83, 'eval_samples_per_second': 50.429, 'eval_steps_per_second': 6.304, 'epoch': 1.26}


                                                        
 32%|███▏      | 2856/9000 [1:13:12<1:06:29,  1.54it/s]

{'eval_loss': 0.2992725670337677, 'eval_f1': 0.8846153846153846, 'eval_runtime': 21.1688, 'eval_samples_per_second': 47.239, 'eval_steps_per_second': 5.905, 'epoch': 1.27}


                                                        
 32%|███▏      | 2868/9000 [1:13:40<1:02:41,  1.63it/s]

{'eval_loss': 0.27113258838653564, 'eval_f1': 0.8975903614457831, 'eval_runtime': 21.5231, 'eval_samples_per_second': 46.462, 'eval_steps_per_second': 5.808, 'epoch': 1.27}


                                                        
 32%|███▏      | 2880/9000 [1:14:07<56:45,  1.80it/s]

{'eval_loss': 0.26456499099731445, 'eval_f1': 0.892116182572614, 'eval_runtime': 21.3045, 'eval_samples_per_second': 46.938, 'eval_steps_per_second': 5.867, 'epoch': 1.28}


                                                        
 32%|███▏      | 2892/9000 [1:14:34<1:00:53,  1.67it/s]

{'eval_loss': 0.2873243987560272, 'eval_f1': 0.896486229819563, 'eval_runtime': 20.8208, 'eval_samples_per_second': 48.029, 'eval_steps_per_second': 6.004, 'epoch': 1.29}


                                                        
 32%|███▏      | 2904/9000 [1:15:01<1:07:56,  1.50it/s]

{'eval_loss': 0.36401501297950745, 'eval_f1': 0.8145539906103285, 'eval_runtime': 21.3151, 'eval_samples_per_second': 46.915, 'eval_steps_per_second': 5.864, 'epoch': 1.29}


                                                        
 32%|███▏      | 2916/9000 [1:15:28<58:40,  1.73it/s]

{'eval_loss': 0.32574811577796936, 'eval_f1': 0.8930936613055819, 'eval_runtime': 21.1347, 'eval_samples_per_second': 47.316, 'eval_steps_per_second': 5.914, 'epoch': 1.3}


                                                        
 33%|███▎      | 2928/9000 [1:15:54<54:50,  1.85it/s]

{'eval_loss': 0.5584105253219604, 'eval_f1': 0.7540173053152038, 'eval_runtime': 20.8244, 'eval_samples_per_second': 48.021, 'eval_steps_per_second': 6.003, 'epoch': 1.3}


                                                        
 33%|███▎      | 2940/9000 [1:16:18<1:07:13,  1.50it/s]

{'eval_loss': 0.32111310958862305, 'eval_f1': 0.8954918032786885, 'eval_runtime': 17.5005, 'eval_samples_per_second': 57.141, 'eval_steps_per_second': 7.143, 'epoch': 1.31}


                                                       
 33%|███▎      | 2952/9000 [1:16:41<51:42,  1.95it/s]

{'eval_loss': 0.3255247473716736, 'eval_f1': 0.8935762224352828, 'eval_runtime': 18.7789, 'eval_samples_per_second': 53.251, 'eval_steps_per_second': 6.656, 'epoch': 1.31}


                                                        
 33%|███▎      | 2964/9000 [1:17:07<54:52,  1.83it/s]

{'eval_loss': 0.3449724018573761, 'eval_f1': 0.8816402609506059, 'eval_runtime': 20.6714, 'eval_samples_per_second': 48.376, 'eval_steps_per_second': 6.047, 'epoch': 1.32}


                                                        
 33%|███▎      | 2976/9000 [1:17:37<1:13:02,  1.37it/s]

{'eval_loss': 0.330923467874527, 'eval_f1': 0.8740581270182993, 'eval_runtime': 22.9334, 'eval_samples_per_second': 43.604, 'eval_steps_per_second': 5.451, 'epoch': 1.32}


                                                        
 33%|███▎      | 2988/9000 [1:18:06<1:06:28,  1.51it/s]

{'eval_loss': 0.27465012669563293, 'eval_f1': 0.904572564612326, 'eval_runtime': 22.2435, 'eval_samples_per_second': 44.957, 'eval_steps_per_second': 5.62, 'epoch': 1.33}


                                                        
 33%|███▎      | 3000/9000 [1:18:35<1:11:40,  1.40it/s]

{'eval_loss': 0.3103673756122589, 'eval_f1': 0.8829891838741396, 'eval_runtime': 22.8511, 'eval_samples_per_second': 43.762, 'eval_steps_per_second': 5.47, 'epoch': 1.33}


                                                        
 33%|███▎      | 3012/9000 [1:19:04<1:02:41,  1.59it/s]

{'eval_loss': 0.3561130166053772, 'eval_f1': 0.8760643330179754, 'eval_runtime': 22.4522, 'eval_samples_per_second': 44.539, 'eval_steps_per_second': 5.567, 'epoch': 1.34}


                                                        
 34%|███▎      | 3024/9000 [1:19:32<1:04:20,  1.55it/s]

{'eval_loss': 0.31192755699157715, 'eval_f1': 0.8905263157894737, 'eval_runtime': 21.7334, 'eval_samples_per_second': 46.012, 'eval_steps_per_second': 5.752, 'epoch': 1.34}


                                                        
 34%|███▎      | 3036/9000 [1:20:00<1:04:57,  1.53it/s]

{'eval_loss': 0.3422681391239166, 'eval_f1': 0.8803827751196173, 'eval_runtime': 21.069, 'eval_samples_per_second': 47.463, 'eval_steps_per_second': 5.933, 'epoch': 1.35}


                                                        
 34%|███▍      | 3048/9000 [1:20:28<1:04:59,  1.53it/s]

{'eval_loss': 0.3119809925556183, 'eval_f1': 0.8875739644970414, 'eval_runtime': 22.309, 'eval_samples_per_second': 44.825, 'eval_steps_per_second': 5.603, 'epoch': 1.35}


                                                        
 34%|███▍      | 3060/9000 [1:20:56<1:03:38,  1.56it/s]

{'eval_loss': 0.3991246819496155, 'eval_f1': 0.8536853685368537, 'eval_runtime': 21.6441, 'eval_samples_per_second': 46.202, 'eval_steps_per_second': 5.775, 'epoch': 1.36}


                                                        
 34%|███▍      | 3072/9000 [1:21:24<1:06:05,  1.49it/s]

{'eval_loss': 0.3114270865917206, 'eval_f1': 0.886517943743938, 'eval_runtime': 21.7549, 'eval_samples_per_second': 45.967, 'eval_steps_per_second': 5.746, 'epoch': 1.37}


                                                        
 34%|███▍      | 3084/9000 [1:21:51<1:04:09,  1.54it/s]

{'eval_loss': 0.30981963872909546, 'eval_f1': 0.8974854932301742, 'eval_runtime': 21.1902, 'eval_samples_per_second': 47.192, 'eval_steps_per_second': 5.899, 'epoch': 1.37}


                                                        
 34%|███▍      | 3096/9000 [1:22:20<1:08:29,  1.44it/s]

{'eval_loss': 0.2964005172252655, 'eval_f1': 0.8943589743589744, 'eval_runtime': 21.7147, 'eval_samples_per_second': 46.052, 'eval_steps_per_second': 5.756, 'epoch': 1.38}


                                                        
 35%|███▍      | 3108/9000 [1:22:48<58:13,  1.69it/s]

{'eval_loss': 0.28556299209594727, 'eval_f1': 0.9006085192697768, 'eval_runtime': 21.8039, 'eval_samples_per_second': 45.863, 'eval_steps_per_second': 5.733, 'epoch': 1.38}


                                                        
 35%|███▍      | 3120/9000 [1:23:15<1:02:34,  1.57it/s]

{'eval_loss': 0.3225967288017273, 'eval_f1': 0.8730853391684902, 'eval_runtime': 21.3001, 'eval_samples_per_second': 46.948, 'eval_steps_per_second': 5.869, 'epoch': 1.39}


                                                        
 35%|███▍      | 3132/9000 [1:23:43<1:08:36,  1.43it/s]

{'eval_loss': 0.2712009847164154, 'eval_f1': 0.898238747553816, 'eval_runtime': 21.1678, 'eval_samples_per_second': 47.242, 'eval_steps_per_second': 5.905, 'epoch': 1.39}


                                                        
 35%|███▍      | 3144/9000 [1:24:11<1:04:38,  1.51it/s]

{'eval_loss': 0.27136483788490295, 'eval_f1': 0.8991185112634672, 'eval_runtime': 21.5699, 'eval_samples_per_second': 46.361, 'eval_steps_per_second': 5.795, 'epoch': 1.4}


                                                        
 35%|███▌      | 3156/9000 [1:24:40<1:04:02,  1.52it/s]

{'eval_loss': 0.2757567763328552, 'eval_f1': 0.8774869109947644, 'eval_runtime': 21.7121, 'eval_samples_per_second': 46.057, 'eval_steps_per_second': 5.757, 'epoch': 1.4}


                                                        
 35%|███▌      | 3168/9000 [1:25:07<57:17,  1.70it/s]

{'eval_loss': 0.3739147484302521, 'eval_f1': 0.8754578754578755, 'eval_runtime': 21.3587, 'eval_samples_per_second': 46.819, 'eval_steps_per_second': 5.852, 'epoch': 1.41}


                                                        
 35%|███▌      | 3180/9000 [1:25:34<1:02:34,  1.55it/s]

{'eval_loss': 0.301964670419693, 'eval_f1': 0.8735135135135135, 'eval_runtime': 21.4984, 'eval_samples_per_second': 46.515, 'eval_steps_per_second': 5.814, 'epoch': 1.41}


                                                        
 35%|███▌      | 3192/9000 [1:25:55<46:08,  2.10it/s]

{'eval_loss': 0.2709614932537079, 'eval_f1': 0.8976697061803444, 'eval_runtime': 15.6511, 'eval_samples_per_second': 63.893, 'eval_steps_per_second': 7.987, 'epoch': 1.42}


                                                       
 36%|███▌      | 3204/9000 [1:26:23<56:57,  1.70it/s]

{'eval_loss': 0.3295722007751465, 'eval_f1': 0.8783638320775026, 'eval_runtime': 21.6095, 'eval_samples_per_second': 46.276, 'eval_steps_per_second': 5.784, 'epoch': 1.42}


                                                        
 36%|███▌      | 3216/9000 [1:26:49<1:04:09,  1.50it/s]

{'eval_loss': 0.3819698989391327, 'eval_f1': 0.8867924528301887, 'eval_runtime': 19.7409, 'eval_samples_per_second': 50.656, 'eval_steps_per_second': 6.332, 'epoch': 1.43}


                                                        
 36%|███▌      | 3228/9000 [1:27:14<59:13,  1.62it/s]

{'eval_loss': 0.37707433104515076, 'eval_f1': 0.8679245283018867, 'eval_runtime': 19.2194, 'eval_samples_per_second': 52.031, 'eval_steps_per_second': 6.504, 'epoch': 1.43}


                                                        
 36%|███▌      | 3240/9000 [1:27:39<59:55,  1.60it/s]

{'eval_loss': 0.31313812732696533, 'eval_f1': 0.889344262295082, 'eval_runtime': 19.1591, 'eval_samples_per_second': 52.195, 'eval_steps_per_second': 6.524, 'epoch': 1.44}


                                                        
 36%|███▌      | 3252/9000 [1:28:04<53:16,  1.80it/s]

{'eval_loss': 0.29819536209106445, 'eval_f1': 0.8914505283381364, 'eval_runtime': 19.7157, 'eval_samples_per_second': 50.721, 'eval_steps_per_second': 6.34, 'epoch': 1.45}


                                                        
 36%|███▋      | 3264/9000 [1:28:31<1:00:33,  1.58it/s]

{'eval_loss': 0.278848260641098, 'eval_f1': 0.8921775898520086, 'eval_runtime': 20.9219, 'eval_samples_per_second': 47.797, 'eval_steps_per_second': 5.975, 'epoch': 1.45}


                                                        
 36%|███▋      | 3276/9000 [1:28:57<1:00:43,  1.57it/s]

{'eval_loss': 0.35414671897888184, 'eval_f1': 0.8741865509761388, 'eval_runtime': 19.9769, 'eval_samples_per_second': 50.058, 'eval_steps_per_second': 6.257, 'epoch': 1.46}


                                                        
 37%|███▋      | 3288/9000 [1:29:23<50:50,  1.87it/s]

{'eval_loss': 0.3633483052253723, 'eval_f1': 0.8861244019138756, 'eval_runtime': 21.0663, 'eval_samples_per_second': 47.469, 'eval_steps_per_second': 5.934, 'epoch': 1.46}


                                                        
 37%|███▋      | 3300/9000 [1:29:48<59:58,  1.58it/s]

{'eval_loss': 0.28422948718070984, 'eval_f1': 0.9014084507042254, 'eval_runtime': 19.3644, 'eval_samples_per_second': 51.641, 'eval_steps_per_second': 6.455, 'epoch': 1.47}


                                                        
 37%|███▋      | 3312/9000 [1:30:13<55:53,  1.70it/s]

{'eval_loss': 0.3033771216869354, 'eval_f1': 0.8832807570977919, 'eval_runtime': 19.1541, 'eval_samples_per_second': 52.208, 'eval_steps_per_second': 6.526, 'epoch': 1.47}


                                                       
 37%|███▋      | 3324/9000 [1:30:37<48:22,  1.96it/s]

{'eval_loss': 0.3220026195049286, 'eval_f1': 0.8986083499005963, 'eval_runtime': 19.1343, 'eval_samples_per_second': 52.262, 'eval_steps_per_second': 6.533, 'epoch': 1.48}


                                                       
 37%|███▋      | 3336/9000 [1:31:04<1:02:06,  1.52it/s]

{'eval_loss': 0.32400500774383545, 'eval_f1': 0.8907563025210085, 'eval_runtime': 19.8667, 'eval_samples_per_second': 50.335, 'eval_steps_per_second': 6.292, 'epoch': 1.48}


                                                        
 37%|███▋      | 3348/9000 [1:31:29<50:35,  1.86it/s]

{'eval_loss': 0.3567418158054352, 'eval_f1': 0.8737270875763747, 'eval_runtime': 20.7999, 'eval_samples_per_second': 48.077, 'eval_steps_per_second': 6.01, 'epoch': 1.49}


                                                        
 37%|███▋      | 3360/9000 [1:31:50<48:10,  1.95it/s]

{'eval_loss': 0.3625836968421936, 'eval_f1': 0.8524229074889867, 'eval_runtime': 15.4541, 'eval_samples_per_second': 64.708, 'eval_steps_per_second': 8.088, 'epoch': 1.49}


                                                       
 37%|███▋      | 3372/9000 [1:32:10<37:19,  2.51it/s]

{'eval_loss': 0.333431214094162, 'eval_f1': 0.8796920115495669, 'eval_runtime': 16.8419, 'eval_samples_per_second': 59.376, 'eval_steps_per_second': 7.422, 'epoch': 1.5}


 38%|███▊      | 3375/9000 [1:32:11<4:30:19,  2.88s/it]

{'loss': 0.3229, 'learning_rate': 0.0001251111111111111, 'epoch': 1.5}


                                                       
 38%|███▊      | 3384/9000 [1:32:32<49:25,  1.89it/s]

{'eval_loss': 0.27589941024780273, 'eval_f1': 0.8913934426229508, 'eval_runtime': 17.3709, 'eval_samples_per_second': 57.567, 'eval_steps_per_second': 7.196, 'epoch': 1.5}


                                                       
 38%|███▊      | 3396/9000 [1:32:55<48:14,  1.94it/s]

{'eval_loss': 0.2824060022830963, 'eval_f1': 0.8931451612903226, 'eval_runtime': 17.8136, 'eval_samples_per_second': 56.137, 'eval_steps_per_second': 7.017, 'epoch': 1.51}


                                                       
 38%|███▊      | 3408/9000 [1:33:17<48:53,  1.91it/s]

{'eval_loss': 0.4790886342525482, 'eval_f1': 0.8538116591928252, 'eval_runtime': 17.5412, 'eval_samples_per_second': 57.009, 'eval_steps_per_second': 7.126, 'epoch': 1.51}


                                                       
 38%|███▊      | 3420/9000 [1:33:39<46:40,  1.99it/s]

{'eval_loss': 0.4108629524707794, 'eval_f1': 0.838487972508591, 'eval_runtime': 17.6758, 'eval_samples_per_second': 56.575, 'eval_steps_per_second': 7.072, 'epoch': 1.52}


                                                       
 38%|███▊      | 3432/9000 [1:34:02<44:40,  2.08it/s]

{'eval_loss': 0.31852349638938904, 'eval_f1': 0.8817891373801917, 'eval_runtime': 18.0, 'eval_samples_per_second': 55.555, 'eval_steps_per_second': 6.944, 'epoch': 1.53}


                                                       
 38%|███▊      | 3444/9000 [1:34:24<39:26,  2.35it/s]

{'eval_loss': 0.2796299457550049, 'eval_f1': 0.896551724137931, 'eval_runtime': 16.9754, 'eval_samples_per_second': 58.909, 'eval_steps_per_second': 7.364, 'epoch': 1.53}


                                                       
 38%|███▊      | 3456/9000 [1:34:46<45:44,  2.02it/s]

{'eval_loss': 0.34690746665000916, 'eval_f1': 0.853631284916201, 'eval_runtime': 17.3144, 'eval_samples_per_second': 57.755, 'eval_steps_per_second': 7.219, 'epoch': 1.54}


                                                       
 39%|███▊      | 3468/9000 [1:35:08<41:05,  2.24it/s]

{'eval_loss': 0.2730313539505005, 'eval_f1': 0.896694214876033, 'eval_runtime': 18.1362, 'eval_samples_per_second': 55.138, 'eval_steps_per_second': 6.892, 'epoch': 1.54}


                                                       
 39%|███▊      | 3480/9000 [1:35:31<46:46,  1.97it/s]

{'eval_loss': 0.2934037148952484, 'eval_f1': 0.8932038834951457, 'eval_runtime': 17.6101, 'eval_samples_per_second': 56.786, 'eval_steps_per_second': 7.098, 'epoch': 1.55}


                                                       
 39%|███▉      | 3492/9000 [1:35:53<46:18,  1.98it/s]

{'eval_loss': 0.26265770196914673, 'eval_f1': 0.8969823100936525, 'eval_runtime': 17.8534, 'eval_samples_per_second': 56.012, 'eval_steps_per_second': 7.001, 'epoch': 1.55}


                                                       
 39%|███▉      | 3504/9000 [1:36:16<45:36,  2.01it/s]

{'eval_loss': 0.3568283021450043, 'eval_f1': 0.85456595264938, 'eval_runtime': 17.8067, 'eval_samples_per_second': 56.159, 'eval_steps_per_second': 7.02, 'epoch': 1.56}


                                                       
 39%|███▉      | 3516/9000 [1:36:38<48:55,  1.87it/s]

{'eval_loss': 0.3346381187438965, 'eval_f1': 0.875968992248062, 'eval_runtime': 17.5667, 'eval_samples_per_second': 56.926, 'eval_steps_per_second': 7.116, 'epoch': 1.56}


                                                       
 39%|███▉      | 3528/9000 [1:37:02<44:28,  2.05it/s]

{'eval_loss': 0.3005273640155792, 'eval_f1': 0.8765027322404371, 'eval_runtime': 18.5853, 'eval_samples_per_second': 53.806, 'eval_steps_per_second': 6.726, 'epoch': 1.57}


                                                       
 39%|███▉      | 3540/9000 [1:37:23<45:14,  2.01it/s]

{'eval_loss': 0.25756126642227173, 'eval_f1': 0.9157581764122894, 'eval_runtime': 16.4648, 'eval_samples_per_second': 60.736, 'eval_steps_per_second': 7.592, 'epoch': 1.57}


                                                       
 39%|███▉      | 3552/9000 [1:37:44<44:15,  2.05it/s]

{'eval_loss': 0.2480469048023224, 'eval_f1': 0.9023638232271325, 'eval_runtime': 16.9806, 'eval_samples_per_second': 58.891, 'eval_steps_per_second': 7.361, 'epoch': 1.58}


                                                       
 40%|███▉      | 3564/9000 [1:38:07<43:08,  2.10it/s]

{'eval_loss': 0.23420587182044983, 'eval_f1': 0.9085365853658536, 'eval_runtime': 17.6643, 'eval_samples_per_second': 56.611, 'eval_steps_per_second': 7.076, 'epoch': 1.58}


                                                       
 40%|███▉      | 3577/9000 [1:38:24<6:12:47,  4.12s/it]

{'eval_loss': 0.25450727343559265, 'eval_f1': 0.9027777777777777, 'eval_runtime': 12.3892, 'eval_samples_per_second': 80.716, 'eval_steps_per_second': 10.089, 'epoch': 1.59}


                                                       
 40%|███▉      | 3589/9000 [1:38:37<5:15:38,  3.50s/it]

{'eval_loss': 0.2615887522697449, 'eval_f1': 0.886339937434828, 'eval_runtime': 10.8897, 'eval_samples_per_second': 91.83, 'eval_steps_per_second': 11.479, 'epoch': 1.59}


                                                       
 40%|████      | 3601/9000 [1:38:49<4:44:24,  3.16s/it]

{'eval_loss': 0.30023008584976196, 'eval_f1': 0.9014354066985646, 'eval_runtime': 9.7452, 'eval_samples_per_second': 102.615, 'eval_steps_per_second': 12.827, 'epoch': 1.6}


                                                       
 40%|████      | 3613/9000 [1:39:00<4:08:23,  2.77s/it]

{'eval_loss': 0.29210734367370605, 'eval_f1': 0.9007936507936507, 'eval_runtime': 8.5505, 'eval_samples_per_second': 116.952, 'eval_steps_per_second': 14.619, 'epoch': 1.61}


                                                       
 40%|████      | 3625/9000 [1:39:10<4:07:20,  2.76s/it]

{'eval_loss': 0.342185378074646, 'eval_f1': 0.8914405010438413, 'eval_runtime': 8.5519, 'eval_samples_per_second': 116.934, 'eval_steps_per_second': 14.617, 'epoch': 1.61}


                                                       
 40%|████      | 3637/9000 [1:39:20<4:06:17,  2.76s/it]

{'eval_loss': 0.3721086382865906, 'eval_f1': 0.8759439050701187, 'eval_runtime': 8.4447, 'eval_samples_per_second': 118.417, 'eval_steps_per_second': 14.802, 'epoch': 1.62}


                                                       
 41%|████      | 3649/9000 [1:39:31<4:03:23,  2.73s/it]

{'eval_loss': 0.3137592375278473, 'eval_f1': 0.903096903096903, 'eval_runtime': 8.4389, 'eval_samples_per_second': 118.498, 'eval_steps_per_second': 14.812, 'epoch': 1.62}


                                                       
 41%|████      | 3661/9000 [1:39:41<4:03:20,  2.73s/it]

{'eval_loss': 0.267780601978302, 'eval_f1': 0.9135300101729399, 'eval_runtime': 8.4466, 'eval_samples_per_second': 118.39, 'eval_steps_per_second': 14.799, 'epoch': 1.63}


                                                       
 41%|████      | 3673/9000 [1:39:52<4:04:39,  2.76s/it]

{'eval_loss': 0.26931408047676086, 'eval_f1': 0.9125628140703518, 'eval_runtime': 8.5384, 'eval_samples_per_second': 117.118, 'eval_steps_per_second': 14.64, 'epoch': 1.63}


                                                       
 41%|████      | 3685/9000 [1:40:02<4:02:50,  2.74s/it]

{'eval_loss': 0.3073907494544983, 'eval_f1': 0.8957055214723927, 'eval_runtime': 8.4674, 'eval_samples_per_second': 118.1, 'eval_steps_per_second': 14.763, 'epoch': 1.64}


                                                       
 41%|████      | 3697/9000 [1:40:12<4:00:26,  2.72s/it]

{'eval_loss': 0.30199873447418213, 'eval_f1': 0.8866189989785496, 'eval_runtime': 8.4317, 'eval_samples_per_second': 118.6, 'eval_steps_per_second': 14.825, 'epoch': 1.64}


                                                       
 41%|████      | 3709/9000 [1:40:23<4:00:30,  2.73s/it]

{'eval_loss': 0.35897162556648254, 'eval_f1': 0.8708133971291866, 'eval_runtime': 8.451, 'eval_samples_per_second': 118.33, 'eval_steps_per_second': 14.791, 'epoch': 1.65}


                                                       
 41%|████▏     | 3721/9000 [1:40:33<3:58:37,  2.71s/it]

{'eval_loss': 0.28974273800849915, 'eval_f1': 0.8877338877338877, 'eval_runtime': 8.4041, 'eval_samples_per_second': 118.99, 'eval_steps_per_second': 14.874, 'epoch': 1.65}


                                                       
 41%|████▏     | 3733/9000 [1:40:43<3:58:34,  2.72s/it]

{'eval_loss': 0.2808511257171631, 'eval_f1': 0.89738430583501, 'eval_runtime': 8.3985, 'eval_samples_per_second': 119.07, 'eval_steps_per_second': 14.884, 'epoch': 1.66}


                                                       
 42%|████▏     | 3745/9000 [1:40:54<3:58:40,  2.73s/it]

{'eval_loss': 0.35688841342926025, 'eval_f1': 0.8491620111731844, 'eval_runtime': 8.4419, 'eval_samples_per_second': 118.456, 'eval_steps_per_second': 14.807, 'epoch': 1.66}


                                                       
 42%|████▏     | 3757/9000 [1:41:04<3:57:00,  2.71s/it]

{'eval_loss': 0.39955762028694153, 'eval_f1': 0.8944911297852474, 'eval_runtime': 8.3525, 'eval_samples_per_second': 119.725, 'eval_steps_per_second': 14.966, 'epoch': 1.67}


                                                       
 42%|████▏     | 3769/9000 [1:41:14<4:04:28,  2.80s/it]

{'eval_loss': 0.3765518069267273, 'eval_f1': 0.8455467869222097, 'eval_runtime': 8.746, 'eval_samples_per_second': 114.338, 'eval_steps_per_second': 14.292, 'epoch': 1.67}


                                                       
 42%|████▏     | 3781/9000 [1:41:26<4:20:20,  2.99s/it]

{'eval_loss': 0.2681735157966614, 'eval_f1': 0.8985507246376812, 'eval_runtime': 9.3296, 'eval_samples_per_second': 107.186, 'eval_steps_per_second': 13.398, 'epoch': 1.68}


                                                       
 42%|████▏     | 3793/9000 [1:41:37<4:23:01,  3.03s/it]

{'eval_loss': 0.2507641315460205, 'eval_f1': 0.9048582995951419, 'eval_runtime': 9.4357, 'eval_samples_per_second': 105.98, 'eval_steps_per_second': 13.248, 'epoch': 1.69}


                                                       
 42%|████▏     | 3805/9000 [1:41:48<4:11:09,  2.90s/it]

{'eval_loss': 0.2504849135875702, 'eval_f1': 0.8950159066808059, 'eval_runtime': 8.9909, 'eval_samples_per_second': 111.224, 'eval_steps_per_second': 13.903, 'epoch': 1.69}


                                                       
 42%|████▏     | 3817/9000 [1:42:00<4:44:14,  3.29s/it]

{'eval_loss': 0.3078482151031494, 'eval_f1': 0.8447488584474887, 'eval_runtime': 10.2338, 'eval_samples_per_second': 97.715, 'eval_steps_per_second': 12.214, 'epoch': 1.7}


                                                       
 43%|████▎     | 3829/9000 [1:42:12<4:24:49,  3.07s/it]

{'eval_loss': 0.27645665407180786, 'eval_f1': 0.8991008991008991, 'eval_runtime': 9.4469, 'eval_samples_per_second': 105.855, 'eval_steps_per_second': 13.232, 'epoch': 1.7}


                                                       
 43%|████▎     | 3841/9000 [1:42:24<4:20:27,  3.03s/it]

{'eval_loss': 0.27095186710357666, 'eval_f1': 0.9024134312696748, 'eval_runtime': 9.3835, 'eval_samples_per_second': 106.57, 'eval_steps_per_second': 13.321, 'epoch': 1.71}


                                                       
 43%|████▎     | 3853/9000 [1:42:35<4:18:56,  3.02s/it]

{'eval_loss': 0.25059136748313904, 'eval_f1': 0.905857740585774, 'eval_runtime': 9.3582, 'eval_samples_per_second': 106.858, 'eval_steps_per_second': 13.357, 'epoch': 1.71}


                                                       
 43%|████▎     | 3865/9000 [1:42:47<4:19:12,  3.03s/it]

{'eval_loss': 0.24086782336235046, 'eval_f1': 0.917766497461929, 'eval_runtime': 9.4028, 'eval_samples_per_second': 106.351, 'eval_steps_per_second': 13.294, 'epoch': 1.72}


                                                       
 43%|████▎     | 3877/9000 [1:42:58<4:17:21,  3.01s/it]

{'eval_loss': 0.38112178444862366, 'eval_f1': 0.8523111612175874, 'eval_runtime': 9.343, 'eval_samples_per_second': 107.032, 'eval_steps_per_second': 13.379, 'epoch': 1.72}


                                                       
 43%|████▎     | 3889/9000 [1:43:09<4:12:36,  2.97s/it]

{'eval_loss': 0.2864253520965576, 'eval_f1': 0.900212314225053, 'eval_runtime': 9.2052, 'eval_samples_per_second': 108.634, 'eval_steps_per_second': 13.579, 'epoch': 1.73}


                                                       
 43%|████▎     | 3901/9000 [1:43:21<4:14:12,  2.99s/it]

{'eval_loss': 0.270770788192749, 'eval_f1': 0.9023354564755839, 'eval_runtime': 9.2816, 'eval_samples_per_second': 107.74, 'eval_steps_per_second': 13.467, 'epoch': 1.73}


                                                       
 43%|████▎     | 3913/9000 [1:43:32<4:13:41,  2.99s/it]

{'eval_loss': 0.2698708474636078, 'eval_f1': 0.9030239833159542, 'eval_runtime': 9.289, 'eval_samples_per_second': 107.654, 'eval_steps_per_second': 13.457, 'epoch': 1.74}


                                                       
 44%|████▎     | 3924/9000 [1:43:43<19:16,  4.39it/s]

{'eval_loss': 0.28548792004585266, 'eval_f1': 0.9109311740890689, 'eval_runtime': 9.2625, 'eval_samples_per_second': 107.962, 'eval_steps_per_second': 13.495, 'epoch': 1.74}


                                                       
 44%|████▎     | 3937/9000 [1:43:54<4:09:48,  2.96s/it]

{'eval_loss': 0.2888699471950531, 'eval_f1': 0.8938428874734607, 'eval_runtime': 9.1727, 'eval_samples_per_second': 109.019, 'eval_steps_per_second': 13.627, 'epoch': 1.75}


                                                       
 44%|████▍     | 3949/9000 [1:44:06<4:08:45,  2.95s/it]

{'eval_loss': 0.24417170882225037, 'eval_f1': 0.9196787148594379, 'eval_runtime': 9.156, 'eval_samples_per_second': 109.218, 'eval_steps_per_second': 13.652, 'epoch': 1.75}


                                                       
 44%|████▍     | 3961/9000 [1:44:17<4:11:10,  2.99s/it]

{'eval_loss': 0.2879883646965027, 'eval_f1': 0.8933901918976547, 'eval_runtime': 9.2778, 'eval_samples_per_second': 107.784, 'eval_steps_per_second': 13.473, 'epoch': 1.76}


                                                       
 44%|████▍     | 3973/9000 [1:44:28<4:11:01,  3.00s/it]

{'eval_loss': 0.28055164217948914, 'eval_f1': 0.905775075987842, 'eval_runtime': 9.301, 'eval_samples_per_second': 107.516, 'eval_steps_per_second': 13.439, 'epoch': 1.77}


                                                       
 44%|████▍     | 3985/9000 [1:44:40<4:11:54,  3.01s/it]

{'eval_loss': 0.26774874329566956, 'eval_f1': 0.907070707070707, 'eval_runtime': 9.3472, 'eval_samples_per_second': 106.984, 'eval_steps_per_second': 13.373, 'epoch': 1.77}


                                                       
 44%|████▍     | 3997/9000 [1:44:51<4:14:01,  3.05s/it]

{'eval_loss': 0.24214869737625122, 'eval_f1': 0.9126016260162602, 'eval_runtime': 9.4491, 'eval_samples_per_second': 105.83, 'eval_steps_per_second': 13.229, 'epoch': 1.78}


                                                       
 45%|████▍     | 4009/9000 [1:45:03<4:19:28,  3.12s/it]

{'eval_loss': 0.3787365257740021, 'eval_f1': 0.8342857142857144, 'eval_runtime': 9.6729, 'eval_samples_per_second': 103.381, 'eval_steps_per_second': 12.923, 'epoch': 1.78}


                                                       
 45%|████▍     | 4021/9000 [1:45:15<4:21:28,  3.15s/it]

{'eval_loss': 0.27516067028045654, 'eval_f1': 0.9007936507936507, 'eval_runtime': 9.7833, 'eval_samples_per_second': 102.215, 'eval_steps_per_second': 12.777, 'epoch': 1.79}


                                                       
 45%|████▍     | 4033/9000 [1:45:27<4:18:53,  3.13s/it]

{'eval_loss': 0.2631443738937378, 'eval_f1': 0.9040247678018576, 'eval_runtime': 9.6707, 'eval_samples_per_second': 103.405, 'eval_steps_per_second': 12.926, 'epoch': 1.79}


                                                       
 45%|████▍     | 4045/9000 [1:45:38<4:11:41,  3.05s/it]

{'eval_loss': 0.28074443340301514, 'eval_f1': 0.9032921810699588, 'eval_runtime': 9.4298, 'eval_samples_per_second': 106.047, 'eval_steps_per_second': 13.256, 'epoch': 1.8}


                                                       
 45%|████▌     | 4057/9000 [1:45:50<4:07:35,  3.01s/it]

{'eval_loss': 0.8298544883728027, 'eval_f1': 0.6746347941567065, 'eval_runtime': 9.3234, 'eval_samples_per_second': 107.258, 'eval_steps_per_second': 13.407, 'epoch': 1.8}


                                                       
 45%|████▌     | 4069/9000 [1:46:01<4:08:19,  3.02s/it]

{'eval_loss': 0.29068732261657715, 'eval_f1': 0.8950749464668094, 'eval_runtime': 9.3392, 'eval_samples_per_second': 107.075, 'eval_steps_per_second': 13.384, 'epoch': 1.81}


                                                       
 45%|████▌     | 4081/9000 [1:46:13<4:06:08,  3.00s/it]

{'eval_loss': 0.257845401763916, 'eval_f1': 0.8981288981288981, 'eval_runtime': 9.2741, 'eval_samples_per_second': 107.827, 'eval_steps_per_second': 13.478, 'epoch': 1.81}


                                                       
 45%|████▌     | 4093/9000 [1:46:24<4:02:07,  2.96s/it]

{'eval_loss': 0.261661559343338, 'eval_f1': 0.9123505976095618, 'eval_runtime': 9.1676, 'eval_samples_per_second': 109.08, 'eval_steps_per_second': 13.635, 'epoch': 1.82}


                                                       
 46%|████▌     | 4105/9000 [1:46:35<4:03:03,  2.98s/it]

{'eval_loss': 0.27525877952575684, 'eval_f1': 0.8976545842217484, 'eval_runtime': 9.25, 'eval_samples_per_second': 108.108, 'eval_steps_per_second': 13.514, 'epoch': 1.82}


                                                       
 46%|████▌     | 4117/9000 [1:46:46<4:02:46,  2.98s/it]

{'eval_loss': 0.27612021565437317, 'eval_f1': 0.9154518950437318, 'eval_runtime': 9.2383, 'eval_samples_per_second': 108.246, 'eval_steps_per_second': 13.531, 'epoch': 1.83}


                                                       
 46%|████▌     | 4129/9000 [1:46:58<4:01:10,  2.97s/it]

{'eval_loss': 0.29324400424957275, 'eval_f1': 0.8765027322404371, 'eval_runtime': 9.1935, 'eval_samples_per_second': 108.772, 'eval_steps_per_second': 13.597, 'epoch': 1.83}


                                                       
 46%|████▌     | 4141/9000 [1:47:09<4:00:06,  2.96s/it]

{'eval_loss': 0.23041106760501862, 'eval_f1': 0.9166666666666666, 'eval_runtime': 9.1875, 'eval_samples_per_second': 108.843, 'eval_steps_per_second': 13.605, 'epoch': 1.84}


                                                       
 46%|████▌     | 4153/9000 [1:47:20<3:58:49,  2.96s/it]

{'eval_loss': 0.2299274504184723, 'eval_f1': 0.917864476386037, 'eval_runtime': 9.1435, 'eval_samples_per_second': 109.367, 'eval_steps_per_second': 13.671, 'epoch': 1.85}


                                                       
 46%|████▋     | 4165/9000 [1:47:32<4:14:02,  3.15s/it]

{'eval_loss': 0.2987383008003235, 'eval_f1': 0.8732083792723263, 'eval_runtime': 9.8093, 'eval_samples_per_second': 101.944, 'eval_steps_per_second': 12.743, 'epoch': 1.85}


                                                       
 46%|████▋     | 4177/9000 [1:47:43<4:02:17,  3.01s/it]

{'eval_loss': 0.2810184955596924, 'eval_f1': 0.900763358778626, 'eval_runtime': 9.3224, 'eval_samples_per_second': 107.268, 'eval_steps_per_second': 13.409, 'epoch': 1.86}


                                                       
 47%|████▋     | 4189/9000 [1:47:55<4:00:47,  3.00s/it]

{'eval_loss': 0.25580811500549316, 'eval_f1': 0.8977871443624867, 'eval_runtime': 9.3149, 'eval_samples_per_second': 107.355, 'eval_steps_per_second': 13.419, 'epoch': 1.86}


                                                       
 47%|████▋     | 4201/9000 [1:48:06<3:58:48,  2.99s/it]

{'eval_loss': 0.2639167308807373, 'eval_f1': 0.8997912317327765, 'eval_runtime': 9.246, 'eval_samples_per_second': 108.155, 'eval_steps_per_second': 13.519, 'epoch': 1.87}


                                                       
 47%|████▋     | 4213/9000 [1:48:17<4:06:33,  3.09s/it]

{'eval_loss': 0.24769771099090576, 'eval_f1': 0.9116751269035532, 'eval_runtime': 9.6086, 'eval_samples_per_second': 104.073, 'eval_steps_per_second': 13.009, 'epoch': 1.87}


                                                       
 47%|████▋     | 4225/9000 [1:48:30<4:20:46,  3.28s/it]

{'eval_loss': 0.2540426552295685, 'eval_f1': 0.9030303030303031, 'eval_runtime': 10.207, 'eval_samples_per_second': 97.972, 'eval_steps_per_second': 12.247, 'epoch': 1.88}


                                                       
 47%|████▋     | 4237/9000 [1:48:42<4:07:37,  3.12s/it]

{'eval_loss': 0.33690470457077026, 'eval_f1': 0.88113413304253, 'eval_runtime': 9.6644, 'eval_samples_per_second': 103.472, 'eval_steps_per_second': 12.934, 'epoch': 1.88}


                                                       
 47%|████▋     | 4249/9000 [1:48:54<4:09:53,  3.16s/it]

{'eval_loss': 0.2964599132537842, 'eval_f1': 0.8985507246376812, 'eval_runtime': 9.7655, 'eval_samples_per_second': 102.401, 'eval_steps_per_second': 12.8, 'epoch': 1.89}


                                                       
 47%|████▋     | 4261/9000 [1:49:05<4:03:50,  3.09s/it]

{'eval_loss': 0.28137293457984924, 'eval_f1': 0.9048086359175662, 'eval_runtime': 9.5781, 'eval_samples_per_second': 104.405, 'eval_steps_per_second': 13.051, 'epoch': 1.89}


                                                       
 47%|████▋     | 4273/9000 [1:49:17<4:03:09,  3.09s/it]

{'eval_loss': 0.302327036857605, 'eval_f1': 0.8822314049586777, 'eval_runtime': 9.5533, 'eval_samples_per_second': 104.675, 'eval_steps_per_second': 13.084, 'epoch': 1.9}


                                                       
 48%|████▊     | 4285/9000 [1:49:29<4:03:50,  3.10s/it]

{'eval_loss': 0.3127783536911011, 'eval_f1': 0.8893320039880359, 'eval_runtime': 9.6295, 'eval_samples_per_second': 103.847, 'eval_steps_per_second': 12.981, 'epoch': 1.9}


                                                       
 48%|████▊     | 4297/9000 [1:49:41<4:10:40,  3.20s/it]

{'eval_loss': 0.32059526443481445, 'eval_f1': 0.8750000000000001, 'eval_runtime': 9.9266, 'eval_samples_per_second': 100.74, 'eval_steps_per_second': 12.592, 'epoch': 1.91}


                                                       
 48%|████▊     | 4308/9000 [1:49:54<19:31,  4.01it/s]

{'eval_loss': 0.2782721519470215, 'eval_f1': 0.8948948948948948, 'eval_runtime': 11.2151, 'eval_samples_per_second': 89.166, 'eval_steps_per_second': 11.146, 'epoch': 1.91}


                                                       
 48%|████▊     | 4321/9000 [1:50:08<4:38:03,  3.57s/it]

{'eval_loss': 0.3275489807128906, 'eval_f1': 0.8802281368821293, 'eval_runtime': 11.0657, 'eval_samples_per_second': 90.37, 'eval_steps_per_second': 11.296, 'epoch': 1.92}


                                                       
 48%|████▊     | 4333/9000 [1:50:22<4:56:54,  3.82s/it]

{'eval_loss': 0.25417155027389526, 'eval_f1': 0.894901144640999, 'eval_runtime': 11.8764, 'eval_samples_per_second': 84.201, 'eval_steps_per_second': 10.525, 'epoch': 1.93}


                                                       
 48%|████▊     | 4345/9000 [1:50:33<3:47:16,  2.93s/it]

{'eval_loss': 0.2894713878631592, 'eval_f1': 0.8991354466858789, 'eval_runtime': 9.0397, 'eval_samples_per_second': 110.623, 'eval_steps_per_second': 13.828, 'epoch': 1.93}


                                                       
 48%|████▊     | 4357/9000 [1:50:45<4:03:32,  3.15s/it]

{'eval_loss': 0.24644115567207336, 'eval_f1': 0.9132231404958677, 'eval_runtime': 9.8105, 'eval_samples_per_second': 101.931, 'eval_steps_per_second': 12.741, 'epoch': 1.94}


                                                       
 49%|████▊     | 4369/9000 [1:50:56<3:47:09,  2.94s/it]

{'eval_loss': 0.2805362641811371, 'eval_f1': 0.9012707722385142, 'eval_runtime': 9.091, 'eval_samples_per_second': 109.999, 'eval_steps_per_second': 13.75, 'epoch': 1.94}


                                                       
 49%|████▊     | 4380/9000 [1:51:09<19:50,  3.88it/s]

{'eval_loss': 0.2655471861362457, 'eval_f1': 0.9164969450101833, 'eval_runtime': 10.54, 'eval_samples_per_second': 94.876, 'eval_steps_per_second': 11.86, 'epoch': 1.95}


                                                       
 49%|████▉     | 4393/9000 [1:51:22<4:29:35,  3.51s/it]

{'eval_loss': 0.273193359375, 'eval_f1': 0.905503634475597, 'eval_runtime': 10.8965, 'eval_samples_per_second': 91.773, 'eval_steps_per_second': 11.472, 'epoch': 1.95}


                                                       
 49%|████▉     | 4404/9000 [1:51:36<19:58,  3.84it/s]

{'eval_loss': 0.2739630341529846, 'eval_f1': 0.9072164948453608, 'eval_runtime': 11.2677, 'eval_samples_per_second': 88.749, 'eval_steps_per_second': 11.094, 'epoch': 1.96}


                                                       
 49%|████▉     | 4416/9000 [1:51:49<19:48,  3.86it/s]

{'eval_loss': 0.2718208134174347, 'eval_f1': 0.9180327868852459, 'eval_runtime': 10.7105, 'eval_samples_per_second': 93.367, 'eval_steps_per_second': 11.671, 'epoch': 1.96}


                                                       
 49%|████▉     | 4429/9000 [1:52:02<4:20:49,  3.42s/it]

{'eval_loss': 0.31680068373680115, 'eval_f1': 0.8927038626609443, 'eval_runtime': 10.6194, 'eval_samples_per_second': 94.167, 'eval_steps_per_second': 11.771, 'epoch': 1.97}


                                                       
 49%|████▉     | 4440/9000 [1:52:15<19:17,  3.94it/s]

{'eval_loss': 0.28651583194732666, 'eval_f1': 0.9066666666666667, 'eval_runtime': 10.6977, 'eval_samples_per_second': 93.478, 'eval_steps_per_second': 11.685, 'epoch': 1.97}


                                                       
 49%|████▉     | 4453/9000 [1:52:28<4:18:44,  3.41s/it]

{'eval_loss': 0.31033527851104736, 'eval_f1': 0.9, 'eval_runtime': 10.601, 'eval_samples_per_second': 94.33, 'eval_steps_per_second': 11.791, 'epoch': 1.98}


                                                       
 50%|████▉     | 4465/9000 [1:52:41<4:18:25,  3.42s/it]

{'eval_loss': 0.2993491291999817, 'eval_f1': 0.8859934853420195, 'eval_runtime': 10.5823, 'eval_samples_per_second': 94.497, 'eval_steps_per_second': 11.812, 'epoch': 1.98}


                                                       
 50%|████▉     | 4476/9000 [1:52:54<18:57,  3.98it/s]

{'eval_loss': 0.29367896914482117, 'eval_f1': 0.9064327485380117, 'eval_runtime': 10.7399, 'eval_samples_per_second': 93.11, 'eval_steps_per_second': 11.639, 'epoch': 1.99}


                                                       
 50%|████▉     | 4488/9000 [1:53:07<19:24,  3.87it/s]

{'eval_loss': 0.2618921399116516, 'eval_f1': 0.9147442326980942, 'eval_runtime': 10.7733, 'eval_samples_per_second': 92.822, 'eval_steps_per_second': 11.603, 'epoch': 1.99}


 50%|█████     | 4500/9000 [1:53:09<18:40,  4.02it/s]  

{'loss': 0.2875, 'learning_rate': 0.0001001111111111111, 'epoch': 2.0}


                                                     
 50%|█████     | 4501/9000 [1:53:19<3:51:28,  3.09s/it]

{'eval_loss': 0.2407284826040268, 'eval_f1': 0.9149377593360997, 'eval_runtime': 9.5375, 'eval_samples_per_second': 104.85, 'eval_steps_per_second': 13.106, 'epoch': 2.0}


                                                       
 50%|█████     | 4513/9000 [1:53:30<3:37:49,  2.91s/it]

{'eval_loss': 0.23634584248065948, 'eval_f1': 0.9148717948717948, 'eval_runtime': 9.0279, 'eval_samples_per_second': 110.768, 'eval_steps_per_second': 13.846, 'epoch': 2.01}


                                                       
 50%|█████     | 4525/9000 [1:53:41<3:45:04,  3.02s/it]

{'eval_loss': 0.2610933780670166, 'eval_f1': 0.9126016260162602, 'eval_runtime': 9.3825, 'eval_samples_per_second': 106.582, 'eval_steps_per_second': 13.323, 'epoch': 2.01}


                                                       
 50%|█████     | 4536/9000 [1:53:52<16:50,  4.42it/s]

{'eval_loss': 0.31481850147247314, 'eval_f1': 0.9026178010471204, 'eval_runtime': 9.6223, 'eval_samples_per_second': 103.926, 'eval_steps_per_second': 12.991, 'epoch': 2.02}


                                                       
 51%|█████     | 4549/9000 [1:54:05<4:02:01,  3.26s/it]

{'eval_loss': 0.3174744248390198, 'eval_f1': 0.9077868852459016, 'eval_runtime': 10.0734, 'eval_samples_per_second': 99.272, 'eval_steps_per_second': 12.409, 'epoch': 2.02}


                                                       
 51%|█████     | 4561/9000 [1:54:16<3:36:50,  2.93s/it]

{'eval_loss': 0.3248598575592041, 'eval_f1': 0.9003021148036253, 'eval_runtime': 9.037, 'eval_samples_per_second': 110.657, 'eval_steps_per_second': 13.832, 'epoch': 2.03}


                                                       
 51%|█████     | 4572/9000 [1:54:27<16:24,  4.50it/s]

{'eval_loss': 0.30468690395355225, 'eval_f1': 0.9085427135678393, 'eval_runtime': 9.0051, 'eval_samples_per_second': 111.048, 'eval_steps_per_second': 13.881, 'epoch': 2.03}


                                                       
 51%|█████     | 4585/9000 [1:54:38<3:32:32,  2.89s/it]

{'eval_loss': 0.28794926404953003, 'eval_f1': 0.9138655462184873, 'eval_runtime': 8.9633, 'eval_samples_per_second': 111.566, 'eval_steps_per_second': 13.946, 'epoch': 2.04}


                                                       
 51%|█████     | 4596/9000 [1:54:51<17:39,  4.16it/s]

{'eval_loss': 0.2786392867565155, 'eval_f1': 0.9204431017119838, 'eval_runtime': 10.3785, 'eval_samples_per_second': 96.353, 'eval_steps_per_second': 12.044, 'epoch': 2.04}


                                                       
 51%|█████     | 4609/9000 [1:55:03<3:43:38,  3.06s/it]

{'eval_loss': 0.2919924259185791, 'eval_f1': 0.914572864321608, 'eval_runtime': 9.4197, 'eval_samples_per_second': 106.16, 'eval_steps_per_second': 13.27, 'epoch': 2.05}


                                                       
 51%|█████▏    | 4621/9000 [1:55:14<3:40:51,  3.03s/it]

{'eval_loss': 0.39333605766296387, 'eval_f1': 0.8914069877242683, 'eval_runtime': 9.4026, 'eval_samples_per_second': 106.353, 'eval_steps_per_second': 13.294, 'epoch': 2.05}


                                                       
 51%|█████▏    | 4633/9000 [1:55:25<3:33:26,  2.93s/it]

{'eval_loss': 0.37637561559677124, 'eval_f1': 0.8703296703296702, 'eval_runtime': 9.1026, 'eval_samples_per_second': 109.859, 'eval_steps_per_second': 13.732, 'epoch': 2.06}


                                                       
 52%|█████▏    | 4645/9000 [1:55:36<3:31:21,  2.91s/it]

{'eval_loss': 0.3353974521160126, 'eval_f1': 0.902845927379784, 'eval_runtime': 9.0376, 'eval_samples_per_second': 110.649, 'eval_steps_per_second': 13.831, 'epoch': 2.06}


                                                       
 52%|█████▏    | 4656/9000 [1:55:49<16:09,  4.48it/s]

{'eval_loss': 0.28279322385787964, 'eval_f1': 0.9127789046653143, 'eval_runtime': 11.4006, 'eval_samples_per_second': 87.714, 'eval_steps_per_second': 10.964, 'epoch': 2.07}


                                                       
 52%|█████▏    | 4668/9000 [1:56:05<21:21,  3.38it/s]

{'eval_loss': 0.2609430253505707, 'eval_f1': 0.9065606361829026, 'eval_runtime': 12.3044, 'eval_samples_per_second': 81.272, 'eval_steps_per_second': 10.159, 'epoch': 2.07}


                                                       
 52%|█████▏    | 4680/9000 [1:56:20<21:25,  3.36it/s]

{'eval_loss': 0.26293516159057617, 'eval_f1': 0.9067713444553482, 'eval_runtime': 12.3606, 'eval_samples_per_second': 80.902, 'eval_steps_per_second': 10.113, 'epoch': 2.08}


                                                       
 52%|█████▏    | 4692/9000 [1:56:35<21:21,  3.36it/s]

{'eval_loss': 0.22443725168704987, 'eval_f1': 0.917766497461929, 'eval_runtime': 12.2355, 'eval_samples_per_second': 81.73, 'eval_steps_per_second': 10.216, 'epoch': 2.09}


                                                       
 52%|█████▏    | 4704/9000 [1:56:50<21:25,  3.34it/s]

{'eval_loss': 0.2579505145549774, 'eval_f1': 0.907843137254902, 'eval_runtime': 12.3288, 'eval_samples_per_second': 81.111, 'eval_steps_per_second': 10.139, 'epoch': 2.09}


                                                       
 52%|█████▏    | 4716/9000 [1:57:04<21:31,  3.32it/s]

{'eval_loss': 0.37445884943008423, 'eval_f1': 0.8938967136150234, 'eval_runtime': 12.0079, 'eval_samples_per_second': 83.278, 'eval_steps_per_second': 10.41, 'epoch': 2.1}


                                                       
 53%|█████▎    | 4728/9000 [1:57:19<20:55,  3.40it/s]

{'eval_loss': 0.29215770959854126, 'eval_f1': 0.9085365853658536, 'eval_runtime': 11.9317, 'eval_samples_per_second': 83.81, 'eval_steps_per_second': 10.476, 'epoch': 2.1}


                                                       
 53%|█████▎    | 4740/9000 [1:57:33<20:07,  3.53it/s]

{'eval_loss': 0.3318229913711548, 'eval_f1': 0.8879310344827586, 'eval_runtime': 11.4501, 'eval_samples_per_second': 87.336, 'eval_steps_per_second': 10.917, 'epoch': 2.11}


                                                       
 53%|█████▎    | 4752/9000 [1:57:47<20:02,  3.53it/s]

{'eval_loss': 0.29319682717323303, 'eval_f1': 0.9160621761658031, 'eval_runtime': 11.3667, 'eval_samples_per_second': 87.976, 'eval_steps_per_second': 10.997, 'epoch': 2.11}


                                                       
 53%|█████▎    | 4765/9000 [1:58:01<4:21:12,  3.70s/it]

{'eval_loss': 0.29156768321990967, 'eval_f1': 0.9122448979591837, 'eval_runtime': 11.459, 'eval_samples_per_second': 87.268, 'eval_steps_per_second': 10.908, 'epoch': 2.12}


                                                       
 53%|█████▎    | 4776/9000 [1:58:15<19:47,  3.56it/s]

{'eval_loss': 0.2772229015827179, 'eval_f1': 0.9148073022312373, 'eval_runtime': 11.432, 'eval_samples_per_second': 87.474, 'eval_steps_per_second': 10.934, 'epoch': 2.12}


                                                       
 53%|█████▎    | 4788/9000 [1:58:29<18:54,  3.71it/s]

{'eval_loss': 0.2666919231414795, 'eval_f1': 0.9187562688064193, 'eval_runtime': 11.2563, 'eval_samples_per_second': 88.839, 'eval_steps_per_second': 11.105, 'epoch': 2.13}


                                                       
 53%|█████▎    | 4800/9000 [1:58:43<19:29,  3.59it/s]

{'eval_loss': 0.28896069526672363, 'eval_f1': 0.9058473736372646, 'eval_runtime': 11.5025, 'eval_samples_per_second': 86.938, 'eval_steps_per_second': 10.867, 'epoch': 2.13}


                                                       
 53%|█████▎    | 4812/9000 [1:58:57<19:49,  3.52it/s]

{'eval_loss': 0.3328806161880493, 'eval_f1': 0.8924387646432376, 'eval_runtime': 11.5125, 'eval_samples_per_second': 86.862, 'eval_steps_per_second': 10.858, 'epoch': 2.14}


                                                       
 54%|█████▎    | 4824/9000 [1:59:11<21:15,  3.27it/s]

{'eval_loss': 0.29450395703315735, 'eval_f1': 0.9099901088031651, 'eval_runtime': 11.8892, 'eval_samples_per_second': 84.11, 'eval_steps_per_second': 10.514, 'epoch': 2.14}


                                                       
 54%|█████▎    | 4837/9000 [1:59:26<4:20:11,  3.75s/it]

{'eval_loss': 0.28273722529411316, 'eval_f1': 0.9087136929460581, 'eval_runtime': 11.6282, 'eval_samples_per_second': 85.998, 'eval_steps_per_second': 10.75, 'epoch': 2.15}


                                                       
 54%|█████▍    | 4848/9000 [1:59:40<19:45,  3.50it/s]

{'eval_loss': 0.26322034001350403, 'eval_f1': 0.9139465875370919, 'eval_runtime': 11.7423, 'eval_samples_per_second': 85.163, 'eval_steps_per_second': 10.645, 'epoch': 2.15}


                                                       
 54%|█████▍    | 4860/9000 [1:59:54<20:44,  3.33it/s]

{'eval_loss': 0.2514099180698395, 'eval_f1': 0.9196337741607324, 'eval_runtime': 11.3793, 'eval_samples_per_second': 87.879, 'eval_steps_per_second': 10.985, 'epoch': 2.16}


                                                       
 54%|█████▍    | 4872/9000 [2:00:08<20:12,  3.40it/s]

{'eval_loss': 0.2663588225841522, 'eval_f1': 0.9128205128205128, 'eval_runtime': 11.4625, 'eval_samples_per_second': 87.241, 'eval_steps_per_second': 10.905, 'epoch': 2.17}


                                                       
 54%|█████▍    | 4884/9000 [2:00:22<18:56,  3.62it/s]

{'eval_loss': 0.3124798536300659, 'eval_f1': 0.9050387596899224, 'eval_runtime': 11.5382, 'eval_samples_per_second': 86.668, 'eval_steps_per_second': 10.834, 'epoch': 2.17}


                                                       
 54%|█████▍    | 4896/9000 [2:00:36<18:59,  3.60it/s]

{'eval_loss': 0.3120502829551697, 'eval_f1': 0.8942917547568711, 'eval_runtime': 11.624, 'eval_samples_per_second': 86.029, 'eval_steps_per_second': 10.754, 'epoch': 2.18}


                                                       
 55%|█████▍    | 4908/9000 [2:00:50<19:19,  3.53it/s]

{'eval_loss': 0.270368367433548, 'eval_f1': 0.9105367793240556, 'eval_runtime': 11.2966, 'eval_samples_per_second': 88.522, 'eval_steps_per_second': 11.065, 'epoch': 2.18}


                                                       
 55%|█████▍    | 4920/9000 [2:01:04<18:13,  3.73it/s]

{'eval_loss': 0.3279954791069031, 'eval_f1': 0.8884165781083952, 'eval_runtime': 11.6747, 'eval_samples_per_second': 85.655, 'eval_steps_per_second': 10.707, 'epoch': 2.19}


                                                       
 55%|█████▍    | 4932/9000 [2:01:18<19:15,  3.52it/s]

{'eval_loss': 0.32615941762924194, 'eval_f1': 0.9044834307992202, 'eval_runtime': 11.7021, 'eval_samples_per_second': 85.455, 'eval_steps_per_second': 10.682, 'epoch': 2.19}


                                                       
 55%|█████▍    | 4944/9000 [2:01:32<19:00,  3.56it/s]

{'eval_loss': 0.33568334579467773, 'eval_f1': 0.9107142857142858, 'eval_runtime': 11.5853, 'eval_samples_per_second': 86.316, 'eval_steps_per_second': 10.789, 'epoch': 2.2}


                                                       
 55%|█████▌    | 4956/9000 [2:01:46<19:06,  3.53it/s]

{'eval_loss': 0.387647807598114, 'eval_f1': 0.9019230769230769, 'eval_runtime': 11.4562, 'eval_samples_per_second': 87.289, 'eval_steps_per_second': 10.911, 'epoch': 2.2}


                                                       
 55%|█████▌    | 4968/9000 [2:02:00<18:57,  3.55it/s]

{'eval_loss': 0.2942119538784027, 'eval_f1': 0.9173387096774193, 'eval_runtime': 11.368, 'eval_samples_per_second': 87.966, 'eval_steps_per_second': 10.996, 'epoch': 2.21}


                                                       
 55%|█████▌    | 4981/9000 [2:02:15<4:15:52,  3.82s/it]

{'eval_loss': 0.30550146102905273, 'eval_f1': 0.9026915113871635, 'eval_runtime': 11.8778, 'eval_samples_per_second': 84.19, 'eval_steps_per_second': 10.524, 'epoch': 2.21}


                                                       
 55%|█████▌    | 4992/9000 [2:02:29<18:07,  3.69it/s]

{'eval_loss': 0.28494226932525635, 'eval_f1': 0.9156626506024097, 'eval_runtime': 11.5024, 'eval_samples_per_second': 86.938, 'eval_steps_per_second': 10.867, 'epoch': 2.22}


                                                       
 56%|█████▌    | 5004/9000 [2:02:43<18:19,  3.64it/s]

{'eval_loss': 0.2905387878417969, 'eval_f1': 0.8891257995735609, 'eval_runtime': 11.7109, 'eval_samples_per_second': 85.391, 'eval_steps_per_second': 10.674, 'epoch': 2.22}


                                                       
 56%|█████▌    | 5017/9000 [2:02:57<4:06:47,  3.72s/it]

{'eval_loss': 0.2791866362094879, 'eval_f1': 0.886509635974304, 'eval_runtime': 11.5195, 'eval_samples_per_second': 86.81, 'eval_steps_per_second': 10.851, 'epoch': 2.23}


                                                       
 56%|█████▌    | 5029/9000 [2:03:12<4:11:05,  3.79s/it]

{'eval_loss': 0.23830604553222656, 'eval_f1': 0.9107883817427386, 'eval_runtime': 11.7813, 'eval_samples_per_second': 84.881, 'eval_steps_per_second': 10.61, 'epoch': 2.23}


                                                       
 56%|█████▌    | 5040/9000 [2:03:25<18:30,  3.57it/s]

{'eval_loss': 0.2371884435415268, 'eval_f1': 0.9139465875370919, 'eval_runtime': 11.5799, 'eval_samples_per_second': 86.356, 'eval_steps_per_second': 10.795, 'epoch': 2.24}


                                                       
 56%|█████▌    | 5052/9000 [2:03:40<18:23,  3.58it/s]

{'eval_loss': 0.31161555647850037, 'eval_f1': 0.899621212121212, 'eval_runtime': 11.5527, 'eval_samples_per_second': 86.56, 'eval_steps_per_second': 10.82, 'epoch': 2.25}


                                                       
 56%|█████▋    | 5065/9000 [2:03:53<3:58:11,  3.63s/it]

{'eval_loss': 0.2601183354854584, 'eval_f1': 0.9025423728813559, 'eval_runtime': 11.2557, 'eval_samples_per_second': 88.844, 'eval_steps_per_second': 11.105, 'epoch': 2.25}


                                                       
 56%|█████▋    | 5076/9000 [2:04:07<18:20,  3.57it/s]

{'eval_loss': 0.26179882884025574, 'eval_f1': 0.9165829145728643, 'eval_runtime': 11.476, 'eval_samples_per_second': 87.138, 'eval_steps_per_second': 10.892, 'epoch': 2.26}


                                                       
 57%|█████▋    | 5088/9000 [2:04:21<18:02,  3.61it/s]

{'eval_loss': 0.29138898849487305, 'eval_f1': 0.9081527347781218, 'eval_runtime': 11.3901, 'eval_samples_per_second': 87.795, 'eval_steps_per_second': 10.974, 'epoch': 2.26}


                                                       
 57%|█████▋    | 5100/9000 [2:04:35<17:55,  3.63it/s]

{'eval_loss': 0.3254626989364624, 'eval_f1': 0.9096209912536444, 'eval_runtime': 11.6902, 'eval_samples_per_second': 85.542, 'eval_steps_per_second': 10.693, 'epoch': 2.27}


                                                       
 57%|█████▋    | 5113/9000 [2:04:49<3:49:59,  3.55s/it]

{'eval_loss': 0.3195079267024994, 'eval_f1': 0.9119373776908023, 'eval_runtime': 10.9635, 'eval_samples_per_second': 91.212, 'eval_steps_per_second': 11.402, 'epoch': 2.27}


                                                       
 57%|█████▋    | 5124/9000 [2:05:02<17:12,  3.75it/s]

{'eval_loss': 0.3300839364528656, 'eval_f1': 0.90382626680455, 'eval_runtime': 11.0365, 'eval_samples_per_second': 90.608, 'eval_steps_per_second': 11.326, 'epoch': 2.28}


                                                       
 57%|█████▋    | 5136/9000 [2:05:16<18:46,  3.43it/s]

{'eval_loss': 0.2956523299217224, 'eval_f1': 0.9062821833161689, 'eval_runtime': 11.1694, 'eval_samples_per_second': 89.531, 'eval_steps_per_second': 11.191, 'epoch': 2.28}


                                                       
 57%|█████▋    | 5149/9000 [2:05:30<3:49:59,  3.58s/it]

{'eval_loss': 0.26479572057724, 'eval_f1': 0.9083503054989817, 'eval_runtime': 11.1187, 'eval_samples_per_second': 89.939, 'eval_steps_per_second': 11.242, 'epoch': 2.29}


                                                       
 57%|█████▋    | 5160/9000 [2:05:43<16:59,  3.77it/s]

{'eval_loss': 0.2592414319515228, 'eval_f1': 0.9137577002053389, 'eval_runtime': 10.7498, 'eval_samples_per_second': 93.025, 'eval_steps_per_second': 11.628, 'epoch': 2.29}


                                                       
 57%|█████▋    | 5172/9000 [2:05:56<18:14,  3.50it/s]

{'eval_loss': 0.24865682423114777, 'eval_f1': 0.917093142272262, 'eval_runtime': 10.9564, 'eval_samples_per_second': 91.271, 'eval_steps_per_second': 11.409, 'epoch': 2.3}


                                                       
 58%|█████▊    | 5185/9000 [2:06:10<3:43:42,  3.52s/it]

{'eval_loss': 0.2953903079032898, 'eval_f1': 0.9036402569593147, 'eval_runtime': 10.9002, 'eval_samples_per_second': 91.741, 'eval_steps_per_second': 11.468, 'epoch': 2.3}


                                                       
 58%|█████▊    | 5196/9000 [2:06:23<16:43,  3.79it/s]

{'eval_loss': 0.25601571798324585, 'eval_f1': 0.921025641025641, 'eval_runtime': 10.8803, 'eval_samples_per_second': 91.909, 'eval_steps_per_second': 11.489, 'epoch': 2.31}


                                                       
 58%|█████▊    | 5209/9000 [2:06:37<3:47:20,  3.60s/it]

{'eval_loss': 0.25674736499786377, 'eval_f1': 0.9164086687306502, 'eval_runtime': 11.1666, 'eval_samples_per_second': 89.553, 'eval_steps_per_second': 11.194, 'epoch': 2.31}


                                                       
 58%|█████▊    | 5220/9000 [2:06:50<16:42,  3.77it/s]

{'eval_loss': 0.2540174722671509, 'eval_f1': 0.92570281124498, 'eval_runtime': 11.1666, 'eval_samples_per_second': 89.553, 'eval_steps_per_second': 11.194, 'epoch': 2.32}


                                                       
 58%|█████▊    | 5232/9000 [2:07:04<16:33,  3.79it/s]

{'eval_loss': 0.3081328272819519, 'eval_f1': 0.9042448173741362, 'eval_runtime': 10.9908, 'eval_samples_per_second': 90.985, 'eval_steps_per_second': 11.373, 'epoch': 2.33}


                                                       
 58%|█████▊    | 5244/9000 [2:07:17<16:42,  3.75it/s]

{'eval_loss': 0.38827258348464966, 'eval_f1': 0.8838219326818675, 'eval_runtime': 10.9471, 'eval_samples_per_second': 91.348, 'eval_steps_per_second': 11.419, 'epoch': 2.33}


                                                       
 58%|█████▊    | 5256/9000 [2:07:30<16:28,  3.79it/s]

{'eval_loss': 0.3184383511543274, 'eval_f1': 0.9013806706114398, 'eval_runtime': 11.0379, 'eval_samples_per_second': 90.597, 'eval_steps_per_second': 11.325, 'epoch': 2.34}


                                                       
 59%|█████▊    | 5268/9000 [2:07:44<16:28,  3.78it/s]

{'eval_loss': 0.2771192193031311, 'eval_f1': 0.9114688128772636, 'eval_runtime': 11.0852, 'eval_samples_per_second': 90.21, 'eval_steps_per_second': 11.276, 'epoch': 2.34}


                                                       
 59%|█████▊    | 5280/9000 [2:07:57<16:30,  3.76it/s]

{'eval_loss': 0.30638086795806885, 'eval_f1': 0.9119496855345911, 'eval_runtime': 10.9862, 'eval_samples_per_second': 91.023, 'eval_steps_per_second': 11.378, 'epoch': 2.35}


                                                       
 59%|█████▉    | 5292/9000 [2:08:11<16:56,  3.65it/s]

{'eval_loss': 0.24034680426120758, 'eval_f1': 0.9235352532274081, 'eval_runtime': 11.2829, 'eval_samples_per_second': 88.629, 'eval_steps_per_second': 11.079, 'epoch': 2.35}


                                                       
 59%|█████▉    | 5305/9000 [2:08:25<3:40:26,  3.58s/it]

{'eval_loss': 0.24358715116977692, 'eval_f1': 0.9180990899898888, 'eval_runtime': 11.0869, 'eval_samples_per_second': 90.197, 'eval_steps_per_second': 11.275, 'epoch': 2.36}


                                                       
 59%|█████▉    | 5316/9000 [2:08:38<17:00,  3.61it/s]

{'eval_loss': 0.237592414021492, 'eval_f1': 0.9226006191950464, 'eval_runtime': 11.227, 'eval_samples_per_second': 89.071, 'eval_steps_per_second': 11.134, 'epoch': 2.36}


                                                       
 59%|█████▉    | 5328/9000 [2:08:52<16:29,  3.71it/s]

{'eval_loss': 0.259458065032959, 'eval_f1': 0.9069767441860465, 'eval_runtime': 11.1212, 'eval_samples_per_second': 89.918, 'eval_steps_per_second': 11.24, 'epoch': 2.37}


                                                       
 59%|█████▉    | 5340/9000 [2:09:05<16:18,  3.74it/s]

{'eval_loss': 0.2459934800863266, 'eval_f1': 0.9137577002053389, 'eval_runtime': 11.0121, 'eval_samples_per_second': 90.809, 'eval_steps_per_second': 11.351, 'epoch': 2.37}


                                                       
 59%|█████▉    | 5352/9000 [2:09:19<17:21,  3.50it/s]

{'eval_loss': 0.282356321811676, 'eval_f1': 0.9024651661307609, 'eval_runtime': 10.8149, 'eval_samples_per_second': 92.465, 'eval_steps_per_second': 11.558, 'epoch': 2.38}


                                                       
 60%|█████▉    | 5364/9000 [2:09:32<15:58,  3.79it/s]

{'eval_loss': 0.2643563747406006, 'eval_f1': 0.9173553719008266, 'eval_runtime': 11.2449, 'eval_samples_per_second': 88.929, 'eval_steps_per_second': 11.116, 'epoch': 2.38}


                                                       
 60%|█████▉    | 5376/9000 [2:09:46<16:44,  3.61it/s]

{'eval_loss': 0.31976428627967834, 'eval_f1': 0.8898488120950324, 'eval_runtime': 11.1454, 'eval_samples_per_second': 89.723, 'eval_steps_per_second': 11.215, 'epoch': 2.39}


                                                       
 60%|█████▉    | 5388/9000 [2:10:00<16:44,  3.60it/s]

{'eval_loss': 0.23767662048339844, 'eval_f1': 0.9249492900608519, 'eval_runtime': 11.1746, 'eval_samples_per_second': 89.488, 'eval_steps_per_second': 11.186, 'epoch': 2.39}


                                                       
 60%|██████    | 5400/9000 [2:10:14<16:17,  3.68it/s]

{'eval_loss': 0.25935447216033936, 'eval_f1': 0.9178082191780821, 'eval_runtime': 11.1925, 'eval_samples_per_second': 89.346, 'eval_steps_per_second': 11.168, 'epoch': 2.4}


                                                       
 60%|██████    | 5412/9000 [2:10:27<16:13,  3.68it/s]

{'eval_loss': 0.37165340781211853, 'eval_f1': 0.8695652173913043, 'eval_runtime': 10.7739, 'eval_samples_per_second': 92.817, 'eval_steps_per_second': 11.602, 'epoch': 2.41}


                                                       
 60%|██████    | 5424/9000 [2:10:40<15:40,  3.80it/s]

{'eval_loss': 0.31757721304893494, 'eval_f1': 0.8983402489626556, 'eval_runtime': 10.771, 'eval_samples_per_second': 92.842, 'eval_steps_per_second': 11.605, 'epoch': 2.41}


                                                       
 60%|██████    | 5437/9000 [2:10:53<3:26:14,  3.47s/it]

{'eval_loss': 0.3104061186313629, 'eval_f1': 0.9039433771486349, 'eval_runtime': 10.777, 'eval_samples_per_second': 92.79, 'eval_steps_per_second': 11.599, 'epoch': 2.42}


                                                       
 61%|██████    | 5449/9000 [2:11:07<3:29:58,  3.55s/it]

{'eval_loss': 0.3072718679904938, 'eval_f1': 0.9004149377593361, 'eval_runtime': 10.9943, 'eval_samples_per_second': 90.956, 'eval_steps_per_second': 11.37, 'epoch': 2.42}


                                                       
 61%|██████    | 5461/9000 [2:11:20<3:27:38,  3.52s/it]

{'eval_loss': 0.2978675067424774, 'eval_f1': 0.9017580144777663, 'eval_runtime': 10.9206, 'eval_samples_per_second': 91.57, 'eval_steps_per_second': 11.446, 'epoch': 2.43}


                                                       
 61%|██████    | 5473/9000 [2:11:33<3:21:46,  3.43s/it]

{'eval_loss': 0.33227935433387756, 'eval_f1': 0.9044834307992202, 'eval_runtime': 10.5974, 'eval_samples_per_second': 94.362, 'eval_steps_per_second': 11.795, 'epoch': 2.43}


                                                       
 61%|██████    | 5484/9000 [2:11:46<15:36,  3.76it/s]

{'eval_loss': 0.31806322932243347, 'eval_f1': 0.9013785790031813, 'eval_runtime': 11.0425, 'eval_samples_per_second': 90.559, 'eval_steps_per_second': 11.32, 'epoch': 2.44}


                                                       
 61%|██████    | 5496/9000 [2:12:00<16:35,  3.52it/s]

{'eval_loss': 0.3639117479324341, 'eval_f1': 0.896486229819563, 'eval_runtime': 11.1801, 'eval_samples_per_second': 89.445, 'eval_steps_per_second': 11.181, 'epoch': 2.44}


                                                       
 61%|██████    | 5508/9000 [2:12:14<15:55,  3.66it/s]

{'eval_loss': 0.27901995182037354, 'eval_f1': 0.9235412474849093, 'eval_runtime': 11.3018, 'eval_samples_per_second': 88.481, 'eval_steps_per_second': 11.06, 'epoch': 2.45}


                                                       
 61%|██████▏   | 5520/9000 [2:12:27<15:37,  3.71it/s]

{'eval_loss': 0.3211844265460968, 'eval_f1': 0.904862579281184, 'eval_runtime': 10.8882, 'eval_samples_per_second': 91.843, 'eval_steps_per_second': 11.48, 'epoch': 2.45}


                                                       
 61%|██████▏   | 5533/9000 [2:12:41<3:19:35,  3.45s/it]

{'eval_loss': 0.2895737290382385, 'eval_f1': 0.9137055837563453, 'eval_runtime': 10.7079, 'eval_samples_per_second': 93.389, 'eval_steps_per_second': 11.674, 'epoch': 2.46}


                                                       
 62%|██████▏   | 5545/9000 [2:12:54<3:20:44,  3.49s/it]

{'eval_loss': 0.29761502146720886, 'eval_f1': 0.9029535864978903, 'eval_runtime': 10.7993, 'eval_samples_per_second': 92.599, 'eval_steps_per_second': 11.575, 'epoch': 2.46}


                                                       
 62%|██████▏   | 5557/9000 [2:13:08<3:28:03,  3.63s/it]

{'eval_loss': 0.3599950075149536, 'eval_f1': 0.9034090909090909, 'eval_runtime': 11.2678, 'eval_samples_per_second': 88.749, 'eval_steps_per_second': 11.094, 'epoch': 2.47}


                                                       
 62%|██████▏   | 5569/9000 [2:13:21<3:24:36,  3.58s/it]

{'eval_loss': 0.26014184951782227, 'eval_f1': 0.914466737064414, 'eval_runtime': 11.0843, 'eval_samples_per_second': 90.218, 'eval_steps_per_second': 11.277, 'epoch': 2.47}


                                                       
 62%|██████▏   | 5581/9000 [2:13:35<3:25:13,  3.60s/it]

{'eval_loss': 0.23189643025398254, 'eval_f1': 0.9177939646201874, 'eval_runtime': 11.141, 'eval_samples_per_second': 89.758, 'eval_steps_per_second': 11.22, 'epoch': 2.48}


                                                       
 62%|██████▏   | 5592/9000 [2:13:48<16:11,  3.51it/s]

{'eval_loss': 0.29126501083374023, 'eval_f1': 0.9012464046021094, 'eval_runtime': 10.9142, 'eval_samples_per_second': 91.623, 'eval_steps_per_second': 11.453, 'epoch': 2.49}


                                                       
 62%|██████▏   | 5604/9000 [2:14:01<14:55,  3.79it/s]

{'eval_loss': 0.25333839654922485, 'eval_f1': 0.9126984126984127, 'eval_runtime': 10.9615, 'eval_samples_per_second': 91.228, 'eval_steps_per_second': 11.404, 'epoch': 2.49}


                                                       
 62%|██████▏   | 5616/9000 [2:14:15<15:33,  3.62it/s]

{'eval_loss': 0.259395033121109, 'eval_f1': 0.9135802469135804, 'eval_runtime': 11.2161, 'eval_samples_per_second': 89.158, 'eval_steps_per_second': 11.145, 'epoch': 2.5}


 63%|██████▎   | 5626/9000 [2:14:17<19:04,  2.95it/s]  

{'loss': 0.2385, 'learning_rate': 7.513333333333333e-05, 'epoch': 2.5}


                                                     
 63%|██████▎   | 5628/9000 [2:14:28<15:05,  3.72it/s]

{'eval_loss': 0.32987281680107117, 'eval_f1': 0.8933333333333333, 'eval_runtime': 10.8917, 'eval_samples_per_second': 91.813, 'eval_steps_per_second': 11.477, 'epoch': 2.5}


                                                       
 63%|██████▎   | 5640/9000 [2:14:42<15:05,  3.71it/s]

{'eval_loss': 0.27714085578918457, 'eval_f1': 0.9089068825910931, 'eval_runtime': 11.107, 'eval_samples_per_second': 90.033, 'eval_steps_per_second': 11.254, 'epoch': 2.51}


                                                       
 63%|██████▎   | 5652/9000 [2:14:56<14:59,  3.72it/s]

{'eval_loss': 0.28088054060935974, 'eval_f1': 0.9033570701932859, 'eval_runtime': 11.1093, 'eval_samples_per_second': 90.014, 'eval_steps_per_second': 11.252, 'epoch': 2.51}


                                                       
 63%|██████▎   | 5664/9000 [2:15:09<14:36,  3.81it/s]

{'eval_loss': 0.27134835720062256, 'eval_f1': 0.9089048106448311, 'eval_runtime': 10.9831, 'eval_samples_per_second': 91.049, 'eval_steps_per_second': 11.381, 'epoch': 2.52}


                                                       
 63%|██████▎   | 5677/9000 [2:15:23<3:16:53,  3.55s/it]

{'eval_loss': 0.28311029076576233, 'eval_f1': 0.9090909090909091, 'eval_runtime': 11.0259, 'eval_samples_per_second': 90.696, 'eval_steps_per_second': 11.337, 'epoch': 2.52}


                                                       
 63%|██████▎   | 5688/9000 [2:15:36<14:48,  3.73it/s]

{'eval_loss': 0.28922945261001587, 'eval_f1': 0.8936170212765957, 'eval_runtime': 10.8503, 'eval_samples_per_second': 92.163, 'eval_steps_per_second': 11.52, 'epoch': 2.53}


                                                       
 63%|██████▎   | 5700/9000 [2:15:49<14:36,  3.77it/s]

{'eval_loss': 0.2657748758792877, 'eval_f1': 0.9167502507522568, 'eval_runtime': 10.7455, 'eval_samples_per_second': 93.062, 'eval_steps_per_second': 11.633, 'epoch': 2.53}


                                                       
 63%|██████▎   | 5713/9000 [2:16:02<3:11:25,  3.49s/it]

{'eval_loss': 0.23760558664798737, 'eval_f1': 0.9206030150753769, 'eval_runtime': 10.7979, 'eval_samples_per_second': 92.611, 'eval_steps_per_second': 11.576, 'epoch': 2.54}


                                                       
 64%|██████▎   | 5724/9000 [2:16:16<14:56,  3.65it/s]

{'eval_loss': 0.3164263367652893, 'eval_f1': 0.9045323047251687, 'eval_runtime': 10.9648, 'eval_samples_per_second': 91.201, 'eval_steps_per_second': 11.4, 'epoch': 2.54}


                                                       
 64%|██████▎   | 5736/9000 [2:16:29<14:45,  3.69it/s]

{'eval_loss': 0.3083098232746124, 'eval_f1': 0.9111111111111111, 'eval_runtime': 10.8143, 'eval_samples_per_second': 92.47, 'eval_steps_per_second': 11.559, 'epoch': 2.55}


                                                       
 64%|██████▍   | 5749/9000 [2:16:43<3:20:16,  3.70s/it]

{'eval_loss': 0.25534531474113464, 'eval_f1': 0.9185929648241207, 'eval_runtime': 11.497, 'eval_samples_per_second': 86.979, 'eval_steps_per_second': 10.872, 'epoch': 2.55}


                                                       
 64%|██████▍   | 5760/9000 [2:16:57<15:12,  3.55it/s]

{'eval_loss': 0.26283878087997437, 'eval_f1': 0.9206349206349207, 'eval_runtime': 11.6303, 'eval_samples_per_second': 85.982, 'eval_steps_per_second': 10.748, 'epoch': 2.56}


                                                       
 64%|██████▍   | 5772/9000 [2:17:12<15:21,  3.50it/s]

{'eval_loss': 0.2629457414150238, 'eval_f1': 0.9235537190082644, 'eval_runtime': 11.8114, 'eval_samples_per_second': 84.664, 'eval_steps_per_second': 10.583, 'epoch': 2.57}


                                                       
 64%|██████▍   | 5784/9000 [2:17:26<15:04,  3.55it/s]

{'eval_loss': 0.2813527286052704, 'eval_f1': 0.9216867469879517, 'eval_runtime': 11.7962, 'eval_samples_per_second': 84.773, 'eval_steps_per_second': 10.597, 'epoch': 2.57}


                                                       
 64%|██████▍   | 5796/9000 [2:17:40<14:56,  3.57it/s]

{'eval_loss': 0.2612748146057129, 'eval_f1': 0.9244897959183674, 'eval_runtime': 11.4928, 'eval_samples_per_second': 87.011, 'eval_steps_per_second': 10.876, 'epoch': 2.58}


                                                       
 65%|██████▍   | 5808/9000 [2:17:54<14:58,  3.55it/s]

{'eval_loss': 0.2473236620426178, 'eval_f1': 0.927624872579001, 'eval_runtime': 11.758, 'eval_samples_per_second': 85.049, 'eval_steps_per_second': 10.631, 'epoch': 2.58}


                                                       
 65%|██████▍   | 5820/9000 [2:18:09<14:22,  3.69it/s]

{'eval_loss': 0.2469722032546997, 'eval_f1': 0.9236947791164658, 'eval_runtime': 11.8154, 'eval_samples_per_second': 84.636, 'eval_steps_per_second': 10.579, 'epoch': 2.59}


                                                       
 65%|██████▍   | 5832/9000 [2:18:23<14:33,  3.63it/s]

{'eval_loss': 0.2933741509914398, 'eval_f1': 0.9200779727095517, 'eval_runtime': 11.7166, 'eval_samples_per_second': 85.349, 'eval_steps_per_second': 10.669, 'epoch': 2.59}


                                                       
 65%|██████▍   | 5844/9000 [2:18:37<14:52,  3.54it/s]

{'eval_loss': 0.2521054148674011, 'eval_f1': 0.9173387096774193, 'eval_runtime': 11.8389, 'eval_samples_per_second': 84.467, 'eval_steps_per_second': 10.558, 'epoch': 2.6}


                                                       
 65%|██████▌   | 5857/9000 [2:18:52<3:17:26,  3.77s/it]

{'eval_loss': 0.2893078625202179, 'eval_f1': 0.9114173228346456, 'eval_runtime': 11.6781, 'eval_samples_per_second': 85.631, 'eval_steps_per_second': 10.704, 'epoch': 2.6}


                                                       
 65%|██████▌   | 5868/9000 [2:19:06<14:10,  3.68it/s]

{'eval_loss': 0.3041767477989197, 'eval_f1': 0.9092783505154639, 'eval_runtime': 11.8035, 'eval_samples_per_second': 84.721, 'eval_steps_per_second': 10.59, 'epoch': 2.61}


                                                       
 65%|██████▌   | 5881/9000 [2:19:20<3:12:21,  3.70s/it]

{'eval_loss': 0.3934871554374695, 'eval_f1': 0.8722466960352423, 'eval_runtime': 11.4865, 'eval_samples_per_second': 87.059, 'eval_steps_per_second': 10.882, 'epoch': 2.61}


                                                       
 65%|██████▌   | 5892/9000 [2:19:34<14:09,  3.66it/s]

{'eval_loss': 0.3208394944667816, 'eval_f1': 0.9034146341463415, 'eval_runtime': 11.4701, 'eval_samples_per_second': 87.183, 'eval_steps_per_second': 10.898, 'epoch': 2.62}


                                                       
 66%|██████▌   | 5904/9000 [2:19:48<14:50,  3.48it/s]

{'eval_loss': 0.29514914751052856, 'eval_f1': 0.9043659043659044, 'eval_runtime': 11.8203, 'eval_samples_per_second': 84.6, 'eval_steps_per_second': 10.575, 'epoch': 2.62}


                                                       
 66%|██████▌   | 5916/9000 [2:20:02<14:51,  3.46it/s]

{'eval_loss': 0.2955958843231201, 'eval_f1': 0.9092805005213764, 'eval_runtime': 11.8075, 'eval_samples_per_second': 84.692, 'eval_steps_per_second': 10.587, 'epoch': 2.63}


                                                       
 66%|██████▌   | 5928/9000 [2:20:17<14:26,  3.54it/s]

{'eval_loss': 0.2943824529647827, 'eval_f1': 0.9118236472945892, 'eval_runtime': 11.6864, 'eval_samples_per_second': 85.57, 'eval_steps_per_second': 10.696, 'epoch': 2.63}


                                                       
 66%|██████▌   | 5940/9000 [2:20:31<14:05,  3.62it/s]

{'eval_loss': 0.27643659710884094, 'eval_f1': 0.9138276553106213, 'eval_runtime': 11.5107, 'eval_samples_per_second': 86.876, 'eval_steps_per_second': 10.859, 'epoch': 2.64}


                                                       
 66%|██████▌   | 5952/9000 [2:20:45<14:31,  3.50it/s]

{'eval_loss': 0.3234700560569763, 'eval_f1': 0.8907922912205568, 'eval_runtime': 11.3986, 'eval_samples_per_second': 87.73, 'eval_steps_per_second': 10.966, 'epoch': 2.65}


                                                       
 66%|██████▋   | 5965/9000 [2:20:59<3:05:56,  3.68s/it]

{'eval_loss': 0.27494630217552185, 'eval_f1': 0.9046610169491526, 'eval_runtime': 11.4067, 'eval_samples_per_second': 87.668, 'eval_steps_per_second': 10.958, 'epoch': 2.65}


                                                       
 66%|██████▋   | 5976/9000 [2:21:13<14:32,  3.47it/s]

{'eval_loss': 0.29781657457351685, 'eval_f1': 0.8852459016393442, 'eval_runtime': 11.3937, 'eval_samples_per_second': 87.768, 'eval_steps_per_second': 10.971, 'epoch': 2.66}


                                                       
 67%|██████▋   | 5988/9000 [2:21:27<13:52,  3.62it/s]

{'eval_loss': 0.34186846017837524, 'eval_f1': 0.9022988505747127, 'eval_runtime': 11.45, 'eval_samples_per_second': 87.336, 'eval_steps_per_second': 10.917, 'epoch': 2.66}


                                                       
 67%|██████▋   | 6000/9000 [2:21:40<13:34,  3.68it/s]

{'eval_loss': 0.25489112734794617, 'eval_f1': 0.9198751300728408, 'eval_runtime': 11.2907, 'eval_samples_per_second': 88.568, 'eval_steps_per_second': 11.071, 'epoch': 2.67}


                                                       
 67%|██████▋   | 6013/9000 [2:21:56<3:02:37,  3.67s/it]

{'eval_loss': 0.22722700238227844, 'eval_f1': 0.9282786885245901, 'eval_runtime': 11.3768, 'eval_samples_per_second': 87.898, 'eval_steps_per_second': 10.987, 'epoch': 2.67}


                                                       
 67%|██████▋   | 6024/9000 [2:22:10<13:24,  3.70it/s]

{'eval_loss': 0.2535524070262909, 'eval_f1': 0.915686274509804, 'eval_runtime': 11.4235, 'eval_samples_per_second': 87.539, 'eval_steps_per_second': 10.942, 'epoch': 2.68}


                                                       
 67%|██████▋   | 6036/9000 [2:22:23<13:49,  3.57it/s]

{'eval_loss': 0.24567122757434845, 'eval_f1': 0.9172625127681306, 'eval_runtime': 11.2109, 'eval_samples_per_second': 89.199, 'eval_steps_per_second': 11.15, 'epoch': 2.68}


                                                       
 67%|██████▋   | 6048/9000 [2:22:37<12:46,  3.85it/s]

{'eval_loss': 0.3319692015647888, 'eval_f1': 0.8803512623490669, 'eval_runtime': 11.0823, 'eval_samples_per_second': 90.234, 'eval_steps_per_second': 11.279, 'epoch': 2.69}


                                                       
 67%|██████▋   | 6061/9000 [2:22:51<2:57:59,  3.63s/it]

{'eval_loss': 0.2647216022014618, 'eval_f1': 0.9121756487025948, 'eval_runtime': 11.2887, 'eval_samples_per_second': 88.584, 'eval_steps_per_second': 11.073, 'epoch': 2.69}


                                                       
 67%|██████▋   | 6072/9000 [2:23:04<13:09,  3.71it/s]

{'eval_loss': 0.24164819717407227, 'eval_f1': 0.9204081632653062, 'eval_runtime': 11.3011, 'eval_samples_per_second': 88.487, 'eval_steps_per_second': 11.061, 'epoch': 2.7}


                                                       
 68%|██████▊   | 6084/9000 [2:23:18<13:12,  3.68it/s]

{'eval_loss': 0.2652426064014435, 'eval_f1': 0.9148514851485149, 'eval_runtime': 11.509, 'eval_samples_per_second': 86.888, 'eval_steps_per_second': 10.861, 'epoch': 2.7}


                                                       
 68%|██████▊   | 6096/9000 [2:23:32<12:52,  3.76it/s]

{'eval_loss': 0.32738593220710754, 'eval_f1': 0.9042145593869731, 'eval_runtime': 11.2306, 'eval_samples_per_second': 89.043, 'eval_steps_per_second': 11.13, 'epoch': 2.71}


                                                       
 68%|██████▊   | 6108/9000 [2:23:45<12:44,  3.78it/s]

{'eval_loss': 0.2610713839530945, 'eval_f1': 0.916, 'eval_runtime': 11.163, 'eval_samples_per_second': 89.582, 'eval_steps_per_second': 11.198, 'epoch': 2.71}


                                                       
 68%|██████▊   | 6121/9000 [2:24:00<2:59:59,  3.75s/it]

{'eval_loss': 0.3011346757411957, 'eval_f1': 0.9026737967914438, 'eval_runtime': 11.7033, 'eval_samples_per_second': 85.446, 'eval_steps_per_second': 10.681, 'epoch': 2.72}


                                                       
 68%|██████▊   | 6132/9000 [2:24:13<12:52,  3.71it/s]

{'eval_loss': 0.3096875548362732, 'eval_f1': 0.9074975657254137, 'eval_runtime': 11.2342, 'eval_samples_per_second': 89.014, 'eval_steps_per_second': 11.127, 'epoch': 2.73}


                                                       
 68%|██████▊   | 6144/9000 [2:24:27<12:07,  3.93it/s]

{'eval_loss': 0.28478094935417175, 'eval_f1': 0.9102428722280885, 'eval_runtime': 11.5399, 'eval_samples_per_second': 86.656, 'eval_steps_per_second': 10.832, 'epoch': 2.73}


                                                       
 68%|██████▊   | 6156/9000 [2:24:41<13:07,  3.61it/s]

{'eval_loss': 0.26853686571121216, 'eval_f1': 0.9132602193419742, 'eval_runtime': 11.434, 'eval_samples_per_second': 87.459, 'eval_steps_per_second': 10.932, 'epoch': 2.74}


                                                       
 69%|██████▊   | 6169/9000 [2:24:55<2:52:11,  3.65s/it]

{'eval_loss': 0.2935168445110321, 'eval_f1': 0.9079365079365079, 'eval_runtime': 11.2772, 'eval_samples_per_second': 88.674, 'eval_steps_per_second': 11.084, 'epoch': 2.74}


                                                       
 69%|██████▊   | 6181/9000 [2:25:08<2:48:00,  3.58s/it]

{'eval_loss': 0.3291271924972534, 'eval_f1': 0.8941684665226781, 'eval_runtime': 11.1259, 'eval_samples_per_second': 89.881, 'eval_steps_per_second': 11.235, 'epoch': 2.75}


                                                       
 69%|██████▉   | 6193/9000 [2:25:19<2:19:00,  2.97s/it]

{'eval_loss': 0.27891263365745544, 'eval_f1': 0.9171842650103521, 'eval_runtime': 9.1865, 'eval_samples_per_second': 108.856, 'eval_steps_per_second': 13.607, 'epoch': 2.75}


                                                       
 69%|██████▉   | 6205/9000 [2:25:31<2:17:40,  2.96s/it]

{'eval_loss': 0.2845061719417572, 'eval_f1': 0.9194029850746269, 'eval_runtime': 9.1556, 'eval_samples_per_second': 109.223, 'eval_steps_per_second': 13.653, 'epoch': 2.76}


                                                       
 69%|██████▉   | 6217/9000 [2:25:42<2:16:43,  2.95s/it]

{'eval_loss': 0.3455117344856262, 'eval_f1': 0.8862660944206009, 'eval_runtime': 9.1233, 'eval_samples_per_second': 109.61, 'eval_steps_per_second': 13.701, 'epoch': 2.76}


                                                       
 69%|██████▉   | 6229/9000 [2:25:53<2:16:29,  2.96s/it]

{'eval_loss': 0.27394211292266846, 'eval_f1': 0.9223007063572148, 'eval_runtime': 9.1156, 'eval_samples_per_second': 109.702, 'eval_steps_per_second': 13.713, 'epoch': 2.77}


                                                       
 69%|██████▉   | 6240/9000 [2:26:07<10:52,  4.23it/s]

{'eval_loss': 0.2757325768470764, 'eval_f1': 0.9221674876847291, 'eval_runtime': 12.2245, 'eval_samples_per_second': 81.803, 'eval_steps_per_second': 10.225, 'epoch': 2.77}


                                                       
 69%|██████▉   | 6252/9000 [2:26:21<12:00,  3.81it/s]

{'eval_loss': 0.3132050335407257, 'eval_f1': 0.8854961832061069, 'eval_runtime': 11.2823, 'eval_samples_per_second': 88.634, 'eval_steps_per_second': 11.079, 'epoch': 2.78}


                                                       
 70%|██████▉   | 6264/9000 [2:26:35<12:19,  3.70it/s]

{'eval_loss': 0.2617208957672119, 'eval_f1': 0.9210789210789211, 'eval_runtime': 11.9641, 'eval_samples_per_second': 83.584, 'eval_steps_per_second': 10.448, 'epoch': 2.78}


                                                       
 70%|██████▉   | 6277/9000 [2:26:49<2:45:47,  3.65s/it]

{'eval_loss': 0.27069103717803955, 'eval_f1': 0.91875, 'eval_runtime': 11.3182, 'eval_samples_per_second': 88.353, 'eval_steps_per_second': 11.044, 'epoch': 2.79}


                                                       
 70%|██████▉   | 6289/9000 [2:27:02<2:24:27,  3.20s/it]

{'eval_loss': 0.30563884973526, 'eval_f1': 0.9039070749736008, 'eval_runtime': 9.8556, 'eval_samples_per_second': 101.466, 'eval_steps_per_second': 12.683, 'epoch': 2.79}


                                                       
 70%|███████   | 6301/9000 [2:27:13<2:18:12,  3.07s/it]

{'eval_loss': 0.30361345410346985, 'eval_f1': 0.9138276553106213, 'eval_runtime': 9.54, 'eval_samples_per_second': 104.822, 'eval_steps_per_second': 13.103, 'epoch': 2.8}


                                                       
 70%|███████   | 6313/9000 [2:27:25<2:13:50,  2.99s/it]

{'eval_loss': 0.3018478751182556, 'eval_f1': 0.9087221095334687, 'eval_runtime': 9.2665, 'eval_samples_per_second': 107.916, 'eval_steps_per_second': 13.489, 'epoch': 2.81}


                                                       
 70%|███████   | 6325/9000 [2:27:36<2:11:41,  2.95s/it]

{'eval_loss': 0.30330997705459595, 'eval_f1': 0.9175769612711023, 'eval_runtime': 9.1658, 'eval_samples_per_second': 109.101, 'eval_steps_per_second': 13.638, 'epoch': 2.81}


                                                       
 70%|███████   | 6337/9000 [2:27:47<2:14:30,  3.03s/it]

{'eval_loss': 0.27121588587760925, 'eval_f1': 0.92123629112662, 'eval_runtime': 9.3942, 'eval_samples_per_second': 106.449, 'eval_steps_per_second': 13.306, 'epoch': 2.82}


                                                       
 71%|███████   | 6349/9000 [2:27:59<2:17:33,  3.11s/it]

{'eval_loss': 0.2561803460121155, 'eval_f1': 0.9158878504672897, 'eval_runtime': 9.68, 'eval_samples_per_second': 103.306, 'eval_steps_per_second': 12.913, 'epoch': 2.82}


                                                       
 71%|███████   | 6361/9000 [2:28:11<2:24:18,  3.28s/it]

{'eval_loss': 0.3350987434387207, 'eval_f1': 0.8839779005524863, 'eval_runtime': 10.2106, 'eval_samples_per_second': 97.938, 'eval_steps_per_second': 12.242, 'epoch': 2.83}


                                                       
 71%|███████   | 6373/9000 [2:28:23<2:14:43,  3.08s/it]

{'eval_loss': 0.2557174861431122, 'eval_f1': 0.9257375381485249, 'eval_runtime': 9.5245, 'eval_samples_per_second': 104.992, 'eval_steps_per_second': 13.124, 'epoch': 2.83}


                                                       
 71%|███████   | 6385/9000 [2:28:34<2:07:17,  2.92s/it]

{'eval_loss': 0.3356703519821167, 'eval_f1': 0.9129186602870814, 'eval_runtime': 9.0301, 'eval_samples_per_second': 110.741, 'eval_steps_per_second': 13.843, 'epoch': 2.84}


                                                       
 71%|███████   | 6397/9000 [2:28:45<2:08:04,  2.95s/it]

{'eval_loss': 0.24287624657154083, 'eval_f1': 0.9279835390946503, 'eval_runtime': 9.1716, 'eval_samples_per_second': 109.032, 'eval_steps_per_second': 13.629, 'epoch': 2.84}


                                                       
 71%|███████   | 6409/9000 [2:28:57<2:10:14,  3.02s/it]

{'eval_loss': 0.26223108172416687, 'eval_f1': 0.9195402298850575, 'eval_runtime': 9.335, 'eval_samples_per_second': 107.124, 'eval_steps_per_second': 13.39, 'epoch': 2.85}


                                                       
 71%|███████▏  | 6421/9000 [2:29:08<2:13:36,  3.11s/it]

{'eval_loss': 0.29586806893348694, 'eval_f1': 0.9152215799614644, 'eval_runtime': 9.6182, 'eval_samples_per_second': 103.969, 'eval_steps_per_second': 12.996, 'epoch': 2.85}


                                                       
 71%|███████▏  | 6433/9000 [2:29:19<2:04:58,  2.92s/it]

{'eval_loss': 0.270496666431427, 'eval_f1': 0.9197651663405089, 'eval_runtime': 9.0552, 'eval_samples_per_second': 110.433, 'eval_steps_per_second': 13.804, 'epoch': 2.86}


                                                       
 72%|███████▏  | 6445/9000 [2:29:30<2:06:30,  2.97s/it]

{'eval_loss': 0.23571641743183136, 'eval_f1': 0.9302788844621513, 'eval_runtime': 9.2337, 'eval_samples_per_second': 108.299, 'eval_steps_per_second': 13.537, 'epoch': 2.86}


                                                       
 72%|███████▏  | 6457/9000 [2:29:41<2:00:59,  2.85s/it]

{'eval_loss': 0.2315731942653656, 'eval_f1': 0.9266802443991853, 'eval_runtime': 8.8757, 'eval_samples_per_second': 112.667, 'eval_steps_per_second': 14.083, 'epoch': 2.87}


                                                       
 72%|███████▏  | 6468/9000 [2:29:52<08:27,  4.99it/s]

{'eval_loss': 0.23787428438663483, 'eval_f1': 0.9218106995884774, 'eval_runtime': 9.0369, 'eval_samples_per_second': 110.657, 'eval_steps_per_second': 13.832, 'epoch': 2.87}


                                                       
 72%|███████▏  | 6481/9000 [2:30:06<2:29:18,  3.56s/it]

{'eval_loss': 0.2632176876068115, 'eval_f1': 0.9159663865546218, 'eval_runtime': 11.0505, 'eval_samples_per_second': 90.494, 'eval_steps_per_second': 11.312, 'epoch': 2.88}


                                                       
 72%|███████▏  | 6492/9000 [2:30:20<10:48,  3.87it/s]

{'eval_loss': 0.28817614912986755, 'eval_f1': 0.9172482552342971, 'eval_runtime': 11.7464, 'eval_samples_per_second': 85.132, 'eval_steps_per_second': 10.642, 'epoch': 2.89}


                                                       
 72%|███████▏  | 6504/9000 [2:30:35<12:45,  3.26it/s]

{'eval_loss': 0.2935342490673065, 'eval_f1': 0.9158110882956879, 'eval_runtime': 12.6529, 'eval_samples_per_second': 79.033, 'eval_steps_per_second': 9.879, 'epoch': 2.89}


                                                       
 72%|███████▏  | 6516/9000 [2:30:51<12:45,  3.24it/s]

{'eval_loss': 0.2771344780921936, 'eval_f1': 0.9156378600823045, 'eval_runtime': 12.9222, 'eval_samples_per_second': 77.386, 'eval_steps_per_second': 9.673, 'epoch': 2.9}


                                                       
 73%|███████▎  | 6528/9000 [2:31:07<12:47,  3.22it/s]

{'eval_loss': 0.3611094653606415, 'eval_f1': 0.8798235942668137, 'eval_runtime': 12.8155, 'eval_samples_per_second': 78.03, 'eval_steps_per_second': 9.754, 'epoch': 2.9}


                                                       
 73%|███████▎  | 6540/9000 [2:31:22<14:02,  2.92it/s]

{'eval_loss': 0.2678290903568268, 'eval_f1': 0.917093142272262, 'eval_runtime': 12.689, 'eval_samples_per_second': 78.808, 'eval_steps_per_second': 9.851, 'epoch': 2.91}


                                                       
 73%|███████▎  | 6552/9000 [2:31:37<13:42,  2.98it/s]

{'eval_loss': 0.28861361742019653, 'eval_f1': 0.9098445595854923, 'eval_runtime': 12.2606, 'eval_samples_per_second': 81.562, 'eval_steps_per_second': 10.195, 'epoch': 2.91}


                                                       
 73%|███████▎  | 6564/9000 [2:31:52<11:52,  3.42it/s]

{'eval_loss': 0.2810938358306885, 'eval_f1': 0.9134515119916579, 'eval_runtime': 12.212, 'eval_samples_per_second': 81.887, 'eval_steps_per_second': 10.236, 'epoch': 2.92}


                                                       
 73%|███████▎  | 6577/9000 [2:32:07<2:31:01,  3.74s/it]

{'eval_loss': 0.2493850141763687, 'eval_f1': 0.9299492385786803, 'eval_runtime': 11.53, 'eval_samples_per_second': 86.73, 'eval_steps_per_second': 10.841, 'epoch': 2.92}


                                                       
 73%|███████▎  | 6589/9000 [2:32:20<2:24:06,  3.59s/it]

{'eval_loss': 0.2811034917831421, 'eval_f1': 0.9130890052356022, 'eval_runtime': 11.1054, 'eval_samples_per_second': 90.046, 'eval_steps_per_second': 11.256, 'epoch': 2.93}


                                                       
 73%|███████▎  | 6601/9000 [2:32:35<2:30:43,  3.77s/it]

{'eval_loss': 0.2524794638156891, 'eval_f1': 0.923391215526047, 'eval_runtime': 11.7385, 'eval_samples_per_second': 85.19, 'eval_steps_per_second': 10.649, 'epoch': 2.93}


                                                       
 73%|███████▎  | 6612/9000 [2:32:48<10:31,  3.78it/s]

{'eval_loss': 0.24149484932422638, 'eval_f1': 0.9232343909928352, 'eval_runtime': 11.0839, 'eval_samples_per_second': 90.221, 'eval_steps_per_second': 11.278, 'epoch': 2.94}


                                                       
 74%|███████▎  | 6624/9000 [2:33:03<10:31,  3.76it/s]

{'eval_loss': 0.2443075031042099, 'eval_f1': 0.9223007063572148, 'eval_runtime': 12.2872, 'eval_samples_per_second': 81.385, 'eval_steps_per_second': 10.173, 'epoch': 2.94}


                                                       
 74%|███████▎  | 6636/9000 [2:33:16<10:58,  3.59it/s]

{'eval_loss': 0.33353951573371887, 'eval_f1': 0.8967741935483872, 'eval_runtime': 11.3927, 'eval_samples_per_second': 87.776, 'eval_steps_per_second': 10.972, 'epoch': 2.95}


                                                       
 74%|███████▍  | 6649/9000 [2:33:30<2:23:16,  3.66s/it]

{'eval_loss': 0.24774737656116486, 'eval_f1': 0.9305555555555555, 'eval_runtime': 11.356, 'eval_samples_per_second': 88.059, 'eval_steps_per_second': 11.007, 'epoch': 2.95}


                                                       
 74%|███████▍  | 6661/9000 [2:33:44<2:17:13,  3.52s/it]

{'eval_loss': 0.2658202648162842, 'eval_f1': 0.9261083743842364, 'eval_runtime': 10.8502, 'eval_samples_per_second': 92.165, 'eval_steps_per_second': 11.521, 'epoch': 2.96}


                                                       
 74%|███████▍  | 6672/9000 [2:33:58<10:40,  3.63it/s]

{'eval_loss': 0.3136904239654541, 'eval_f1': 0.8967741935483872, 'eval_runtime': 11.7231, 'eval_samples_per_second': 85.302, 'eval_steps_per_second': 10.663, 'epoch': 2.97}


                                                       
 74%|███████▍  | 6684/9000 [2:34:10<09:12,  4.19it/s]

{'eval_loss': 0.3051739037036896, 'eval_f1': 0.9026737967914438, 'eval_runtime': 9.8555, 'eval_samples_per_second': 101.466, 'eval_steps_per_second': 12.683, 'epoch': 2.97}


                                                       
 74%|███████▍  | 6697/9000 [2:34:22<2:03:11,  3.21s/it]

{'eval_loss': 0.2724027633666992, 'eval_f1': 0.9176225234619395, 'eval_runtime': 10.0009, 'eval_samples_per_second': 99.991, 'eval_steps_per_second': 12.499, 'epoch': 2.98}


                                                       
 75%|███████▍  | 6709/9000 [2:34:34<1:54:52,  3.01s/it]

{'eval_loss': 0.24249841272830963, 'eval_f1': 0.9213483146067416, 'eval_runtime': 9.3416, 'eval_samples_per_second': 107.048, 'eval_steps_per_second': 13.381, 'epoch': 2.98}


                                                       
 75%|███████▍  | 6721/9000 [2:34:45<1:51:52,  2.95s/it]

{'eval_loss': 0.2539224326610565, 'eval_f1': 0.9175991861648017, 'eval_runtime': 9.1189, 'eval_samples_per_second': 109.662, 'eval_steps_per_second': 13.708, 'epoch': 2.99}


                                                       
 75%|███████▍  | 6733/9000 [2:34:56<1:50:13,  2.92s/it]

{'eval_loss': 0.25889402627944946, 'eval_f1': 0.9206030150753769, 'eval_runtime': 9.0286, 'eval_samples_per_second': 110.759, 'eval_steps_per_second': 13.845, 'epoch': 2.99}


                                                       
 75%|███████▍  | 6745/9000 [2:35:07<1:52:39,  3.00s/it]

{'eval_loss': 0.2634168565273285, 'eval_f1': 0.9102296450939458, 'eval_runtime': 9.3173, 'eval_samples_per_second': 107.327, 'eval_steps_per_second': 13.416, 'epoch': 3.0}


 75%|███████▌  | 6751/9000 [2:35:08<18:46,  2.00it/s]  

{'loss': 0.2466, 'learning_rate': 5.0155555555555556e-05, 'epoch': 3.0}


                                                     
 75%|███████▌  | 6756/9000 [2:35:18<08:20,  4.48it/s]

{'eval_loss': 0.2460857480764389, 'eval_f1': 0.9243523316062177, 'eval_runtime': 9.3364, 'eval_samples_per_second': 107.108, 'eval_steps_per_second': 13.388, 'epoch': 3.0}


                                                       
 75%|███████▌  | 6769/9000 [2:35:30<1:53:31,  3.05s/it]

{'eval_loss': 0.2466498464345932, 'eval_f1': 0.9297520661157025, 'eval_runtime': 9.4369, 'eval_samples_per_second': 105.967, 'eval_steps_per_second': 13.246, 'epoch': 3.01}


                                                       
 75%|███████▌  | 6781/9000 [2:35:41<1:48:43,  2.94s/it]

{'eval_loss': 0.28857702016830444, 'eval_f1': 0.9141039236479321, 'eval_runtime': 9.1158, 'eval_samples_per_second': 109.699, 'eval_steps_per_second': 13.712, 'epoch': 3.01}


                                                       
 75%|███████▌  | 6793/9000 [2:35:52<1:47:45,  2.93s/it]

{'eval_loss': 0.2762499153614044, 'eval_f1': 0.925281473899693, 'eval_runtime': 9.0862, 'eval_samples_per_second': 110.058, 'eval_steps_per_second': 13.757, 'epoch': 3.02}


                                                       
 76%|███████▌  | 6805/9000 [2:36:03<1:48:24,  2.96s/it]

{'eval_loss': 0.2748369872570038, 'eval_f1': 0.9257375381485249, 'eval_runtime': 9.2059, 'eval_samples_per_second': 108.626, 'eval_steps_per_second': 13.578, 'epoch': 3.02}


                                                       
 76%|███████▌  | 6817/9000 [2:36:14<1:47:00,  2.94s/it]

{'eval_loss': 0.3039960265159607, 'eval_f1': 0.9184890656063619, 'eval_runtime': 9.1282, 'eval_samples_per_second': 109.55, 'eval_steps_per_second': 13.694, 'epoch': 3.03}


                                                       
 76%|███████▌  | 6829/9000 [2:36:26<1:46:22,  2.94s/it]

{'eval_loss': 0.2985783517360687, 'eval_f1': 0.9172625127681306, 'eval_runtime': 9.119, 'eval_samples_per_second': 109.661, 'eval_steps_per_second': 13.708, 'epoch': 3.03}


                                                       
 76%|███████▌  | 6841/9000 [2:36:37<1:47:03,  2.98s/it]

{'eval_loss': 0.32614660263061523, 'eval_f1': 0.912906610703043, 'eval_runtime': 9.2349, 'eval_samples_per_second': 108.285, 'eval_steps_per_second': 13.536, 'epoch': 3.04}


                                                       
 76%|███████▌  | 6853/9000 [2:36:48<1:44:10,  2.91s/it]

{'eval_loss': 0.3144979774951935, 'eval_f1': 0.9163179916317992, 'eval_runtime': 9.0287, 'eval_samples_per_second': 110.758, 'eval_steps_per_second': 13.845, 'epoch': 3.05}


                                                       
 76%|███████▋  | 6865/9000 [2:36:59<1:46:45,  3.00s/it]

{'eval_loss': 0.32585737109184265, 'eval_f1': 0.9183266932270917, 'eval_runtime': 9.2983, 'eval_samples_per_second': 107.547, 'eval_steps_per_second': 13.443, 'epoch': 3.05}


                                                       
 76%|███████▋  | 6877/9000 [2:37:12<2:01:23,  3.43s/it]

{'eval_loss': 0.3137628138065338, 'eval_f1': 0.9118852459016392, 'eval_runtime': 10.7282, 'eval_samples_per_second': 93.213, 'eval_steps_per_second': 11.652, 'epoch': 3.06}


                                                       
 77%|███████▋  | 6889/9000 [2:37:24<1:55:07,  3.27s/it]

{'eval_loss': 0.3088018298149109, 'eval_f1': 0.9155370177267989, 'eval_runtime': 10.1528, 'eval_samples_per_second': 98.495, 'eval_steps_per_second': 12.312, 'epoch': 3.06}


                                                       
 77%|███████▋  | 6901/9000 [2:37:36<1:42:46,  2.94s/it]

{'eval_loss': 0.2729783356189728, 'eval_f1': 0.9193054136874361, 'eval_runtime': 9.0899, 'eval_samples_per_second': 110.012, 'eval_steps_per_second': 13.752, 'epoch': 3.07}


                                                       
 77%|███████▋  | 6913/9000 [2:37:47<1:46:57,  3.07s/it]

{'eval_loss': 0.2789231836795807, 'eval_f1': 0.9219562955254942, 'eval_runtime': 9.5361, 'eval_samples_per_second': 104.864, 'eval_steps_per_second': 13.108, 'epoch': 3.07}


                                                       
 77%|███████▋  | 6925/9000 [2:37:59<1:49:23,  3.16s/it]

{'eval_loss': 0.2793787121772766, 'eval_f1': 0.9201213346814965, 'eval_runtime': 9.8095, 'eval_samples_per_second': 101.942, 'eval_steps_per_second': 12.743, 'epoch': 3.08}


                                                       
 77%|███████▋  | 6937/9000 [2:38:11<1:46:28,  3.10s/it]

{'eval_loss': 0.2738555073738098, 'eval_f1': 0.9262295081967212, 'eval_runtime': 9.5981, 'eval_samples_per_second': 104.188, 'eval_steps_per_second': 13.023, 'epoch': 3.08}


                                                       
 77%|███████▋  | 6949/9000 [2:38:22<1:45:06,  3.07s/it]

{'eval_loss': 0.2766226530075073, 'eval_f1': 0.9237199582027168, 'eval_runtime': 9.5421, 'eval_samples_per_second': 104.799, 'eval_steps_per_second': 13.1, 'epoch': 3.09}


                                                       
 77%|███████▋  | 6961/9000 [2:38:34<1:44:30,  3.08s/it]

{'eval_loss': 0.2736396789550781, 'eval_f1': 0.9263803680981595, 'eval_runtime': 9.5454, 'eval_samples_per_second': 104.763, 'eval_steps_per_second': 13.095, 'epoch': 3.09}


                                                       
 77%|███████▋  | 6973/9000 [2:38:46<1:46:15,  3.15s/it]

{'eval_loss': 0.2878822088241577, 'eval_f1': 0.9237199582027168, 'eval_runtime': 9.7736, 'eval_samples_per_second': 102.317, 'eval_steps_per_second': 12.79, 'epoch': 3.1}


                                                       
 78%|███████▊  | 6985/9000 [2:38:57<1:42:05,  3.04s/it]

{'eval_loss': 0.2897655963897705, 'eval_f1': 0.9267803410230692, 'eval_runtime': 9.4072, 'eval_samples_per_second': 106.302, 'eval_steps_per_second': 13.288, 'epoch': 3.1}


                                                       
 78%|███████▊  | 6997/9000 [2:39:09<1:40:56,  3.02s/it]

{'eval_loss': 0.28754517436027527, 'eval_f1': 0.9281997918834548, 'eval_runtime': 9.3809, 'eval_samples_per_second': 106.6, 'eval_steps_per_second': 13.325, 'epoch': 3.11}


                                                       
 78%|███████▊  | 7009/9000 [2:39:20<1:41:16,  3.05s/it]

{'eval_loss': 0.2881242036819458, 'eval_f1': 0.9272349272349273, 'eval_runtime': 9.4755, 'eval_samples_per_second': 105.536, 'eval_steps_per_second': 13.192, 'epoch': 3.11}


                                                       
 78%|███████▊  | 7021/9000 [2:39:32<1:42:38,  3.11s/it]

{'eval_loss': 0.29831597208976746, 'eval_f1': 0.9249011857707509, 'eval_runtime': 9.6417, 'eval_samples_per_second': 103.716, 'eval_steps_per_second': 12.964, 'epoch': 3.12}


                                                       
 78%|███████▊  | 7033/9000 [2:39:44<1:43:52,  3.17s/it]

{'eval_loss': 0.28176361322402954, 'eval_f1': 0.9314516129032258, 'eval_runtime': 9.8376, 'eval_samples_per_second': 101.651, 'eval_steps_per_second': 12.706, 'epoch': 3.13}


                                                       
 78%|███████▊  | 7045/9000 [2:39:56<1:43:20,  3.17s/it]

{'eval_loss': 0.28848692774772644, 'eval_f1': 0.9292307692307693, 'eval_runtime': 9.8207, 'eval_samples_per_second': 101.826, 'eval_steps_per_second': 12.728, 'epoch': 3.13}


                                                       
 78%|███████▊  | 7057/9000 [2:40:08<1:42:37,  3.17s/it]

{'eval_loss': 0.312171071767807, 'eval_f1': 0.9243027888446216, 'eval_runtime': 9.8193, 'eval_samples_per_second': 101.84, 'eval_steps_per_second': 12.73, 'epoch': 3.14}


                                                       
 79%|███████▊  | 7069/9000 [2:40:20<1:42:14,  3.18s/it]

{'eval_loss': 0.3128918409347534, 'eval_f1': 0.9250749250749252, 'eval_runtime': 9.8557, 'eval_samples_per_second': 101.464, 'eval_steps_per_second': 12.683, 'epoch': 3.14}


                                                       
 79%|███████▊  | 7081/9000 [2:40:32<1:41:55,  3.19s/it]

{'eval_loss': 0.31145963072776794, 'eval_f1': 0.9243353783231084, 'eval_runtime': 9.8894, 'eval_samples_per_second': 101.119, 'eval_steps_per_second': 12.64, 'epoch': 3.15}


                                                       
 79%|███████▉  | 7093/9000 [2:40:44<1:40:05,  3.15s/it]

{'eval_loss': 0.3281807601451874, 'eval_f1': 0.9239999999999999, 'eval_runtime': 9.7597, 'eval_samples_per_second': 102.462, 'eval_steps_per_second': 12.808, 'epoch': 3.15}


                                                       
 79%|███████▉  | 7105/9000 [2:40:56<1:39:35,  3.15s/it]

{'eval_loss': 0.32982197403907776, 'eval_f1': 0.9149377593360997, 'eval_runtime': 9.7712, 'eval_samples_per_second': 102.341, 'eval_steps_per_second': 12.793, 'epoch': 3.16}


                                                       
 79%|███████▉  | 7117/9000 [2:41:08<1:39:24,  3.17s/it]

{'eval_loss': 0.32606786489486694, 'eval_f1': 0.9184726522187822, 'eval_runtime': 9.8294, 'eval_samples_per_second': 101.736, 'eval_steps_per_second': 12.717, 'epoch': 3.16}


                                                       
 79%|███████▉  | 7129/9000 [2:41:20<1:39:23,  3.19s/it]

{'eval_loss': 0.313884973526001, 'eval_f1': 0.9188078108941418, 'eval_runtime': 9.8828, 'eval_samples_per_second': 101.186, 'eval_steps_per_second': 12.648, 'epoch': 3.17}


                                                       
 79%|███████▉  | 7141/9000 [2:41:32<1:38:14,  3.17s/it]

{'eval_loss': 0.3156190812587738, 'eval_f1': 0.9203722854188212, 'eval_runtime': 9.8241, 'eval_samples_per_second': 101.791, 'eval_steps_per_second': 12.724, 'epoch': 3.17}


                                                       
 79%|███████▉  | 7153/9000 [2:41:44<1:36:05,  3.12s/it]

{'eval_loss': 0.32243138551712036, 'eval_f1': 0.9182643794147326, 'eval_runtime': 9.6898, 'eval_samples_per_second': 103.201, 'eval_steps_per_second': 12.9, 'epoch': 3.18}


                                                       
 80%|███████▉  | 7165/9000 [2:41:56<1:37:03,  3.17s/it]

{'eval_loss': 0.30882593989372253, 'eval_f1': 0.9218106995884774, 'eval_runtime': 9.8511, 'eval_samples_per_second': 101.512, 'eval_steps_per_second': 12.689, 'epoch': 3.18}


                                                       
 80%|███████▉  | 7177/9000 [2:42:08<1:36:47,  3.19s/it]

{'eval_loss': 0.2987123429775238, 'eval_f1': 0.9238578680203045, 'eval_runtime': 9.8839, 'eval_samples_per_second': 101.174, 'eval_steps_per_second': 12.647, 'epoch': 3.19}


                                                       
 80%|███████▉  | 7189/9000 [2:42:20<1:35:13,  3.15s/it]

{'eval_loss': 0.29688724875450134, 'eval_f1': 0.9198396793587175, 'eval_runtime': 9.7832, 'eval_samples_per_second': 102.216, 'eval_steps_per_second': 12.777, 'epoch': 3.19}


                                                       
 80%|████████  | 7201/9000 [2:42:32<1:34:29,  3.15s/it]

{'eval_loss': 0.27937713265419006, 'eval_f1': 0.9283551967709384, 'eval_runtime': 9.7716, 'eval_samples_per_second': 102.337, 'eval_steps_per_second': 12.792, 'epoch': 3.2}


                                                       
 80%|████████  | 7213/9000 [2:42:43<1:31:02,  3.06s/it]

{'eval_loss': 0.29270946979522705, 'eval_f1': 0.9238790406673618, 'eval_runtime': 9.4461, 'eval_samples_per_second': 105.864, 'eval_steps_per_second': 13.233, 'epoch': 3.21}


                                                       
 80%|████████  | 7225/9000 [2:42:55<1:33:02,  3.15s/it]

{'eval_loss': 0.28969240188598633, 'eval_f1': 0.9219712525667351, 'eval_runtime': 9.7717, 'eval_samples_per_second': 102.337, 'eval_steps_per_second': 12.792, 'epoch': 3.21}


                                                       
 80%|████████  | 7237/9000 [2:43:07<1:32:53,  3.16s/it]

{'eval_loss': 0.2910744547843933, 'eval_f1': 0.9244897959183674, 'eval_runtime': 9.7708, 'eval_samples_per_second': 102.346, 'eval_steps_per_second': 12.793, 'epoch': 3.22}


                                                       
 81%|████████  | 7249/9000 [2:43:19<1:30:57,  3.12s/it]

{'eval_loss': 0.28710928559303284, 'eval_f1': 0.9238790406673618, 'eval_runtime': 9.6591, 'eval_samples_per_second': 103.529, 'eval_steps_per_second': 12.941, 'epoch': 3.22}


                                                       
 81%|████████  | 7261/9000 [2:43:31<1:31:04,  3.14s/it]

{'eval_loss': 0.28994160890579224, 'eval_f1': 0.9265770423991727, 'eval_runtime': 9.7216, 'eval_samples_per_second': 102.864, 'eval_steps_per_second': 12.858, 'epoch': 3.23}


                                                       
 81%|████████  | 7273/9000 [2:43:43<1:35:01,  3.30s/it]

{'eval_loss': 0.29622888565063477, 'eval_f1': 0.9221435793731041, 'eval_runtime': 10.2723, 'eval_samples_per_second': 97.349, 'eval_steps_per_second': 12.169, 'epoch': 3.23}


                                                       
 81%|████████  | 7285/9000 [2:43:55<1:30:18,  3.16s/it]

{'eval_loss': 0.2897797226905823, 'eval_f1': 0.920570264765784, 'eval_runtime': 9.8025, 'eval_samples_per_second': 102.015, 'eval_steps_per_second': 12.752, 'epoch': 3.24}


                                                       
 81%|████████  | 7297/9000 [2:44:07<1:30:25,  3.19s/it]

{'eval_loss': 0.31470435857772827, 'eval_f1': 0.9112050739957717, 'eval_runtime': 9.8822, 'eval_samples_per_second': 101.192, 'eval_steps_per_second': 12.649, 'epoch': 3.24}


                                                       
 81%|████████  | 7309/9000 [2:44:20<1:31:15,  3.24s/it]

{'eval_loss': 0.3153701722621918, 'eval_f1': 0.9110169491525424, 'eval_runtime': 10.0408, 'eval_samples_per_second': 99.594, 'eval_steps_per_second': 12.449, 'epoch': 3.25}


                                                       
 81%|████████▏ | 7321/9000 [2:44:32<1:28:28,  3.16s/it]

{'eval_loss': 0.2971823215484619, 'eval_f1': 0.9171038824763904, 'eval_runtime': 9.8117, 'eval_samples_per_second': 101.919, 'eval_steps_per_second': 12.74, 'epoch': 3.25}


                                                       
 81%|████████▏ | 7333/9000 [2:44:44<1:28:56,  3.20s/it]

{'eval_loss': 0.2874535918235779, 'eval_f1': 0.9210526315789475, 'eval_runtime': 9.9529, 'eval_samples_per_second': 100.473, 'eval_steps_per_second': 12.559, 'epoch': 3.26}


                                                       
 82%|████████▏ | 7345/9000 [2:44:56<1:27:12,  3.16s/it]

{'eval_loss': 0.2638666331768036, 'eval_f1': 0.9275362318840581, 'eval_runtime': 9.8044, 'eval_samples_per_second': 101.995, 'eval_steps_per_second': 12.749, 'epoch': 3.26}


                                                       
 82%|████████▏ | 7357/9000 [2:45:08<1:27:30,  3.20s/it]

{'eval_loss': 0.3526483178138733, 'eval_f1': 0.8947368421052632, 'eval_runtime': 9.8983, 'eval_samples_per_second': 101.028, 'eval_steps_per_second': 12.628, 'epoch': 3.27}


                                                       
 82%|████████▏ | 7369/9000 [2:45:20<1:25:25,  3.14s/it]

{'eval_loss': 0.2906225621700287, 'eval_f1': 0.918200408997955, 'eval_runtime': 9.739, 'eval_samples_per_second': 102.68, 'eval_steps_per_second': 12.835, 'epoch': 3.27}


                                                       
 82%|████████▏ | 7381/9000 [2:45:32<1:26:16,  3.20s/it]

{'eval_loss': 0.30031538009643555, 'eval_f1': 0.9140461215932913, 'eval_runtime': 9.9246, 'eval_samples_per_second': 100.759, 'eval_steps_per_second': 12.595, 'epoch': 3.28}


                                                       
 82%|████████▏ | 7393/9000 [2:45:44<1:24:31,  3.16s/it]

{'eval_loss': 0.29432111978530884, 'eval_f1': 0.9164021164021164, 'eval_runtime': 9.7905, 'eval_samples_per_second': 102.14, 'eval_steps_per_second': 12.767, 'epoch': 3.29}


                                                       
 82%|████████▏ | 7405/9000 [2:45:56<1:22:49,  3.12s/it]

{'eval_loss': 0.2802702784538269, 'eval_f1': 0.9229188078108941, 'eval_runtime': 9.6536, 'eval_samples_per_second': 103.589, 'eval_steps_per_second': 12.949, 'epoch': 3.29}


                                                       
 82%|████████▏ | 7417/9000 [2:46:07<1:21:58,  3.11s/it]

{'eval_loss': 0.2742445170879364, 'eval_f1': 0.9186991869918699, 'eval_runtime': 9.6459, 'eval_samples_per_second': 103.671, 'eval_steps_per_second': 12.959, 'epoch': 3.3}


                                                       
 83%|████████▎ | 7429/9000 [2:46:19<1:23:36,  3.19s/it]

{'eval_loss': 0.2766968309879303, 'eval_f1': 0.9200819672131147, 'eval_runtime': 9.8754, 'eval_samples_per_second': 101.261, 'eval_steps_per_second': 12.658, 'epoch': 3.3}


                                                       
 83%|████████▎ | 7440/9000 [2:46:31<06:07,  4.25it/s]

{'eval_loss': 0.2857995629310608, 'eval_f1': 0.9184726522187822, 'eval_runtime': 9.9283, 'eval_samples_per_second': 100.722, 'eval_steps_per_second': 12.59, 'epoch': 3.31}


                                                       
 83%|████████▎ | 7452/9000 [2:46:43<06:08,  4.20it/s]

{'eval_loss': 0.28381261229515076, 'eval_f1': 0.9158878504672897, 'eval_runtime': 10.0343, 'eval_samples_per_second': 99.658, 'eval_steps_per_second': 12.457, 'epoch': 3.31}


                                                       
 83%|████████▎ | 7465/9000 [2:46:56<1:23:06,  3.25s/it]

{'eval_loss': 0.275505006313324, 'eval_f1': 0.9186906019007393, 'eval_runtime': 10.0729, 'eval_samples_per_second': 99.276, 'eval_steps_per_second': 12.41, 'epoch': 3.32}


                                                       
 83%|████████▎ | 7477/9000 [2:47:08<1:21:35,  3.21s/it]

{'eval_loss': 0.2934105098247528, 'eval_f1': 0.9204771371769384, 'eval_runtime': 9.9614, 'eval_samples_per_second': 100.388, 'eval_steps_per_second': 12.548, 'epoch': 3.32}


                                                       
 83%|████████▎ | 7488/9000 [2:47:20<06:10,  4.08it/s]

{'eval_loss': 0.27777624130249023, 'eval_f1': 0.9209209209209208, 'eval_runtime': 9.7267, 'eval_samples_per_second': 102.809, 'eval_steps_per_second': 12.851, 'epoch': 3.33}


                                                       
 83%|████████▎ | 7501/9000 [2:47:32<1:18:43,  3.15s/it]

{'eval_loss': 0.2828899025917053, 'eval_f1': 0.923076923076923, 'eval_runtime': 9.7785, 'eval_samples_per_second': 102.265, 'eval_steps_per_second': 12.783, 'epoch': 3.33}


                                                       
 83%|████████▎ | 7513/9000 [2:47:44<1:18:19,  3.16s/it]

{'eval_loss': 0.2884179651737213, 'eval_f1': 0.9201596806387227, 'eval_runtime': 9.7913, 'eval_samples_per_second': 102.132, 'eval_steps_per_second': 12.766, 'epoch': 3.34}


                                                       
 84%|████████▎ | 7525/9000 [2:47:56<1:17:49,  3.17s/it]

{'eval_loss': 0.2815394401550293, 'eval_f1': 0.9252525252525252, 'eval_runtime': 9.8102, 'eval_samples_per_second': 101.934, 'eval_steps_per_second': 12.742, 'epoch': 3.34}


                                                       
 84%|████████▎ | 7537/9000 [2:48:08<1:18:43,  3.23s/it]

{'eval_loss': 0.30013391375541687, 'eval_f1': 0.9155370177267989, 'eval_runtime': 10.0297, 'eval_samples_per_second': 99.704, 'eval_steps_per_second': 12.463, 'epoch': 3.35}


                                                       
 84%|████████▍ | 7549/9000 [2:48:20<1:17:46,  3.22s/it]

{'eval_loss': 0.3118073046207428, 'eval_f1': 0.9073684210526316, 'eval_runtime': 9.9874, 'eval_samples_per_second': 100.126, 'eval_steps_per_second': 12.516, 'epoch': 3.35}


                                                       
 84%|████████▍ | 7561/9000 [2:48:32<1:16:01,  3.17s/it]

{'eval_loss': 0.29867833852767944, 'eval_f1': 0.9190871369294605, 'eval_runtime': 9.8365, 'eval_samples_per_second': 101.662, 'eval_steps_per_second': 12.708, 'epoch': 3.36}


                                                       
 84%|████████▍ | 7573/9000 [2:48:44<1:14:51,  3.15s/it]

{'eval_loss': 0.3184046149253845, 'eval_f1': 0.9165009940357853, 'eval_runtime': 9.7705, 'eval_samples_per_second': 102.349, 'eval_steps_per_second': 12.794, 'epoch': 3.37}


                                                       
 84%|████████▍ | 7585/9000 [2:48:56<1:14:38,  3.17s/it]

{'eval_loss': 0.29979845881462097, 'eval_f1': 0.9183040330920373, 'eval_runtime': 9.7764, 'eval_samples_per_second': 102.287, 'eval_steps_per_second': 12.786, 'epoch': 3.37}


                                                       
 84%|████████▍ | 7597/9000 [2:49:08<1:14:39,  3.19s/it]

{'eval_loss': 0.32664141058921814, 'eval_f1': 0.9150743099787686, 'eval_runtime': 9.905, 'eval_samples_per_second': 100.959, 'eval_steps_per_second': 12.62, 'epoch': 3.38}


                                                       
 85%|████████▍ | 7609/9000 [2:49:20<1:14:04,  3.20s/it]

{'eval_loss': 0.32566022872924805, 'eval_f1': 0.9163265306122449, 'eval_runtime': 9.9064, 'eval_samples_per_second': 100.945, 'eval_steps_per_second': 12.618, 'epoch': 3.38}


                                                       
 85%|████████▍ | 7621/9000 [2:49:32<1:12:26,  3.15s/it]

{'eval_loss': 0.35082951188087463, 'eval_f1': 0.9135300101729399, 'eval_runtime': 9.7612, 'eval_samples_per_second': 102.447, 'eval_steps_per_second': 12.806, 'epoch': 3.39}


                                                       
 85%|████████▍ | 7633/9000 [2:49:44<1:13:03,  3.21s/it]

{'eval_loss': 0.3623914420604706, 'eval_f1': 0.9127789046653143, 'eval_runtime': 9.9429, 'eval_samples_per_second': 100.574, 'eval_steps_per_second': 12.572, 'epoch': 3.39}


                                                       
 85%|████████▍ | 7645/9000 [2:49:56<1:12:32,  3.21s/it]

{'eval_loss': 0.3691425025463104, 'eval_f1': 0.9110889110889111, 'eval_runtime': 9.9588, 'eval_samples_per_second': 100.414, 'eval_steps_per_second': 12.552, 'epoch': 3.4}


                                                       
 85%|████████▌ | 7657/9000 [2:50:09<1:13:26,  3.28s/it]

{'eval_loss': 0.34210556745529175, 'eval_f1': 0.9127789046653143, 'eval_runtime': 10.2109, 'eval_samples_per_second': 97.934, 'eval_steps_per_second': 12.242, 'epoch': 3.4}


                                                       
 85%|████████▌ | 7668/9000 [2:50:21<05:25,  4.09it/s]

{'eval_loss': 0.3351778984069824, 'eval_f1': 0.9142857142857143, 'eval_runtime': 9.7358, 'eval_samples_per_second': 102.713, 'eval_steps_per_second': 12.839, 'epoch': 3.41}


                                                       
 85%|████████▌ | 7681/9000 [2:50:33<1:09:09,  3.15s/it]

{'eval_loss': 0.32079246640205383, 'eval_f1': 0.9173553719008266, 'eval_runtime': 9.7663, 'eval_samples_per_second': 102.393, 'eval_steps_per_second': 12.799, 'epoch': 3.41}


                                                       
 85%|████████▌ | 7693/9000 [2:50:45<1:10:39,  3.24s/it]

{'eval_loss': 0.32561609148979187, 'eval_f1': 0.9177939646201874, 'eval_runtime': 10.0798, 'eval_samples_per_second': 99.208, 'eval_steps_per_second': 12.401, 'epoch': 3.42}


                                                       
 86%|████████▌ | 7704/9000 [2:50:57<05:14,  4.12it/s]

{'eval_loss': 0.33401018381118774, 'eval_f1': 0.9141104294478528, 'eval_runtime': 9.9942, 'eval_samples_per_second': 100.058, 'eval_steps_per_second': 12.507, 'epoch': 3.42}


                                                       
 86%|████████▌ | 7717/9000 [2:51:09<1:08:18,  3.19s/it]

{'eval_loss': 0.33752965927124023, 'eval_f1': 0.9067201604814443, 'eval_runtime': 9.9197, 'eval_samples_per_second': 100.809, 'eval_steps_per_second': 12.601, 'epoch': 3.43}


                                                       
 86%|████████▌ | 7728/9000 [2:51:21<05:05,  4.16it/s]

{'eval_loss': 0.31001123785972595, 'eval_f1': 0.9151832460732985, 'eval_runtime': 9.9743, 'eval_samples_per_second': 100.257, 'eval_steps_per_second': 12.532, 'epoch': 3.43}


                                                       
 86%|████████▌ | 7741/9000 [2:51:33<1:07:14,  3.20s/it]

{'eval_loss': 0.3029831647872925, 'eval_f1': 0.9117341640706127, 'eval_runtime': 9.9356, 'eval_samples_per_second': 100.648, 'eval_steps_per_second': 12.581, 'epoch': 3.44}


                                                       
 86%|████████▌ | 7753/9000 [2:51:46<1:07:16,  3.24s/it]

{'eval_loss': 0.3007189631462097, 'eval_f1': 0.9133537206931702, 'eval_runtime': 10.0293, 'eval_samples_per_second': 99.708, 'eval_steps_per_second': 12.463, 'epoch': 3.45}


                                                       
 86%|████████▋ | 7764/9000 [2:51:58<05:08,  4.01it/s]

{'eval_loss': 0.31328287720680237, 'eval_f1': 0.9154929577464789, 'eval_runtime': 9.9556, 'eval_samples_per_second': 100.446, 'eval_steps_per_second': 12.556, 'epoch': 3.45}


                                                       
 86%|████████▋ | 7777/9000 [2:52:10<1:04:45,  3.18s/it]

{'eval_loss': 0.3131464719772339, 'eval_f1': 0.9176706827309238, 'eval_runtime': 9.8509, 'eval_samples_per_second': 101.513, 'eval_steps_per_second': 12.689, 'epoch': 3.46}


                                                       
 87%|████████▋ | 7789/9000 [2:52:22<1:03:51,  3.16s/it]

{'eval_loss': 0.29632487893104553, 'eval_f1': 0.9202453987730062, 'eval_runtime': 9.8156, 'eval_samples_per_second': 101.879, 'eval_steps_per_second': 12.735, 'epoch': 3.46}


                                                       
 87%|████████▋ | 7800/9000 [2:52:34<05:48,  3.45it/s]

{'eval_loss': 0.34246641397476196, 'eval_f1': 0.9092849519743863, 'eval_runtime': 9.8812, 'eval_samples_per_second': 101.202, 'eval_steps_per_second': 12.65, 'epoch': 3.47}


                                                       
 87%|████████▋ | 7813/9000 [2:52:46<1:02:32,  3.16s/it]

{'eval_loss': 0.2945884168148041, 'eval_f1': 0.9175991861648017, 'eval_runtime': 9.8025, 'eval_samples_per_second': 102.015, 'eval_steps_per_second': 12.752, 'epoch': 3.47}


                                                       
 87%|████████▋ | 7825/9000 [2:52:58<1:03:26,  3.24s/it]

{'eval_loss': 0.27560657262802124, 'eval_f1': 0.9232343909928352, 'eval_runtime': 10.0556, 'eval_samples_per_second': 99.448, 'eval_steps_per_second': 12.431, 'epoch': 3.48}


                                                       
 87%|████████▋ | 7837/9000 [2:53:10<1:02:29,  3.22s/it]

{'eval_loss': 0.2696530222892761, 'eval_f1': 0.9294605809128632, 'eval_runtime': 9.9957, 'eval_samples_per_second': 100.043, 'eval_steps_per_second': 12.505, 'epoch': 3.48}


                                                       
 87%|████████▋ | 7849/9000 [2:53:23<1:01:56,  3.23s/it]

{'eval_loss': 0.2605600953102112, 'eval_f1': 0.9282786885245901, 'eval_runtime': 10.0213, 'eval_samples_per_second': 99.787, 'eval_steps_per_second': 12.473, 'epoch': 3.49}


                                                       
 87%|████████▋ | 7861/9000 [2:53:35<1:01:00,  3.21s/it]

{'eval_loss': 0.26208963990211487, 'eval_f1': 0.9240121580547113, 'eval_runtime': 9.9726, 'eval_samples_per_second': 100.275, 'eval_steps_per_second': 12.534, 'epoch': 3.49}


                                                       
 87%|████████▋ | 7872/9000 [2:53:47<04:34,  4.11it/s]

{'eval_loss': 0.35207441449165344, 'eval_f1': 0.9094412331406553, 'eval_runtime': 10.1348, 'eval_samples_per_second': 98.67, 'eval_steps_per_second': 12.334, 'epoch': 3.5}


 88%|████████▊ | 7876/9000 [2:53:48<23:16,  1.24s/it]  

{'loss': 0.1775, 'learning_rate': 2.5155555555555555e-05, 'epoch': 3.5}


                                                     
 88%|████████▊ | 7885/9000 [2:53:59<1:00:30,  3.26s/it]

{'eval_loss': 0.3283838629722595, 'eval_f1': 0.9126213592233009, 'eval_runtime': 10.1092, 'eval_samples_per_second': 98.92, 'eval_steps_per_second': 12.365, 'epoch': 3.5}


                                                       
 88%|████████▊ | 7897/9000 [2:54:11<58:15,  3.17s/it]

{'eval_loss': 0.2633083462715149, 'eval_f1': 0.9277721261444558, 'eval_runtime': 9.819, 'eval_samples_per_second': 101.844, 'eval_steps_per_second': 12.73, 'epoch': 3.51}


                                                     
 88%|████████▊ | 7909/9000 [2:54:24<58:37,  3.22s/it]

{'eval_loss': 0.2595618963241577, 'eval_f1': 0.9235537190082644, 'eval_runtime': 10.0095, 'eval_samples_per_second': 99.905, 'eval_steps_per_second': 12.488, 'epoch': 3.51}


                                                     
 88%|████████▊ | 7921/9000 [2:54:36<57:22,  3.19s/it]

{'eval_loss': 0.2703193724155426, 'eval_f1': 0.9208333333333334, 'eval_runtime': 9.8935, 'eval_samples_per_second': 101.077, 'eval_steps_per_second': 12.635, 'epoch': 3.52}


                                                     
 88%|████████▊ | 7933/9000 [2:54:48<57:47,  3.25s/it]

{'eval_loss': 0.2690010368824005, 'eval_f1': 0.9208633093525179, 'eval_runtime': 10.0919, 'eval_samples_per_second': 99.089, 'eval_steps_per_second': 12.386, 'epoch': 3.53}


                                                     
 88%|████████▊ | 7945/9000 [2:55:00<56:53,  3.24s/it]

{'eval_loss': 0.2910345494747162, 'eval_f1': 0.9181636726546906, 'eval_runtime': 10.0107, 'eval_samples_per_second': 99.893, 'eval_steps_per_second': 12.487, 'epoch': 3.53}


                                                     
 88%|████████▊ | 7957/9000 [2:55:13<55:16,  3.18s/it]

{'eval_loss': 0.29241910576820374, 'eval_f1': 0.9216867469879517, 'eval_runtime': 9.8273, 'eval_samples_per_second': 101.757, 'eval_steps_per_second': 12.72, 'epoch': 3.54}


                                                     
 89%|████████▊ | 7969/9000 [2:55:24<53:47,  3.13s/it]

{'eval_loss': 0.2978975772857666, 'eval_f1': 0.9206349206349207, 'eval_runtime': 9.6923, 'eval_samples_per_second': 103.175, 'eval_steps_per_second': 12.897, 'epoch': 3.54}


                                                     
 89%|████████▊ | 7981/9000 [2:55:36<54:18,  3.20s/it]

{'eval_loss': 0.2834533751010895, 'eval_f1': 0.9238578680203045, 'eval_runtime': 9.922, 'eval_samples_per_second': 100.786, 'eval_steps_per_second': 12.598, 'epoch': 3.55}


                                                     
 89%|████████▉ | 7993/9000 [2:55:48<53:16,  3.17s/it]

{'eval_loss': 0.29994314908981323, 'eval_f1': 0.9157894736842105, 'eval_runtime': 9.8648, 'eval_samples_per_second': 101.37, 'eval_steps_per_second': 12.671, 'epoch': 3.55}


                                                     
 89%|████████▉ | 8005/9000 [2:56:01<52:47,  3.18s/it]

{'eval_loss': 0.38782966136932373, 'eval_f1': 0.8862144420131292, 'eval_runtime': 9.8317, 'eval_samples_per_second': 101.712, 'eval_steps_per_second': 12.714, 'epoch': 3.56}


                                                     
 89%|████████▉ | 8017/9000 [2:56:13<52:30,  3.21s/it]

{'eval_loss': 0.2931179106235504, 'eval_f1': 0.9177215189873418, 'eval_runtime': 9.9422, 'eval_samples_per_second': 100.582, 'eval_steps_per_second': 12.573, 'epoch': 3.56}


                                                     
 89%|████████▉ | 8029/9000 [2:56:25<52:32,  3.25s/it]

{'eval_loss': 0.28085216879844666, 'eval_f1': 0.9204431017119838, 'eval_runtime': 10.0805, 'eval_samples_per_second': 99.201, 'eval_steps_per_second': 12.4, 'epoch': 3.57}


                                                     
 89%|████████▉ | 8041/9000 [2:56:37<51:56,  3.25s/it]

{'eval_loss': 0.2709205746650696, 'eval_f1': 0.9249492900608519, 'eval_runtime': 10.0728, 'eval_samples_per_second': 99.278, 'eval_steps_per_second': 12.41, 'epoch': 3.57}


                                                     
 89%|████████▉ | 8053/9000 [2:56:49<51:09,  3.24s/it]

{'eval_loss': 0.27231365442276, 'eval_f1': 0.9252525252525252, 'eval_runtime': 10.0677, 'eval_samples_per_second': 99.328, 'eval_steps_per_second': 12.416, 'epoch': 3.58}


                                                     
 90%|████████▉ | 8065/9000 [2:57:02<50:20,  3.23s/it]

{'eval_loss': 0.28855326771736145, 'eval_f1': 0.92123629112662, 'eval_runtime': 10.0157, 'eval_samples_per_second': 99.844, 'eval_steps_per_second': 12.48, 'epoch': 3.58}


                                                     
 90%|████████▉ | 8077/9000 [2:57:14<48:55,  3.18s/it]

{'eval_loss': 0.2735676169395447, 'eval_f1': 0.9272727272727272, 'eval_runtime': 9.8641, 'eval_samples_per_second': 101.378, 'eval_steps_per_second': 12.672, 'epoch': 3.59}


                                                     
 90%|████████▉ | 8089/9000 [2:57:26<49:04,  3.23s/it]

{'eval_loss': 0.26516157388687134, 'eval_f1': 0.928497409326425, 'eval_runtime': 10.0429, 'eval_samples_per_second': 99.573, 'eval_steps_per_second': 12.447, 'epoch': 3.59}


                                                     
 90%|█████████ | 8101/9000 [2:57:38<48:30,  3.24s/it]

{'eval_loss': 0.2741556763648987, 'eval_f1': 0.9238790406673618, 'eval_runtime': 10.0001, 'eval_samples_per_second': 99.999, 'eval_steps_per_second': 12.5, 'epoch': 3.6}


                                                     
 90%|█████████ | 8113/9000 [2:57:50<48:02,  3.25s/it]

{'eval_loss': 0.26831990480422974, 'eval_f1': 0.9192546583850932, 'eval_runtime': 10.023, 'eval_samples_per_second': 99.771, 'eval_steps_per_second': 12.471, 'epoch': 3.61}


                                                     
 90%|█████████ | 8125/9000 [2:58:03<46:42,  3.20s/it]

{'eval_loss': 0.26534292101860046, 'eval_f1': 0.9248197734294542, 'eval_runtime': 9.9156, 'eval_samples_per_second': 100.851, 'eval_steps_per_second': 12.606, 'epoch': 3.61}


                                                     
 90%|█████████ | 8137/9000 [2:58:15<46:28,  3.23s/it]

{'eval_loss': 0.27496767044067383, 'eval_f1': 0.9218436873747495, 'eval_runtime': 10.0091, 'eval_samples_per_second': 99.909, 'eval_steps_per_second': 12.489, 'epoch': 3.62}


                                                     
 91%|█████████ | 8148/9000 [2:58:27<03:24,  4.17it/s]

{'eval_loss': 0.26936468482017517, 'eval_f1': 0.9271794871794872, 'eval_runtime': 10.0244, 'eval_samples_per_second': 99.757, 'eval_steps_per_second': 12.47, 'epoch': 3.62}


                                                     
 91%|█████████ | 8161/9000 [2:58:39<44:58,  3.22s/it]

{'eval_loss': 0.2778259515762329, 'eval_f1': 0.9213250517598345, 'eval_runtime': 9.9776, 'eval_samples_per_second': 100.224, 'eval_steps_per_second': 12.528, 'epoch': 3.63}


                                                     
 91%|█████████ | 8172/9000 [2:58:51<03:21,  4.11it/s]

{'eval_loss': 0.2785264253616333, 'eval_f1': 0.926530612244898, 'eval_runtime': 9.9098, 'eval_samples_per_second': 100.91, 'eval_steps_per_second': 12.614, 'epoch': 3.63}


                                                     
 91%|█████████ | 8185/9000 [2:59:04<43:26,  3.20s/it]

{'eval_loss': 0.287777841091156, 'eval_f1': 0.9240246406570842, 'eval_runtime': 9.897, 'eval_samples_per_second': 101.041, 'eval_steps_per_second': 12.63, 'epoch': 3.64}


                                                     
 91%|█████████ | 8197/9000 [2:59:16<42:39,  3.19s/it]

{'eval_loss': 0.30755725502967834, 'eval_f1': 0.9148264984227129, 'eval_runtime': 9.904, 'eval_samples_per_second': 100.969, 'eval_steps_per_second': 12.621, 'epoch': 3.64}


                                                     
 91%|█████████ | 8209/9000 [2:59:28<42:18,  3.21s/it]

{'eval_loss': 0.2905600666999817, 'eval_f1': 0.9260780287474334, 'eval_runtime': 9.9557, 'eval_samples_per_second': 100.445, 'eval_steps_per_second': 12.556, 'epoch': 3.65}


                                                     
 91%|█████████▏| 8221/9000 [2:59:40<41:51,  3.22s/it]

{'eval_loss': 0.31665751338005066, 'eval_f1': 0.9161676646706586, 'eval_runtime': 10.0087, 'eval_samples_per_second': 99.913, 'eval_steps_per_second': 12.489, 'epoch': 3.65}


                                                     
 91%|█████████▏| 8233/9000 [2:59:52<40:45,  3.19s/it]

{'eval_loss': 0.2951987385749817, 'eval_f1': 0.9290060851926978, 'eval_runtime': 9.8872, 'eval_samples_per_second': 101.141, 'eval_steps_per_second': 12.643, 'epoch': 3.66}


                                                     
 92%|█████████▏| 8245/9000 [3:00:04<40:53,  3.25s/it]

{'eval_loss': 0.2906816303730011, 'eval_f1': 0.9269776876267749, 'eval_runtime': 10.0751, 'eval_samples_per_second': 99.255, 'eval_steps_per_second': 12.407, 'epoch': 3.66}


                                                     
 92%|█████████▏| 8257/9000 [3:00:17<40:27,  3.27s/it]

{'eval_loss': 0.2822228968143463, 'eval_f1': 0.9284253578732107, 'eval_runtime': 10.1271, 'eval_samples_per_second': 98.745, 'eval_steps_per_second': 12.343, 'epoch': 3.67}


                                                     
 92%|█████████▏| 8269/9000 [3:00:29<39:28,  3.24s/it]

{'eval_loss': 0.29611286520957947, 'eval_f1': 0.9208333333333334, 'eval_runtime': 10.0607, 'eval_samples_per_second': 99.396, 'eval_steps_per_second': 12.425, 'epoch': 3.67}


                                                     
 92%|█████████▏| 8281/9000 [3:00:41<38:56,  3.25s/it]

{'eval_loss': 0.2861478328704834, 'eval_f1': 0.9293756397134083, 'eval_runtime': 10.0887, 'eval_samples_per_second': 99.121, 'eval_steps_per_second': 12.39, 'epoch': 3.68}


                                                     
 92%|█████████▏| 8293/9000 [3:00:53<37:57,  3.22s/it]

{'eval_loss': 0.28331875801086426, 'eval_f1': 0.9274770173646578, 'eval_runtime': 9.9918, 'eval_samples_per_second': 100.082, 'eval_steps_per_second': 12.51, 'epoch': 3.69}


                                                     
 92%|█████████▏| 8304/9000 [3:01:05<02:45,  4.20it/s]

{'eval_loss': 0.28694412112236023, 'eval_f1': 0.9281314168377824, 'eval_runtime': 10.1909, 'eval_samples_per_second': 98.127, 'eval_steps_per_second': 12.266, 'epoch': 3.69}


                                                     
 92%|█████████▏| 8317/9000 [3:01:18<37:02,  3.25s/it]

{'eval_loss': 0.2929799258708954, 'eval_f1': 0.9273285568065507, 'eval_runtime': 10.0881, 'eval_samples_per_second': 99.127, 'eval_steps_per_second': 12.391, 'epoch': 3.7}


                                                     
 93%|█████████▎| 8329/9000 [3:01:30<35:30,  3.17s/it]

{'eval_loss': 0.2987024486064911, 'eval_f1': 0.9226069246435845, 'eval_runtime': 9.8416, 'eval_samples_per_second': 101.609, 'eval_steps_per_second': 12.701, 'epoch': 3.7}


                                                     
 93%|█████████▎| 8340/9000 [3:01:42<02:37,  4.20it/s]

{'eval_loss': 0.30455780029296875, 'eval_f1': 0.9229208924949289, 'eval_runtime': 9.9651, 'eval_samples_per_second': 100.35, 'eval_steps_per_second': 12.544, 'epoch': 3.71}


                                                     
 93%|█████████▎| 8353/9000 [3:01:54<33:54,  3.14s/it]

{'eval_loss': 0.31186169385910034, 'eval_f1': 0.9238578680203045, 'eval_runtime': 9.7496, 'eval_samples_per_second': 102.569, 'eval_steps_per_second': 12.821, 'epoch': 3.71}


                                                     
 93%|█████████▎| 8365/9000 [3:02:06<33:50,  3.20s/it]

{'eval_loss': 0.31686216592788696, 'eval_f1': 0.9206680584551149, 'eval_runtime': 9.923, 'eval_samples_per_second': 100.776, 'eval_steps_per_second': 12.597, 'epoch': 3.72}


                                                     
 93%|█████████▎| 8376/9000 [3:02:18<02:24,  4.32it/s]

{'eval_loss': 0.3046802282333374, 'eval_f1': 0.9214659685863874, 'eval_runtime': 9.92, 'eval_samples_per_second': 100.806, 'eval_steps_per_second': 12.601, 'epoch': 3.72}


                                                     
 93%|█████████▎| 8389/9000 [3:02:30<32:41,  3.21s/it]

{'eval_loss': 0.2886894941329956, 'eval_f1': 0.925, 'eval_runtime': 9.951, 'eval_samples_per_second': 100.492, 'eval_steps_per_second': 12.562, 'epoch': 3.73}


                                                     
 93%|█████████▎| 8401/9000 [3:02:42<32:20,  3.24s/it]

{'eval_loss': 0.2902122735977173, 'eval_f1': 0.9254658385093169, 'eval_runtime': 10.0203, 'eval_samples_per_second': 99.798, 'eval_steps_per_second': 12.475, 'epoch': 3.73}


                                                     
 93%|█████████▎| 8413/9000 [3:02:55<32:51,  3.36s/it]

{'eval_loss': 0.29333531856536865, 'eval_f1': 0.9262295081967212, 'eval_runtime': 10.4306, 'eval_samples_per_second': 95.872, 'eval_steps_per_second': 11.984, 'epoch': 3.74}


                                                     
 94%|█████████▎| 8425/9000 [3:03:07<31:18,  3.27s/it]

{'eval_loss': 0.2960669994354248, 'eval_f1': 0.9215086646279307, 'eval_runtime': 10.1318, 'eval_samples_per_second': 98.699, 'eval_steps_per_second': 12.337, 'epoch': 3.74}


                                                     
 94%|█████████▎| 8437/9000 [3:03:20<30:10,  3.22s/it]

{'eval_loss': 0.29400700330734253, 'eval_f1': 0.9224489795918366, 'eval_runtime': 9.9685, 'eval_samples_per_second': 100.316, 'eval_steps_per_second': 12.54, 'epoch': 3.75}


                                                     
 94%|█████████▍| 8449/9000 [3:03:32<29:41,  3.23s/it]

{'eval_loss': 0.2930707335472107, 'eval_f1': 0.9257731958762886, 'eval_runtime': 10.0337, 'eval_samples_per_second': 99.664, 'eval_steps_per_second': 12.458, 'epoch': 3.75}


                                                     
 94%|█████████▍| 8461/9000 [3:03:44<28:22,  3.16s/it]

{'eval_loss': 0.2965116500854492, 'eval_f1': 0.9254658385093169, 'eval_runtime': 9.7912, 'eval_samples_per_second': 102.132, 'eval_steps_per_second': 12.767, 'epoch': 3.76}


                                                     
 94%|█████████▍| 8473/9000 [3:03:56<27:47,  3.16s/it]

{'eval_loss': 0.30483439564704895, 'eval_f1': 0.921793534932221, 'eval_runtime': 9.8212, 'eval_samples_per_second': 101.821, 'eval_steps_per_second': 12.728, 'epoch': 3.77}


                                                     
 94%|█████████▍| 8485/9000 [3:04:08<27:11,  3.17s/it]

{'eval_loss': 0.2995989918708801, 'eval_f1': 0.9241952232606438, 'eval_runtime': 9.8343, 'eval_samples_per_second': 101.685, 'eval_steps_per_second': 12.711, 'epoch': 3.77}


                                                     
 94%|█████████▍| 8497/9000 [3:04:20<26:43,  3.19s/it]

{'eval_loss': 0.2873036563396454, 'eval_f1': 0.9287925696594427, 'eval_runtime': 9.8949, 'eval_samples_per_second': 101.062, 'eval_steps_per_second': 12.633, 'epoch': 3.78}


                                                     
 95%|█████████▍| 8509/9000 [3:04:32<26:24,  3.23s/it]

{'eval_loss': 0.2836112082004547, 'eval_f1': 0.926530612244898, 'eval_runtime': 10.0217, 'eval_samples_per_second': 99.783, 'eval_steps_per_second': 12.473, 'epoch': 3.78}


                                                     
 95%|█████████▍| 8521/9000 [3:04:44<25:50,  3.24s/it]

{'eval_loss': 0.2994033694267273, 'eval_f1': 0.9223107569721116, 'eval_runtime': 10.0345, 'eval_samples_per_second': 99.657, 'eval_steps_per_second': 12.457, 'epoch': 3.79}


                                                     
 95%|█████████▍| 8532/9000 [3:04:56<01:50,  4.25it/s]

{'eval_loss': 0.2984905540943146, 'eval_f1': 0.9232303090727816, 'eval_runtime': 10.096, 'eval_samples_per_second': 99.049, 'eval_steps_per_second': 12.381, 'epoch': 3.79}


                                                     
 95%|█████████▍| 8545/9000 [3:05:09<24:44,  3.26s/it]

{'eval_loss': 0.28410765528678894, 'eval_f1': 0.9241658240647118, 'eval_runtime': 10.1312, 'eval_samples_per_second': 98.705, 'eval_steps_per_second': 12.338, 'epoch': 3.8}


                                                     
 95%|█████████▌| 8557/9000 [3:05:21<23:21,  3.16s/it]

{'eval_loss': 0.2825010120868683, 'eval_f1': 0.9279187817258884, 'eval_runtime': 9.7976, 'eval_samples_per_second': 102.066, 'eval_steps_per_second': 12.758, 'epoch': 3.8}


                                                     
 95%|█████████▌| 8569/9000 [3:05:33<22:59,  3.20s/it]

{'eval_loss': 0.28501951694488525, 'eval_f1': 0.9258883248730966, 'eval_runtime': 9.9083, 'eval_samples_per_second': 100.926, 'eval_steps_per_second': 12.616, 'epoch': 3.81}


                                                     
 95%|█████████▌| 8580/9000 [3:05:44<01:37,  4.30it/s]

{'eval_loss': 0.2861744463443756, 'eval_f1': 0.9247967479674797, 'eval_runtime': 9.9218, 'eval_samples_per_second': 100.788, 'eval_steps_per_second': 12.599, 'epoch': 3.81}


                                                     
 95%|█████████▌| 8593/9000 [3:05:57<21:42,  3.20s/it]

{'eval_loss': 0.288311243057251, 'eval_f1': 0.9226069246435845, 'eval_runtime': 9.9393, 'eval_samples_per_second': 100.611, 'eval_steps_per_second': 12.576, 'epoch': 3.82}


                                                     
 96%|█████████▌| 8604/9000 [3:06:09<01:38,  4.01it/s]

{'eval_loss': 0.2912978231906891, 'eval_f1': 0.9202825428859738, 'eval_runtime': 9.9193, 'eval_samples_per_second': 100.814, 'eval_steps_per_second': 12.602, 'epoch': 3.82}


                                                     
 96%|█████████▌| 8616/9000 [3:06:21<01:29,  4.29it/s]

{'eval_loss': 0.29443249106407166, 'eval_f1': 0.9199999999999999, 'eval_runtime': 9.7869, 'eval_samples_per_second': 102.177, 'eval_steps_per_second': 12.772, 'epoch': 3.83}


                                                     
 96%|█████████▌| 8628/9000 [3:06:33<01:30,  4.10it/s]

{'eval_loss': 0.2898702919483185, 'eval_f1': 0.9204431017119838, 'eval_runtime': 10.0389, 'eval_samples_per_second': 99.612, 'eval_steps_per_second': 12.452, 'epoch': 3.83}


                                                     
 96%|█████████▌| 8641/9000 [3:06:45<19:20,  3.23s/it]

{'eval_loss': 0.28267499804496765, 'eval_f1': 0.9237029501525942, 'eval_runtime': 10.0209, 'eval_samples_per_second': 99.791, 'eval_steps_per_second': 12.474, 'epoch': 3.84}


                                                     
 96%|█████████▌| 8653/9000 [3:06:58<18:37,  3.22s/it]

{'eval_loss': 0.2820640206336975, 'eval_f1': 0.9243353783231084, 'eval_runtime': 9.9979, 'eval_samples_per_second': 100.021, 'eval_steps_per_second': 12.503, 'epoch': 3.85}


                                                     
 96%|█████████▋| 8665/9000 [3:07:10<18:19,  3.28s/it]

{'eval_loss': 0.28474146127700806, 'eval_f1': 0.9229208924949289, 'eval_runtime': 10.2252, 'eval_samples_per_second': 97.798, 'eval_steps_per_second': 12.225, 'epoch': 3.85}


                                                     
 96%|█████████▋| 8677/9000 [3:07:22<17:14,  3.20s/it]

{'eval_loss': 0.2898378372192383, 'eval_f1': 0.9209209209209208, 'eval_runtime': 9.9293, 'eval_samples_per_second': 100.712, 'eval_steps_per_second': 12.589, 'epoch': 3.86}


                                                     
 97%|█████████▋| 8689/9000 [3:07:34<16:39,  3.21s/it]

{'eval_loss': 0.2902623116970062, 'eval_f1': 0.9185929648241207, 'eval_runtime': 9.9478, 'eval_samples_per_second': 100.524, 'eval_steps_per_second': 12.566, 'epoch': 3.86}


                                                     
 97%|█████████▋| 8701/9000 [3:07:46<16:13,  3.26s/it]

{'eval_loss': 0.28515005111694336, 'eval_f1': 0.9186991869918699, 'eval_runtime': 10.0964, 'eval_samples_per_second': 99.045, 'eval_steps_per_second': 12.381, 'epoch': 3.87}


                                                     
 97%|█████████▋| 8713/9000 [3:07:59<15:34,  3.25s/it]

{'eval_loss': 0.2803420424461365, 'eval_f1': 0.9224489795918366, 'eval_runtime': 10.1072, 'eval_samples_per_second': 98.94, 'eval_steps_per_second': 12.367, 'epoch': 3.87}


                                                     
 97%|█████████▋| 8724/9000 [3:08:11<01:09,  3.97it/s]

{'eval_loss': 0.277709424495697, 'eval_f1': 0.9259259259259259, 'eval_runtime': 9.9588, 'eval_samples_per_second': 100.413, 'eval_steps_per_second': 12.552, 'epoch': 3.88}


                                                     
 97%|█████████▋| 8737/9000 [3:08:23<14:10,  3.23s/it]

{'eval_loss': 0.2820344865322113, 'eval_f1': 0.9254658385093169, 'eval_runtime': 10.0372, 'eval_samples_per_second': 99.63, 'eval_steps_per_second': 12.454, 'epoch': 3.88}


                                                     
 97%|█████████▋| 8749/9000 [3:08:35<13:11,  3.15s/it]

{'eval_loss': 0.2790440618991852, 'eval_f1': 0.9249743062692704, 'eval_runtime': 9.7942, 'eval_samples_per_second': 102.101, 'eval_steps_per_second': 12.763, 'epoch': 3.89}


                                                     
 97%|█████████▋| 8761/9000 [3:08:47<12:42,  3.19s/it]

{'eval_loss': 0.2796717882156372, 'eval_f1': 0.9260780287474334, 'eval_runtime': 9.9133, 'eval_samples_per_second': 100.874, 'eval_steps_per_second': 12.609, 'epoch': 3.89}


                                                     
 97%|█████████▋| 8772/9000 [3:08:59<00:53,  4.23it/s]

{'eval_loss': 0.2815045118331909, 'eval_f1': 0.9245087900723887, 'eval_runtime': 9.8991, 'eval_samples_per_second': 101.019, 'eval_steps_per_second': 12.627, 'epoch': 3.9}


                                                     
 98%|█████████▊| 8785/9000 [3:09:11<11:15,  3.14s/it]

{'eval_loss': 0.27894726395606995, 'eval_f1': 0.9276859504132231, 'eval_runtime': 9.7318, 'eval_samples_per_second': 102.756, 'eval_steps_per_second': 12.845, 'epoch': 3.9}


                                                     
 98%|█████████▊| 8797/9000 [3:09:23<10:50,  3.21s/it]

{'eval_loss': 0.2777577340602875, 'eval_f1': 0.9282786885245901, 'eval_runtime': 9.9486, 'eval_samples_per_second': 100.517, 'eval_steps_per_second': 12.565, 'epoch': 3.91}


                                                     
 98%|█████████▊| 8809/9000 [3:09:35<10:17,  3.23s/it]

{'eval_loss': 0.27973711490631104, 'eval_f1': 0.9227642276422764, 'eval_runtime': 10.0243, 'eval_samples_per_second': 99.758, 'eval_steps_per_second': 12.47, 'epoch': 3.91}


                                                     
 98%|█████████▊| 8821/9000 [3:09:48<09:34,  3.21s/it]

{'eval_loss': 0.2785564064979553, 'eval_f1': 0.9237029501525942, 'eval_runtime': 9.9488, 'eval_samples_per_second': 100.515, 'eval_steps_per_second': 12.564, 'epoch': 3.92}


                                                     
 98%|█████████▊| 8833/9000 [3:10:00<08:44,  3.14s/it]

{'eval_loss': 0.27747654914855957, 'eval_f1': 0.9227642276422764, 'eval_runtime': 9.7504, 'eval_samples_per_second': 102.56, 'eval_steps_per_second': 12.82, 'epoch': 3.93}


                                                     
 98%|█████████▊| 8845/9000 [3:10:12<08:15,  3.20s/it]

{'eval_loss': 0.27733784914016724, 'eval_f1': 0.9218274111675128, 'eval_runtime': 9.9057, 'eval_samples_per_second': 100.952, 'eval_steps_per_second': 12.619, 'epoch': 3.93}


                                                     
 98%|█████████▊| 8856/9000 [3:10:24<00:34,  4.23it/s]

{'eval_loss': 0.2762141227722168, 'eval_f1': 0.9274770173646578, 'eval_runtime': 10.0667, 'eval_samples_per_second': 99.337, 'eval_steps_per_second': 12.417, 'epoch': 3.94}


                                                     
 99%|█████████▊| 8868/9000 [3:10:37<00:35,  3.67it/s]

{'eval_loss': 0.27824121713638306, 'eval_f1': 0.9292307692307693, 'eval_runtime': 11.2202, 'eval_samples_per_second': 89.125, 'eval_steps_per_second': 11.141, 'epoch': 3.94}


                                                     
 99%|█████████▊| 8880/9000 [3:10:51<00:31,  3.84it/s]

{'eval_loss': 0.2798178195953369, 'eval_f1': 0.9248197734294542, 'eval_runtime': 11.0488, 'eval_samples_per_second': 90.508, 'eval_steps_per_second': 11.313, 'epoch': 3.95}


                                                     
 99%|█████████▉| 8893/9000 [3:11:04<06:09,  3.45s/it]

{'eval_loss': 0.27752435207366943, 'eval_f1': 0.9281314168377824, 'eval_runtime': 10.7112, 'eval_samples_per_second': 93.361, 'eval_steps_per_second': 11.67, 'epoch': 3.95}


                                                     
 99%|█████████▉| 8905/9000 [3:11:18<05:32,  3.51s/it]

{'eval_loss': 0.27761492133140564, 'eval_f1': 0.9292307692307693, 'eval_runtime': 10.8844, 'eval_samples_per_second': 91.875, 'eval_steps_per_second': 11.484, 'epoch': 3.96}


                                                     
 99%|█████████▉| 8916/9000 [3:11:31<00:22,  3.68it/s]

{'eval_loss': 0.2783431112766266, 'eval_f1': 0.9281314168377824, 'eval_runtime': 11.0785, 'eval_samples_per_second': 90.265, 'eval_steps_per_second': 11.283, 'epoch': 3.96}


                                                     
 99%|█████████▉| 8928/9000 [3:11:44<00:19,  3.68it/s]

{'eval_loss': 0.2798994183540344, 'eval_f1': 0.9270298047276465, 'eval_runtime': 10.7613, 'eval_samples_per_second': 92.925, 'eval_steps_per_second': 11.616, 'epoch': 3.97}


                                                     
 99%|█████████▉| 8940/9000 [3:11:58<00:16,  3.65it/s]

{'eval_loss': 0.2810879647731781, 'eval_f1': 0.9257731958762886, 'eval_runtime': 11.3781, 'eval_samples_per_second': 87.888, 'eval_steps_per_second': 10.986, 'epoch': 3.97}


                                                     
 99%|█████████▉| 8952/9000 [3:12:12<00:13,  3.67it/s]

{'eval_loss': 0.2799675762653351, 'eval_f1': 0.9279835390946503, 'eval_runtime': 11.2654, 'eval_samples_per_second': 88.768, 'eval_steps_per_second': 11.096, 'epoch': 3.98}


                                                     
100%|█████████▉| 8964/9000 [3:12:25<00:09,  3.71it/s]

{'eval_loss': 0.2791776657104492, 'eval_f1': 0.9279835390946503, 'eval_runtime': 11.2678, 'eval_samples_per_second': 88.749, 'eval_steps_per_second': 11.094, 'epoch': 3.98}


                                                     
100%|█████████▉| 8976/9000 [3:12:39<00:06,  3.71it/s]

{'eval_loss': 0.2787468433380127, 'eval_f1': 0.9279835390946503, 'eval_runtime': 11.1157, 'eval_samples_per_second': 89.963, 'eval_steps_per_second': 11.245, 'epoch': 3.99}


                                                     
100%|█████████▉| 8989/9000 [3:12:53<00:40,  3.64s/it]

{'eval_loss': 0.27897191047668457, 'eval_f1': 0.9279835390946503, 'eval_runtime': 11.309, 'eval_samples_per_second': 88.425, 'eval_steps_per_second': 11.053, 'epoch': 3.99}


100%|██████████| 9000/9000 [3:12:55<00:00,  3.70it/s]

{'loss': 0.1737, 'learning_rate': 1.5555555555555556e-07, 'epoch': 4.0}


                                                     
100%|██████████| 9000/9000 [3:13:07<00:00,  3.70it/s]

{'eval_loss': 0.27891668677330017, 'eval_f1': 0.9268795056642637, 'eval_runtime': 11.2633, 'eval_samples_per_second': 88.784, 'eval_steps_per_second': 11.098, 'epoch': 4.0}


100%|██████████| 9000/9000 [3:13:07<00:00,  1.29s/it]


{'train_runtime': 11587.4068, 'train_samples_per_second': 6.214, 'train_steps_per_second': 0.777, 'train_loss': 0.2982212431165907, 'epoch': 4.0}
***** train metrics *****
  epoch                    =        4.0
  train_loss               =     0.2982
  train_runtime            = 3:13:07.40
  train_samples_per_second =      6.214
  train_steps_per_second   =      0.777


In [4]:
# eval
metrics = trainer.evaluate(prepared_ds['test'])
trainer.log_metrics("eval", metrics)
trainer.save_metrics("eval", metrics)

100%|██████████| 125/125 [00:11<00:00, 11.24it/s]

***** eval metrics *****
  epoch                   =        4.0
  eval_f1                 =     0.9256
  eval_loss               =     0.2209
  eval_runtime            = 0:00:11.19
  eval_samples_per_second =     89.296
  eval_steps_per_second   =     11.162



