pip install -q git+https://github.com/huggingface/trl

In [1]:
# The model that you want to train from the Hugging Face hub
model_name = "mistralai/Mistral-7B-Instruct-v0.1"

# The instruction dataset to use
dataset_name = "StarkWizard/cairo-instruct"

# Fine-tuned model name
new_model = "StarkWizard/Mistral-7b-instruct-cairo-PEFT"



import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"

hub_name = "StarkWizard/Mistral-7b-instruct-cairo-instruct"

max_steps = 1000 # to tweak to get the best out of the model 
nb_epochs = 3

In [2]:
import torch
from datasets import load_dataset, Dataset
from peft import LoraConfig, AutoPeftModelForCausalLM
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from trl import SFTTrainer
import os
 
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
tokenizer.pad_token = tokenizer.eos_token

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype="float16",
    bnb_4bit_use_double_quant=True,
)

model = AutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path=model_name,
                                             trust_remote_code=True,
                                             quantization_config=bnb_config,
                                             use_auth_token=True,
                                             device_map={"": 0}
                                             )

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



In [3]:
from datasets import load_dataset, Dataset
from peft import LoraConfig, AutoPeftModelForCausalLM, prepare_model_for_kbit_training, get_peft_model
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments
from trl import SFTTrainer
import os

model.config.use_cache=False
model.config.pretraining_tp=1
model.config.window = 256 
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)


Loading Dataset

In [4]:
from datasets import load_dataset

# Load the dataset
dataset_train = load_dataset(dataset_name, split="train", download_mode='force_redownload',ignore_verifications=True)
dataset_test = load_dataset(dataset_name, split="eval", download_mode='force_redownload',ignore_verifications=True)



Downloading readme:   0%|          | 0.00/381 [00:00<?, ?B/s]

Downloading data files:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/79.6k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/2.42k [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/2 [00:00<?, ?it/s]

Generating train split:   0%|          | 0/805 [00:00<?, ? examples/s]

Generating eval split:   0%|          | 0/5 [00:00<?, ? examples/s]

Downloading readme:   0%|          | 0.00/381 [00:00<?, ?B/s]

Downloading data files:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/79.6k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/2.42k [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/2 [00:00<?, ?it/s]

Generating train split:   0%|          | 0/805 [00:00<?, ? examples/s]

Generating eval split:   0%|          | 0/5 [00:00<?, ? examples/s]

In [5]:



peft_config = LoraConfig(
    r=16, lora_alpha=16, lora_dropout=0.05, bias="none", task_type="CAUSAL_LM", target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
        "lm_head"
    ],inference_mode = False
)

model = get_peft_model(model, peft_config)
training_arguments = TrainingArguments(
    output_dir=new_model,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=1,
    gradient_checkpointing = True,
    optim="paged_adamw_32bit",
    evaluation_strategy="steps",
    learning_rate=2e-5,
    lr_scheduler_type="cosine",
    warmup_ratio=0.03,
    save_strategy="epoch",
    logging_dir="./logs", 
    logging_steps=5,
    num_train_epochs=nb_epochs,
    fp16=False,
    push_to_hub=True
)

trainer = SFTTrainer(
    model=model,
    train_dataset=dataset_train,
    eval_dataset=dataset_test,
    peft_config=peft_config,
    dataset_text_field="text",
    args=training_arguments,
    tokenizer=tokenizer,
    packing=False,
    max_seq_length=512,
    neftune_noise_alpha=5
)


Map:   0%|          | 0/805 [00:00<?, ? examples/s]

Map:   0%|          | 0/5 [00:00<?, ? examples/s]



In [6]:

trainer.train()
trainer.model.push_to_hub(new_model)

  0%|          | 0/2415 [00:00<?, ?it/s]

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


{'loss': 4.3051, 'learning_rate': 1.3698630136986302e-06, 'epoch': 0.01}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 4.651289463043213, 'eval_runtime': 0.973, 'eval_samples_per_second': 5.139, 'eval_steps_per_second': 1.028, 'epoch': 0.01}
{'loss': 6.0984, 'learning_rate': 2.7397260273972604e-06, 'epoch': 0.01}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 4.645524024963379, 'eval_runtime': 0.9662, 'eval_samples_per_second': 5.175, 'eval_steps_per_second': 1.035, 'epoch': 0.01}
{'loss': 4.5181, 'learning_rate': 4.109589041095891e-06, 'epoch': 0.02}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 4.63462495803833, 'eval_runtime': 0.9752, 'eval_samples_per_second': 5.127, 'eval_steps_per_second': 1.025, 'epoch': 0.02}
{'loss': 5.0098, 'learning_rate': 5.479452054794521e-06, 'epoch': 0.02}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 4.619684219360352, 'eval_runtime': 1.0001, 'eval_samples_per_second': 4.999, 'eval_steps_per_second': 1.0, 'epoch': 0.02}
{'loss': 4.8269, 'learning_rate': 6.849315068493151e-06, 'epoch': 0.03}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 4.5942702293396, 'eval_runtime': 0.9782, 'eval_samples_per_second': 5.111, 'eval_steps_per_second': 1.022, 'epoch': 0.03}
{'loss': 4.3197, 'learning_rate': 8.219178082191782e-06, 'epoch': 0.04}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 4.550282001495361, 'eval_runtime': 1.0956, 'eval_samples_per_second': 4.564, 'eval_steps_per_second': 0.913, 'epoch': 0.04}
{'loss': 4.3894, 'learning_rate': 9.589041095890411e-06, 'epoch': 0.04}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 4.47790002822876, 'eval_runtime': 1.0404, 'eval_samples_per_second': 4.806, 'eval_steps_per_second': 0.961, 'epoch': 0.04}
{'loss': 3.8205, 'learning_rate': 1.0958904109589042e-05, 'epoch': 0.05}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 4.369492530822754, 'eval_runtime': 0.9832, 'eval_samples_per_second': 5.086, 'eval_steps_per_second': 1.017, 'epoch': 0.05}
{'loss': 3.8474, 'learning_rate': 1.2328767123287673e-05, 'epoch': 0.06}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 4.217759609222412, 'eval_runtime': 1.0958, 'eval_samples_per_second': 4.563, 'eval_steps_per_second': 0.913, 'epoch': 0.06}
{'loss': 4.5352, 'learning_rate': 1.3698630136986302e-05, 'epoch': 0.06}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 4.059573650360107, 'eval_runtime': 1.023, 'eval_samples_per_second': 4.888, 'eval_steps_per_second': 0.978, 'epoch': 0.06}
{'loss': 3.7626, 'learning_rate': 1.5068493150684933e-05, 'epoch': 0.07}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 3.8719234466552734, 'eval_runtime': 1.0847, 'eval_samples_per_second': 4.61, 'eval_steps_per_second': 0.922, 'epoch': 0.07}
{'loss': 3.0298, 'learning_rate': 1.6438356164383563e-05, 'epoch': 0.07}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 3.681899309158325, 'eval_runtime': 1.0931, 'eval_samples_per_second': 4.574, 'eval_steps_per_second': 0.915, 'epoch': 0.07}
{'loss': 3.3875, 'learning_rate': 1.7808219178082194e-05, 'epoch': 0.08}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 3.5443577766418457, 'eval_runtime': 1.0146, 'eval_samples_per_second': 4.928, 'eval_steps_per_second': 0.986, 'epoch': 0.08}
{'loss': 2.8085, 'learning_rate': 1.9178082191780822e-05, 'epoch': 0.09}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 3.470305919647217, 'eval_runtime': 0.9877, 'eval_samples_per_second': 5.062, 'eval_steps_per_second': 1.012, 'epoch': 0.09}
{'loss': 2.8103, 'learning_rate': 1.9999964012166784e-05, 'epoch': 0.09}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 3.4166626930236816, 'eval_runtime': 0.974, 'eval_samples_per_second': 5.133, 'eval_steps_per_second': 1.027, 'epoch': 0.09}
{'loss': 2.8745, 'learning_rate': 1.9999559152017842e-05, 'epoch': 0.1}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 3.339308500289917, 'eval_runtime': 0.9905, 'eval_samples_per_second': 5.048, 'eval_steps_per_second': 1.01, 'epoch': 0.1}
{'loss': 2.6028, 'learning_rate': 1.999870446520163e-05, 'epoch': 0.11}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 3.28226900100708, 'eval_runtime': 1.0981, 'eval_samples_per_second': 4.553, 'eval_steps_per_second': 0.911, 'epoch': 0.11}
{'loss': 2.7059, 'learning_rate': 1.9997399990165947e-05, 'epoch': 0.11}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 3.202972412109375, 'eval_runtime': 1.0907, 'eval_samples_per_second': 4.584, 'eval_steps_per_second': 0.917, 'epoch': 0.11}
{'loss': 2.7878, 'learning_rate': 1.9995645785592137e-05, 'epoch': 0.12}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 2.864076614379883, 'eval_runtime': 1.0899, 'eval_samples_per_second': 4.587, 'eval_steps_per_second': 0.917, 'epoch': 0.12}
{'loss': 2.7932, 'learning_rate': 1.999344193039248e-05, 'epoch': 0.12}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 2.819537401199341, 'eval_runtime': 0.9873, 'eval_samples_per_second': 5.064, 'eval_steps_per_second': 1.013, 'epoch': 0.12}
{'loss': 2.4089, 'learning_rate': 1.9990788523706636e-05, 'epoch': 0.13}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 2.770174264907837, 'eval_runtime': 1.0332, 'eval_samples_per_second': 4.839, 'eval_steps_per_second': 0.968, 'epoch': 0.13}
{'loss': 2.3262, 'learning_rate': 1.998768568489717e-05, 'epoch': 0.14}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 2.7181313037872314, 'eval_runtime': 0.9905, 'eval_samples_per_second': 5.048, 'eval_steps_per_second': 1.01, 'epoch': 0.14}
{'loss': 2.3868, 'learning_rate': 1.9984133553544204e-05, 'epoch': 0.14}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 2.651160717010498, 'eval_runtime': 1.0625, 'eval_samples_per_second': 4.706, 'eval_steps_per_second': 0.941, 'epoch': 0.14}
{'loss': 2.1462, 'learning_rate': 1.998013228943912e-05, 'epoch': 0.15}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 2.5865116119384766, 'eval_runtime': 1.0867, 'eval_samples_per_second': 4.601, 'eval_steps_per_second': 0.92, 'epoch': 0.15}
{'loss': 2.0003, 'learning_rate': 1.997568207257738e-05, 'epoch': 0.16}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 2.5381879806518555, 'eval_runtime': 1.1052, 'eval_samples_per_second': 4.524, 'eval_steps_per_second': 0.905, 'epoch': 0.16}
{'loss': 1.9509, 'learning_rate': 1.9970783103150434e-05, 'epoch': 0.16}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 2.469899892807007, 'eval_runtime': 1.0258, 'eval_samples_per_second': 4.874, 'eval_steps_per_second': 0.975, 'epoch': 0.16}
{'loss': 1.949, 'learning_rate': 1.996543560153671e-05, 'epoch': 0.17}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 2.41989803314209, 'eval_runtime': 1.0763, 'eval_samples_per_second': 4.646, 'eval_steps_per_second': 0.929, 'epoch': 0.17}
{'loss': 1.8966, 'learning_rate': 1.9959639808291694e-05, 'epoch': 0.17}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 2.367833375930786, 'eval_runtime': 1.0802, 'eval_samples_per_second': 4.629, 'eval_steps_per_second': 0.926, 'epoch': 0.17}
{'loss': 2.799, 'learning_rate': 1.9953395984137113e-05, 'epoch': 0.18}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 2.3347058296203613, 'eval_runtime': 1.0211, 'eval_samples_per_second': 4.897, 'eval_steps_per_second': 0.979, 'epoch': 0.18}
{'loss': 2.2398, 'learning_rate': 1.994670440994921e-05, 'epoch': 0.19}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 2.3252034187316895, 'eval_runtime': 1.0401, 'eval_samples_per_second': 4.807, 'eval_steps_per_second': 0.961, 'epoch': 0.19}
{'loss': 1.941, 'learning_rate': 1.993956538674611e-05, 'epoch': 0.19}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 2.3223793506622314, 'eval_runtime': 1.0136, 'eval_samples_per_second': 4.933, 'eval_steps_per_second': 0.987, 'epoch': 0.19}
{'loss': 2.013, 'learning_rate': 1.9931979235674274e-05, 'epoch': 0.2}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 2.314272880554199, 'eval_runtime': 1.0866, 'eval_samples_per_second': 4.602, 'eval_steps_per_second': 0.92, 'epoch': 0.2}
{'loss': 2.0168, 'learning_rate': 1.9923946297994044e-05, 'epoch': 0.2}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 2.297389268875122, 'eval_runtime': 0.9759, 'eval_samples_per_second': 5.123, 'eval_steps_per_second': 1.025, 'epoch': 0.2}
{'loss': 1.8037, 'learning_rate': 1.991546693506432e-05, 'epoch': 0.21}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 2.269404411315918, 'eval_runtime': 0.9768, 'eval_samples_per_second': 5.119, 'eval_steps_per_second': 1.024, 'epoch': 0.21}
{'loss': 2.4074, 'learning_rate': 1.9906541528326266e-05, 'epoch': 0.22}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 2.230644941329956, 'eval_runtime': 0.9996, 'eval_samples_per_second': 5.002, 'eval_steps_per_second': 1.0, 'epoch': 0.22}
{'loss': 1.9955, 'learning_rate': 1.9897170479286178e-05, 'epoch': 0.22}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 2.1970455646514893, 'eval_runtime': 1.0922, 'eval_samples_per_second': 4.578, 'eval_steps_per_second': 0.916, 'epoch': 0.22}
{'loss': 1.6786, 'learning_rate': 1.988735420949742e-05, 'epoch': 0.23}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 2.174060344696045, 'eval_runtime': 1.0586, 'eval_samples_per_second': 4.723, 'eval_steps_per_second': 0.945, 'epoch': 0.23}
{'loss': 2.2734, 'learning_rate': 1.9877093160541452e-05, 'epoch': 0.24}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 2.1545302867889404, 'eval_runtime': 0.9957, 'eval_samples_per_second': 5.022, 'eval_steps_per_second': 1.004, 'epoch': 0.24}
{'loss': 1.7815, 'learning_rate': 1.9866387794007968e-05, 'epoch': 0.24}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 2.1268558502197266, 'eval_runtime': 1.0686, 'eval_samples_per_second': 4.679, 'eval_steps_per_second': 0.936, 'epoch': 0.24}
{'loss': 1.8702, 'learning_rate': 1.9855238591474132e-05, 'epoch': 0.25}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 2.107708215713501, 'eval_runtime': 1.0888, 'eval_samples_per_second': 4.592, 'eval_steps_per_second': 0.918, 'epoch': 0.25}
{'loss': 1.679, 'learning_rate': 1.9843646054482914e-05, 'epoch': 0.25}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 2.0930638313293457, 'eval_runtime': 0.9792, 'eval_samples_per_second': 5.106, 'eval_steps_per_second': 1.021, 'epoch': 0.25}
{'loss': 1.764, 'learning_rate': 1.9831610704520537e-05, 'epoch': 0.26}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 2.068228006362915, 'eval_runtime': 1.008, 'eval_samples_per_second': 4.961, 'eval_steps_per_second': 0.992, 'epoch': 0.26}
{'loss': 1.8928, 'learning_rate': 1.9819133082993e-05, 'epoch': 0.27}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 2.046252489089966, 'eval_runtime': 1.0836, 'eval_samples_per_second': 4.614, 'eval_steps_per_second': 0.923, 'epoch': 0.27}
{'loss': 1.6704, 'learning_rate': 1.9806213751201746e-05, 'epoch': 0.27}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 2.0148766040802, 'eval_runtime': 1.089, 'eval_samples_per_second': 4.592, 'eval_steps_per_second': 0.918, 'epoch': 0.27}
{'loss': 1.6566, 'learning_rate': 1.9792853290318384e-05, 'epoch': 0.28}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.9985826015472412, 'eval_runtime': 0.9911, 'eval_samples_per_second': 5.045, 'eval_steps_per_second': 1.009, 'epoch': 0.28}
{'loss': 1.6265, 'learning_rate': 1.977905230135857e-05, 'epoch': 0.29}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.9878942966461182, 'eval_runtime': 1.0812, 'eval_samples_per_second': 4.624, 'eval_steps_per_second': 0.925, 'epoch': 0.29}
{'loss': 1.619, 'learning_rate': 1.9764811405154965e-05, 'epoch': 0.29}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.9748655557632446, 'eval_runtime': 1.0875, 'eval_samples_per_second': 4.598, 'eval_steps_per_second': 0.92, 'epoch': 0.29}
{'loss': 1.9639, 'learning_rate': 1.9750131242329296e-05, 'epoch': 0.3}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.9534509181976318, 'eval_runtime': 1.0096, 'eval_samples_per_second': 4.953, 'eval_steps_per_second': 0.991, 'epoch': 0.3}
{'loss': 1.9363, 'learning_rate': 1.9735012473263545e-05, 'epoch': 0.3}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.9344098567962646, 'eval_runtime': 0.9877, 'eval_samples_per_second': 5.062, 'eval_steps_per_second': 1.012, 'epoch': 0.3}
{'loss': 1.1618, 'learning_rate': 1.971945577807025e-05, 'epoch': 0.31}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.9247429370880127, 'eval_runtime': 1.0426, 'eval_samples_per_second': 4.796, 'eval_steps_per_second': 0.959, 'epoch': 0.31}
{'loss': 1.8425, 'learning_rate': 1.970346185656189e-05, 'epoch': 0.32}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.91989004611969, 'eval_runtime': 1.0962, 'eval_samples_per_second': 4.561, 'eval_steps_per_second': 0.912, 'epoch': 0.32}
{'loss': 1.5395, 'learning_rate': 1.9687031428219432e-05, 'epoch': 0.32}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.9240295886993408, 'eval_runtime': 1.0893, 'eval_samples_per_second': 4.59, 'eval_steps_per_second': 0.918, 'epoch': 0.32}
{'loss': 1.6621, 'learning_rate': 1.9670165232159938e-05, 'epoch': 0.33}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.9276336431503296, 'eval_runtime': 1.0312, 'eval_samples_per_second': 4.849, 'eval_steps_per_second': 0.97, 'epoch': 0.33}
{'loss': 1.7791, 'learning_rate': 1.965286402710333e-05, 'epoch': 0.34}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.928799033164978, 'eval_runtime': 1.0423, 'eval_samples_per_second': 4.797, 'eval_steps_per_second': 0.959, 'epoch': 0.34}
{'loss': 1.8282, 'learning_rate': 1.9635128591338265e-05, 'epoch': 0.34}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.929003119468689, 'eval_runtime': 1.1012, 'eval_samples_per_second': 4.541, 'eval_steps_per_second': 0.908, 'epoch': 0.34}
{'loss': 1.4037, 'learning_rate': 1.961695972268711e-05, 'epoch': 0.35}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.9306358098983765, 'eval_runtime': 0.9807, 'eval_samples_per_second': 5.098, 'eval_steps_per_second': 1.02, 'epoch': 0.35}
{'loss': 1.4468, 'learning_rate': 1.9598358238470058e-05, 'epoch': 0.35}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.9216468334197998, 'eval_runtime': 1.0526, 'eval_samples_per_second': 4.75, 'eval_steps_per_second': 0.95, 'epoch': 0.35}
{'loss': 1.5645, 'learning_rate': 1.9579324975468363e-05, 'epoch': 0.36}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.9026925563812256, 'eval_runtime': 1.064, 'eval_samples_per_second': 4.699, 'eval_steps_per_second': 0.94, 'epoch': 0.36}
{'loss': 1.3587, 'learning_rate': 1.9559860789886697e-05, 'epoch': 0.37}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.8776752948760986, 'eval_runtime': 0.9915, 'eval_samples_per_second': 5.043, 'eval_steps_per_second': 1.009, 'epoch': 0.37}
{'loss': 1.5783, 'learning_rate': 1.9539966557314634e-05, 'epoch': 0.37}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.8569786548614502, 'eval_runtime': 0.9872, 'eval_samples_per_second': 5.065, 'eval_steps_per_second': 1.013, 'epoch': 0.37}
{'loss': 1.3736, 'learning_rate': 1.9519643172687263e-05, 'epoch': 0.38}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.8367040157318115, 'eval_runtime': 1.0892, 'eval_samples_per_second': 4.59, 'eval_steps_per_second': 0.918, 'epoch': 0.38}
{'loss': 1.3657, 'learning_rate': 1.949889155024492e-05, 'epoch': 0.39}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.8224834203720093, 'eval_runtime': 1.0464, 'eval_samples_per_second': 4.778, 'eval_steps_per_second': 0.956, 'epoch': 0.39}
{'loss': 1.6, 'learning_rate': 1.9477712623492083e-05, 'epoch': 0.39}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.8208906650543213, 'eval_runtime': 1.0642, 'eval_samples_per_second': 4.698, 'eval_steps_per_second': 0.94, 'epoch': 0.39}
{'loss': 1.3556, 'learning_rate': 1.9456107345155346e-05, 'epoch': 0.4}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.8269269466400146, 'eval_runtime': 1.045, 'eval_samples_per_second': 4.785, 'eval_steps_per_second': 0.957, 'epoch': 0.4}
{'loss': 1.3053, 'learning_rate': 1.94340766871406e-05, 'epoch': 0.4}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.8399032354354858, 'eval_runtime': 1.001, 'eval_samples_per_second': 4.995, 'eval_steps_per_second': 0.999, 'epoch': 0.4}
{'loss': 1.3685, 'learning_rate': 1.941162164048928e-05, 'epoch': 0.41}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.858776330947876, 'eval_runtime': 1.076, 'eval_samples_per_second': 4.647, 'eval_steps_per_second': 0.929, 'epoch': 0.41}
{'loss': 1.5458, 'learning_rate': 1.9388743215333787e-05, 'epoch': 0.42}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.8713802099227905, 'eval_runtime': 1.0591, 'eval_samples_per_second': 4.721, 'eval_steps_per_second': 0.944, 'epoch': 0.42}
{'loss': 1.2776, 'learning_rate': 1.9365442440852078e-05, 'epoch': 0.42}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.8865792751312256, 'eval_runtime': 1.0057, 'eval_samples_per_second': 4.972, 'eval_steps_per_second': 0.994, 'epoch': 0.42}
{'loss': 1.9151, 'learning_rate': 1.934172036522133e-05, 'epoch': 0.43}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.8839794397354126, 'eval_runtime': 1.0538, 'eval_samples_per_second': 4.745, 'eval_steps_per_second': 0.949, 'epoch': 0.43}
{'loss': 1.3067, 'learning_rate': 1.9317578055570812e-05, 'epoch': 0.43}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.8786346912384033, 'eval_runtime': 1.103, 'eval_samples_per_second': 4.533, 'eval_steps_per_second': 0.907, 'epoch': 0.43}
{'loss': 1.4648, 'learning_rate': 1.929301659793387e-05, 'epoch': 0.44}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.8800042867660522, 'eval_runtime': 0.9893, 'eval_samples_per_second': 5.054, 'eval_steps_per_second': 1.011, 'epoch': 0.44}
{'loss': 1.3578, 'learning_rate': 1.9268037097199074e-05, 'epoch': 0.45}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.8745524883270264, 'eval_runtime': 1.0911, 'eval_samples_per_second': 4.582, 'eval_steps_per_second': 0.916, 'epoch': 0.45}
{'loss': 1.8549, 'learning_rate': 1.924264067706052e-05, 'epoch': 0.45}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.8681930303573608, 'eval_runtime': 1.091, 'eval_samples_per_second': 4.583, 'eval_steps_per_second': 0.917, 'epoch': 0.45}
{'loss': 1.3957, 'learning_rate': 1.9216828479967274e-05, 'epoch': 0.46}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.8491700887680054, 'eval_runtime': 1.0126, 'eval_samples_per_second': 4.938, 'eval_steps_per_second': 0.988, 'epoch': 0.46}
{'loss': 1.4982, 'learning_rate': 1.9190601667071983e-05, 'epoch': 0.47}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.8256248235702515, 'eval_runtime': 1.0199, 'eval_samples_per_second': 4.903, 'eval_steps_per_second': 0.981, 'epoch': 0.47}
{'loss': 1.384, 'learning_rate': 1.916396141817865e-05, 'epoch': 0.47}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.8096163272857666, 'eval_runtime': 1.0952, 'eval_samples_per_second': 4.566, 'eval_steps_per_second': 0.913, 'epoch': 0.47}
{'loss': 1.3536, 'learning_rate': 1.9136908931689535e-05, 'epoch': 0.48}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.7986438274383545, 'eval_runtime': 1.0754, 'eval_samples_per_second': 4.65, 'eval_steps_per_second': 0.93, 'epoch': 0.48}
{'loss': 1.2109, 'learning_rate': 1.9109445424551274e-05, 'epoch': 0.48}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.7756812572479248, 'eval_runtime': 1.091, 'eval_samples_per_second': 4.583, 'eval_steps_per_second': 0.917, 'epoch': 0.48}
{'loss': 1.46, 'learning_rate': 1.9081572132200122e-05, 'epoch': 0.49}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.7699886560440063, 'eval_runtime': 1.0314, 'eval_samples_per_second': 4.848, 'eval_steps_per_second': 0.97, 'epoch': 0.49}
{'loss': 1.6749, 'learning_rate': 1.905329030850637e-05, 'epoch': 0.5}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.7528040409088135, 'eval_runtime': 0.9925, 'eval_samples_per_second': 5.038, 'eval_steps_per_second': 1.008, 'epoch': 0.5}
{'loss': 1.7566, 'learning_rate': 1.902460122571796e-05, 'epoch': 0.5}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.7424724102020264, 'eval_runtime': 1.0867, 'eval_samples_per_second': 4.601, 'eval_steps_per_second': 0.92, 'epoch': 0.5}
{'loss': 1.6968, 'learning_rate': 1.8995506174403235e-05, 'epoch': 0.51}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.7398145198822021, 'eval_runtime': 1.0709, 'eval_samples_per_second': 4.669, 'eval_steps_per_second': 0.934, 'epoch': 0.51}
{'loss': 1.7462, 'learning_rate': 1.896600646339289e-05, 'epoch': 0.52}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.7415310144424438, 'eval_runtime': 1.0893, 'eval_samples_per_second': 4.59, 'eval_steps_per_second': 0.918, 'epoch': 0.52}
{'loss': 1.2093, 'learning_rate': 1.89361034197211e-05, 'epoch': 0.52}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.7434028387069702, 'eval_runtime': 1.0907, 'eval_samples_per_second': 4.584, 'eval_steps_per_second': 0.917, 'epoch': 0.52}
{'loss': 1.4057, 'learning_rate': 1.890579838856581e-05, 'epoch': 0.53}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.7503207921981812, 'eval_runtime': 0.9797, 'eval_samples_per_second': 5.103, 'eval_steps_per_second': 1.021, 'epoch': 0.53}
{'loss': 1.3066, 'learning_rate': 1.8875092733188232e-05, 'epoch': 0.53}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.7555660009384155, 'eval_runtime': 1.0158, 'eval_samples_per_second': 4.922, 'eval_steps_per_second': 0.984, 'epoch': 0.53}
{'loss': 1.3021, 'learning_rate': 1.8843987834871532e-05, 'epoch': 0.54}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.7512025833129883, 'eval_runtime': 0.9815, 'eval_samples_per_second': 5.094, 'eval_steps_per_second': 1.019, 'epoch': 0.54}
{'loss': 1.3985, 'learning_rate': 1.8812485092858662e-05, 'epoch': 0.55}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.7461583614349365, 'eval_runtime': 1.0926, 'eval_samples_per_second': 4.576, 'eval_steps_per_second': 0.915, 'epoch': 0.55}
{'loss': 1.3506, 'learning_rate': 1.8780585924289443e-05, 'epoch': 0.55}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.7455589771270752, 'eval_runtime': 0.9794, 'eval_samples_per_second': 5.105, 'eval_steps_per_second': 1.021, 'epoch': 0.55}
{'loss': 1.3965, 'learning_rate': 1.874829176413681e-05, 'epoch': 0.56}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.7457339763641357, 'eval_runtime': 1.0131, 'eval_samples_per_second': 4.935, 'eval_steps_per_second': 0.987, 'epoch': 0.56}
{'loss': 1.1251, 'learning_rate': 1.8715604065142243e-05, 'epoch': 0.57}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.7519242763519287, 'eval_runtime': 1.079, 'eval_samples_per_second': 4.634, 'eval_steps_per_second': 0.927, 'epoch': 0.57}
{'loss': 1.2288, 'learning_rate': 1.8682524297750436e-05, 'epoch': 0.57}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.7622143030166626, 'eval_runtime': 0.9917, 'eval_samples_per_second': 5.042, 'eval_steps_per_second': 1.008, 'epoch': 0.57}
{'loss': 0.9003, 'learning_rate': 1.864905395004315e-05, 'epoch': 0.58}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.7738120555877686, 'eval_runtime': 1.0137, 'eval_samples_per_second': 4.932, 'eval_steps_per_second': 0.986, 'epoch': 0.58}
{'loss': 1.4472, 'learning_rate': 1.8615194527672247e-05, 'epoch': 0.58}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.7770884037017822, 'eval_runtime': 1.0185, 'eval_samples_per_second': 4.909, 'eval_steps_per_second': 0.982, 'epoch': 0.58}
{'loss': 1.4742, 'learning_rate': 1.8580947553791996e-05, 'epoch': 0.59}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.7725193500518799, 'eval_runtime': 1.0489, 'eval_samples_per_second': 4.767, 'eval_steps_per_second': 0.953, 'epoch': 0.59}
{'loss': 1.2695, 'learning_rate': 1.8546314568990524e-05, 'epoch': 0.6}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.7587652206420898, 'eval_runtime': 1.0787, 'eval_samples_per_second': 4.635, 'eval_steps_per_second': 0.927, 'epoch': 0.6}
{'loss': 1.3834, 'learning_rate': 1.8511297131220523e-05, 'epoch': 0.6}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.7411916255950928, 'eval_runtime': 1.0889, 'eval_samples_per_second': 4.592, 'eval_steps_per_second': 0.918, 'epoch': 0.6}
{'loss': 1.0527, 'learning_rate': 1.847589681572917e-05, 'epoch': 0.61}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.7208983898162842, 'eval_runtime': 1.0949, 'eval_samples_per_second': 4.567, 'eval_steps_per_second': 0.913, 'epoch': 0.61}
{'loss': 1.2608, 'learning_rate': 1.8440115214987266e-05, 'epoch': 0.61}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.6998379230499268, 'eval_runtime': 1.0234, 'eval_samples_per_second': 4.886, 'eval_steps_per_second': 0.977, 'epoch': 0.61}
{'loss': 1.2654, 'learning_rate': 1.8403953938617592e-05, 'epoch': 0.62}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.6884139776229858, 'eval_runtime': 0.9792, 'eval_samples_per_second': 5.106, 'eval_steps_per_second': 1.021, 'epoch': 0.62}
{'loss': 1.6226, 'learning_rate': 1.8367414613322504e-05, 'epoch': 0.63}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.6855242252349854, 'eval_runtime': 1.0426, 'eval_samples_per_second': 4.796, 'eval_steps_per_second': 0.959, 'epoch': 0.63}
{'loss': 1.6562, 'learning_rate': 1.8330498882810753e-05, 'epoch': 0.63}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.675217628479004, 'eval_runtime': 1.0913, 'eval_samples_per_second': 4.582, 'eval_steps_per_second': 0.916, 'epoch': 0.63}
{'loss': 1.2738, 'learning_rate': 1.8293208407723556e-05, 'epoch': 0.64}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.666461706161499, 'eval_runtime': 0.9869, 'eval_samples_per_second': 5.066, 'eval_steps_per_second': 1.013, 'epoch': 0.64}
{'loss': 1.2114, 'learning_rate': 1.825554486555987e-05, 'epoch': 0.65}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.6647484302520752, 'eval_runtime': 1.0837, 'eval_samples_per_second': 4.614, 'eval_steps_per_second': 0.923, 'epoch': 0.65}
{'loss': 1.6291, 'learning_rate': 1.821750995060096e-05, 'epoch': 0.65}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.667297124862671, 'eval_runtime': 1.0022, 'eval_samples_per_second': 4.989, 'eval_steps_per_second': 0.998, 'epoch': 0.65}
{'loss': 1.4349, 'learning_rate': 1.8179105373834156e-05, 'epoch': 0.66}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.6655232906341553, 'eval_runtime': 1.0764, 'eval_samples_per_second': 4.645, 'eval_steps_per_second': 0.929, 'epoch': 0.66}
{'loss': 1.347, 'learning_rate': 1.8140332862875896e-05, 'epoch': 0.66}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.6618177890777588, 'eval_runtime': 1.0877, 'eval_samples_per_second': 4.597, 'eval_steps_per_second': 0.919, 'epoch': 0.66}
{'loss': 1.6406, 'learning_rate': 1.8101194161894006e-05, 'epoch': 0.67}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.6557680368423462, 'eval_runtime': 0.9782, 'eval_samples_per_second': 5.111, 'eval_steps_per_second': 1.022, 'epoch': 0.67}
{'loss': 1.3534, 'learning_rate': 1.8061691031529255e-05, 'epoch': 0.68}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.6536409854888916, 'eval_runtime': 1.0717, 'eval_samples_per_second': 4.665, 'eval_steps_per_second': 0.933, 'epoch': 0.68}
{'loss': 1.4293, 'learning_rate': 1.8021825248816124e-05, 'epoch': 0.68}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.6572669744491577, 'eval_runtime': 0.9826, 'eval_samples_per_second': 5.089, 'eval_steps_per_second': 1.018, 'epoch': 0.68}
{'loss': 1.2952, 'learning_rate': 1.7981598607102895e-05, 'epoch': 0.69}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.6686570644378662, 'eval_runtime': 1.0913, 'eval_samples_per_second': 4.582, 'eval_steps_per_second': 0.916, 'epoch': 0.69}
{'loss': 1.4691, 'learning_rate': 1.7941012915970956e-05, 'epoch': 0.7}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.682003378868103, 'eval_runtime': 0.9761, 'eval_samples_per_second': 5.123, 'eval_steps_per_second': 1.025, 'epoch': 0.7}
{'loss': 1.5472, 'learning_rate': 1.790007000115341e-05, 'epoch': 0.7}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.691192388534546, 'eval_runtime': 0.9963, 'eval_samples_per_second': 5.019, 'eval_steps_per_second': 1.004, 'epoch': 0.7}
{'loss': 1.2394, 'learning_rate': 1.7858771704452955e-05, 'epoch': 0.71}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.6782310009002686, 'eval_runtime': 1.0948, 'eval_samples_per_second': 4.567, 'eval_steps_per_second': 0.913, 'epoch': 0.71}
{'loss': 1.4195, 'learning_rate': 1.7817119883658995e-05, 'epoch': 0.71}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.6646335124969482, 'eval_runtime': 1.0957, 'eval_samples_per_second': 4.563, 'eval_steps_per_second': 0.913, 'epoch': 0.71}
{'loss': 1.3744, 'learning_rate': 1.7775116412464116e-05, 'epoch': 0.72}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.6571228504180908, 'eval_runtime': 0.9955, 'eval_samples_per_second': 5.022, 'eval_steps_per_second': 1.004, 'epoch': 0.72}
{'loss': 1.3335, 'learning_rate': 1.773276318037976e-05, 'epoch': 0.73}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.6551954746246338, 'eval_runtime': 1.0917, 'eval_samples_per_second': 4.58, 'eval_steps_per_second': 0.916, 'epoch': 0.73}
{'loss': 1.418, 'learning_rate': 1.7690062092651237e-05, 'epoch': 0.73}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.6552705764770508, 'eval_runtime': 1.087, 'eval_samples_per_second': 4.6, 'eval_steps_per_second': 0.92, 'epoch': 0.73}
{'loss': 1.7578, 'learning_rate': 1.764701507017203e-05, 'epoch': 0.74}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.656011939048767, 'eval_runtime': 1.0844, 'eval_samples_per_second': 4.611, 'eval_steps_per_second': 0.922, 'epoch': 0.74}
{'loss': 1.2953, 'learning_rate': 1.7603624049397377e-05, 'epoch': 0.75}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.652624487876892, 'eval_runtime': 1.0509, 'eval_samples_per_second': 4.758, 'eval_steps_per_second': 0.952, 'epoch': 0.75}
{'loss': 1.256, 'learning_rate': 1.7559890982257153e-05, 'epoch': 0.75}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.648772954940796, 'eval_runtime': 0.9827, 'eval_samples_per_second': 5.088, 'eval_steps_per_second': 1.018, 'epoch': 0.75}
{'loss': 2.055, 'learning_rate': 1.751581783606807e-05, 'epoch': 0.76}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.6432838439941406, 'eval_runtime': 1.0792, 'eval_samples_per_second': 4.633, 'eval_steps_per_second': 0.927, 'epoch': 0.76}
{'loss': 1.2331, 'learning_rate': 1.7471406593445177e-05, 'epoch': 0.76}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.6445213556289673, 'eval_runtime': 1.024, 'eval_samples_per_second': 4.883, 'eval_steps_per_second': 0.977, 'epoch': 0.76}
{'loss': 1.3865, 'learning_rate': 1.7426659252212674e-05, 'epoch': 0.77}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.6510207653045654, 'eval_runtime': 1.0929, 'eval_samples_per_second': 4.575, 'eval_steps_per_second': 0.915, 'epoch': 0.77}
{'loss': 1.59, 'learning_rate': 1.738157782531404e-05, 'epoch': 0.78}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.6480634212493896, 'eval_runtime': 1.0199, 'eval_samples_per_second': 4.903, 'eval_steps_per_second': 0.981, 'epoch': 0.78}
{'loss': 1.2692, 'learning_rate': 1.7336164340721476e-05, 'epoch': 0.78}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.6313998699188232, 'eval_runtime': 1.008, 'eval_samples_per_second': 4.96, 'eval_steps_per_second': 0.992, 'epoch': 0.78}
{'loss': 1.3702, 'learning_rate': 1.7290420841344692e-05, 'epoch': 0.79}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.6084171533584595, 'eval_runtime': 1.0847, 'eval_samples_per_second': 4.609, 'eval_steps_per_second': 0.922, 'epoch': 0.79}
{'loss': 1.4938, 'learning_rate': 1.7244349384938986e-05, 'epoch': 0.8}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5955030918121338, 'eval_runtime': 0.9719, 'eval_samples_per_second': 5.144, 'eval_steps_per_second': 1.029, 'epoch': 0.8}
{'loss': 1.2065, 'learning_rate': 1.7197952044012695e-05, 'epoch': 0.8}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5799518823623657, 'eval_runtime': 1.0334, 'eval_samples_per_second': 4.838, 'eval_steps_per_second': 0.968, 'epoch': 0.8}
{'loss': 1.1527, 'learning_rate': 1.7151230905733955e-05, 'epoch': 0.81}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5759665966033936, 'eval_runtime': 0.971, 'eval_samples_per_second': 5.15, 'eval_steps_per_second': 1.03, 'epoch': 0.81}
{'loss': 1.2793, 'learning_rate': 1.710418807183681e-05, 'epoch': 0.81}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5743463039398193, 'eval_runtime': 1.0728, 'eval_samples_per_second': 4.661, 'eval_steps_per_second': 0.932, 'epoch': 0.81}
{'loss': 1.4041, 'learning_rate': 1.7056825658526674e-05, 'epoch': 0.82}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5649956464767456, 'eval_runtime': 1.0247, 'eval_samples_per_second': 4.879, 'eval_steps_per_second': 0.976, 'epoch': 0.82}
{'loss': 1.5386, 'learning_rate': 1.700914579638513e-05, 'epoch': 0.83}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.543363332748413, 'eval_runtime': 0.9799, 'eval_samples_per_second': 5.103, 'eval_steps_per_second': 1.021, 'epoch': 0.83}
{'loss': 1.3738, 'learning_rate': 1.6961150630274078e-05, 'epoch': 0.83}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5359852313995361, 'eval_runtime': 1.0186, 'eval_samples_per_second': 4.909, 'eval_steps_per_second': 0.982, 'epoch': 0.83}
{'loss': 1.0602, 'learning_rate': 1.691284231923926e-05, 'epoch': 0.84}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5322778224945068, 'eval_runtime': 1.0929, 'eval_samples_per_second': 4.575, 'eval_steps_per_second': 0.915, 'epoch': 0.84}
{'loss': 1.394, 'learning_rate': 1.6864223036413136e-05, 'epoch': 0.84}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5330110788345337, 'eval_runtime': 1.0821, 'eval_samples_per_second': 4.621, 'eval_steps_per_second': 0.924, 'epoch': 0.84}
{'loss': 1.1428, 'learning_rate': 1.681529496891712e-05, 'epoch': 0.85}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5363935232162476, 'eval_runtime': 0.9748, 'eval_samples_per_second': 5.129, 'eval_steps_per_second': 1.026, 'epoch': 0.85}
{'loss': 0.9016, 'learning_rate': 1.6766060317763196e-05, 'epoch': 0.86}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5411089658737183, 'eval_runtime': 1.0068, 'eval_samples_per_second': 4.966, 'eval_steps_per_second': 0.993, 'epoch': 0.86}
{'loss': 1.0839, 'learning_rate': 1.671652129775491e-05, 'epoch': 0.86}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5436511039733887, 'eval_runtime': 1.0448, 'eval_samples_per_second': 4.785, 'eval_steps_per_second': 0.957, 'epoch': 0.86}
{'loss': 1.2129, 'learning_rate': 1.6666680137387724e-05, 'epoch': 0.87}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5464731454849243, 'eval_runtime': 1.0885, 'eval_samples_per_second': 4.593, 'eval_steps_per_second': 0.919, 'epoch': 0.87}
{'loss': 1.4112, 'learning_rate': 1.6616539078748798e-05, 'epoch': 0.88}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.538158655166626, 'eval_runtime': 1.0855, 'eval_samples_per_second': 4.606, 'eval_steps_per_second': 0.921, 'epoch': 0.88}
{'loss': 1.1929, 'learning_rate': 1.656610037741609e-05, 'epoch': 0.88}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5318892002105713, 'eval_runtime': 0.9805, 'eval_samples_per_second': 5.1, 'eval_steps_per_second': 1.02, 'epoch': 0.88}
{'loss': 1.0578, 'learning_rate': 1.651536630235692e-05, 'epoch': 0.89}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5377695560455322, 'eval_runtime': 0.9788, 'eval_samples_per_second': 5.108, 'eval_steps_per_second': 1.022, 'epoch': 0.89}
{'loss': 0.9349, 'learning_rate': 1.6464339135825895e-05, 'epoch': 0.89}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5383481979370117, 'eval_runtime': 0.9799, 'eval_samples_per_second': 5.103, 'eval_steps_per_second': 1.021, 'epoch': 0.89}
{'loss': 1.1202, 'learning_rate': 1.641302117326223e-05, 'epoch': 0.9}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5548624992370605, 'eval_runtime': 1.0987, 'eval_samples_per_second': 4.551, 'eval_steps_per_second': 0.91, 'epoch': 0.9}
{'loss': 1.2484, 'learning_rate': 1.6361414723186506e-05, 'epoch': 0.91}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5638097524642944, 'eval_runtime': 1.0036, 'eval_samples_per_second': 4.982, 'eval_steps_per_second': 0.996, 'epoch': 0.91}
{'loss': 1.4358, 'learning_rate': 1.630952210709681e-05, 'epoch': 0.91}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5506116151809692, 'eval_runtime': 1.0898, 'eval_samples_per_second': 4.588, 'eval_steps_per_second': 0.918, 'epoch': 0.91}
{'loss': 1.1894, 'learning_rate': 1.6257345659364308e-05, 'epoch': 0.92}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5455845594406128, 'eval_runtime': 1.0887, 'eval_samples_per_second': 4.593, 'eval_steps_per_second': 0.919, 'epoch': 0.92}
{'loss': 1.1115, 'learning_rate': 1.6204887727128233e-05, 'epoch': 0.93}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.543280005455017, 'eval_runtime': 1.0121, 'eval_samples_per_second': 4.94, 'eval_steps_per_second': 0.988, 'epoch': 0.93}
{'loss': 1.4962, 'learning_rate': 1.615215067019029e-05, 'epoch': 0.93}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5399060249328613, 'eval_runtime': 1.0906, 'eval_samples_per_second': 4.585, 'eval_steps_per_second': 0.917, 'epoch': 0.93}
{'loss': 1.1677, 'learning_rate': 1.6099136860908538e-05, 'epoch': 0.94}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5390346050262451, 'eval_runtime': 1.0878, 'eval_samples_per_second': 4.596, 'eval_steps_per_second': 0.919, 'epoch': 0.94}
{'loss': 1.3113, 'learning_rate': 1.604584868409061e-05, 'epoch': 0.94}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5357248783111572, 'eval_runtime': 1.015, 'eval_samples_per_second': 4.926, 'eval_steps_per_second': 0.985, 'epoch': 0.94}
{'loss': 1.226, 'learning_rate': 1.599228853688649e-05, 'epoch': 0.95}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5299360752105713, 'eval_runtime': 0.9874, 'eval_samples_per_second': 5.064, 'eval_steps_per_second': 1.013, 'epoch': 0.95}
{'loss': 1.1655, 'learning_rate': 1.5938458828680642e-05, 'epoch': 0.96}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.528918981552124, 'eval_runtime': 1.094, 'eval_samples_per_second': 4.571, 'eval_steps_per_second': 0.914, 'epoch': 0.96}
{'loss': 1.3207, 'learning_rate': 1.5884361980983645e-05, 'epoch': 0.96}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5315684080123901, 'eval_runtime': 1.0223, 'eval_samples_per_second': 4.891, 'eval_steps_per_second': 0.978, 'epoch': 0.96}
{'loss': 1.3359, 'learning_rate': 1.5830000427323252e-05, 'epoch': 0.97}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5292631387710571, 'eval_runtime': 1.0852, 'eval_samples_per_second': 4.607, 'eval_steps_per_second': 0.921, 'epoch': 0.97}
{'loss': 1.1929, 'learning_rate': 1.5775376613134922e-05, 'epoch': 0.98}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5282151699066162, 'eval_runtime': 0.9802, 'eval_samples_per_second': 5.101, 'eval_steps_per_second': 1.02, 'epoch': 0.98}
{'loss': 1.3694, 'learning_rate': 1.5720492995651804e-05, 'epoch': 0.98}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5311630964279175, 'eval_runtime': 1.0733, 'eval_samples_per_second': 4.659, 'eval_steps_per_second': 0.932, 'epoch': 0.98}
{'loss': 1.417, 'learning_rate': 1.5665352043794222e-05, 'epoch': 0.99}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5379153490066528, 'eval_runtime': 1.0641, 'eval_samples_per_second': 4.699, 'eval_steps_per_second': 0.94, 'epoch': 0.99}
{'loss': 1.0495, 'learning_rate': 1.5609956238058584e-05, 'epoch': 0.99}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5467944145202637, 'eval_runtime': 1.0955, 'eval_samples_per_second': 4.564, 'eval_steps_per_second': 0.913, 'epoch': 0.99}
{'loss': 1.5054, 'learning_rate': 1.5554308070405815e-05, 'epoch': 1.0}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5506576299667358, 'eval_runtime': 1.0689, 'eval_samples_per_second': 4.678, 'eval_steps_per_second': 0.936, 'epoch': 1.0}




{'loss': 0.8766, 'learning_rate': 1.5498410044149252e-05, 'epoch': 1.01}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5557861328125, 'eval_runtime': 1.0786, 'eval_samples_per_second': 4.636, 'eval_steps_per_second': 0.927, 'epoch': 1.01}
{'loss': 1.1715, 'learning_rate': 1.5442264673842036e-05, 'epoch': 1.01}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5584745407104492, 'eval_runtime': 0.9891, 'eval_samples_per_second': 5.055, 'eval_steps_per_second': 1.011, 'epoch': 1.01}
{'loss': 1.0015, 'learning_rate': 1.5385874485163992e-05, 'epoch': 1.02}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5560115575790405, 'eval_runtime': 1.0869, 'eval_samples_per_second': 4.6, 'eval_steps_per_second': 0.92, 'epoch': 1.02}
{'loss': 1.2362, 'learning_rate': 1.5329242014808013e-05, 'epoch': 1.02}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5569560527801514, 'eval_runtime': 0.9928, 'eval_samples_per_second': 5.036, 'eval_steps_per_second': 1.007, 'epoch': 1.02}
{'loss': 0.8876, 'learning_rate': 1.5272369810365946e-05, 'epoch': 1.03}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5502047538757324, 'eval_runtime': 1.0651, 'eval_samples_per_second': 4.695, 'eval_steps_per_second': 0.939, 'epoch': 1.03}
{'loss': 0.9681, 'learning_rate': 1.5215260430214003e-05, 'epoch': 1.04}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5372345447540283, 'eval_runtime': 0.9747, 'eval_samples_per_second': 5.13, 'eval_steps_per_second': 1.026, 'epoch': 1.04}
{'loss': 0.6658, 'learning_rate': 1.5157916443397644e-05, 'epoch': 1.04}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5276415348052979, 'eval_runtime': 1.0047, 'eval_samples_per_second': 4.977, 'eval_steps_per_second': 0.995, 'epoch': 1.04}
{'loss': 1.0693, 'learning_rate': 1.5100340429516046e-05, 'epoch': 1.05}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5245782136917114, 'eval_runtime': 1.0859, 'eval_samples_per_second': 4.604, 'eval_steps_per_second': 0.921, 'epoch': 1.05}
{'loss': 0.7984, 'learning_rate': 1.504253497860603e-05, 'epoch': 1.06}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5187751054763794, 'eval_runtime': 1.0444, 'eval_samples_per_second': 4.788, 'eval_steps_per_second': 0.958, 'epoch': 1.06}
{'loss': 0.8178, 'learning_rate': 1.4984502691025566e-05, 'epoch': 1.06}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5191706418991089, 'eval_runtime': 0.9972, 'eval_samples_per_second': 5.014, 'eval_steps_per_second': 1.003, 'epoch': 1.06}
{'loss': 1.0403, 'learning_rate': 1.4926246177336792e-05, 'epoch': 1.07}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5226659774780273, 'eval_runtime': 1.0888, 'eval_samples_per_second': 4.592, 'eval_steps_per_second': 0.918, 'epoch': 1.07}
{'loss': 1.2257, 'learning_rate': 1.4867768058188581e-05, 'epoch': 1.07}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.525286078453064, 'eval_runtime': 1.0118, 'eval_samples_per_second': 4.942, 'eval_steps_per_second': 0.988, 'epoch': 1.07}
{'loss': 0.9451, 'learning_rate': 1.4809070964198644e-05, 'epoch': 1.08}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5253374576568604, 'eval_runtime': 1.0926, 'eval_samples_per_second': 4.576, 'eval_steps_per_second': 0.915, 'epoch': 1.08}
{'loss': 1.1614, 'learning_rate': 1.4750157535835201e-05, 'epoch': 1.09}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.529249668121338, 'eval_runtime': 0.9729, 'eval_samples_per_second': 5.139, 'eval_steps_per_second': 1.028, 'epoch': 1.09}
{'loss': 1.2161, 'learning_rate': 1.4691030423298208e-05, 'epoch': 1.09}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5268988609313965, 'eval_runtime': 1.1099, 'eval_samples_per_second': 4.505, 'eval_steps_per_second': 0.901, 'epoch': 1.09}
{'loss': 1.1047, 'learning_rate': 1.4631692286400107e-05, 'epoch': 1.1}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5269309282302856, 'eval_runtime': 1.0911, 'eval_samples_per_second': 4.582, 'eval_steps_per_second': 0.916, 'epoch': 1.1}
{'loss': 1.1248, 'learning_rate': 1.4572145794446222e-05, 'epoch': 1.11}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5264365673065186, 'eval_runtime': 1.0967, 'eval_samples_per_second': 4.559, 'eval_steps_per_second': 0.912, 'epoch': 1.11}
{'loss': 0.8808, 'learning_rate': 1.4512393626114638e-05, 'epoch': 1.11}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.52687406539917, 'eval_runtime': 1.0235, 'eval_samples_per_second': 4.885, 'eval_steps_per_second': 0.977, 'epoch': 1.11}
{'loss': 1.1161, 'learning_rate': 1.4452438469335726e-05, 'epoch': 1.12}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5339560508728027, 'eval_runtime': 1.0036, 'eval_samples_per_second': 4.982, 'eval_steps_per_second': 0.996, 'epoch': 1.12}
{'loss': 1.1226, 'learning_rate': 1.4392283021171226e-05, 'epoch': 1.12}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5424354076385498, 'eval_runtime': 1.0509, 'eval_samples_per_second': 4.758, 'eval_steps_per_second': 0.952, 'epoch': 1.12}
{'loss': 0.9059, 'learning_rate': 1.4331929987692907e-05, 'epoch': 1.13}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5483802556991577, 'eval_runtime': 0.9831, 'eval_samples_per_second': 5.086, 'eval_steps_per_second': 1.017, 'epoch': 1.13}
{'loss': 1.0149, 'learning_rate': 1.4271382083860847e-05, 'epoch': 1.14}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5453850030899048, 'eval_runtime': 1.0988, 'eval_samples_per_second': 4.55, 'eval_steps_per_second': 0.91, 'epoch': 1.14}
{'loss': 1.0749, 'learning_rate': 1.4210642033401305e-05, 'epoch': 1.14}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5473790168762207, 'eval_runtime': 0.9898, 'eval_samples_per_second': 5.051, 'eval_steps_per_second': 1.01, 'epoch': 1.14}
{'loss': 0.8647, 'learning_rate': 1.4149712568684182e-05, 'epoch': 1.15}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5545125007629395, 'eval_runtime': 1.0954, 'eval_samples_per_second': 4.564, 'eval_steps_per_second': 0.913, 'epoch': 1.15}
{'loss': 1.3308, 'learning_rate': 1.408859643060011e-05, 'epoch': 1.16}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.560271143913269, 'eval_runtime': 0.9843, 'eval_samples_per_second': 5.08, 'eval_steps_per_second': 1.016, 'epoch': 1.16}
{'loss': 0.9661, 'learning_rate': 1.4027296368437168e-05, 'epoch': 1.16}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5642237663269043, 'eval_runtime': 1.0553, 'eval_samples_per_second': 4.738, 'eval_steps_per_second': 0.948, 'epoch': 1.16}
{'loss': 1.038, 'learning_rate': 1.396581513975719e-05, 'epoch': 1.17}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5621440410614014, 'eval_runtime': 1.0574, 'eval_samples_per_second': 4.728, 'eval_steps_per_second': 0.946, 'epoch': 1.17}
{'loss': 0.96, 'learning_rate': 1.3904155510271712e-05, 'epoch': 1.17}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5550427436828613, 'eval_runtime': 0.9767, 'eval_samples_per_second': 5.119, 'eval_steps_per_second': 1.024, 'epoch': 1.17}
{'loss': 1.2275, 'learning_rate': 1.3842320253717589e-05, 'epoch': 1.18}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5399024486541748, 'eval_runtime': 1.0328, 'eval_samples_per_second': 4.841, 'eval_steps_per_second': 0.968, 'epoch': 1.18}
{'loss': 0.9022, 'learning_rate': 1.3780312151732178e-05, 'epoch': 1.19}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.528873085975647, 'eval_runtime': 1.0894, 'eval_samples_per_second': 4.59, 'eval_steps_per_second': 0.918, 'epoch': 1.19}
{'loss': 0.7934, 'learning_rate': 1.3718133993728239e-05, 'epoch': 1.19}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5261849164962769, 'eval_runtime': 1.0908, 'eval_samples_per_second': 4.584, 'eval_steps_per_second': 0.917, 'epoch': 1.19}
{'loss': 1.1376, 'learning_rate': 1.365578857676844e-05, 'epoch': 1.2}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.524802803993225, 'eval_runtime': 1.0119, 'eval_samples_per_second': 4.941, 'eval_steps_per_second': 0.988, 'epoch': 1.2}
{'loss': 1.1455, 'learning_rate': 1.359327870543953e-05, 'epoch': 1.2}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5206401348114014, 'eval_runtime': 1.02, 'eval_samples_per_second': 4.902, 'eval_steps_per_second': 0.98, 'epoch': 1.2}
{'loss': 1.3663, 'learning_rate': 1.353060719172619e-05, 'epoch': 1.21}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5178329944610596, 'eval_runtime': 1.0184, 'eval_samples_per_second': 4.91, 'eval_steps_per_second': 0.982, 'epoch': 1.21}
{'loss': 1.3473, 'learning_rate': 1.3467776854884519e-05, 'epoch': 1.22}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5169669389724731, 'eval_runtime': 1.0117, 'eval_samples_per_second': 4.942, 'eval_steps_per_second': 0.988, 'epoch': 1.22}
{'loss': 0.8251, 'learning_rate': 1.3404790521315211e-05, 'epoch': 1.22}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5200896263122559, 'eval_runtime': 1.0069, 'eval_samples_per_second': 4.966, 'eval_steps_per_second': 0.993, 'epoch': 1.22}
{'loss': 1.2475, 'learning_rate': 1.3341651024436442e-05, 'epoch': 1.23}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5232032537460327, 'eval_runtime': 1.0946, 'eval_samples_per_second': 4.568, 'eval_steps_per_second': 0.914, 'epoch': 1.23}
{'loss': 0.7259, 'learning_rate': 1.3278361204556364e-05, 'epoch': 1.24}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5230334997177124, 'eval_runtime': 0.9906, 'eval_samples_per_second': 5.047, 'eval_steps_per_second': 1.009, 'epoch': 1.24}
{'loss': 0.947, 'learning_rate': 1.3214923908745365e-05, 'epoch': 1.24}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5244520902633667, 'eval_runtime': 0.991, 'eval_samples_per_second': 5.045, 'eval_steps_per_second': 1.009, 'epoch': 1.24}
{'loss': 0.9272, 'learning_rate': 1.3151341990707988e-05, 'epoch': 1.25}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.526963710784912, 'eval_runtime': 1.0288, 'eval_samples_per_second': 4.86, 'eval_steps_per_second': 0.972, 'epoch': 1.25}
{'loss': 1.1042, 'learning_rate': 1.3087618310654554e-05, 'epoch': 1.25}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5285227298736572, 'eval_runtime': 0.9962, 'eval_samples_per_second': 5.019, 'eval_steps_per_second': 1.004, 'epoch': 1.25}
{'loss': 0.8819, 'learning_rate': 1.3023755735172491e-05, 'epoch': 1.26}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5287413597106934, 'eval_runtime': 1.0173, 'eval_samples_per_second': 4.915, 'eval_steps_per_second': 0.983, 'epoch': 1.26}
{'loss': 1.31, 'learning_rate': 1.2959757137097409e-05, 'epoch': 1.27}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5299806594848633, 'eval_runtime': 1.0886, 'eval_samples_per_second': 4.593, 'eval_steps_per_second': 0.919, 'epoch': 1.27}
{'loss': 0.8495, 'learning_rate': 1.2895625395383823e-05, 'epoch': 1.27}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5318371057510376, 'eval_runtime': 1.086, 'eval_samples_per_second': 4.604, 'eval_steps_per_second': 0.921, 'epoch': 1.27}
{'loss': 1.1606, 'learning_rate': 1.2831363394975686e-05, 'epoch': 1.28}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5288714170455933, 'eval_runtime': 0.9805, 'eval_samples_per_second': 5.099, 'eval_steps_per_second': 1.02, 'epoch': 1.28}
{'loss': 1.1184, 'learning_rate': 1.2766974026676585e-05, 'epoch': 1.29}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5251197814941406, 'eval_runtime': 1.0007, 'eval_samples_per_second': 4.997, 'eval_steps_per_second': 0.999, 'epoch': 1.29}
{'loss': 1.0988, 'learning_rate': 1.2702460187019702e-05, 'epoch': 1.29}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.523463249206543, 'eval_runtime': 1.0059, 'eval_samples_per_second': 4.971, 'eval_steps_per_second': 0.994, 'epoch': 1.29}
{'loss': 0.7879, 'learning_rate': 1.2637824778137532e-05, 'epoch': 1.3}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.526428461074829, 'eval_runtime': 1.0807, 'eval_samples_per_second': 4.627, 'eval_steps_per_second': 0.925, 'epoch': 1.3}
{'loss': 1.1713, 'learning_rate': 1.2573070707631314e-05, 'epoch': 1.3}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.532978892326355, 'eval_runtime': 1.0236, 'eval_samples_per_second': 4.885, 'eval_steps_per_second': 0.977, 'epoch': 1.3}
{'loss': 1.1114, 'learning_rate': 1.2508200888440232e-05, 'epoch': 1.31}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5384104251861572, 'eval_runtime': 1.0147, 'eval_samples_per_second': 4.927, 'eval_steps_per_second': 0.985, 'epoch': 1.31}
{'loss': 0.8324, 'learning_rate': 1.2443218238710389e-05, 'epoch': 1.32}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5401854515075684, 'eval_runtime': 1.0698, 'eval_samples_per_second': 4.674, 'eval_steps_per_second': 0.935, 'epoch': 1.32}
{'loss': 1.166, 'learning_rate': 1.2378125681663535e-05, 'epoch': 1.32}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5358541011810303, 'eval_runtime': 1.087, 'eval_samples_per_second': 4.6, 'eval_steps_per_second': 0.92, 'epoch': 1.32}
{'loss': 0.8529, 'learning_rate': 1.2312926145465554e-05, 'epoch': 1.33}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5341371297836304, 'eval_runtime': 0.9732, 'eval_samples_per_second': 5.138, 'eval_steps_per_second': 1.028, 'epoch': 1.33}
{'loss': 0.9945, 'learning_rate': 1.2247622563094752e-05, 'epoch': 1.34}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5271883010864258, 'eval_runtime': 1.09, 'eval_samples_per_second': 4.587, 'eval_steps_per_second': 0.917, 'epoch': 1.34}
{'loss': 0.8763, 'learning_rate': 1.218221787220992e-05, 'epoch': 1.34}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5165103673934937, 'eval_runtime': 1.0174, 'eval_samples_per_second': 4.915, 'eval_steps_per_second': 0.983, 'epoch': 1.34}
{'loss': 1.0143, 'learning_rate': 1.2116715015018179e-05, 'epoch': 1.35}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5109026432037354, 'eval_runtime': 0.9868, 'eval_samples_per_second': 5.067, 'eval_steps_per_second': 1.013, 'epoch': 1.35}
{'loss': 0.9774, 'learning_rate': 1.2051116938142623e-05, 'epoch': 1.35}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5055822134017944, 'eval_runtime': 1.0804, 'eval_samples_per_second': 4.628, 'eval_steps_per_second': 0.926, 'epoch': 1.35}
{'loss': 0.7308, 'learning_rate': 1.1985426592489767e-05, 'epoch': 1.36}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.503867268562317, 'eval_runtime': 1.0498, 'eval_samples_per_second': 4.763, 'eval_steps_per_second': 0.953, 'epoch': 1.36}
{'loss': 0.996, 'learning_rate': 1.1919646933116817e-05, 'epoch': 1.37}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5089094638824463, 'eval_runtime': 0.9776, 'eval_samples_per_second': 5.114, 'eval_steps_per_second': 1.023, 'epoch': 1.37}
{'loss': 0.8008, 'learning_rate': 1.1853780919098714e-05, 'epoch': 1.37}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5140308141708374, 'eval_runtime': 1.1014, 'eval_samples_per_second': 4.54, 'eval_steps_per_second': 0.908, 'epoch': 1.37}
{'loss': 1.1051, 'learning_rate': 1.1787831513395035e-05, 'epoch': 1.38}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.5141633749008179, 'eval_runtime': 1.0009, 'eval_samples_per_second': 4.996, 'eval_steps_per_second': 0.999, 'epoch': 1.38}
{'loss': 0.897, 'learning_rate': 1.1721801682716714e-05, 'epoch': 1.39}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.512342929840088, 'eval_runtime': 1.0051, 'eval_samples_per_second': 4.974, 'eval_steps_per_second': 0.995, 'epoch': 1.39}
{'loss': 1.2073, 'learning_rate': 1.1655694397392552e-05, 'epoch': 1.39}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.496107816696167, 'eval_runtime': 0.9933, 'eval_samples_per_second': 5.034, 'eval_steps_per_second': 1.007, 'epoch': 1.39}
{'loss': 1.1065, 'learning_rate': 1.158951263123564e-05, 'epoch': 1.4}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4871606826782227, 'eval_runtime': 1.0931, 'eval_samples_per_second': 4.574, 'eval_steps_per_second': 0.915, 'epoch': 1.4}
{'loss': 0.8795, 'learning_rate': 1.1523259361409558e-05, 'epoch': 1.4}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4779441356658936, 'eval_runtime': 1.0058, 'eval_samples_per_second': 4.971, 'eval_steps_per_second': 0.994, 'epoch': 1.4}
{'loss': 1.263, 'learning_rate': 1.1456937568294447e-05, 'epoch': 1.41}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4692983627319336, 'eval_runtime': 1.004, 'eval_samples_per_second': 4.98, 'eval_steps_per_second': 0.996, 'epoch': 1.41}
{'loss': 1.0561, 'learning_rate': 1.1390550235352948e-05, 'epoch': 1.42}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4605125188827515, 'eval_runtime': 1.0888, 'eval_samples_per_second': 4.592, 'eval_steps_per_second': 0.918, 'epoch': 1.42}
{'loss': 1.0662, 'learning_rate': 1.1324100348995994e-05, 'epoch': 1.42}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4566149711608887, 'eval_runtime': 1.0824, 'eval_samples_per_second': 4.619, 'eval_steps_per_second': 0.924, 'epoch': 1.42}
{'loss': 1.0599, 'learning_rate': 1.125759089844845e-05, 'epoch': 1.43}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4571419954299927, 'eval_runtime': 1.0771, 'eval_samples_per_second': 4.642, 'eval_steps_per_second': 0.928, 'epoch': 1.43}
{'loss': 1.1485, 'learning_rate': 1.1191024875614669e-05, 'epoch': 1.43}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4589860439300537, 'eval_runtime': 0.9731, 'eval_samples_per_second': 5.138, 'eval_steps_per_second': 1.028, 'epoch': 1.43}
{'loss': 0.8074, 'learning_rate': 1.1124405274943878e-05, 'epoch': 1.44}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4618772268295288, 'eval_runtime': 0.9737, 'eval_samples_per_second': 5.135, 'eval_steps_per_second': 1.027, 'epoch': 1.44}
{'loss': 0.7329, 'learning_rate': 1.105773509329548e-05, 'epoch': 1.45}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4645063877105713, 'eval_runtime': 1.0398, 'eval_samples_per_second': 4.809, 'eval_steps_per_second': 0.962, 'epoch': 1.45}
{'loss': 0.8916, 'learning_rate': 1.0991017329804267e-05, 'epoch': 1.45}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4641586542129517, 'eval_runtime': 1.084, 'eval_samples_per_second': 4.613, 'eval_steps_per_second': 0.923, 'epoch': 1.45}
{'loss': 1.0222, 'learning_rate': 1.0924254985745462e-05, 'epoch': 1.46}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4618903398513794, 'eval_runtime': 1.0305, 'eval_samples_per_second': 4.852, 'eval_steps_per_second': 0.97, 'epoch': 1.46}
{'loss': 1.0245, 'learning_rate': 1.0857451064399729e-05, 'epoch': 1.47}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4607359170913696, 'eval_runtime': 1.0654, 'eval_samples_per_second': 4.693, 'eval_steps_per_second': 0.939, 'epoch': 1.47}
{'loss': 0.879, 'learning_rate': 1.0790608570918092e-05, 'epoch': 1.47}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4631487131118774, 'eval_runtime': 1.0878, 'eval_samples_per_second': 4.597, 'eval_steps_per_second': 0.919, 'epoch': 1.47}
{'loss': 1.1893, 'learning_rate': 1.0723730512186712e-05, 'epoch': 1.48}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.464677095413208, 'eval_runtime': 0.974, 'eval_samples_per_second': 5.134, 'eval_steps_per_second': 1.027, 'epoch': 1.48}
{'loss': 0.9292, 'learning_rate': 1.0656819896691644e-05, 'epoch': 1.48}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4652906656265259, 'eval_runtime': 1.0926, 'eval_samples_per_second': 4.576, 'eval_steps_per_second': 0.915, 'epoch': 1.48}
{'loss': 1.1015, 'learning_rate': 1.0589879734383499e-05, 'epoch': 1.49}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4670722484588623, 'eval_runtime': 1.018, 'eval_samples_per_second': 4.911, 'eval_steps_per_second': 0.982, 'epoch': 1.49}
{'loss': 1.0534, 'learning_rate': 1.052291303654204e-05, 'epoch': 1.5}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.447419285774231, 'eval_runtime': 1.0536, 'eval_samples_per_second': 4.746, 'eval_steps_per_second': 0.949, 'epoch': 1.5}
{'loss': 0.965, 'learning_rate': 1.0455922815640718e-05, 'epoch': 1.5}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4360127449035645, 'eval_runtime': 1.0233, 'eval_samples_per_second': 4.886, 'eval_steps_per_second': 0.977, 'epoch': 1.5}
{'loss': 0.9106, 'learning_rate': 1.0388912085211173e-05, 'epoch': 1.51}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4301927089691162, 'eval_runtime': 0.9806, 'eval_samples_per_second': 5.099, 'eval_steps_per_second': 1.02, 'epoch': 1.51}
{'loss': 1.0588, 'learning_rate': 1.0321883859707647e-05, 'epoch': 1.52}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4255096912384033, 'eval_runtime': 1.0487, 'eval_samples_per_second': 4.768, 'eval_steps_per_second': 0.954, 'epoch': 1.52}
{'loss': 1.1597, 'learning_rate': 1.0254841154371391e-05, 'epoch': 1.52}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4200654029846191, 'eval_runtime': 1.0057, 'eval_samples_per_second': 4.972, 'eval_steps_per_second': 0.994, 'epoch': 1.52}
{'loss': 1.0652, 'learning_rate': 1.0187786985095036e-05, 'epoch': 1.53}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4181239604949951, 'eval_runtime': 1.0925, 'eval_samples_per_second': 4.577, 'eval_steps_per_second': 0.915, 'epoch': 1.53}
{'loss': 0.7934, 'learning_rate': 1.0120724368286907e-05, 'epoch': 1.53}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4181008338928223, 'eval_runtime': 0.9756, 'eval_samples_per_second': 5.125, 'eval_steps_per_second': 1.025, 'epoch': 1.53}
{'loss': 0.7962, 'learning_rate': 1.005365632073534e-05, 'epoch': 1.54}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.41569185256958, 'eval_runtime': 1.0979, 'eval_samples_per_second': 4.554, 'eval_steps_per_second': 0.911, 'epoch': 1.54}
{'loss': 1.0976, 'learning_rate': 9.986585859472972e-06, 'epoch': 1.55}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4091764688491821, 'eval_runtime': 0.9793, 'eval_samples_per_second': 5.106, 'eval_steps_per_second': 1.021, 'epoch': 1.55}
{'loss': 1.1634, 'learning_rate': 9.91951600164102e-06, 'epoch': 1.55}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4030431509017944, 'eval_runtime': 1.0297, 'eval_samples_per_second': 4.856, 'eval_steps_per_second': 0.971, 'epoch': 1.55}
{'loss': 0.8001, 'learning_rate': 9.852449764353553e-06, 'epoch': 1.56}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4014668464660645, 'eval_runtime': 1.0006, 'eval_samples_per_second': 4.997, 'eval_steps_per_second': 0.999, 'epoch': 1.56}
{'loss': 1.2195, 'learning_rate': 9.785390164561773e-06, 'epoch': 1.57}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4024829864501953, 'eval_runtime': 1.0134, 'eval_samples_per_second': 4.934, 'eval_steps_per_second': 0.987, 'epoch': 1.57}
{'loss': 0.7794, 'learning_rate': 9.7183402189183e-06, 'epoch': 1.57}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4050424098968506, 'eval_runtime': 1.0749, 'eval_samples_per_second': 4.652, 'eval_steps_per_second': 0.93, 'epoch': 1.57}
{'loss': 0.8764, 'learning_rate': 9.65130294364146e-06, 'epoch': 1.58}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4085304737091064, 'eval_runtime': 1.0491, 'eval_samples_per_second': 4.766, 'eval_steps_per_second': 0.953, 'epoch': 1.58}
{'loss': 0.6804, 'learning_rate': 9.584281354379607e-06, 'epoch': 1.58}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.41546630859375, 'eval_runtime': 1.0864, 'eval_samples_per_second': 4.602, 'eval_steps_per_second': 0.92, 'epoch': 1.58}
{'loss': 1.0895, 'learning_rate': 9.517278466075474e-06, 'epoch': 1.59}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.418289065361023, 'eval_runtime': 1.0915, 'eval_samples_per_second': 4.581, 'eval_steps_per_second': 0.916, 'epoch': 1.59}
{'loss': 1.2551, 'learning_rate': 9.450297292830523e-06, 'epoch': 1.6}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4191187620162964, 'eval_runtime': 0.9945, 'eval_samples_per_second': 5.028, 'eval_steps_per_second': 1.006, 'epoch': 1.6}
{'loss': 1.0739, 'learning_rate': 9.383340847769383e-06, 'epoch': 1.6}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.41947603225708, 'eval_runtime': 1.0926, 'eval_samples_per_second': 4.576, 'eval_steps_per_second': 0.915, 'epoch': 1.6}
{'loss': 1.2041, 'learning_rate': 9.3164121429043e-06, 'epoch': 1.61}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4143977165222168, 'eval_runtime': 0.9824, 'eval_samples_per_second': 5.089, 'eval_steps_per_second': 1.018, 'epoch': 1.61}
{'loss': 1.0646, 'learning_rate': 9.249514188999615e-06, 'epoch': 1.61}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4086552858352661, 'eval_runtime': 1.0911, 'eval_samples_per_second': 4.582, 'eval_steps_per_second': 0.916, 'epoch': 1.61}
{'loss': 1.1827, 'learning_rate': 9.182649995436365e-06, 'epoch': 1.62}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4035686254501343, 'eval_runtime': 1.0865, 'eval_samples_per_second': 4.602, 'eval_steps_per_second': 0.92, 'epoch': 1.62}
{'loss': 0.9524, 'learning_rate': 9.115822570076896e-06, 'epoch': 1.63}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4000192880630493, 'eval_runtime': 1.0288, 'eval_samples_per_second': 4.86, 'eval_steps_per_second': 0.972, 'epoch': 1.63}
{'loss': 0.8674, 'learning_rate': 9.049034919129524e-06, 'epoch': 1.63}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3936936855316162, 'eval_runtime': 1.0227, 'eval_samples_per_second': 4.889, 'eval_steps_per_second': 0.978, 'epoch': 1.63}
{'loss': 0.8317, 'learning_rate': 8.98229004701335e-06, 'epoch': 1.64}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3891435861587524, 'eval_runtime': 0.976, 'eval_samples_per_second': 5.123, 'eval_steps_per_second': 1.025, 'epoch': 1.64}
{'loss': 0.8822, 'learning_rate': 8.915590956223081e-06, 'epoch': 1.65}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3874361515045166, 'eval_runtime': 0.9915, 'eval_samples_per_second': 5.043, 'eval_steps_per_second': 1.009, 'epoch': 1.65}
{'loss': 0.9407, 'learning_rate': 8.848940647193952e-06, 'epoch': 1.65}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.388383150100708, 'eval_runtime': 1.0969, 'eval_samples_per_second': 4.558, 'eval_steps_per_second': 0.912, 'epoch': 1.65}
{'loss': 1.1155, 'learning_rate': 8.782342118166784e-06, 'epoch': 1.66}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3890262842178345, 'eval_runtime': 0.9803, 'eval_samples_per_second': 5.1, 'eval_steps_per_second': 1.02, 'epoch': 1.66}
{'loss': 1.0777, 'learning_rate': 8.71579836505309e-06, 'epoch': 1.66}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3900648355484009, 'eval_runtime': 1.0769, 'eval_samples_per_second': 4.643, 'eval_steps_per_second': 0.929, 'epoch': 1.66}
{'loss': 0.7964, 'learning_rate': 8.649312381300297e-06, 'epoch': 1.67}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3916683197021484, 'eval_runtime': 0.9744, 'eval_samples_per_second': 5.131, 'eval_steps_per_second': 1.026, 'epoch': 1.67}
{'loss': 0.9459, 'learning_rate': 8.58288715775711e-06, 'epoch': 1.68}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.396971583366394, 'eval_runtime': 1.0901, 'eval_samples_per_second': 4.587, 'eval_steps_per_second': 0.917, 'epoch': 1.68}
{'loss': 1.1347, 'learning_rate': 8.516525682538951e-06, 'epoch': 1.68}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4000415802001953, 'eval_runtime': 1.0227, 'eval_samples_per_second': 4.889, 'eval_steps_per_second': 0.978, 'epoch': 1.68}
{'loss': 1.1125, 'learning_rate': 8.450230940893549e-06, 'epoch': 1.69}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4026038646697998, 'eval_runtime': 1.081, 'eval_samples_per_second': 4.625, 'eval_steps_per_second': 0.925, 'epoch': 1.69}
{'loss': 0.8664, 'learning_rate': 8.384005915066642e-06, 'epoch': 1.7}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.403929352760315, 'eval_runtime': 1.0694, 'eval_samples_per_second': 4.676, 'eval_steps_per_second': 0.935, 'epoch': 1.7}
{'loss': 0.9891, 'learning_rate': 8.317853584167833e-06, 'epoch': 1.7}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4046827554702759, 'eval_runtime': 0.9779, 'eval_samples_per_second': 5.113, 'eval_steps_per_second': 1.023, 'epoch': 1.7}
{'loss': 0.7963, 'learning_rate': 8.25177692403656e-06, 'epoch': 1.71}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4058372974395752, 'eval_runtime': 1.0919, 'eval_samples_per_second': 4.579, 'eval_steps_per_second': 0.916, 'epoch': 1.71}
{'loss': 0.7781, 'learning_rate': 8.185778907108248e-06, 'epoch': 1.71}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4077239036560059, 'eval_runtime': 1.0807, 'eval_samples_per_second': 4.627, 'eval_steps_per_second': 0.925, 'epoch': 1.71}
{'loss': 1.0027, 'learning_rate': 8.11986250228058e-06, 'epoch': 1.72}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4114809036254883, 'eval_runtime': 1.0191, 'eval_samples_per_second': 4.906, 'eval_steps_per_second': 0.981, 'epoch': 1.72}
{'loss': 1.2933, 'learning_rate': 8.054030674779942e-06, 'epoch': 1.73}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4139294624328613, 'eval_runtime': 0.986, 'eval_samples_per_second': 5.071, 'eval_steps_per_second': 1.014, 'epoch': 1.73}
{'loss': 1.1318, 'learning_rate': 7.98828638602805e-06, 'epoch': 1.73}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4172484874725342, 'eval_runtime': 1.0885, 'eval_samples_per_second': 4.593, 'eval_steps_per_second': 0.919, 'epoch': 1.73}
{'loss': 0.7032, 'learning_rate': 7.922632593508714e-06, 'epoch': 1.74}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4205299615859985, 'eval_runtime': 0.9948, 'eval_samples_per_second': 5.026, 'eval_steps_per_second': 1.005, 'epoch': 1.74}
{'loss': 1.0389, 'learning_rate': 7.857072250634799e-06, 'epoch': 1.75}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4193096160888672, 'eval_runtime': 1.0351, 'eval_samples_per_second': 4.83, 'eval_steps_per_second': 0.966, 'epoch': 1.75}
{'loss': 1.1092, 'learning_rate': 7.791608306615377e-06, 'epoch': 1.75}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.416553258895874, 'eval_runtime': 0.9822, 'eval_samples_per_second': 5.091, 'eval_steps_per_second': 1.018, 'epoch': 1.75}
{'loss': 0.9949, 'learning_rate': 7.72624370632305e-06, 'epoch': 1.76}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4166232347488403, 'eval_runtime': 1.0882, 'eval_samples_per_second': 4.595, 'eval_steps_per_second': 0.919, 'epoch': 1.76}
{'loss': 1.0366, 'learning_rate': 7.660981390161471e-06, 'epoch': 1.76}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4166637659072876, 'eval_runtime': 1.0957, 'eval_samples_per_second': 4.563, 'eval_steps_per_second': 0.913, 'epoch': 1.76}
{'loss': 0.7156, 'learning_rate': 7.595824293933085e-06, 'epoch': 1.77}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4130487442016602, 'eval_runtime': 1.0044, 'eval_samples_per_second': 4.978, 'eval_steps_per_second': 0.996, 'epoch': 1.77}
{'loss': 0.9926, 'learning_rate': 7.530775348707055e-06, 'epoch': 1.78}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.408982753753662, 'eval_runtime': 0.9947, 'eval_samples_per_second': 5.027, 'eval_steps_per_second': 1.005, 'epoch': 1.78}
{'loss': 0.9607, 'learning_rate': 7.465837480687399e-06, 'epoch': 1.78}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.404409408569336, 'eval_runtime': 1.0382, 'eval_samples_per_second': 4.816, 'eval_steps_per_second': 0.963, 'epoch': 1.78}
{'loss': 1.0713, 'learning_rate': 7.401013611081378e-06, 'epoch': 1.79}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.402556300163269, 'eval_runtime': 1.0608, 'eval_samples_per_second': 4.713, 'eval_steps_per_second': 0.943, 'epoch': 1.79}
{'loss': 1.1619, 'learning_rate': 7.336306655968069e-06, 'epoch': 1.8}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4017138481140137, 'eval_runtime': 1.079, 'eval_samples_per_second': 4.634, 'eval_steps_per_second': 0.927, 'epoch': 1.8}
{'loss': 0.8559, 'learning_rate': 7.271719526167189e-06, 'epoch': 1.8}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.405207872390747, 'eval_runtime': 1.0237, 'eval_samples_per_second': 4.884, 'eval_steps_per_second': 0.977, 'epoch': 1.8}
{'loss': 1.1135, 'learning_rate': 7.207255127108159e-06, 'epoch': 1.81}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4075253009796143, 'eval_runtime': 1.0777, 'eval_samples_per_second': 4.64, 'eval_steps_per_second': 0.928, 'epoch': 1.81}
{'loss': 0.8101, 'learning_rate': 7.142916358699404e-06, 'epoch': 1.81}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.409855842590332, 'eval_runtime': 0.9719, 'eval_samples_per_second': 5.145, 'eval_steps_per_second': 1.029, 'epoch': 1.81}
{'loss': 1.26, 'learning_rate': 7.0787061151978855e-06, 'epoch': 1.82}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4110848903656006, 'eval_runtime': 1.0794, 'eval_samples_per_second': 4.632, 'eval_steps_per_second': 0.926, 'epoch': 1.82}
{'loss': 0.9592, 'learning_rate': 7.014627285078928e-06, 'epoch': 1.83}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4125028848648071, 'eval_runtime': 1.0333, 'eval_samples_per_second': 4.839, 'eval_steps_per_second': 0.968, 'epoch': 1.83}
{'loss': 0.8096, 'learning_rate': 6.950682750906275e-06, 'epoch': 1.83}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4118257761001587, 'eval_runtime': 1.0836, 'eval_samples_per_second': 4.614, 'eval_steps_per_second': 0.923, 'epoch': 1.83}
{'loss': 1.0516, 'learning_rate': 6.886875389202399e-06, 'epoch': 1.84}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4100087881088257, 'eval_runtime': 1.0954, 'eval_samples_per_second': 4.565, 'eval_steps_per_second': 0.913, 'epoch': 1.84}
{'loss': 1.1262, 'learning_rate': 6.823208070319124e-06, 'epoch': 1.84}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.408296823501587, 'eval_runtime': 1.0913, 'eval_samples_per_second': 4.582, 'eval_steps_per_second': 0.916, 'epoch': 1.84}
{'loss': 1.0242, 'learning_rate': 6.759683658308509e-06, 'epoch': 1.85}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4044864177703857, 'eval_runtime': 0.9732, 'eval_samples_per_second': 5.138, 'eval_steps_per_second': 1.028, 'epoch': 1.85}
{'loss': 0.6799, 'learning_rate': 6.6963050107939705e-06, 'epoch': 1.86}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4025285243988037, 'eval_runtime': 1.0866, 'eval_samples_per_second': 4.602, 'eval_steps_per_second': 0.92, 'epoch': 1.86}
{'loss': 1.0506, 'learning_rate': 6.633074978841788e-06, 'epoch': 1.86}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.403306007385254, 'eval_runtime': 1.0022, 'eval_samples_per_second': 4.989, 'eval_steps_per_second': 0.998, 'epoch': 1.86}
{'loss': 0.992, 'learning_rate': 6.56999640683281e-06, 'epoch': 1.87}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4057178497314453, 'eval_runtime': 1.0778, 'eval_samples_per_second': 4.639, 'eval_steps_per_second': 0.928, 'epoch': 1.87}
{'loss': 0.7451, 'learning_rate': 6.507072132334502e-06, 'epoch': 1.88}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4075323343276978, 'eval_runtime': 1.0645, 'eval_samples_per_second': 4.697, 'eval_steps_per_second': 0.939, 'epoch': 1.88}
{'loss': 0.7043, 'learning_rate': 6.444304985973329e-06, 'epoch': 1.88}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4082764387130737, 'eval_runtime': 0.9663, 'eval_samples_per_second': 5.175, 'eval_steps_per_second': 1.035, 'epoch': 1.88}
{'loss': 0.9484, 'learning_rate': 6.381697791307394e-06, 'epoch': 1.89}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4102715253829956, 'eval_runtime': 0.9796, 'eval_samples_per_second': 5.104, 'eval_steps_per_second': 1.021, 'epoch': 1.89}
{'loss': 1.1098, 'learning_rate': 6.319253364699419e-06, 'epoch': 1.89}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4110969305038452, 'eval_runtime': 1.0971, 'eval_samples_per_second': 4.557, 'eval_steps_per_second': 0.911, 'epoch': 1.89}
{'loss': 1.1356, 'learning_rate': 6.25697451519008e-06, 'epoch': 1.9}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4106968641281128, 'eval_runtime': 1.0728, 'eval_samples_per_second': 4.661, 'eval_steps_per_second': 0.932, 'epoch': 1.9}
{'loss': 0.7968, 'learning_rate': 6.194864044371614e-06, 'epoch': 1.91}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4106745719909668, 'eval_runtime': 1.0654, 'eval_samples_per_second': 4.693, 'eval_steps_per_second': 0.939, 'epoch': 1.91}
{'loss': 1.2059, 'learning_rate': 6.132924746261802e-06, 'epoch': 1.91}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.410481572151184, 'eval_runtime': 0.9973, 'eval_samples_per_second': 5.013, 'eval_steps_per_second': 1.003, 'epoch': 1.91}
{'loss': 1.1356, 'learning_rate': 6.071159407178282e-06, 'epoch': 1.92}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4111816883087158, 'eval_runtime': 1.0463, 'eval_samples_per_second': 4.779, 'eval_steps_per_second': 0.956, 'epoch': 1.92}
{'loss': 1.0667, 'learning_rate': 6.009570805613211e-06, 'epoch': 1.93}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4098402261734009, 'eval_runtime': 0.9996, 'eval_samples_per_second': 5.002, 'eval_steps_per_second': 1.0, 'epoch': 1.93}
{'loss': 1.1734, 'learning_rate': 5.94816171210826e-06, 'epoch': 1.93}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4074913263320923, 'eval_runtime': 0.9884, 'eval_samples_per_second': 5.059, 'eval_steps_per_second': 1.012, 'epoch': 1.93}
{'loss': 0.9053, 'learning_rate': 5.886934889130003e-06, 'epoch': 1.94}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.407052993774414, 'eval_runtime': 1.0399, 'eval_samples_per_second': 4.808, 'eval_steps_per_second': 0.962, 'epoch': 1.94}
{'loss': 1.3082, 'learning_rate': 5.825893090945636e-06, 'epoch': 1.94}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4061154127120972, 'eval_runtime': 0.9814, 'eval_samples_per_second': 5.095, 'eval_steps_per_second': 1.019, 'epoch': 1.94}
{'loss': 0.9878, 'learning_rate': 5.765039063499067e-06, 'epoch': 1.95}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4042011499404907, 'eval_runtime': 0.9816, 'eval_samples_per_second': 5.094, 'eval_steps_per_second': 1.019, 'epoch': 1.95}
{'loss': 0.903, 'learning_rate': 5.704375544287424e-06, 'epoch': 1.96}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.401579737663269, 'eval_runtime': 1.0873, 'eval_samples_per_second': 4.598, 'eval_steps_per_second': 0.92, 'epoch': 1.96}
{'loss': 1.0473, 'learning_rate': 5.643905262237881e-06, 'epoch': 1.96}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3993839025497437, 'eval_runtime': 0.9747, 'eval_samples_per_second': 5.13, 'eval_steps_per_second': 1.026, 'epoch': 1.96}
{'loss': 0.8369, 'learning_rate': 5.58363093758489e-06, 'epoch': 1.97}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3987492322921753, 'eval_runtime': 1.0927, 'eval_samples_per_second': 4.576, 'eval_steps_per_second': 0.915, 'epoch': 1.97}
{'loss': 0.7455, 'learning_rate': 5.52355528174785e-06, 'epoch': 1.98}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.399860143661499, 'eval_runtime': 0.9757, 'eval_samples_per_second': 5.125, 'eval_steps_per_second': 1.025, 'epoch': 1.98}
{'loss': 0.8768, 'learning_rate': 5.46368099720911e-06, 'epoch': 1.98}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.401487112045288, 'eval_runtime': 0.9712, 'eval_samples_per_second': 5.148, 'eval_steps_per_second': 1.03, 'epoch': 1.98}
{'loss': 0.889, 'learning_rate': 5.404010777392381e-06, 'epoch': 1.99}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4021739959716797, 'eval_runtime': 1.0208, 'eval_samples_per_second': 4.898, 'eval_steps_per_second': 0.98, 'epoch': 1.99}
{'loss': 1.0272, 'learning_rate': 5.344547306541619e-06, 'epoch': 1.99}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.403574824333191, 'eval_runtime': 0.9927, 'eval_samples_per_second': 5.037, 'eval_steps_per_second': 1.007, 'epoch': 1.99}
{'loss': 1.1686, 'learning_rate': 5.285293259600243e-06, 'epoch': 2.0}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4060789346694946, 'eval_runtime': 1.0797, 'eval_samples_per_second': 4.631, 'eval_steps_per_second': 0.926, 'epoch': 2.0}




{'loss': 0.8942, 'learning_rate': 5.226251302090797e-06, 'epoch': 2.01}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4076279401779175, 'eval_runtime': 0.9764, 'eval_samples_per_second': 5.121, 'eval_steps_per_second': 1.024, 'epoch': 2.01}
{'loss': 0.6767, 'learning_rate': 5.167424089995073e-06, 'epoch': 2.01}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.408293604850769, 'eval_runtime': 1.0876, 'eval_samples_per_second': 4.597, 'eval_steps_per_second': 0.919, 'epoch': 2.01}
{'loss': 0.5968, 'learning_rate': 5.1088142696346124e-06, 'epoch': 2.02}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4082757234573364, 'eval_runtime': 1.0757, 'eval_samples_per_second': 4.648, 'eval_steps_per_second': 0.93, 'epoch': 2.02}
{'loss': 0.8588, 'learning_rate': 5.0504244775516596e-06, 'epoch': 2.02}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4084057807922363, 'eval_runtime': 1.025, 'eval_samples_per_second': 4.878, 'eval_steps_per_second': 0.976, 'epoch': 2.02}
{'loss': 0.8567, 'learning_rate': 4.992257340390564e-06, 'epoch': 2.03}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4102401733398438, 'eval_runtime': 0.987, 'eval_samples_per_second': 5.066, 'eval_steps_per_second': 1.013, 'epoch': 2.03}
{'loss': 0.5956, 'learning_rate': 4.9343154747796315e-06, 'epoch': 2.04}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4135726690292358, 'eval_runtime': 1.0531, 'eval_samples_per_second': 4.748, 'eval_steps_per_second': 0.95, 'epoch': 2.04}
{'loss': 0.757, 'learning_rate': 4.876601487213397e-06, 'epoch': 2.04}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4168967008590698, 'eval_runtime': 0.9811, 'eval_samples_per_second': 5.096, 'eval_steps_per_second': 1.019, 'epoch': 2.04}
{'loss': 0.862, 'learning_rate': 4.819117973935377e-06, 'epoch': 2.05}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4160550832748413, 'eval_runtime': 0.9699, 'eval_samples_per_second': 5.155, 'eval_steps_per_second': 1.031, 'epoch': 2.05}
{'loss': 0.6203, 'learning_rate': 4.761867520821298e-06, 'epoch': 2.06}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.415329933166504, 'eval_runtime': 1.089, 'eval_samples_per_second': 4.591, 'eval_steps_per_second': 0.918, 'epoch': 2.06}
{'loss': 0.9204, 'learning_rate': 4.704852703262746e-06, 'epoch': 2.06}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.414621353149414, 'eval_runtime': 1.0897, 'eval_samples_per_second': 4.588, 'eval_steps_per_second': 0.918, 'epoch': 2.06}
{'loss': 0.7921, 'learning_rate': 4.648076086051319e-06, 'epoch': 2.07}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4122353792190552, 'eval_runtime': 1.0972, 'eval_samples_per_second': 4.557, 'eval_steps_per_second': 0.911, 'epoch': 2.07}
{'loss': 0.6917, 'learning_rate': 4.591540223263272e-06, 'epoch': 2.07}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4074933528900146, 'eval_runtime': 1.0704, 'eval_samples_per_second': 4.671, 'eval_steps_per_second': 0.934, 'epoch': 2.07}
{'loss': 0.5652, 'learning_rate': 4.5352476581445945e-06, 'epoch': 2.08}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.4044829607009888, 'eval_runtime': 1.0772, 'eval_samples_per_second': 4.642, 'eval_steps_per_second': 0.928, 'epoch': 2.08}
{'loss': 0.8806, 'learning_rate': 4.479200922996614e-06, 'epoch': 2.09}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.400391697883606, 'eval_runtime': 1.0595, 'eval_samples_per_second': 4.719, 'eval_steps_per_second': 0.944, 'epoch': 2.09}
{'loss': 0.7212, 'learning_rate': 4.423402539062094e-06, 'epoch': 2.09}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3974095582962036, 'eval_runtime': 1.0093, 'eval_samples_per_second': 4.954, 'eval_steps_per_second': 0.991, 'epoch': 2.09}
{'loss': 0.8111, 'learning_rate': 4.367855016411795e-06, 'epoch': 2.1}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.395443081855774, 'eval_runtime': 1.0264, 'eval_samples_per_second': 4.871, 'eval_steps_per_second': 0.974, 'epoch': 2.1}
{'loss': 0.6394, 'learning_rate': 4.312560853831572e-06, 'epoch': 2.11}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3931056261062622, 'eval_runtime': 1.0918, 'eval_samples_per_second': 4.58, 'eval_steps_per_second': 0.916, 'epoch': 2.11}
{'loss': 0.9279, 'learning_rate': 4.257522538709975e-06, 'epoch': 2.11}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3904080390930176, 'eval_runtime': 1.0369, 'eval_samples_per_second': 4.822, 'eval_steps_per_second': 0.964, 'epoch': 2.11}
{'loss': 0.619, 'learning_rate': 4.202742546926335e-06, 'epoch': 2.12}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3882348537445068, 'eval_runtime': 1.0357, 'eval_samples_per_second': 4.828, 'eval_steps_per_second': 0.966, 'epoch': 2.12}
{'loss': 0.8465, 'learning_rate': 4.1482233427393985e-06, 'epoch': 2.12}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3864129781723022, 'eval_runtime': 1.0761, 'eval_samples_per_second': 4.647, 'eval_steps_per_second': 0.929, 'epoch': 2.12}
{'loss': 0.6146, 'learning_rate': 4.093967378676485e-06, 'epoch': 2.13}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3875218629837036, 'eval_runtime': 0.9979, 'eval_samples_per_second': 5.01, 'eval_steps_per_second': 1.002, 'epoch': 2.13}
{'loss': 0.795, 'learning_rate': 4.039977095423134e-06, 'epoch': 2.14}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3885136842727661, 'eval_runtime': 1.0847, 'eval_samples_per_second': 4.61, 'eval_steps_per_second': 0.922, 'epoch': 2.14}
{'loss': 0.9125, 'learning_rate': 3.986254921713335e-06, 'epoch': 2.14}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3903658390045166, 'eval_runtime': 1.0961, 'eval_samples_per_second': 4.562, 'eval_steps_per_second': 0.912, 'epoch': 2.14}
{'loss': 0.5573, 'learning_rate': 3.932803274220266e-06, 'epoch': 2.15}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3912466764450073, 'eval_runtime': 1.0125, 'eval_samples_per_second': 4.938, 'eval_steps_per_second': 0.988, 'epoch': 2.15}
{'loss': 0.6867, 'learning_rate': 3.879624557447573e-06, 'epoch': 2.16}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3931418657302856, 'eval_runtime': 1.0886, 'eval_samples_per_second': 4.593, 'eval_steps_per_second': 0.919, 'epoch': 2.16}
{'loss': 0.6366, 'learning_rate': 3.8267211636212054e-06, 'epoch': 2.16}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3943567276000977, 'eval_runtime': 1.0142, 'eval_samples_per_second': 4.93, 'eval_steps_per_second': 0.986, 'epoch': 2.16}
{'loss': 0.6824, 'learning_rate': 3.774095472581818e-06, 'epoch': 2.17}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3952370882034302, 'eval_runtime': 0.9735, 'eval_samples_per_second': 5.136, 'eval_steps_per_second': 1.027, 'epoch': 2.17}
{'loss': 0.8181, 'learning_rate': 3.721749851677693e-06, 'epoch': 2.17}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3946666717529297, 'eval_runtime': 1.0591, 'eval_samples_per_second': 4.721, 'eval_steps_per_second': 0.944, 'epoch': 2.17}
{'loss': 0.5716, 'learning_rate': 3.6696866556582557e-06, 'epoch': 2.18}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3941539525985718, 'eval_runtime': 0.9822, 'eval_samples_per_second': 5.09, 'eval_steps_per_second': 1.018, 'epoch': 2.18}
{'loss': 0.5816, 'learning_rate': 3.6179082265681563e-06, 'epoch': 2.19}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3944365978240967, 'eval_runtime': 1.0088, 'eval_samples_per_second': 4.956, 'eval_steps_per_second': 0.991, 'epoch': 2.19}
{'loss': 0.9485, 'learning_rate': 3.566416893641891e-06, 'epoch': 2.19}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3939166069030762, 'eval_runtime': 1.0086, 'eval_samples_per_second': 4.957, 'eval_steps_per_second': 0.991, 'epoch': 2.19}
{'loss': 0.7967, 'learning_rate': 3.5152149731990504e-06, 'epoch': 2.2}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3937489986419678, 'eval_runtime': 0.9696, 'eval_samples_per_second': 5.157, 'eval_steps_per_second': 1.031, 'epoch': 2.2}
{'loss': 0.9598, 'learning_rate': 3.4643047685400944e-06, 'epoch': 2.2}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3940595388412476, 'eval_runtime': 0.981, 'eval_samples_per_second': 5.097, 'eval_steps_per_second': 1.019, 'epoch': 2.2}
{'loss': 0.6564, 'learning_rate': 3.4136885698427503e-06, 'epoch': 2.21}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.393999457359314, 'eval_runtime': 1.0869, 'eval_samples_per_second': 4.6, 'eval_steps_per_second': 0.92, 'epoch': 2.21}
{'loss': 0.6725, 'learning_rate': 3.3633686540590004e-06, 'epoch': 2.22}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3943543434143066, 'eval_runtime': 1.0355, 'eval_samples_per_second': 4.828, 'eval_steps_per_second': 0.966, 'epoch': 2.22}
{'loss': 0.8079, 'learning_rate': 3.3133472848126346e-06, 'epoch': 2.22}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3949272632598877, 'eval_runtime': 0.975, 'eval_samples_per_second': 5.128, 'eval_steps_per_second': 1.026, 'epoch': 2.22}
{'loss': 0.9235, 'learning_rate': 3.263626712297431e-06, 'epoch': 2.23}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.395322561264038, 'eval_runtime': 1.0934, 'eval_samples_per_second': 4.573, 'eval_steps_per_second': 0.915, 'epoch': 2.23}
{'loss': 0.7717, 'learning_rate': 3.214209173175943e-06, 'epoch': 2.24}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3949023485183716, 'eval_runtime': 0.9691, 'eval_samples_per_second': 5.159, 'eval_steps_per_second': 1.032, 'epoch': 2.24}
{'loss': 0.8211, 'learning_rate': 3.1650968904788594e-06, 'epoch': 2.24}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.393385648727417, 'eval_runtime': 1.0605, 'eval_samples_per_second': 4.715, 'eval_steps_per_second': 0.943, 'epoch': 2.24}
{'loss': 0.6028, 'learning_rate': 3.1162920735050206e-06, 'epoch': 2.25}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3924421072006226, 'eval_runtime': 1.0437, 'eval_samples_per_second': 4.791, 'eval_steps_per_second': 0.958, 'epoch': 2.25}
{'loss': 0.789, 'learning_rate': 3.0677969177220344e-06, 'epoch': 2.25}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3913921117782593, 'eval_runtime': 1.0431, 'eval_samples_per_second': 4.793, 'eval_steps_per_second': 0.959, 'epoch': 2.25}
{'loss': 0.6252, 'learning_rate': 3.0196136046675027e-06, 'epoch': 2.26}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3904385566711426, 'eval_runtime': 0.976, 'eval_samples_per_second': 5.123, 'eval_steps_per_second': 1.025, 'epoch': 2.26}
{'loss': 0.6481, 'learning_rate': 2.9717443018508873e-06, 'epoch': 2.27}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3902541399002075, 'eval_runtime': 0.9708, 'eval_samples_per_second': 5.15, 'eval_steps_per_second': 1.03, 'epoch': 2.27}
{'loss': 0.6745, 'learning_rate': 2.9241911626560226e-06, 'epoch': 2.27}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3920315504074097, 'eval_runtime': 1.0903, 'eval_samples_per_second': 4.586, 'eval_steps_per_second': 0.917, 'epoch': 2.27}
{'loss': 0.7478, 'learning_rate': 2.8769563262442224e-06, 'epoch': 2.28}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.393359661102295, 'eval_runtime': 1.0005, 'eval_samples_per_second': 4.998, 'eval_steps_per_second': 1.0, 'epoch': 2.28}
{'loss': 0.6099, 'learning_rate': 2.8300419174580597e-06, 'epoch': 2.29}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3941717147827148, 'eval_runtime': 1.0148, 'eval_samples_per_second': 4.927, 'eval_steps_per_second': 0.985, 'epoch': 2.29}
{'loss': 0.7457, 'learning_rate': 2.783450046725794e-06, 'epoch': 2.29}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3951934576034546, 'eval_runtime': 1.0589, 'eval_samples_per_second': 4.722, 'eval_steps_per_second': 0.944, 'epoch': 2.29}
{'loss': 0.7358, 'learning_rate': 2.7371828099664142e-06, 'epoch': 2.3}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3960233926773071, 'eval_runtime': 1.0682, 'eval_samples_per_second': 4.681, 'eval_steps_per_second': 0.936, 'epoch': 2.3}
{'loss': 0.7607, 'learning_rate': 2.6912422884953625e-06, 'epoch': 2.3}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3964647054672241, 'eval_runtime': 0.985, 'eval_samples_per_second': 5.076, 'eval_steps_per_second': 1.015, 'epoch': 2.3}
{'loss': 0.6915, 'learning_rate': 2.645630548930919e-06, 'epoch': 2.31}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3961141109466553, 'eval_runtime': 1.0773, 'eval_samples_per_second': 4.641, 'eval_steps_per_second': 0.928, 'epoch': 2.31}
{'loss': 0.6381, 'learning_rate': 2.6003496431012133e-06, 'epoch': 2.32}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3962520360946655, 'eval_runtime': 1.032, 'eval_samples_per_second': 4.845, 'eval_steps_per_second': 0.969, 'epoch': 2.32}
{'loss': 0.9491, 'learning_rate': 2.5554016079519373e-06, 'epoch': 2.32}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3965317010879517, 'eval_runtime': 1.026, 'eval_samples_per_second': 4.873, 'eval_steps_per_second': 0.975, 'epoch': 2.32}
{'loss': 0.9499, 'learning_rate': 2.510788465454721e-06, 'epoch': 2.33}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3975390195846558, 'eval_runtime': 0.9685, 'eval_samples_per_second': 5.163, 'eval_steps_per_second': 1.033, 'epoch': 2.33}
{'loss': 0.7074, 'learning_rate': 2.4665122225161553e-06, 'epoch': 2.34}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3985168933868408, 'eval_runtime': 1.025, 'eval_samples_per_second': 4.878, 'eval_steps_per_second': 0.976, 'epoch': 2.34}
{'loss': 1.1003, 'learning_rate': 2.422574870887523e-06, 'epoch': 2.34}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.398653507232666, 'eval_runtime': 1.0737, 'eval_samples_per_second': 4.657, 'eval_steps_per_second': 0.931, 'epoch': 2.34}
{'loss': 0.5827, 'learning_rate': 2.3789783870752102e-06, 'epoch': 2.35}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3987693786621094, 'eval_runtime': 1.0796, 'eval_samples_per_second': 4.632, 'eval_steps_per_second': 0.926, 'epoch': 2.35}
{'loss': 0.6399, 'learning_rate': 2.3357247322517728e-06, 'epoch': 2.35}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3990591764450073, 'eval_runtime': 1.0831, 'eval_samples_per_second': 4.616, 'eval_steps_per_second': 0.923, 'epoch': 2.35}
{'loss': 0.7142, 'learning_rate': 2.292815852167726e-06, 'epoch': 2.36}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3994929790496826, 'eval_runtime': 1.0304, 'eval_samples_per_second': 4.853, 'eval_steps_per_second': 0.971, 'epoch': 2.36}
{'loss': 0.5856, 'learning_rate': 2.250253677064018e-06, 'epoch': 2.37}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3982336521148682, 'eval_runtime': 1.0692, 'eval_samples_per_second': 4.676, 'eval_steps_per_second': 0.935, 'epoch': 2.37}
{'loss': 0.6575, 'learning_rate': 2.208040121585201e-06, 'epoch': 2.37}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.396020531654358, 'eval_runtime': 0.9771, 'eval_samples_per_second': 5.117, 'eval_steps_per_second': 1.023, 'epoch': 2.37}
{'loss': 0.78, 'learning_rate': 2.166177084693276e-06, 'epoch': 2.38}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3949834108352661, 'eval_runtime': 0.9977, 'eval_samples_per_second': 5.012, 'eval_steps_per_second': 1.002, 'epoch': 2.38}
{'loss': 0.7159, 'learning_rate': 2.1246664495823046e-06, 'epoch': 2.39}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3944823741912842, 'eval_runtime': 1.0193, 'eval_samples_per_second': 4.905, 'eval_steps_per_second': 0.981, 'epoch': 2.39}
{'loss': 0.9519, 'learning_rate': 2.0835100835936785e-06, 'epoch': 2.39}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3941943645477295, 'eval_runtime': 0.9686, 'eval_samples_per_second': 5.162, 'eval_steps_per_second': 1.032, 'epoch': 2.39}
{'loss': 0.6286, 'learning_rate': 2.042709838132101e-06, 'epoch': 2.4}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3938854932785034, 'eval_runtime': 1.0561, 'eval_samples_per_second': 4.734, 'eval_steps_per_second': 0.947, 'epoch': 2.4}
{'loss': 1.0793, 'learning_rate': 2.002267548582335e-06, 'epoch': 2.4}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3930014371871948, 'eval_runtime': 1.0935, 'eval_samples_per_second': 4.573, 'eval_steps_per_second': 0.915, 'epoch': 2.4}
{'loss': 0.7853, 'learning_rate': 1.962185034226619e-06, 'epoch': 2.41}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3925325870513916, 'eval_runtime': 1.0456, 'eval_samples_per_second': 4.782, 'eval_steps_per_second': 0.956, 'epoch': 2.41}
{'loss': 0.6497, 'learning_rate': 1.9224640981628186e-06, 'epoch': 2.42}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3922550678253174, 'eval_runtime': 1.0161, 'eval_samples_per_second': 4.921, 'eval_steps_per_second': 0.984, 'epoch': 2.42}
{'loss': 0.4736, 'learning_rate': 1.8831065272233373e-06, 'epoch': 2.42}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3920106887817383, 'eval_runtime': 1.0763, 'eval_samples_per_second': 4.645, 'eval_steps_per_second': 0.929, 'epoch': 2.42}
{'loss': 0.657, 'learning_rate': 1.844114091894732e-06, 'epoch': 2.43}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3923872709274292, 'eval_runtime': 1.0059, 'eval_samples_per_second': 4.971, 'eval_steps_per_second': 0.994, 'epoch': 2.43}
{'loss': 0.908, 'learning_rate': 1.80548854623804e-06, 'epoch': 2.43}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3886948823928833, 'eval_runtime': 0.9741, 'eval_samples_per_second': 5.133, 'eval_steps_per_second': 1.027, 'epoch': 2.43}
{'loss': 0.597, 'learning_rate': 1.7672316278099167e-06, 'epoch': 2.44}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3870669603347778, 'eval_runtime': 1.0563, 'eval_samples_per_second': 4.733, 'eval_steps_per_second': 0.947, 'epoch': 2.44}
{'loss': 0.8178, 'learning_rate': 1.7293450575844417e-06, 'epoch': 2.45}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3859232664108276, 'eval_runtime': 0.9974, 'eval_samples_per_second': 5.013, 'eval_steps_per_second': 1.003, 'epoch': 2.45}
{'loss': 0.5872, 'learning_rate': 1.6918305398757118e-06, 'epoch': 2.45}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3857296705245972, 'eval_runtime': 1.0856, 'eval_samples_per_second': 4.606, 'eval_steps_per_second': 0.921, 'epoch': 2.45}
{'loss': 0.8146, 'learning_rate': 1.6546897622611658e-06, 'epoch': 2.46}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3859739303588867, 'eval_runtime': 0.99, 'eval_samples_per_second': 5.051, 'eval_steps_per_second': 1.01, 'epoch': 2.46}
{'loss': 0.4944, 'learning_rate': 1.6179243955056879e-06, 'epoch': 2.47}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3857879638671875, 'eval_runtime': 1.0064, 'eval_samples_per_second': 4.968, 'eval_steps_per_second': 0.994, 'epoch': 2.47}
{'loss': 1.0914, 'learning_rate': 1.5815360934864243e-06, 'epoch': 2.47}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3860331773757935, 'eval_runtime': 1.093, 'eval_samples_per_second': 4.575, 'eval_steps_per_second': 0.915, 'epoch': 2.47}
{'loss': 0.9455, 'learning_rate': 1.5455264931183988e-06, 'epoch': 2.48}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3860489130020142, 'eval_runtime': 1.0349, 'eval_samples_per_second': 4.832, 'eval_steps_per_second': 0.966, 'epoch': 2.48}
{'loss': 1.1623, 'learning_rate': 1.509897214280881e-06, 'epoch': 2.48}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3855500221252441, 'eval_runtime': 1.0751, 'eval_samples_per_second': 4.651, 'eval_steps_per_second': 0.93, 'epoch': 2.48}
{'loss': 0.6237, 'learning_rate': 1.4746498597445015e-06, 'epoch': 2.49}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.385473370552063, 'eval_runtime': 1.0159, 'eval_samples_per_second': 4.922, 'eval_steps_per_second': 0.984, 'epoch': 2.49}
{'loss': 0.679, 'learning_rate': 1.4397860150991604e-06, 'epoch': 2.5}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3851712942123413, 'eval_runtime': 0.9974, 'eval_samples_per_second': 5.013, 'eval_steps_per_second': 1.003, 'epoch': 2.5}
{'loss': 0.649, 'learning_rate': 1.4053072486827079e-06, 'epoch': 2.5}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3851433992385864, 'eval_runtime': 0.9918, 'eval_samples_per_second': 5.041, 'eval_steps_per_second': 1.008, 'epoch': 2.5}
{'loss': 0.5921, 'learning_rate': 1.3712151115103744e-06, 'epoch': 2.51}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3846049308776855, 'eval_runtime': 1.0272, 'eval_samples_per_second': 4.868, 'eval_steps_per_second': 0.974, 'epoch': 2.51}
{'loss': 1.0773, 'learning_rate': 1.3375111372050143e-06, 'epoch': 2.52}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3840011358261108, 'eval_runtime': 1.0812, 'eval_samples_per_second': 4.624, 'eval_steps_per_second': 0.925, 'epoch': 2.52}
{'loss': 0.6607, 'learning_rate': 1.3041968419281127e-06, 'epoch': 2.52}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3838449716567993, 'eval_runtime': 1.0179, 'eval_samples_per_second': 4.912, 'eval_steps_per_second': 0.982, 'epoch': 2.52}
{'loss': 0.9613, 'learning_rate': 1.2712737243115768e-06, 'epoch': 2.53}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3839726448059082, 'eval_runtime': 1.0855, 'eval_samples_per_second': 4.606, 'eval_steps_per_second': 0.921, 'epoch': 2.53}
{'loss': 0.794, 'learning_rate': 1.2387432653903242e-06, 'epoch': 2.53}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3840677738189697, 'eval_runtime': 1.0787, 'eval_samples_per_second': 4.635, 'eval_steps_per_second': 0.927, 'epoch': 2.53}
{'loss': 0.6393, 'learning_rate': 1.2066069285356596e-06, 'epoch': 2.54}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3839677572250366, 'eval_runtime': 1.0799, 'eval_samples_per_second': 4.63, 'eval_steps_per_second': 0.926, 'epoch': 2.54}
{'loss': 0.7945, 'learning_rate': 1.1748661593894506e-06, 'epoch': 2.55}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.384163737297058, 'eval_runtime': 1.0875, 'eval_samples_per_second': 4.598, 'eval_steps_per_second': 0.92, 'epoch': 2.55}
{'loss': 0.7556, 'learning_rate': 1.143522385799074e-06, 'epoch': 2.55}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3843492269515991, 'eval_runtime': 1.0854, 'eval_samples_per_second': 4.607, 'eval_steps_per_second': 0.921, 'epoch': 2.55}
{'loss': 0.7464, 'learning_rate': 1.1125770177532136e-06, 'epoch': 2.56}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3847498893737793, 'eval_runtime': 1.0804, 'eval_samples_per_second': 4.628, 'eval_steps_per_second': 0.926, 'epoch': 2.56}
{'loss': 0.6802, 'learning_rate': 1.0820314473184224e-06, 'epoch': 2.57}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3848944902420044, 'eval_runtime': 1.0175, 'eval_samples_per_second': 4.914, 'eval_steps_per_second': 0.983, 'epoch': 2.57}
{'loss': 0.6375, 'learning_rate': 1.051887048576481e-06, 'epoch': 2.57}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3848495483398438, 'eval_runtime': 0.9736, 'eval_samples_per_second': 5.135, 'eval_steps_per_second': 1.027, 'epoch': 2.57}
{'loss': 0.6056, 'learning_rate': 1.022145177562618e-06, 'epoch': 2.58}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.38498055934906, 'eval_runtime': 1.0891, 'eval_samples_per_second': 4.591, 'eval_steps_per_second': 0.918, 'epoch': 2.58}
{'loss': 0.666, 'learning_rate': 9.928071722044897e-07, 'epoch': 2.58}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3853120803833008, 'eval_runtime': 0.978, 'eval_samples_per_second': 5.112, 'eval_steps_per_second': 1.022, 'epoch': 2.58}
{'loss': 0.8134, 'learning_rate': 9.638743522619876e-07, 'epoch': 2.59}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3852458000183105, 'eval_runtime': 0.9854, 'eval_samples_per_second': 5.074, 'eval_steps_per_second': 1.015, 'epoch': 2.59}
{'loss': 0.6919, 'learning_rate': 9.35348019267891e-07, 'epoch': 2.6}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.385074257850647, 'eval_runtime': 1.0847, 'eval_samples_per_second': 4.609, 'eval_steps_per_second': 0.922, 'epoch': 2.6}
{'loss': 0.4644, 'learning_rate': 9.072294564693041e-07, 'epoch': 2.6}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3851649761199951, 'eval_runtime': 1.0856, 'eval_samples_per_second': 4.606, 'eval_steps_per_second': 0.921, 'epoch': 2.6}
{'loss': 0.4804, 'learning_rate': 8.79519928769923e-07, 'epoch': 2.61}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3849847316741943, 'eval_runtime': 1.0172, 'eval_samples_per_second': 4.916, 'eval_steps_per_second': 0.983, 'epoch': 2.61}
{'loss': 0.614, 'learning_rate': 8.522206826731516e-07, 'epoch': 2.61}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3846338987350464, 'eval_runtime': 1.0124, 'eval_samples_per_second': 4.939, 'eval_steps_per_second': 0.988, 'epoch': 2.61}
{'loss': 0.6064, 'learning_rate': 8.253329462260207e-07, 'epoch': 2.62}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.384263277053833, 'eval_runtime': 1.0492, 'eval_samples_per_second': 4.765, 'eval_steps_per_second': 0.953, 'epoch': 2.62}
{'loss': 0.7512, 'learning_rate': 7.988579289639365e-07, 'epoch': 2.63}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3838021755218506, 'eval_runtime': 0.9857, 'eval_samples_per_second': 5.072, 'eval_steps_per_second': 1.014, 'epoch': 2.63}
{'loss': 0.8527, 'learning_rate': 7.727968218562798e-07, 'epoch': 2.63}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3834104537963867, 'eval_runtime': 0.9849, 'eval_samples_per_second': 5.076, 'eval_steps_per_second': 1.015, 'epoch': 2.63}
{'loss': 0.7765, 'learning_rate': 7.471507972528325e-07, 'epoch': 2.64}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3829768896102905, 'eval_runtime': 1.0111, 'eval_samples_per_second': 4.945, 'eval_steps_per_second': 0.989, 'epoch': 2.64}
{'loss': 0.9656, 'learning_rate': 7.219210088310291e-07, 'epoch': 2.65}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3828803300857544, 'eval_runtime': 1.0188, 'eval_samples_per_second': 4.908, 'eval_steps_per_second': 0.982, 'epoch': 2.65}
{'loss': 0.622, 'learning_rate': 6.971085915440679e-07, 'epoch': 2.65}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3827247619628906, 'eval_runtime': 1.082, 'eval_samples_per_second': 4.621, 'eval_steps_per_second': 0.924, 'epoch': 2.65}
{'loss': 0.8135, 'learning_rate': 6.727146615698555e-07, 'epoch': 2.66}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3823875188827515, 'eval_runtime': 0.9696, 'eval_samples_per_second': 5.157, 'eval_steps_per_second': 1.031, 'epoch': 2.66}
{'loss': 0.8374, 'learning_rate': 6.487403162607875e-07, 'epoch': 2.66}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3821152448654175, 'eval_runtime': 1.0956, 'eval_samples_per_second': 4.564, 'eval_steps_per_second': 0.913, 'epoch': 2.66}
{'loss': 0.7105, 'learning_rate': 6.251866340943946e-07, 'epoch': 2.67}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.38202702999115, 'eval_runtime': 1.0802, 'eval_samples_per_second': 4.629, 'eval_steps_per_second': 0.926, 'epoch': 2.67}
{'loss': 0.5934, 'learning_rate': 6.020546746248224e-07, 'epoch': 2.68}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3818461894989014, 'eval_runtime': 0.9905, 'eval_samples_per_second': 5.048, 'eval_steps_per_second': 1.01, 'epoch': 2.68}
{'loss': 0.9445, 'learning_rate': 5.793454784351693e-07, 'epoch': 2.68}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3816282749176025, 'eval_runtime': 1.0452, 'eval_samples_per_second': 4.784, 'eval_steps_per_second': 0.957, 'epoch': 2.68}
{'loss': 0.605, 'learning_rate': 5.570600670906712e-07, 'epoch': 2.69}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3811533451080322, 'eval_runtime': 0.9988, 'eval_samples_per_second': 5.006, 'eval_steps_per_second': 1.001, 'epoch': 2.69}
{'loss': 0.8405, 'learning_rate': 5.351994430927588e-07, 'epoch': 2.7}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3803166151046753, 'eval_runtime': 1.0883, 'eval_samples_per_second': 4.594, 'eval_steps_per_second': 0.919, 'epoch': 2.7}
{'loss': 0.7281, 'learning_rate': 5.137645898339472e-07, 'epoch': 2.7}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3798474073410034, 'eval_runtime': 0.9936, 'eval_samples_per_second': 5.032, 'eval_steps_per_second': 1.006, 'epoch': 2.7}
{'loss': 0.7559, 'learning_rate': 4.927564715536015e-07, 'epoch': 2.71}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3797167539596558, 'eval_runtime': 1.0123, 'eval_samples_per_second': 4.939, 'eval_steps_per_second': 0.988, 'epoch': 2.71}
{'loss': 0.5854, 'learning_rate': 4.721760332945724e-07, 'epoch': 2.71}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3796675205230713, 'eval_runtime': 1.0867, 'eval_samples_per_second': 4.601, 'eval_steps_per_second': 0.92, 'epoch': 2.71}
{'loss': 0.7068, 'learning_rate': 4.5202420086066566e-07, 'epoch': 2.72}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3796354532241821, 'eval_runtime': 1.0948, 'eval_samples_per_second': 4.567, 'eval_steps_per_second': 0.913, 'epoch': 2.72}
{'loss': 0.8, 'learning_rate': 4.323018807750079e-07, 'epoch': 2.73}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3796699047088623, 'eval_runtime': 1.0046, 'eval_samples_per_second': 4.977, 'eval_steps_per_second': 0.995, 'epoch': 2.73}
{'loss': 0.7997, 'learning_rate': 4.1300996023926344e-07, 'epoch': 2.73}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3795286417007446, 'eval_runtime': 1.0804, 'eval_samples_per_second': 4.628, 'eval_steps_per_second': 0.926, 'epoch': 2.73}
{'loss': 0.5858, 'learning_rate': 3.9414930709372414e-07, 'epoch': 2.74}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3792821168899536, 'eval_runtime': 0.9797, 'eval_samples_per_second': 5.104, 'eval_steps_per_second': 1.021, 'epoch': 2.74}
{'loss': 0.648, 'learning_rate': 3.757207697782639e-07, 'epoch': 2.75}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3785005807876587, 'eval_runtime': 1.0955, 'eval_samples_per_second': 4.564, 'eval_steps_per_second': 0.913, 'epoch': 2.75}
{'loss': 0.8049, 'learning_rate': 3.5772517729418475e-07, 'epoch': 2.75}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3782371282577515, 'eval_runtime': 1.0205, 'eval_samples_per_second': 4.899, 'eval_steps_per_second': 0.98, 'epoch': 2.75}
{'loss': 0.5493, 'learning_rate': 3.4016333916691344e-07, 'epoch': 2.76}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3780090808868408, 'eval_runtime': 1.0376, 'eval_samples_per_second': 4.819, 'eval_steps_per_second': 0.964, 'epoch': 2.76}
{'loss': 0.7989, 'learning_rate': 3.230360454095871e-07, 'epoch': 2.76}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3778949975967407, 'eval_runtime': 0.9653, 'eval_samples_per_second': 5.18, 'eval_steps_per_second': 1.036, 'epoch': 2.76}
{'loss': 0.6234, 'learning_rate': 3.063440664875217e-07, 'epoch': 2.77}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3779195547103882, 'eval_runtime': 1.0744, 'eval_samples_per_second': 4.654, 'eval_steps_per_second': 0.931, 'epoch': 2.77}
{'loss': 0.7596, 'learning_rate': 2.90088153283542e-07, 'epoch': 2.78}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.377978801727295, 'eval_runtime': 1.0658, 'eval_samples_per_second': 4.691, 'eval_steps_per_second': 0.938, 'epoch': 2.78}
{'loss': 0.5708, 'learning_rate': 2.7426903706421207e-07, 'epoch': 2.78}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3780144453048706, 'eval_runtime': 0.9848, 'eval_samples_per_second': 5.077, 'eval_steps_per_second': 1.015, 'epoch': 2.78}
{'loss': 0.9401, 'learning_rate': 2.588874294469368e-07, 'epoch': 2.79}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3780722618103027, 'eval_runtime': 1.0683, 'eval_samples_per_second': 4.68, 'eval_steps_per_second': 0.936, 'epoch': 2.79}
{'loss': 0.676, 'learning_rate': 2.43944022367949e-07, 'epoch': 2.8}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3781332969665527, 'eval_runtime': 1.015, 'eval_samples_per_second': 4.926, 'eval_steps_per_second': 0.985, 'epoch': 2.8}
{'loss': 0.7896, 'learning_rate': 2.294394880511852e-07, 'epoch': 2.8}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3782211542129517, 'eval_runtime': 0.9874, 'eval_samples_per_second': 5.064, 'eval_steps_per_second': 1.013, 'epoch': 2.8}
{'loss': 0.8766, 'learning_rate': 2.153744789780421e-07, 'epoch': 2.81}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3782110214233398, 'eval_runtime': 1.0037, 'eval_samples_per_second': 4.982, 'eval_steps_per_second': 0.996, 'epoch': 2.81}
{'loss': 0.9507, 'learning_rate': 2.0174962785802665e-07, 'epoch': 2.81}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3781801462173462, 'eval_runtime': 1.0578, 'eval_samples_per_second': 4.727, 'eval_steps_per_second': 0.945, 'epoch': 2.81}
{'loss': 0.7748, 'learning_rate': 1.8856554760029788e-07, 'epoch': 2.82}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3781421184539795, 'eval_runtime': 1.085, 'eval_samples_per_second': 4.608, 'eval_steps_per_second': 0.922, 'epoch': 2.82}
{'loss': 1.0402, 'learning_rate': 1.758228312860899e-07, 'epoch': 2.83}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3781309127807617, 'eval_runtime': 0.9805, 'eval_samples_per_second': 5.1, 'eval_steps_per_second': 1.02, 'epoch': 2.83}
{'loss': 0.8382, 'learning_rate': 1.6352205214203332e-07, 'epoch': 2.83}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.378152847290039, 'eval_runtime': 1.0912, 'eval_samples_per_second': 4.582, 'eval_steps_per_second': 0.916, 'epoch': 2.83}
{'loss': 0.7858, 'learning_rate': 1.516637635143725e-07, 'epoch': 2.84}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3781901597976685, 'eval_runtime': 0.9758, 'eval_samples_per_second': 5.124, 'eval_steps_per_second': 1.025, 'epoch': 2.84}
{'loss': 0.6741, 'learning_rate': 1.402484988440689e-07, 'epoch': 2.84}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3781920671463013, 'eval_runtime': 1.0766, 'eval_samples_per_second': 4.644, 'eval_steps_per_second': 0.929, 'epoch': 2.84}
{'loss': 0.5981, 'learning_rate': 1.2927677164280562e-07, 'epoch': 2.85}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3781449794769287, 'eval_runtime': 1.0164, 'eval_samples_per_second': 4.919, 'eval_steps_per_second': 0.984, 'epoch': 2.85}
{'loss': 0.6955, 'learning_rate': 1.1874907546989057e-07, 'epoch': 2.86}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3780920505523682, 'eval_runtime': 1.088, 'eval_samples_per_second': 4.595, 'eval_steps_per_second': 0.919, 'epoch': 2.86}
{'loss': 0.7908, 'learning_rate': 1.0866588391004962e-07, 'epoch': 2.86}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3780628442764282, 'eval_runtime': 0.9935, 'eval_samples_per_second': 5.033, 'eval_steps_per_second': 1.007, 'epoch': 2.86}
{'loss': 0.5134, 'learning_rate': 9.902765055212483e-08, 'epoch': 2.87}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3780561685562134, 'eval_runtime': 1.0775, 'eval_samples_per_second': 4.641, 'eval_steps_per_second': 0.928, 'epoch': 2.87}
{'loss': 0.5937, 'learning_rate': 8.983480896866847e-08, 'epoch': 2.88}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3780620098114014, 'eval_runtime': 1.0849, 'eval_samples_per_second': 4.609, 'eval_steps_per_second': 0.922, 'epoch': 2.88}
{'loss': 0.7548, 'learning_rate': 8.108777269644097e-08, 'epoch': 2.88}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3780748844146729, 'eval_runtime': 1.0249, 'eval_samples_per_second': 4.879, 'eval_steps_per_second': 0.976, 'epoch': 2.88}
{'loss': 0.7145, 'learning_rate': 7.278693521780566e-08, 'epoch': 2.89}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3780843019485474, 'eval_runtime': 0.976, 'eval_samples_per_second': 5.123, 'eval_steps_per_second': 1.025, 'epoch': 2.89}
{'loss': 0.5834, 'learning_rate': 6.493266994303082e-08, 'epoch': 2.89}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.378085732460022, 'eval_runtime': 1.0292, 'eval_samples_per_second': 4.858, 'eval_steps_per_second': 0.972, 'epoch': 2.89}
{'loss': 0.4265, 'learning_rate': 5.7525330193489715e-08, 'epoch': 2.9}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.378101110458374, 'eval_runtime': 0.9935, 'eval_samples_per_second': 5.033, 'eval_steps_per_second': 1.007, 'epoch': 2.9}
{'loss': 0.6834, 'learning_rate': 5.056524918576777e-08, 'epoch': 2.91}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3780971765518188, 'eval_runtime': 1.0924, 'eval_samples_per_second': 4.577, 'eval_steps_per_second': 0.915, 'epoch': 2.91}
{'loss': 0.6806, 'learning_rate': 4.405274001667126e-08, 'epoch': 2.91}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3780922889709473, 'eval_runtime': 0.9996, 'eval_samples_per_second': 5.002, 'eval_steps_per_second': 1.0, 'epoch': 2.91}
{'loss': 0.6396, 'learning_rate': 3.7988095649146294e-08, 'epoch': 2.92}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.378089427947998, 'eval_runtime': 1.0622, 'eval_samples_per_second': 4.707, 'eval_steps_per_second': 0.941, 'epoch': 2.92}
{'loss': 0.5687, 'learning_rate': 3.237158889909386e-08, 'epoch': 2.93}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3780953884124756, 'eval_runtime': 1.0859, 'eval_samples_per_second': 4.604, 'eval_steps_per_second': 0.921, 'epoch': 2.93}
{'loss': 0.5708, 'learning_rate': 2.7203472423106282e-08, 'epoch': 2.93}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3780986070632935, 'eval_runtime': 1.0087, 'eval_samples_per_second': 4.957, 'eval_steps_per_second': 0.991, 'epoch': 2.93}
{'loss': 0.8792, 'learning_rate': 2.2483978707091846e-08, 'epoch': 2.94}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.378091812133789, 'eval_runtime': 1.0785, 'eval_samples_per_second': 4.636, 'eval_steps_per_second': 0.927, 'epoch': 2.94}
{'loss': 0.9435, 'learning_rate': 1.8213320055824324e-08, 'epoch': 2.94}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3780773878097534, 'eval_runtime': 1.0001, 'eval_samples_per_second': 4.999, 'eval_steps_per_second': 1.0, 'epoch': 2.94}
{'loss': 0.5223, 'learning_rate': 1.4391688583386133e-08, 'epoch': 2.95}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3780698776245117, 'eval_runtime': 0.9771, 'eval_samples_per_second': 5.117, 'eval_steps_per_second': 1.023, 'epoch': 2.95}
{'loss': 0.7868, 'learning_rate': 1.1019256204533036e-08, 'epoch': 2.96}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3780648708343506, 'eval_runtime': 1.0164, 'eval_samples_per_second': 4.919, 'eval_steps_per_second': 0.984, 'epoch': 2.96}
{'loss': 0.5674, 'learning_rate': 8.096174626952558e-09, 'epoch': 2.96}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3780648708343506, 'eval_runtime': 1.0242, 'eval_samples_per_second': 4.882, 'eval_steps_per_second': 0.976, 'epoch': 2.96}
{'loss': 0.6398, 'learning_rate': 5.622575344448322e-09, 'epoch': 2.97}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3780649900436401, 'eval_runtime': 0.9969, 'eval_samples_per_second': 5.015, 'eval_steps_per_second': 1.003, 'epoch': 2.97}
{'loss': 0.4459, 'learning_rate': 3.5985696310159024e-09, 'epoch': 2.98}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.378064751625061, 'eval_runtime': 0.986, 'eval_samples_per_second': 5.071, 'eval_steps_per_second': 1.014, 'epoch': 2.98}
{'loss': 0.6396, 'learning_rate': 2.0242485358445974e-09, 'epoch': 2.98}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3780651092529297, 'eval_runtime': 1.0441, 'eval_samples_per_second': 4.789, 'eval_steps_per_second': 0.958, 'epoch': 2.98}
{'loss': 0.8958, 'learning_rate': 8.996828792184886e-10, 'epoch': 2.99}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3780642747879028, 'eval_runtime': 0.9774, 'eval_samples_per_second': 5.116, 'eval_steps_per_second': 1.023, 'epoch': 2.99}
{'loss': 0.5641, 'learning_rate': 2.2492324932787968e-10, 'epoch': 2.99}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3780640363693237, 'eval_runtime': 1.0232, 'eval_samples_per_second': 4.887, 'eval_steps_per_second': 0.977, 'epoch': 2.99}
{'loss': 0.7367, 'learning_rate': 0.0, 'epoch': 3.0}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 1.3780639171600342, 'eval_runtime': 1.0988, 'eval_samples_per_second': 4.551, 'eval_steps_per_second': 0.91, 'epoch': 3.0}
{'train_runtime': 4477.1981, 'train_samples_per_second': 0.539, 'train_steps_per_second': 0.539, 'train_loss': 1.1731945971771303, 'epoch': 3.0}


adapter_model.bin:   0%|          | 0.00/170M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/StarkWizard/Mistral-7b-instruct-cairo-PEFT/commit/0ba48a99f126d5d3ddc56b4c73e8aac835ecc5f3', commit_message='Upload model', commit_description='', oid='0ba48a99f126d5d3ddc56b4c73e8aac835ecc5f3', pr_url=None, pr_revision=None, pr_num=None)