In [None]:
# install
!pip install neologdn 'transformers[ja]' wandb

Collecting neologdn
  Downloading neologdn-0.5.2.tar.gz (86 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/86.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.2/86.2 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting wandb
  Downloading wandb-0.16.4-py3-none-any.whl (2.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m20.7 MB/s[0m eta [36m0:00:00[0m
Collecting fugashi>=1.0 (from transformers[ja])
  Downloading fugashi-1.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (600 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m600.9/600.9 kB[0m [31m34.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting ipadic<2.0,>=1.0.0 (from transformers[ja])
  Downloading ipadic-1.0.0.tar.gz (13.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.4/13.4 MB[0m [

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import torch
import json
import neologdn
import unicodedata
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModelForMaskedLM
from torch.optim import AdamW
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import numpy as np
import wandb
import random

In [None]:
class DataPreprocessor:
    def __init__(self, file_path, tokenizer_name="tohoku-nlp/bert-base-japanese-whole-word-masking", max_length=512, batch_size=32, test_size=0.2):
        self.file_path = file_path
        self.tokenizer_name = tokenizer_name
        self.max_length = max_length
        self.batch_size = batch_size
        self.test_size = test_size
        self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
        self.train_dataloader, self.test_dataloader = self.prepare_data()

    def cleaning_text(self, text):
        normalized = neologdn.normalize(text)
        cleaned_text = unicodedata.normalize("NFKC", normalized)
        return cleaned_text

    def load_data(self):
        with open(self.file_path, 'r', encoding='utf-8') as file:
            data = json.load(file)
            cleaned_data = [self.cleaning_text(text) for text in data]
        return cleaned_data

    def split_data(self, data):
        data_pairs = [(data[i], data[i + 1]) for i in range(0, len(data) - 1, 2)]
        train_pairs, test_pairs = train_test_split(data_pairs, test_size=self.test_size, random_state=42)
        return train_pairs, test_pairs

    def create_data_loader(self, text_pairs):
        dataset = BERTDataset(text_pairs, self.tokenizer, max_length=self.max_length)
        dataloader = DataLoader(dataset, batch_size=self.batch_size, shuffle=True)
        return dataloader

    def prepare_data(self):
        data = self.load_data()
        train_pairs, test_pairs = self.split_data(data)
        train_dataloader = self.create_data_loader(train_pairs)
        test_dataloader = self.create_data_loader(test_pairs)
        return train_dataloader, test_dataloader

class BERTDataset(Dataset):
    def __init__(self, texts, tokenizer, max_length=512, mlm_probability=0.15):
        self.texts = texts
        self.tokenizer = tokenizer
        self.max_length = max_length
        self.mlm_probability = mlm_probability
        self.vocab_items = list(tokenizer.vocab.items())

    def __len__(self):
        return len(self.texts)

    def random_masking(self, tokens):
        labels = [-100] * len(tokens)  # ラベルの初期化
        masked_tokens = tokens.clone()  # トークン列のディープコピーを作成
        for i, token_id in enumerate(tokens.tolist()):  # tokensをリストに変換してイテレート
            # 特殊トークンとマスクトークンはスキップ
            if token_id in self.tokenizer.all_special_ids:
                continue
            # ランダムにトークンをマスクする条件
            if random.random() < self.mlm_probability:
                labels[i] = token_id  # 元のトークンIDをラベルに設定
                prob = random.random()
                if prob < 0.8:
                    # 80%の確率でマスクトークンに置き換え
                    masked_tokens[i] = self.tokenizer.mask_token_id
                elif prob < 0.9:
                    # 10%の確率でランダムなトークンに置き換え
                    random_token_id = random.choice(list(self.tokenizer.vocab.values()))
                    masked_tokens[i] = random_token_id
                # 10%の確率で元のトークンをそのまま使用する場合は、masked_tokens[i]を更新する必要がない
            # マスクしない場合も、masked_tokens[i]を更新する必要がない
        return masked_tokens, labels




    def __getitem__(self, idx):
        text1, text2 = self.texts[idx]
        tokens = self.tokenizer(text1, text2, truncation="longest_first", max_length=self.max_length, return_tensors="pt", padding="max_length")
        input_ids = tokens["input_ids"][0]
        token_type_ids = tokens["token_type_ids"][0]
        attention_mask = tokens["attention_mask"][0]
        tokens, labels = self.random_masking(input_ids)
        return torch.tensor(tokens), torch.tensor(labels), torch.tensor(attention_mask), torch.tensor(token_type_ids)

def compute_metrics(pred_labels, true_labels):
    precision, recall, f1, _ = precision_recall_fscore_support(true_labels, pred_labels, average='binary')
    accuracy = accuracy_score(true_labels, pred_labels)
    return accuracy, precision, recall, f1

def compute_perplexity(loss):
    return np.exp(loss)

def evaluate(model, dataloader, device):
    model.eval()
    total_loss = 0
    preds, labels = [], []

    with torch.no_grad():
        for batch in dataloader:
            inputs, labels_batch, attention_mask, token_type_ids = batch
            inputs, labels_batch, attention_mask, token_type_ids = inputs.to(device), labels_batch.to(device), attention_mask.to(device), token_type_ids.to(device)

            outputs = model(input_ids=inputs, attention_mask=attention_mask, token_type_ids=token_type_ids, labels=labels_batch)
            loss = outputs.loss
            total_loss += loss.item()

            logits = outputs.logits
            preds.extend(torch.argmax(logits, dim=-1).cpu().numpy())
            labels.extend(labels_batch.cpu().numpy())

    avg_loss = total_loss / len(dataloader)
    perplexity = compute_perplexity(avg_loss)
    return avg_loss, perplexity

def train(model, train_dataloader, optimizer, device, epochs=4):
    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for batch in train_dataloader:
            inputs, labels, attention_mask, token_type_ids = batch
            inputs, labels, attention_mask, token_type_ids = inputs.to(device), labels.to(device), attention_mask.to(device), token_type_ids.to(device)

            optimizer.zero_grad()
            outputs = model(input_ids=inputs, attention_mask=attention_mask, token_type_ids=token_type_ids, labels=labels)
            loss = outputs.loss
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

            # WandB logging
            wandb.log({
                "train_loss": loss.item(),
            })
            print(f"Epoch: {epoch}, Train Loss: {loss.item()}")

        avg_loss = total_loss / len(train_dataloader)
        perplexity = compute_perplexity(avg_loss)


        wandb.log({
            "train_loss": avg_loss,
            "train_perplexity": perplexity
        })
        print(f"Epoch: {epoch}, Train Loss: {avg_loss}, Train Perplexity: {perplexity}")


In [None]:

wandb.login()
wandb.init(project="Continual Pretraining of BERT on Legal Domain For all sentence", config={
    "epochs": 2,
    "learning_rate": 1e-4,
    "batch_size": 13,
    "max_length": 512,
    "mlm_probability": 0.15
})
config = wandb.config  # Access the configuration settings via wandb.config

bs = config["batch_size"]
lr = config["learning_rate"]
epochs = config["epochs"]
max_length = config["max_length"]
# DataPreprocessorの初期化と設定

data_preprocessor = DataPreprocessor(file_path="/content/drive/MyDrive/corpus_Pretraing_400.json", max_length=max_length, batch_size=bs)

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mxiangchitian26[0m. Use [1m`wandb login --relogin`[0m to force relogin


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/120 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/479 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/258k [00:00<?, ?B/s]

In [None]:
# モデル、オプティマイザーの初期化
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AutoModelForMaskedLM.from_pretrained("tohoku-nlp/bert-base-japanese-whole-word-masking").to(device)
optimizer = AdamW(model.parameters(), lr=lr)

# 訓練と評価の実行
train(model, data_preprocessor.train_dataloader, optimizer, device, epochs=epochs)
wandb.save("model.pt")
model.save_pretrained("model.pt")
wandb.finish()


pytorch_model.bin:   0%|          | 0.00/445M [00:00<?, ?B/s]

  return self.fget.__get__(instance, owner)()
Some weights of the model checkpoint at tohoku-nlp/bert-base-japanese-whole-word-masking were not used when initializing BertForMaskedLM: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  return torch.tensor(tokens), torch.tensor(labels), torch.tensor(attention_mask), torch.tensor(token_type_ids)


Epoch: 0, Train Loss: 2.431859254837036
Epoch: 0, Train Loss: 1.996899962425232
Epoch: 0, Train Loss: 1.9113715887069702
Epoch: 0, Train Loss: 1.6816939115524292
Epoch: 0, Train Loss: 1.8227672576904297
Epoch: 0, Train Loss: 2.305180072784424
Epoch: 0, Train Loss: 2.02392315864563
Epoch: 0, Train Loss: 1.878844141960144
Epoch: 0, Train Loss: 1.704481601715088
Epoch: 0, Train Loss: 1.647470235824585
Epoch: 0, Train Loss: 1.9806363582611084
Epoch: 0, Train Loss: 1.8009886741638184
Epoch: 0, Train Loss: 1.5992401838302612
Epoch: 0, Train Loss: 2.3208019733428955
Epoch: 0, Train Loss: 2.5154879093170166
Epoch: 0, Train Loss: 1.9381310939788818
Epoch: 0, Train Loss: 2.2130324840545654
Epoch: 0, Train Loss: 2.082097291946411
Epoch: 0, Train Loss: 1.9919264316558838
Epoch: 0, Train Loss: 1.7295446395874023
Epoch: 0, Train Loss: 2.304593086242676
Epoch: 0, Train Loss: 1.7757214307785034
Epoch: 0, Train Loss: 1.9914647340774536
Epoch: 0, Train Loss: 2.1206085681915283
Epoch: 0, Train Loss: 1.78

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.3969547748565674
Epoch: 0, Train Loss: 2.20181941986084
Epoch: 0, Train Loss: 1.6054140329360962
Epoch: 0, Train Loss: 1.876668930053711
Epoch: 0, Train Loss: 1.5826747417449951
Epoch: 0, Train Loss: 1.5433157682418823
Epoch: 0, Train Loss: 1.6494665145874023
Epoch: 0, Train Loss: 1.4434504508972168
Epoch: 0, Train Loss: 2.038516044616699
Epoch: 0, Train Loss: 1.6606464385986328
Epoch: 0, Train Loss: 1.5433472394943237
Epoch: 0, Train Loss: 1.6799206733703613
Epoch: 0, Train Loss: 1.2732597589492798
Epoch: 0, Train Loss: 1.3617662191390991
Epoch: 0, Train Loss: 1.5200047492980957
Epoch: 0, Train Loss: 1.5704904794692993
Epoch: 0, Train Loss: 1.7868998050689697
Epoch: 0, Train Loss: 1.5912582874298096
Epoch: 0, Train Loss: 1.5575876235961914
Epoch: 0, Train Loss: 1.4394185543060303
Epoch: 0, Train Loss: 1.5194510221481323
Epoch: 0, Train Loss: 1.5270079374313354
Epoch: 0, Train Loss: 1.507008671760559
Epoch: 0, Train Loss: 1.6898456811904907
Epoch: 0, Train Loss:

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.5400397777557373
Epoch: 0, Train Loss: 1.523104190826416
Epoch: 0, Train Loss: 1.9420065879821777
Epoch: 0, Train Loss: 2.0094802379608154
Epoch: 0, Train Loss: 1.4531922340393066
Epoch: 0, Train Loss: 1.4709415435791016
Epoch: 0, Train Loss: 1.4977689981460571
Epoch: 0, Train Loss: 1.5511244535446167
Epoch: 0, Train Loss: 1.677855372428894
Epoch: 0, Train Loss: 1.5325807332992554
Epoch: 0, Train Loss: 1.2471826076507568
Epoch: 0, Train Loss: 1.484159231185913
Epoch: 0, Train Loss: 1.7363123893737793
Epoch: 0, Train Loss: 1.4829078912734985
Epoch: 0, Train Loss: 1.9801548719406128
Epoch: 0, Train Loss: 1.3779819011688232
Epoch: 0, Train Loss: 1.6993471384048462
Epoch: 0, Train Loss: 1.643324375152588
Epoch: 0, Train Loss: 1.4550825357437134
Epoch: 0, Train Loss: 1.6431713104248047
Epoch: 0, Train Loss: 1.8091968297958374
Epoch: 0, Train Loss: 1.4435173273086548
Epoch: 0, Train Loss: 1.63527250289917
Epoch: 0, Train Loss: 1.4590895175933838
Epoch: 0, Train Loss: 

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.5128988027572632
Epoch: 0, Train Loss: 1.300464391708374
Epoch: 0, Train Loss: 1.9051355123519897
Epoch: 0, Train Loss: 1.1594114303588867
Epoch: 0, Train Loss: 1.636552333831787
Epoch: 0, Train Loss: 1.2597416639328003
Epoch: 0, Train Loss: 1.554352879524231
Epoch: 0, Train Loss: 1.3855081796646118
Epoch: 0, Train Loss: 1.3587009906768799
Epoch: 0, Train Loss: 1.5361849069595337
Epoch: 0, Train Loss: 1.3449177742004395
Epoch: 0, Train Loss: 1.682550072669983
Epoch: 0, Train Loss: 1.5793194770812988
Epoch: 0, Train Loss: 1.5395203828811646
Epoch: 0, Train Loss: 1.5388790369033813
Epoch: 0, Train Loss: 1.5349420309066772
Epoch: 0, Train Loss: 1.989262342453003
Epoch: 0, Train Loss: 1.335807204246521
Epoch: 0, Train Loss: 1.5744215250015259
Epoch: 0, Train Loss: 2.008824586868286
Epoch: 0, Train Loss: 1.2873514890670776
Epoch: 0, Train Loss: 1.5031541585922241
Epoch: 0, Train Loss: 1.37488853931427
Epoch: 0, Train Loss: 1.472695231437683
Epoch: 0, Train Loss: 1.50

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.4875411987304688
Epoch: 0, Train Loss: 1.5303878784179688
Epoch: 0, Train Loss: 2.0405986309051514
Epoch: 0, Train Loss: 1.6330772638320923
Epoch: 0, Train Loss: 1.3132554292678833
Epoch: 0, Train Loss: 1.5084141492843628
Epoch: 0, Train Loss: 1.6729063987731934
Epoch: 0, Train Loss: 1.3650715351104736
Epoch: 0, Train Loss: 1.3708308935165405
Epoch: 0, Train Loss: 1.6143299341201782
Epoch: 0, Train Loss: 1.300158143043518
Epoch: 0, Train Loss: 1.7698286771774292
Epoch: 0, Train Loss: 1.5638798475265503
Epoch: 0, Train Loss: 1.4944491386413574
Epoch: 0, Train Loss: 1.810258388519287
Epoch: 0, Train Loss: 1.6173127889633179
Epoch: 0, Train Loss: 1.7222219705581665
Epoch: 0, Train Loss: 1.3247791528701782
Epoch: 0, Train Loss: 1.2849746942520142
Epoch: 0, Train Loss: 1.488591194152832
Epoch: 0, Train Loss: 1.7068583965301514
Epoch: 0, Train Loss: 1.5395479202270508
Epoch: 0, Train Loss: 1.6723556518554688
Epoch: 0, Train Loss: 1.705468773841858
Epoch: 0, Train Loss

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.1385222673416138
Epoch: 0, Train Loss: 1.2582037448883057
Epoch: 0, Train Loss: 1.5922534465789795
Epoch: 0, Train Loss: 1.3809080123901367
Epoch: 0, Train Loss: 1.5365819931030273
Epoch: 0, Train Loss: 1.7656292915344238
Epoch: 0, Train Loss: 1.6033021211624146
Epoch: 0, Train Loss: 1.579918622970581
Epoch: 0, Train Loss: 1.6078675985336304
Epoch: 0, Train Loss: 1.3744698762893677
Epoch: 0, Train Loss: 1.434958577156067
Epoch: 0, Train Loss: 1.4614918231964111
Epoch: 0, Train Loss: 1.5455384254455566
Epoch: 0, Train Loss: 1.822813868522644
Epoch: 0, Train Loss: 1.7507728338241577
Epoch: 0, Train Loss: 1.7039414644241333
Epoch: 0, Train Loss: 1.3558086156845093
Epoch: 0, Train Loss: 1.4255049228668213
Epoch: 0, Train Loss: 1.3820164203643799
Epoch: 0, Train Loss: 1.329984188079834
Epoch: 0, Train Loss: 1.3765811920166016
Epoch: 0, Train Loss: 1.3936281204223633
Epoch: 0, Train Loss: 1.48667573928833
Epoch: 0, Train Loss: 1.6286901235580444
Epoch: 0, Train Loss: 

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.8011661767959595
Epoch: 0, Train Loss: 1.2962220907211304
Epoch: 0, Train Loss: 1.6890426874160767
Epoch: 0, Train Loss: 1.722299337387085
Epoch: 0, Train Loss: 2.1693735122680664
Epoch: 0, Train Loss: 1.3181296586990356
Epoch: 0, Train Loss: 1.460711121559143
Epoch: 0, Train Loss: 1.213441014289856
Epoch: 0, Train Loss: 1.5304795503616333
Epoch: 0, Train Loss: 1.71995210647583
Epoch: 0, Train Loss: 1.3345320224761963
Epoch: 0, Train Loss: 1.3013404607772827
Epoch: 0, Train Loss: 1.3015892505645752
Epoch: 0, Train Loss: 1.4968456029891968
Epoch: 0, Train Loss: 1.7155544757843018
Epoch: 0, Train Loss: 1.5935276746749878
Epoch: 0, Train Loss: 1.4481171369552612
Epoch: 0, Train Loss: 1.4632397890090942
Epoch: 0, Train Loss: 1.7487177848815918
Epoch: 0, Train Loss: 1.191584825515747
Epoch: 0, Train Loss: 1.4606308937072754
Epoch: 0, Train Loss: 1.6435966491699219
Epoch: 0, Train Loss: 1.4179409742355347
Epoch: 0, Train Loss: 1.340651512145996
Epoch: 0, Train Loss: 1

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.6911860704421997
Epoch: 0, Train Loss: 1.1662406921386719
Epoch: 0, Train Loss: 1.623669981956482
Epoch: 0, Train Loss: 1.4769272804260254
Epoch: 0, Train Loss: 1.4331779479980469
Epoch: 0, Train Loss: 1.2803868055343628
Epoch: 0, Train Loss: 1.5346673727035522
Epoch: 0, Train Loss: 1.4987220764160156
Epoch: 0, Train Loss: 1.9123671054840088
Epoch: 0, Train Loss: 1.985146164894104
Epoch: 0, Train Loss: 1.3407458066940308
Epoch: 0, Train Loss: 1.382944941520691
Epoch: 0, Train Loss: 1.6077080965042114
Epoch: 0, Train Loss: 1.504006028175354
Epoch: 0, Train Loss: 1.974460244178772
Epoch: 0, Train Loss: 1.3139293193817139
Epoch: 0, Train Loss: 1.3991340398788452
Epoch: 0, Train Loss: 1.495052695274353
Epoch: 0, Train Loss: 1.5313456058502197
Epoch: 0, Train Loss: 2.0752651691436768
Epoch: 0, Train Loss: 1.962419033050537
Epoch: 0, Train Loss: 2.2535412311553955
Epoch: 0, Train Loss: 1.732558012008667
Epoch: 0, Train Loss: 1.7243620157241821
Epoch: 0, Train Loss: 1.

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.4238340854644775
Epoch: 0, Train Loss: 1.545353651046753
Epoch: 0, Train Loss: 1.3585067987442017
Epoch: 0, Train Loss: 1.8473269939422607
Epoch: 0, Train Loss: 1.4920027256011963
Epoch: 0, Train Loss: 1.3312402963638306
Epoch: 0, Train Loss: 1.5253468751907349
Epoch: 0, Train Loss: 1.8572286367416382
Epoch: 0, Train Loss: 1.4070285558700562
Epoch: 0, Train Loss: 1.7363827228546143
Epoch: 0, Train Loss: 1.8141529560089111
Epoch: 0, Train Loss: 1.393772006034851
Epoch: 0, Train Loss: 1.4576352834701538
Epoch: 0, Train Loss: 1.858404517173767
Epoch: 0, Train Loss: 1.397375226020813


Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.428649663925171
Epoch: 0, Train Loss: 1.313961148262024
Epoch: 0, Train Loss: 1.4306907653808594
Epoch: 0, Train Loss: 1.4536933898925781
Epoch: 0, Train Loss: 1.0622810125350952
Epoch: 0, Train Loss: 1.9095739126205444
Epoch: 0, Train Loss: 1.61526620388031
Epoch: 0, Train Loss: 1.3840868473052979
Epoch: 0, Train Loss: 1.8312206268310547
Epoch: 0, Train Loss: 1.517054557800293
Epoch: 0, Train Loss: 1.499464988708496
Epoch: 0, Train Loss: 1.6487175226211548
Epoch: 0, Train Loss: 1.529100775718689
Epoch: 0, Train Loss: 1.6308122873306274
Epoch: 0, Train Loss: 1.6251716613769531
Epoch: 0, Train Loss: 1.341065526008606
Epoch: 0, Train Loss: 1.4051527976989746
Epoch: 0, Train Loss: 1.5912158489227295
Epoch: 0, Train Loss: 1.597105622291565
Epoch: 0, Train Loss: 2.1481246948242188
Epoch: 0, Train Loss: 1.998465895652771
Epoch: 0, Train Loss: 1.9483858346939087
Epoch: 0, Train Loss: 1.4163010120391846
Epoch: 0, Train Loss: 1.378947138786316
Epoch: 0, Train Loss: 1.533

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.7620848417282104
Epoch: 0, Train Loss: 0.9972124695777893
Epoch: 0, Train Loss: 1.1009546518325806
Epoch: 0, Train Loss: 1.3977919816970825
Epoch: 0, Train Loss: 1.5499162673950195
Epoch: 0, Train Loss: 1.5614885091781616
Epoch: 0, Train Loss: 1.5783518552780151
Epoch: 0, Train Loss: 1.5391343832015991
Epoch: 0, Train Loss: 1.4307578802108765
Epoch: 0, Train Loss: 1.5765897035598755
Epoch: 0, Train Loss: 1.8511968851089478
Epoch: 0, Train Loss: 1.6017013788223267
Epoch: 0, Train Loss: 1.3716708421707153
Epoch: 0, Train Loss: 1.887715458869934
Epoch: 0, Train Loss: 1.4880553483963013
Epoch: 0, Train Loss: 1.3111212253570557
Epoch: 0, Train Loss: 1.5658605098724365
Epoch: 0, Train Loss: 1.4404979944229126
Epoch: 0, Train Loss: 1.712045431137085
Epoch: 0, Train Loss: 1.6286362409591675
Epoch: 0, Train Loss: 1.1882133483886719
Epoch: 0, Train Loss: 1.311028242111206
Epoch: 0, Train Loss: 1.5135574340820312
Epoch: 0, Train Loss: 1.975528359413147
Epoch: 0, Train Loss

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.3336377143859863
Epoch: 0, Train Loss: 1.2542208433151245
Epoch: 0, Train Loss: 1.3594857454299927
Epoch: 0, Train Loss: 1.8846684694290161
Epoch: 0, Train Loss: 1.3454049825668335


Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.4611506462097168
Epoch: 0, Train Loss: 1.2402029037475586
Epoch: 0, Train Loss: 2.037714958190918
Epoch: 0, Train Loss: 1.6545261144638062
Epoch: 0, Train Loss: 1.1678708791732788
Epoch: 0, Train Loss: 1.421265721321106
Epoch: 0, Train Loss: 1.1628148555755615
Epoch: 0, Train Loss: 1.7879611253738403
Epoch: 0, Train Loss: 1.4198449850082397
Epoch: 0, Train Loss: 1.6073554754257202
Epoch: 0, Train Loss: 1.23805832862854
Epoch: 0, Train Loss: 1.0165706872940063
Epoch: 0, Train Loss: 1.3365836143493652
Epoch: 0, Train Loss: 1.6019887924194336
Epoch: 0, Train Loss: 1.5319819450378418
Epoch: 0, Train Loss: 1.3816097974777222
Epoch: 0, Train Loss: 1.4941020011901855
Epoch: 0, Train Loss: 1.334443211555481
Epoch: 0, Train Loss: 1.6467283964157104
Epoch: 0, Train Loss: 1.544846534729004
Epoch: 0, Train Loss: 1.7374424934387207
Epoch: 0, Train Loss: 1.5277342796325684
Epoch: 0, Train Loss: 1.4534293413162231
Epoch: 0, Train Loss: 1.7689145803451538
Epoch: 0, Train Loss: 

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.700975775718689
Epoch: 0, Train Loss: 1.4257330894470215
Epoch: 0, Train Loss: 1.533426284790039
Epoch: 0, Train Loss: 1.5445921421051025
Epoch: 0, Train Loss: 1.1824806928634644
Epoch: 0, Train Loss: 1.0034782886505127
Epoch: 0, Train Loss: 1.57786226272583
Epoch: 0, Train Loss: 1.7760077714920044
Epoch: 0, Train Loss: 1.445268988609314
Epoch: 0, Train Loss: 1.491638422012329
Epoch: 0, Train Loss: 1.7783180475234985
Epoch: 0, Train Loss: 1.6574280261993408
Epoch: 0, Train Loss: 1.290106177330017
Epoch: 0, Train Loss: 1.2022651433944702
Epoch: 0, Train Loss: 1.1102676391601562
Epoch: 0, Train Loss: 1.3506125211715698
Epoch: 0, Train Loss: 1.6338046789169312
Epoch: 0, Train Loss: 1.4679312705993652
Epoch: 0, Train Loss: 1.7375434637069702
Epoch: 0, Train Loss: 1.219216227531433
Epoch: 0, Train Loss: 1.5775806903839111
Epoch: 0, Train Loss: 1.6044037342071533
Epoch: 0, Train Loss: 1.6049660444259644
Epoch: 0, Train Loss: 1.4862873554229736
Epoch: 0, Train Loss: 1.

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.4953761100769043
Epoch: 0, Train Loss: 1.1166162490844727
Epoch: 0, Train Loss: 1.387284517288208
Epoch: 0, Train Loss: 1.7766128778457642
Epoch: 0, Train Loss: 1.3913356065750122
Epoch: 0, Train Loss: 1.2877442836761475
Epoch: 0, Train Loss: 1.515500783920288
Epoch: 0, Train Loss: 1.7875334024429321
Epoch: 0, Train Loss: 1.6759947538375854
Epoch: 0, Train Loss: 1.3868328332901
Epoch: 0, Train Loss: 1.4687288999557495
Epoch: 0, Train Loss: 1.6104592084884644
Epoch: 0, Train Loss: 1.4201529026031494
Epoch: 0, Train Loss: 1.722554326057434


Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.2722798585891724
Epoch: 0, Train Loss: 1.3352019786834717
Epoch: 0, Train Loss: 1.4531233310699463
Epoch: 0, Train Loss: 1.5830835103988647
Epoch: 0, Train Loss: 1.6738916635513306
Epoch: 0, Train Loss: 1.3072750568389893
Epoch: 0, Train Loss: 1.1945796012878418
Epoch: 0, Train Loss: 1.6526931524276733
Epoch: 0, Train Loss: 1.2508692741394043
Epoch: 0, Train Loss: 1.7898564338684082
Epoch: 0, Train Loss: 1.4820103645324707
Epoch: 0, Train Loss: 1.325424313545227
Epoch: 0, Train Loss: 1.4983900785446167
Epoch: 0, Train Loss: 1.3864593505859375
Epoch: 0, Train Loss: 1.1755175590515137
Epoch: 0, Train Loss: 1.376768708229065
Epoch: 0, Train Loss: 1.7753292322158813
Epoch: 0, Train Loss: 1.623963475227356
Epoch: 0, Train Loss: 1.7253905534744263
Epoch: 0, Train Loss: 1.531959056854248
Epoch: 0, Train Loss: 1.5983772277832031
Epoch: 0, Train Loss: 1.4735329151153564
Epoch: 0, Train Loss: 1.6785491704940796
Epoch: 0, Train Loss: 1.573784351348877
Epoch: 0, Train Loss:

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.7528655529022217
Epoch: 0, Train Loss: 1.305182933807373
Epoch: 0, Train Loss: 1.3002630472183228
Epoch: 0, Train Loss: 1.7588392496109009
Epoch: 0, Train Loss: 1.512967824935913
Epoch: 0, Train Loss: 1.4392633438110352
Epoch: 0, Train Loss: 1.2812391519546509
Epoch: 0, Train Loss: 1.4255714416503906
Epoch: 0, Train Loss: 1.4999561309814453
Epoch: 0, Train Loss: 1.8350145816802979
Epoch: 0, Train Loss: 1.3221503496170044
Epoch: 0, Train Loss: 1.8068965673446655
Epoch: 0, Train Loss: 1.517059564590454
Epoch: 0, Train Loss: 1.4104082584381104
Epoch: 0, Train Loss: 1.4589918851852417
Epoch: 0, Train Loss: 1.4567389488220215
Epoch: 0, Train Loss: 1.6668771505355835
Epoch: 0, Train Loss: 1.6790311336517334
Epoch: 0, Train Loss: 1.2799750566482544
Epoch: 0, Train Loss: 1.7972997426986694
Epoch: 0, Train Loss: 1.7447924613952637
Epoch: 0, Train Loss: 1.28682279586792
Epoch: 0, Train Loss: 1.596326231956482
Epoch: 0, Train Loss: 1.1110663414001465
Epoch: 0, Train Loss: 

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.3610661029815674
Epoch: 0, Train Loss: 1.0969032049179077
Epoch: 0, Train Loss: 1.8055715560913086
Epoch: 0, Train Loss: 1.3450303077697754
Epoch: 0, Train Loss: 1.464268684387207
Epoch: 0, Train Loss: 1.3659545183181763
Epoch: 0, Train Loss: 1.6625113487243652
Epoch: 0, Train Loss: 1.4904457330703735
Epoch: 0, Train Loss: 1.6982039213180542
Epoch: 0, Train Loss: 1.6937673091888428
Epoch: 0, Train Loss: 1.4585870504379272
Epoch: 0, Train Loss: 1.4266377687454224
Epoch: 0, Train Loss: 1.0263023376464844
Epoch: 0, Train Loss: 1.2099635601043701
Epoch: 0, Train Loss: 1.6085820198059082
Epoch: 0, Train Loss: 1.4049909114837646
Epoch: 0, Train Loss: 1.2844740152359009
Epoch: 0, Train Loss: 1.349192500114441
Epoch: 0, Train Loss: 1.6085344552993774
Epoch: 0, Train Loss: 1.4785959720611572
Epoch: 0, Train Loss: 1.5157480239868164
Epoch: 0, Train Loss: 1.671535611152649
Epoch: 0, Train Loss: 1.5230233669281006
Epoch: 0, Train Loss: 1.3041932582855225
Epoch: 0, Train Los

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.4968570470809937
Epoch: 0, Train Loss: 0.8897050619125366
Epoch: 0, Train Loss: 1.1930149793624878
Epoch: 0, Train Loss: 1.5109809637069702
Epoch: 0, Train Loss: 1.5355782508850098
Epoch: 0, Train Loss: 1.3923135995864868
Epoch: 0, Train Loss: 1.5578080415725708
Epoch: 0, Train Loss: 1.4586673974990845
Epoch: 0, Train Loss: 1.4308127164840698
Epoch: 0, Train Loss: 1.5232183933258057
Epoch: 0, Train Loss: 1.3155614137649536
Epoch: 0, Train Loss: 1.8872300386428833
Epoch: 0, Train Loss: 1.2319053411483765
Epoch: 0, Train Loss: 1.6338032484054565
Epoch: 0, Train Loss: 1.1961606740951538
Epoch: 0, Train Loss: 1.5600972175598145
Epoch: 0, Train Loss: 1.181960940361023
Epoch: 0, Train Loss: 1.4599645137786865
Epoch: 0, Train Loss: 1.5189111232757568
Epoch: 0, Train Loss: 1.594745397567749
Epoch: 0, Train Loss: 1.8750430345535278
Epoch: 0, Train Loss: 1.5244841575622559
Epoch: 0, Train Loss: 1.5741029977798462
Epoch: 0, Train Loss: 1.4779645204544067
Epoch: 0, Train Lo

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.4048081636428833
Epoch: 0, Train Loss: 1.4635062217712402
Epoch: 0, Train Loss: 1.498567819595337
Epoch: 0, Train Loss: 1.7770596742630005
Epoch: 0, Train Loss: 1.6115201711654663
Epoch: 0, Train Loss: 1.3916122913360596
Epoch: 0, Train Loss: 1.784353494644165
Epoch: 0, Train Loss: 1.5536315441131592
Epoch: 0, Train Loss: 1.6636388301849365
Epoch: 0, Train Loss: 1.6436026096343994
Epoch: 0, Train Loss: 1.8293384313583374
Epoch: 0, Train Loss: 1.7561231851577759
Epoch: 0, Train Loss: 1.3048540353775024
Epoch: 0, Train Loss: 1.768448829650879
Epoch: 0, Train Loss: 1.485426425933838
Epoch: 0, Train Loss: 1.4812966585159302
Epoch: 0, Train Loss: 1.780963659286499
Epoch: 0, Train Loss: 1.4453110694885254
Epoch: 0, Train Loss: 1.5406581163406372
Epoch: 0, Train Loss: 1.5290648937225342
Epoch: 0, Train Loss: 1.3750073909759521
Epoch: 0, Train Loss: 1.2058693170547485
Epoch: 0, Train Loss: 1.3368045091629028
Epoch: 0, Train Loss: 1.8594801425933838
Epoch: 0, Train Loss:

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.661322832107544
Epoch: 0, Train Loss: 1.2779502868652344
Epoch: 0, Train Loss: 1.4024637937545776
Epoch: 0, Train Loss: 1.7324621677398682
Epoch: 0, Train Loss: 1.1690112352371216
Epoch: 0, Train Loss: 1.5868335962295532
Epoch: 0, Train Loss: 1.3757601976394653
Epoch: 0, Train Loss: 1.8139888048171997
Epoch: 0, Train Loss: 1.6254223585128784
Epoch: 0, Train Loss: 1.7543013095855713
Epoch: 0, Train Loss: 1.5614166259765625
Epoch: 0, Train Loss: 1.4962948560714722
Epoch: 0, Train Loss: 1.4672948122024536
Epoch: 0, Train Loss: 1.6589195728302002
Epoch: 0, Train Loss: 1.9695510864257812
Epoch: 0, Train Loss: 1.49317467212677
Epoch: 0, Train Loss: 1.2992594242095947
Epoch: 0, Train Loss: 1.489211082458496
Epoch: 0, Train Loss: 1.1923439502716064
Epoch: 0, Train Loss: 1.9341661930084229
Epoch: 0, Train Loss: 2.5980868339538574
Epoch: 0, Train Loss: 1.625950574874878
Epoch: 0, Train Loss: 1.752307415008545
Epoch: 0, Train Loss: 1.5838875770568848
Epoch: 0, Train Loss: 

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.2727875709533691
Epoch: 0, Train Loss: 1.4906275272369385
Epoch: 0, Train Loss: 1.302091121673584
Epoch: 0, Train Loss: 1.369498372077942
Epoch: 0, Train Loss: 1.608707070350647
Epoch: 0, Train Loss: 1.8292522430419922
Epoch: 0, Train Loss: 1.8854247331619263
Epoch: 0, Train Loss: 1.657941460609436
Epoch: 0, Train Loss: 1.8781298398971558
Epoch: 0, Train Loss: 0.981887698173523
Epoch: 0, Train Loss: 1.767195463180542
Epoch: 0, Train Loss: 1.7397581338882446
Epoch: 0, Train Loss: 1.665831208229065
Epoch: 0, Train Loss: 1.5339953899383545
Epoch: 0, Train Loss: 1.0962514877319336
Epoch: 0, Train Loss: 1.327683925628662
Epoch: 0, Train Loss: 1.8810151815414429
Epoch: 0, Train Loss: 1.6089261770248413
Epoch: 0, Train Loss: 1.3364253044128418
Epoch: 0, Train Loss: 1.4059561491012573
Epoch: 0, Train Loss: 1.296532154083252
Epoch: 0, Train Loss: 1.2360981702804565
Epoch: 0, Train Loss: 1.387281894683838
Epoch: 0, Train Loss: 1.3866971731185913
Epoch: 0, Train Loss: 1.56

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.5990136861801147
Epoch: 0, Train Loss: 1.7781068086624146
Epoch: 0, Train Loss: 1.4062508344650269
Epoch: 0, Train Loss: 1.3685334920883179
Epoch: 0, Train Loss: 1.3032078742980957
Epoch: 0, Train Loss: 1.4782130718231201
Epoch: 0, Train Loss: 1.6143101453781128
Epoch: 0, Train Loss: 1.1413438320159912
Epoch: 0, Train Loss: 1.4568976163864136
Epoch: 0, Train Loss: 1.8159475326538086
Epoch: 0, Train Loss: 1.262078046798706
Epoch: 0, Train Loss: 1.3432872295379639
Epoch: 0, Train Loss: 1.633487582206726
Epoch: 0, Train Loss: 1.6635758876800537
Epoch: 0, Train Loss: 1.356452465057373
Epoch: 0, Train Loss: 1.5750000476837158
Epoch: 0, Train Loss: 1.855627417564392
Epoch: 0, Train Loss: 1.5310516357421875
Epoch: 0, Train Loss: 2.0919268131256104
Epoch: 0, Train Loss: 1.301786184310913
Epoch: 0, Train Loss: 1.499733567237854
Epoch: 0, Train Loss: 1.319463849067688
Epoch: 0, Train Loss: 1.2984522581100464
Epoch: 0, Train Loss: 1.5630497932434082
Epoch: 0, Train Loss: 1

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.2700928449630737
Epoch: 0, Train Loss: 1.397888422012329
Epoch: 0, Train Loss: 1.8328887224197388
Epoch: 0, Train Loss: 1.7949833869934082
Epoch: 0, Train Loss: 1.4093221426010132
Epoch: 0, Train Loss: 2.0190954208374023
Epoch: 0, Train Loss: 1.2405025959014893
Epoch: 0, Train Loss: 1.4169018268585205
Epoch: 0, Train Loss: 2.0063843727111816
Epoch: 0, Train Loss: 1.245832920074463
Epoch: 0, Train Loss: 1.535241723060608
Epoch: 0, Train Loss: 1.4205728769302368
Epoch: 0, Train Loss: 2.1273348331451416
Epoch: 0, Train Loss: 1.5660380125045776
Epoch: 0, Train Loss: 1.715781807899475
Epoch: 0, Train Loss: 1.1998257637023926
Epoch: 0, Train Loss: 1.1974176168441772
Epoch: 0, Train Loss: 1.6225029230117798
Epoch: 0, Train Loss: 1.7481412887573242
Epoch: 0, Train Loss: 1.5187615156173706
Epoch: 0, Train Loss: 1.2970186471939087
Epoch: 0, Train Loss: 1.5089316368103027
Epoch: 0, Train Loss: 1.7676949501037598
Epoch: 0, Train Loss: 1.6939769983291626
Epoch: 0, Train Loss

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.370190978050232
Epoch: 0, Train Loss: 1.3863686323165894
Epoch: 0, Train Loss: 1.8348573446273804
Epoch: 0, Train Loss: 1.3697090148925781
Epoch: 0, Train Loss: 1.5239073038101196
Epoch: 0, Train Loss: 1.5152500867843628
Epoch: 0, Train Loss: 1.689260721206665
Epoch: 0, Train Loss: 1.9931458234786987
Epoch: 0, Train Loss: 1.9492653608322144
Epoch: 0, Train Loss: 1.4757963418960571
Epoch: 0, Train Loss: 1.719283938407898
Epoch: 0, Train Loss: 1.3985724449157715
Epoch: 0, Train Loss: 1.3139421939849854
Epoch: 0, Train Loss: 1.4092375040054321
Epoch: 0, Train Loss: 1.255770206451416
Epoch: 0, Train Loss: 1.8170033693313599
Epoch: 0, Train Loss: 2.015521764755249
Epoch: 0, Train Loss: 1.3128091096878052
Epoch: 0, Train Loss: 1.538997769355774
Epoch: 0, Train Loss: 1.5168410539627075
Epoch: 0, Train Loss: 1.6431201696395874
Epoch: 0, Train Loss: 1.2200740575790405
Epoch: 0, Train Loss: 1.7051564455032349
Epoch: 0, Train Loss: 1.8687888383865356
Epoch: 0, Train Loss: 

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.613844633102417
Epoch: 0, Train Loss: 1.2792468070983887
Epoch: 0, Train Loss: 1.4226888418197632
Epoch: 0, Train Loss: 1.644374132156372
Epoch: 0, Train Loss: 1.7889578342437744
Epoch: 0, Train Loss: 1.8149240016937256
Epoch: 0, Train Loss: 1.2809396982192993
Epoch: 0, Train Loss: 1.601735234260559
Epoch: 0, Train Loss: 1.5223963260650635
Epoch: 0, Train Loss: 1.5432040691375732
Epoch: 0, Train Loss: 1.644464373588562
Epoch: 0, Train Loss: 1.3761625289916992
Epoch: 0, Train Loss: 1.8828508853912354
Epoch: 0, Train Loss: 1.4658812284469604
Epoch: 0, Train Loss: 1.6881600618362427
Epoch: 0, Train Loss: 1.445915937423706
Epoch: 0, Train Loss: 1.4115941524505615
Epoch: 0, Train Loss: 1.7909350395202637
Epoch: 0, Train Loss: 1.632759928703308
Epoch: 0, Train Loss: 1.599892497062683
Epoch: 0, Train Loss: 1.6376979351043701
Epoch: 0, Train Loss: 1.4778623580932617
Epoch: 0, Train Loss: 1.3359359502792358
Epoch: 0, Train Loss: 1.5064456462860107
Epoch: 0, Train Loss: 1

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.3878792524337769
Epoch: 0, Train Loss: 1.4715298414230347
Epoch: 0, Train Loss: 1.2365379333496094
Epoch: 0, Train Loss: 1.5365787744522095
Epoch: 0, Train Loss: 1.3116650581359863
Epoch: 0, Train Loss: 1.6006484031677246
Epoch: 0, Train Loss: 1.368889570236206
Epoch: 0, Train Loss: 1.6769332885742188
Epoch: 0, Train Loss: 1.3218945264816284
Epoch: 0, Train Loss: 1.8964557647705078
Epoch: 0, Train Loss: 1.3780500888824463
Epoch: 0, Train Loss: 1.1331292390823364
Epoch: 0, Train Loss: 1.4803342819213867
Epoch: 0, Train Loss: 1.8758573532104492
Epoch: 0, Train Loss: 1.8775179386138916
Epoch: 0, Train Loss: 1.5161163806915283
Epoch: 0, Train Loss: 1.6838910579681396
Epoch: 0, Train Loss: 1.3141084909439087
Epoch: 0, Train Loss: 1.334625005722046
Epoch: 0, Train Loss: 1.4494266510009766
Epoch: 0, Train Loss: 1.6853944063186646
Epoch: 0, Train Loss: 1.7904704809188843
Epoch: 0, Train Loss: 1.4445199966430664
Epoch: 0, Train Loss: 1.7912176847457886
Epoch: 0, Train Lo

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.4789197444915771
Epoch: 0, Train Loss: 1.877860188484192
Epoch: 0, Train Loss: 1.5890284776687622
Epoch: 0, Train Loss: 1.387674331665039
Epoch: 0, Train Loss: 1.3270738124847412
Epoch: 0, Train Loss: 1.5087409019470215
Epoch: 0, Train Loss: 1.2128596305847168
Epoch: 0, Train Loss: 1.5662939548492432
Epoch: 0, Train Loss: 1.5858839750289917
Epoch: 0, Train Loss: 1.7237777709960938
Epoch: 0, Train Loss: 1.3200896978378296
Epoch: 0, Train Loss: 1.6978875398635864
Epoch: 0, Train Loss: 1.3558615446090698
Epoch: 0, Train Loss: 1.4737666845321655
Epoch: 0, Train Loss: 1.6034536361694336
Epoch: 0, Train Loss: 1.6243072748184204
Epoch: 0, Train Loss: 1.7830727100372314
Epoch: 0, Train Loss: 1.2314975261688232
Epoch: 0, Train Loss: 1.6621088981628418
Epoch: 0, Train Loss: 1.8511651754379272
Epoch: 0, Train Loss: 1.5314242839813232
Epoch: 0, Train Loss: 1.563042163848877
Epoch: 0, Train Loss: 1.3123670816421509
Epoch: 0, Train Loss: 1.3570715188980103
Epoch: 0, Train Los

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.304295301437378
Epoch: 0, Train Loss: 1.5395792722702026
Epoch: 0, Train Loss: 1.43166983127594
Epoch: 0, Train Loss: 1.514566421508789
Epoch: 0, Train Loss: 1.180196762084961
Epoch: 0, Train Loss: 2.3506205081939697
Epoch: 0, Train Loss: 1.36493718624115
Epoch: 0, Train Loss: 1.6250534057617188
Epoch: 0, Train Loss: 1.803086757659912
Epoch: 0, Train Loss: 1.7533423900604248
Epoch: 0, Train Loss: 1.1870769262313843
Epoch: 0, Train Loss: 1.729764461517334
Epoch: 0, Train Loss: 1.0777599811553955
Epoch: 0, Train Loss: 1.4342159032821655
Epoch: 0, Train Loss: 1.854570746421814
Epoch: 0, Train Loss: 1.5833971500396729
Epoch: 0, Train Loss: 1.5709916353225708
Epoch: 0, Train Loss: 1.5681827068328857
Epoch: 0, Train Loss: 1.1602458953857422
Epoch: 0, Train Loss: 1.4898948669433594
Epoch: 0, Train Loss: 1.5578550100326538
Epoch: 0, Train Loss: 1.5265394449234009
Epoch: 0, Train Loss: 1.889506459236145
Epoch: 0, Train Loss: 1.4188544750213623
Epoch: 0, Train Loss: 1.423

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.6957865953445435
Epoch: 0, Train Loss: 1.3059958219528198
Epoch: 0, Train Loss: 1.3040745258331299
Epoch: 0, Train Loss: 1.2793093919754028
Epoch: 0, Train Loss: 1.251095175743103
Epoch: 0, Train Loss: 1.4459446668624878
Epoch: 0, Train Loss: 1.3175240755081177
Epoch: 0, Train Loss: 1.6216139793395996
Epoch: 0, Train Loss: 1.2341296672821045
Epoch: 0, Train Loss: 1.587554693222046
Epoch: 0, Train Loss: 1.1916240453720093
Epoch: 0, Train Loss: 1.22689688205719
Epoch: 0, Train Loss: 1.655924916267395
Epoch: 0, Train Loss: 1.6342741250991821
Epoch: 0, Train Loss: 1.2189784049987793
Epoch: 0, Train Loss: 1.661877155303955
Epoch: 0, Train Loss: 1.3839031457901
Epoch: 0, Train Loss: 1.2937042713165283
Epoch: 0, Train Loss: 1.6181061267852783
Epoch: 0, Train Loss: 1.2117801904678345
Epoch: 0, Train Loss: 1.7130205631256104
Epoch: 0, Train Loss: 1.5771074295043945
Epoch: 0, Train Loss: 1.5707134008407593
Epoch: 0, Train Loss: 1.4611375331878662
Epoch: 0, Train Loss: 1.7

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.5642606019973755
Epoch: 0, Train Loss: 1.8126587867736816
Epoch: 0, Train Loss: 1.475274920463562
Epoch: 0, Train Loss: 1.61514151096344
Epoch: 0, Train Loss: 1.4295378923416138
Epoch: 0, Train Loss: 1.4178249835968018
Epoch: 0, Train Loss: 1.558661699295044
Epoch: 0, Train Loss: 1.4062845706939697
Epoch: 0, Train Loss: 1.6420106887817383
Epoch: 0, Train Loss: 1.6788156032562256
Epoch: 0, Train Loss: 1.8726274967193604
Epoch: 0, Train Loss: 1.6578317880630493
Epoch: 0, Train Loss: 1.139488935470581
Epoch: 0, Train Loss: 1.311950922012329
Epoch: 0, Train Loss: 1.3141145706176758
Epoch: 0, Train Loss: 1.6664843559265137
Epoch: 0, Train Loss: 1.8384870290756226
Epoch: 0, Train Loss: 1.8357855081558228
Epoch: 0, Train Loss: 1.2723751068115234
Epoch: 0, Train Loss: 1.4209331274032593
Epoch: 0, Train Loss: 1.440806269645691
Epoch: 0, Train Loss: 1.5570175647735596
Epoch: 0, Train Loss: 1.2533220052719116
Epoch: 0, Train Loss: 1.7406684160232544
Epoch: 0, Train Loss: 1

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.7860829830169678
Epoch: 0, Train Loss: 0.9129948019981384
Epoch: 0, Train Loss: 1.497947096824646
Epoch: 0, Train Loss: 1.4382827281951904
Epoch: 0, Train Loss: 1.340714454650879
Epoch: 0, Train Loss: 1.4287890195846558
Epoch: 0, Train Loss: 1.9059604406356812
Epoch: 0, Train Loss: 1.819132924079895
Epoch: 0, Train Loss: 0.768371045589447
Epoch: 0, Train Loss: 1.415350317955017
Epoch: 0, Train Loss: 1.7318514585494995
Epoch: 0, Train Loss: 1.654481291770935
Epoch: 0, Train Loss: 1.362731695175171
Epoch: 0, Train Loss: 1.5993049144744873
Epoch: 0, Train Loss: 1.512296199798584
Epoch: 0, Train Loss: 1.3908952474594116
Epoch: 0, Train Loss: 1.9855303764343262
Epoch: 0, Train Loss: 1.282446265220642
Epoch: 0, Train Loss: 1.2847058773040771
Epoch: 0, Train Loss: 1.6713080406188965
Epoch: 0, Train Loss: 1.4445183277130127
Epoch: 0, Train Loss: 1.4268074035644531
Epoch: 0, Train Loss: 1.888749361038208
Epoch: 0, Train Loss: 1.082800269126892
Epoch: 0, Train Loss: 1.467

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 0.9715362787246704
Epoch: 0, Train Loss: 1.0910208225250244
Epoch: 0, Train Loss: 1.2429423332214355
Epoch: 0, Train Loss: 1.5771204233169556
Epoch: 0, Train Loss: 1.3232448101043701
Epoch: 0, Train Loss: 1.4321939945220947
Epoch: 0, Train Loss: 1.5515986680984497
Epoch: 0, Train Loss: 1.3622838258743286
Epoch: 0, Train Loss: 1.5514092445373535
Epoch: 0, Train Loss: 1.0937539339065552
Epoch: 0, Train Loss: 1.3667504787445068
Epoch: 0, Train Loss: 1.3366572856903076
Epoch: 0, Train Loss: 1.3318136930465698
Epoch: 0, Train Loss: 1.795237421989441
Epoch: 0, Train Loss: 1.486037015914917
Epoch: 0, Train Loss: 1.3783866167068481
Epoch: 0, Train Loss: 1.4076471328735352
Epoch: 0, Train Loss: 1.5317018032073975
Epoch: 0, Train Loss: 1.8118969202041626
Epoch: 0, Train Loss: 1.4380470514297485
Epoch: 0, Train Loss: 1.7892154455184937
Epoch: 0, Train Loss: 1.971527338027954
Epoch: 0, Train Loss: 1.3027079105377197
Epoch: 0, Train Loss: 1.3942124843597412
Epoch: 0, Train Los

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.3171623945236206
Epoch: 0, Train Loss: 1.3594331741333008
Epoch: 0, Train Loss: 1.5189541578292847
Epoch: 0, Train Loss: 1.9453506469726562
Epoch: 0, Train Loss: 1.3706060647964478
Epoch: 0, Train Loss: 1.517334222793579
Epoch: 0, Train Loss: 0.9825317859649658
Epoch: 0, Train Loss: 1.6671174764633179
Epoch: 0, Train Loss: 1.3555490970611572
Epoch: 0, Train Loss: 1.3149046897888184
Epoch: 0, Train Loss: 1.4697973728179932
Epoch: 0, Train Loss: 1.3568581342697144
Epoch: 0, Train Loss: 1.6834074258804321
Epoch: 0, Train Loss: 1.5868301391601562
Epoch: 0, Train Loss: 1.6493024826049805
Epoch: 0, Train Loss: 1.5667052268981934
Epoch: 0, Train Loss: 1.462579369544983
Epoch: 0, Train Loss: 1.3260923624038696
Epoch: 0, Train Loss: 1.609170913696289
Epoch: 0, Train Loss: 1.38313627243042
Epoch: 0, Train Loss: 1.5025715827941895
Epoch: 0, Train Loss: 1.5136761665344238
Epoch: 0, Train Loss: 1.3370314836502075
Epoch: 0, Train Loss: 1.6539674997329712
Epoch: 0, Train Loss:

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.6606018543243408
Epoch: 0, Train Loss: 1.6626249551773071
Epoch: 0, Train Loss: 1.4898072481155396
Epoch: 0, Train Loss: 1.889297604560852
Epoch: 0, Train Loss: 1.525470495223999
Epoch: 0, Train Loss: 2.0205283164978027
Epoch: 0, Train Loss: 1.4443621635437012
Epoch: 0, Train Loss: 1.6980050802230835
Epoch: 0, Train Loss: 1.3698023557662964
Epoch: 0, Train Loss: 1.732145071029663
Epoch: 0, Train Loss: 1.4954488277435303
Epoch: 0, Train Loss: 1.569582462310791
Epoch: 0, Train Loss: 1.600110411643982
Epoch: 0, Train Loss: 1.5935580730438232
Epoch: 0, Train Loss: 1.3614965677261353
Epoch: 0, Train Loss: 1.6010375022888184
Epoch: 0, Train Loss: 1.8140971660614014
Epoch: 0, Train Loss: 1.5812933444976807
Epoch: 0, Train Loss: 1.6062990427017212
Epoch: 0, Train Loss: 1.5567458868026733
Epoch: 0, Train Loss: 1.2869360446929932
Epoch: 0, Train Loss: 1.7334650754928589
Epoch: 0, Train Loss: 1.433875560760498
Epoch: 0, Train Loss: 1.156917691230774
Epoch: 0, Train Loss: 1

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.6090197563171387
Epoch: 0, Train Loss: 1.0127735137939453
Epoch: 0, Train Loss: 1.3593493700027466
Epoch: 0, Train Loss: 1.2176001071929932
Epoch: 0, Train Loss: 1.2011266946792603
Epoch: 0, Train Loss: 1.6186579465866089
Epoch: 0, Train Loss: 1.3866280317306519
Epoch: 0, Train Loss: 1.231482982635498
Epoch: 0, Train Loss: 1.65569007396698
Epoch: 0, Train Loss: 1.5336389541625977
Epoch: 0, Train Loss: 1.4795734882354736
Epoch: 0, Train Loss: 1.6772700548171997
Epoch: 0, Train Loss: 1.2005112171173096
Epoch: 0, Train Loss: 1.4144186973571777
Epoch: 0, Train Loss: 1.5008909702301025
Epoch: 0, Train Loss: 1.5367772579193115
Epoch: 0, Train Loss: 1.5019947290420532
Epoch: 0, Train Loss: 1.1611058712005615
Epoch: 0, Train Loss: 1.6817669868469238
Epoch: 0, Train Loss: 1.5044618844985962
Epoch: 0, Train Loss: 1.6726162433624268
Epoch: 0, Train Loss: 1.7738333940505981
Epoch: 0, Train Loss: 1.5286457538604736
Epoch: 0, Train Loss: 1.7195382118225098
Epoch: 0, Train Los

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.2202978134155273
Epoch: 0, Train Loss: 1.2668191194534302
Epoch: 0, Train Loss: 1.2637629508972168
Epoch: 0, Train Loss: 1.6306343078613281
Epoch: 0, Train Loss: 1.7352428436279297
Epoch: 0, Train Loss: 1.7486538887023926
Epoch: 0, Train Loss: 1.6370359659194946
Epoch: 0, Train Loss: 1.2871609926223755
Epoch: 0, Train Loss: 1.5381642580032349
Epoch: 0, Train Loss: 1.6937083005905151
Epoch: 0, Train Loss: 1.6257514953613281
Epoch: 0, Train Loss: 1.532541036605835
Epoch: 0, Train Loss: 1.742754340171814
Epoch: 0, Train Loss: 1.539588451385498
Epoch: 0, Train Loss: 1.3136540651321411
Epoch: 0, Train Loss: 0.9306476712226868
Epoch: 0, Train Loss: 1.041806936264038
Epoch: 0, Train Loss: 1.385533094406128
Epoch: 0, Train Loss: 1.5828758478164673
Epoch: 0, Train Loss: 1.3433064222335815
Epoch: 0, Train Loss: 1.635005235671997
Epoch: 0, Train Loss: 1.3637878894805908
Epoch: 0, Train Loss: 1.4499481916427612
Epoch: 0, Train Loss: 1.8910123109817505
Epoch: 0, Train Loss: 

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.7807011604309082
Epoch: 0, Train Loss: 1.7911232709884644
Epoch: 0, Train Loss: 1.8690475225448608
Epoch: 0, Train Loss: 2.0218019485473633
Epoch: 0, Train Loss: 1.382132649421692
Epoch: 0, Train Loss: 1.641517162322998
Epoch: 0, Train Loss: 1.6195347309112549
Epoch: 0, Train Loss: 1.2953345775604248
Epoch: 0, Train Loss: 1.3494223356246948
Epoch: 0, Train Loss: 1.2225635051727295
Epoch: 0, Train Loss: 1.2080090045928955
Epoch: 0, Train Loss: 1.3910279273986816
Epoch: 0, Train Loss: 1.591639518737793
Epoch: 0, Train Loss: 1.4674054384231567
Epoch: 0, Train Loss: 1.4511830806732178
Epoch: 0, Train Loss: 1.3214032649993896
Epoch: 0, Train Loss: 1.5224602222442627
Epoch: 0, Train Loss: 1.9442247152328491
Epoch: 0, Train Loss: 1.360883355140686
Epoch: 0, Train Loss: 1.6526808738708496
Epoch: 0, Train Loss: 1.5030678510665894
Epoch: 0, Train Loss: 1.1595678329467773
Epoch: 0, Train Loss: 2.5935823917388916
Epoch: 0, Train Loss: 1.1792724132537842
Epoch: 0, Train Loss

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.3394566774368286
Epoch: 0, Train Loss: 1.0159307718276978
Epoch: 0, Train Loss: 1.7322092056274414
Epoch: 0, Train Loss: 1.4480100870132446
Epoch: 0, Train Loss: 1.452791690826416
Epoch: 0, Train Loss: 1.3130288124084473
Epoch: 0, Train Loss: 1.6198294162750244
Epoch: 0, Train Loss: 1.8612861633300781
Epoch: 0, Train Loss: 1.4400993585586548
Epoch: 0, Train Loss: 1.6266515254974365
Epoch: 0, Train Loss: 1.5374410152435303
Epoch: 0, Train Loss: 1.3399362564086914
Epoch: 0, Train Loss: 1.2578531503677368
Epoch: 0, Train Loss: 1.3522850275039673
Epoch: 0, Train Loss: 1.7044678926467896
Epoch: 0, Train Loss: 1.461959958076477
Epoch: 0, Train Loss: 1.5853043794631958
Epoch: 0, Train Loss: 1.11515212059021
Epoch: 0, Train Loss: 1.3352363109588623
Epoch: 0, Train Loss: 1.3810932636260986
Epoch: 0, Train Loss: 1.347414493560791
Epoch: 0, Train Loss: 1.500157356262207
Epoch: 0, Train Loss: 1.385118007659912
Epoch: 0, Train Loss: 1.387587070465088
Epoch: 0, Train Loss: 1.

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.5231773853302002
Epoch: 0, Train Loss: 1.1410666704177856
Epoch: 0, Train Loss: 1.2949954271316528
Epoch: 0, Train Loss: 1.3159281015396118
Epoch: 0, Train Loss: 1.6295818090438843
Epoch: 0, Train Loss: 1.5379176139831543
Epoch: 0, Train Loss: 1.1928001642227173
Epoch: 0, Train Loss: 1.6686005592346191
Epoch: 0, Train Loss: 1.5330487489700317
Epoch: 0, Train Loss: 1.4766898155212402
Epoch: 0, Train Loss: 1.4596858024597168
Epoch: 0, Train Loss: 1.8028401136398315
Epoch: 0, Train Loss: 1.7623471021652222
Epoch: 0, Train Loss: 1.1199274063110352
Epoch: 0, Train Loss: 1.4579750299453735
Epoch: 0, Train Loss: 1.3848251104354858
Epoch: 0, Train Loss: 1.4993600845336914
Epoch: 0, Train Loss: 1.4971063137054443
Epoch: 0, Train Loss: 1.4376145601272583
Epoch: 0, Train Loss: 1.8437387943267822
Epoch: 0, Train Loss: 1.4192609786987305
Epoch: 0, Train Loss: 1.6173772811889648
Epoch: 0, Train Loss: 1.0657944679260254
Epoch: 0, Train Loss: 1.8317476511001587
Epoch: 0, Train 

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.3038766384124756
Epoch: 0, Train Loss: 1.104318618774414
Epoch: 0, Train Loss: 1.4718025922775269
Epoch: 0, Train Loss: 1.2829869985580444
Epoch: 0, Train Loss: 1.626638650894165
Epoch: 0, Train Loss: 1.616770625114441
Epoch: 0, Train Loss: 1.5155209302902222
Epoch: 0, Train Loss: 1.6318079233169556
Epoch: 0, Train Loss: 1.556768536567688
Epoch: 0, Train Loss: 1.5151530504226685
Epoch: 0, Train Loss: 1.5734095573425293
Epoch: 0, Train Loss: 1.5690189599990845
Epoch: 0, Train Loss: 1.20156729221344
Epoch: 0, Train Loss: 1.3727517127990723
Epoch: 0, Train Loss: 1.3865911960601807
Epoch: 0, Train Loss: 1.404751181602478
Epoch: 0, Train Loss: 1.5381783246994019
Epoch: 0, Train Loss: 1.47126305103302
Epoch: 0, Train Loss: 1.5584548711776733
Epoch: 0, Train Loss: 1.6447718143463135
Epoch: 0, Train Loss: 1.366485357284546
Epoch: 0, Train Loss: 1.6476058959960938
Epoch: 0, Train Loss: 1.1939970254898071
Epoch: 0, Train Loss: 1.4054521322250366
Epoch: 0, Train Loss: 1.77

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.7786729335784912
Epoch: 0, Train Loss: 1.2844531536102295
Epoch: 0, Train Loss: 1.4847846031188965
Epoch: 0, Train Loss: 1.4949434995651245
Epoch: 0, Train Loss: 1.4580001831054688
Epoch: 0, Train Loss: 1.372507095336914
Epoch: 0, Train Loss: 1.1692888736724854
Epoch: 0, Train Loss: 1.4540786743164062
Epoch: 0, Train Loss: 1.7093490362167358
Epoch: 0, Train Loss: 1.1429479122161865
Epoch: 0, Train Loss: 1.6886773109436035
Epoch: 0, Train Loss: 1.6901391744613647
Epoch: 0, Train Loss: 1.678328514099121
Epoch: 0, Train Loss: 1.7509599924087524
Epoch: 0, Train Loss: 1.661273717880249
Epoch: 0, Train Loss: 1.4536688327789307
Epoch: 0, Train Loss: 1.3504995107650757
Epoch: 0, Train Loss: 1.5266003608703613
Epoch: 0, Train Loss: 1.6624188423156738
Epoch: 0, Train Loss: 1.5812349319458008
Epoch: 0, Train Loss: 1.1621346473693848
Epoch: 0, Train Loss: 1.4717745780944824
Epoch: 0, Train Loss: 1.9256467819213867
Epoch: 0, Train Loss: 1.341652512550354
Epoch: 0, Train Loss

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.7864235639572144
Epoch: 0, Train Loss: 1.1481080055236816
Epoch: 0, Train Loss: 1.5006482601165771
Epoch: 0, Train Loss: 1.9446697235107422
Epoch: 0, Train Loss: 1.2496960163116455
Epoch: 0, Train Loss: 1.3030632734298706
Epoch: 0, Train Loss: 1.4330084323883057
Epoch: 0, Train Loss: 1.2938061952590942
Epoch: 0, Train Loss: 1.322561502456665
Epoch: 0, Train Loss: 1.552391529083252
Epoch: 0, Train Loss: 1.4569776058197021
Epoch: 0, Train Loss: 1.4693751335144043
Epoch: 0, Train Loss: 1.4325616359710693
Epoch: 0, Train Loss: 1.3117564916610718
Epoch: 0, Train Loss: 1.194506287574768
Epoch: 0, Train Loss: 1.4868181943893433
Epoch: 0, Train Loss: 1.6267000436782837
Epoch: 0, Train Loss: 1.5552724599838257
Epoch: 0, Train Loss: 1.2369438409805298
Epoch: 0, Train Loss: 1.5474883317947388
Epoch: 0, Train Loss: 1.2925341129302979
Epoch: 0, Train Loss: 1.4737813472747803
Epoch: 0, Train Loss: 1.9148125648498535
Epoch: 0, Train Loss: 1.875617265701294
Epoch: 0, Train Loss

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.2999157905578613
Epoch: 0, Train Loss: 1.6166926622390747
Epoch: 0, Train Loss: 1.2031193971633911
Epoch: 0, Train Loss: 1.4131228923797607
Epoch: 0, Train Loss: 1.3804384469985962
Epoch: 0, Train Loss: 1.9868519306182861
Epoch: 0, Train Loss: 1.5567998886108398
Epoch: 0, Train Loss: 1.6726397275924683
Epoch: 0, Train Loss: 1.4837110042572021
Epoch: 0, Train Loss: 1.51961088180542
Epoch: 0, Train Loss: 1.5430768728256226
Epoch: 0, Train Loss: 1.4264198541641235
Epoch: 0, Train Loss: 1.57369065284729
Epoch: 0, Train Loss: 1.8872379064559937
Epoch: 0, Train Loss: 2.1972148418426514
Epoch: 0, Train Loss: 1.7941977977752686
Epoch: 0, Train Loss: 1.3578317165374756
Epoch: 0, Train Loss: 1.8578740358352661
Epoch: 0, Train Loss: 1.361390233039856
Epoch: 0, Train Loss: 1.4446009397506714
Epoch: 0, Train Loss: 1.4107035398483276
Epoch: 0, Train Loss: 1.477822184562683
Epoch: 0, Train Loss: 1.1736139059066772
Epoch: 0, Train Loss: 1.7001532316207886
Epoch: 0, Train Loss: 

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.3931605815887451
Epoch: 0, Train Loss: 1.3929675817489624
Epoch: 0, Train Loss: 1.3805698156356812
Epoch: 0, Train Loss: 1.2636572122573853
Epoch: 0, Train Loss: 1.3836292028427124
Epoch: 0, Train Loss: 1.8349753618240356
Epoch: 0, Train Loss: 1.4574347734451294
Epoch: 0, Train Loss: 1.2681103944778442
Epoch: 0, Train Loss: 1.3787312507629395
Epoch: 0, Train Loss: 1.268600344657898
Epoch: 0, Train Loss: 1.4212377071380615
Epoch: 0, Train Loss: 1.5503532886505127
Epoch: 0, Train Loss: 1.853079915046692
Epoch: 0, Train Loss: 1.5020419359207153
Epoch: 0, Train Loss: 1.3779122829437256
Epoch: 0, Train Loss: 1.466402530670166
Epoch: 0, Train Loss: 1.6389390230178833
Epoch: 0, Train Loss: 1.294224739074707
Epoch: 0, Train Loss: 1.0567034482955933
Epoch: 0, Train Loss: 1.156108021736145
Epoch: 0, Train Loss: 1.143579363822937
Epoch: 0, Train Loss: 1.9091483354568481
Epoch: 0, Train Loss: 1.4155793190002441
Epoch: 0, Train Loss: 1.381641149520874
Epoch: 0, Train Loss: 1

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.8111591339111328
Epoch: 0, Train Loss: 1.4835901260375977
Epoch: 0, Train Loss: 1.3574519157409668
Epoch: 0, Train Loss: 1.2478508949279785
Epoch: 0, Train Loss: 1.7348549365997314
Epoch: 0, Train Loss: 1.5749999284744263
Epoch: 0, Train Loss: 1.6635282039642334
Epoch: 0, Train Loss: 1.3262666463851929
Epoch: 0, Train Loss: 1.6910640001296997
Epoch: 0, Train Loss: 1.5297659635543823
Epoch: 0, Train Loss: 1.39786958694458
Epoch: 0, Train Loss: 1.4346826076507568
Epoch: 0, Train Loss: 1.4813051223754883
Epoch: 0, Train Loss: 1.3575797080993652
Epoch: 0, Train Loss: 1.437958002090454
Epoch: 0, Train Loss: 1.6493959426879883
Epoch: 0, Train Loss: 1.1007243394851685
Epoch: 0, Train Loss: 1.7208155393600464
Epoch: 0, Train Loss: 1.691754937171936
Epoch: 0, Train Loss: 1.8627740144729614
Epoch: 0, Train Loss: 1.3741165399551392
Epoch: 0, Train Loss: 1.4072386026382446
Epoch: 0, Train Loss: 1.385132074356079
Epoch: 0, Train Loss: 1.6120781898498535
Epoch: 0, Train Loss:

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.052632212638855
Epoch: 0, Train Loss: 1.137781023979187
Epoch: 0, Train Loss: 1.4957859516143799
Epoch: 0, Train Loss: 1.367501974105835
Epoch: 0, Train Loss: 1.6311875581741333
Epoch: 0, Train Loss: 1.3957422971725464
Epoch: 0, Train Loss: 1.2844115495681763
Epoch: 0, Train Loss: 2.130495309829712
Epoch: 0, Train Loss: 1.301468014717102
Epoch: 0, Train Loss: 1.3100377321243286
Epoch: 0, Train Loss: 1.8704864978790283
Epoch: 0, Train Loss: 1.3625385761260986
Epoch: 0, Train Loss: 1.4797804355621338
Epoch: 0, Train Loss: 1.371679425239563
Epoch: 0, Train Loss: 1.3201146125793457
Epoch: 0, Train Loss: 1.641204833984375
Epoch: 0, Train Loss: 1.230523705482483
Epoch: 0, Train Loss: 1.4827712774276733
Epoch: 0, Train Loss: 1.36613929271698
Epoch: 0, Train Loss: 1.573123812675476
Epoch: 0, Train Loss: 1.3231703042984009
Epoch: 0, Train Loss: 1.2110737562179565
Epoch: 0, Train Loss: 1.302200436592102
Epoch: 0, Train Loss: 1.278780221939087
Epoch: 0, Train Loss: 1.47049

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.6040129661560059
Epoch: 0, Train Loss: 1.1424739360809326
Epoch: 0, Train Loss: 1.5160950422286987
Epoch: 0, Train Loss: 1.7638335227966309
Epoch: 0, Train Loss: 1.3596231937408447
Epoch: 0, Train Loss: 1.5331355333328247
Epoch: 0, Train Loss: 1.62286376953125
Epoch: 0, Train Loss: 1.475574254989624
Epoch: 0, Train Loss: 1.6465086936950684
Epoch: 0, Train Loss: 1.4271245002746582
Epoch: 0, Train Loss: 1.424575924873352
Epoch: 0, Train Loss: 1.4260528087615967
Epoch: 0, Train Loss: 1.601336121559143
Epoch: 0, Train Loss: 1.5720375776290894
Epoch: 0, Train Loss: 1.420749306678772
Epoch: 0, Train Loss: 1.5641438961029053
Epoch: 0, Train Loss: 1.7192885875701904
Epoch: 0, Train Loss: 1.553423285484314
Epoch: 0, Train Loss: 1.330329179763794
Epoch: 0, Train Loss: 1.546808123588562
Epoch: 0, Train Loss: 1.7176569700241089
Epoch: 0, Train Loss: 1.5572569370269775
Epoch: 0, Train Loss: 1.2118374109268188
Epoch: 0, Train Loss: 1.5810983180999756
Epoch: 0, Train Loss: 1.5

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.5217032432556152
Epoch: 0, Train Loss: 1.3279391527175903
Epoch: 0, Train Loss: 1.705390214920044
Epoch: 0, Train Loss: 1.6615674495697021
Epoch: 0, Train Loss: 1.4725990295410156
Epoch: 0, Train Loss: 1.4056346416473389
Epoch: 0, Train Loss: 1.4672253131866455
Epoch: 0, Train Loss: 1.942568302154541
Epoch: 0, Train Loss: 1.227028489112854
Epoch: 0, Train Loss: 1.2462161779403687
Epoch: 0, Train Loss: 1.2321910858154297
Epoch: 0, Train Loss: 1.5732202529907227
Epoch: 0, Train Loss: 1.5891900062561035
Epoch: 0, Train Loss: 1.5851505994796753
Epoch: 0, Train Loss: 1.171034812927246
Epoch: 0, Train Loss: 1.3103653192520142
Epoch: 0, Train Loss: 1.5988515615463257
Epoch: 0, Train Loss: 1.8231700658798218
Epoch: 0, Train Loss: 1.298534870147705
Epoch: 0, Train Loss: 1.3792966604232788
Epoch: 0, Train Loss: 1.3394348621368408
Epoch: 0, Train Loss: 1.4812947511672974
Epoch: 0, Train Loss: 1.6046959161758423
Epoch: 0, Train Loss: 1.1650584936141968
Epoch: 0, Train Loss:

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.7651004791259766
Epoch: 0, Train Loss: 1.2387477159500122
Epoch: 0, Train Loss: 1.3610508441925049
Epoch: 0, Train Loss: 1.576032042503357
Epoch: 0, Train Loss: 1.3171570301055908
Epoch: 0, Train Loss: 1.2530187368392944
Epoch: 0, Train Loss: 1.6732687950134277
Epoch: 0, Train Loss: 1.857954502105713
Epoch: 0, Train Loss: 1.494208574295044
Epoch: 0, Train Loss: 1.3997581005096436
Epoch: 0, Train Loss: 1.9491772651672363
Epoch: 0, Train Loss: 1.313887596130371
Epoch: 0, Train Loss: 1.1961545944213867
Epoch: 0, Train Loss: 1.6207287311553955
Epoch: 0, Train Loss: 1.749837875366211
Epoch: 0, Train Loss: 1.4410094022750854
Epoch: 0, Train Loss: 1.4700678586959839
Epoch: 0, Train Loss: 1.5840084552764893
Epoch: 0, Train Loss: 1.8819090127944946
Epoch: 0, Train Loss: 1.7254853248596191
Epoch: 0, Train Loss: 1.6755298376083374
Epoch: 0, Train Loss: 1.1978195905685425
Epoch: 0, Train Loss: 1.6606746912002563
Epoch: 0, Train Loss: 1.4459162950515747
Epoch: 0, Train Loss:

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.7418442964553833
Epoch: 0, Train Loss: 1.3180749416351318
Epoch: 0, Train Loss: 0.9283230304718018
Epoch: 0, Train Loss: 1.7387683391571045
Epoch: 0, Train Loss: 1.5383620262145996
Epoch: 0, Train Loss: 1.2156829833984375
Epoch: 0, Train Loss: 1.4485524892807007
Epoch: 0, Train Loss: 1.7645801305770874
Epoch: 0, Train Loss: 1.3517038822174072
Epoch: 0, Train Loss: 1.659987211227417
Epoch: 0, Train Loss: 1.5049738883972168
Epoch: 0, Train Loss: 1.5522079467773438
Epoch: 0, Train Loss: 1.2812832593917847
Epoch: 0, Train Loss: 1.5530296564102173
Epoch: 0, Train Loss: 1.642438530921936
Epoch: 0, Train Loss: 1.4208157062530518
Epoch: 0, Train Loss: 1.2784109115600586
Epoch: 0, Train Loss: 1.236690878868103
Epoch: 0, Train Loss: 1.5470287799835205
Epoch: 0, Train Loss: 1.711441993713379
Epoch: 0, Train Loss: 1.1309889554977417
Epoch: 0, Train Loss: 1.3888245820999146
Epoch: 0, Train Loss: 1.3305743932724
Epoch: 0, Train Loss: 1.6376146078109741
Epoch: 0, Train Loss: 1

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.538114070892334
Epoch: 0, Train Loss: 1.6099841594696045
Epoch: 0, Train Loss: 1.7616291046142578
Epoch: 0, Train Loss: 1.4566887617111206
Epoch: 0, Train Loss: 1.5924142599105835
Epoch: 0, Train Loss: 1.5371516942977905
Epoch: 0, Train Loss: 1.3980603218078613
Epoch: 0, Train Loss: 1.5731019973754883
Epoch: 0, Train Loss: 1.336594820022583
Epoch: 0, Train Loss: 1.5552005767822266
Epoch: 0, Train Loss: 1.4205025434494019
Epoch: 0, Train Loss: 2.108443260192871
Epoch: 0, Train Loss: 1.606105089187622
Epoch: 0, Train Loss: 1.771410346031189
Epoch: 0, Train Loss: 1.593785285949707
Epoch: 0, Train Loss: 1.7923544645309448
Epoch: 0, Train Loss: 1.4289138317108154
Epoch: 0, Train Loss: 1.1687040328979492
Epoch: 0, Train Loss: 1.3246101140975952
Epoch: 0, Train Loss: 1.5146533250808716
Epoch: 0, Train Loss: 1.3169636726379395
Epoch: 0, Train Loss: 1.2602665424346924
Epoch: 0, Train Loss: 1.422830581665039
Epoch: 0, Train Loss: 1.6711472272872925
Epoch: 0, Train Loss: 1

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.4508484601974487
Epoch: 0, Train Loss: 1.1377097368240356
Epoch: 0, Train Loss: 1.7455755472183228
Epoch: 0, Train Loss: 1.6351168155670166
Epoch: 0, Train Loss: 1.6340500116348267
Epoch: 0, Train Loss: 1.6535148620605469
Epoch: 0, Train Loss: 1.5803401470184326
Epoch: 0, Train Loss: 1.5545145273208618
Epoch: 0, Train Loss: 1.353868842124939
Epoch: 0, Train Loss: 1.3163797855377197
Epoch: 0, Train Loss: 1.519710659980774
Epoch: 0, Train Loss: 1.271714687347412
Epoch: 0, Train Loss: 1.2476190328598022
Epoch: 0, Train Loss: 1.546725869178772
Epoch: 0, Train Loss: 1.9085068702697754
Epoch: 0, Train Loss: 2.1718780994415283
Epoch: 0, Train Loss: 1.6868654489517212
Epoch: 0, Train Loss: 2.0185329914093018
Epoch: 0, Train Loss: 1.316537618637085
Epoch: 0, Train Loss: 1.3040310144424438
Epoch: 0, Train Loss: 1.4490991830825806
Epoch: 0, Train Loss: 1.5754919052124023
Epoch: 0, Train Loss: 1.8309352397918701
Epoch: 0, Train Loss: 1.5620267391204834
Epoch: 0, Train Loss:

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.9321184158325195
Epoch: 0, Train Loss: 0.8861964344978333
Epoch: 0, Train Loss: 1.825039267539978
Epoch: 0, Train Loss: 1.5225435495376587
Epoch: 0, Train Loss: 1.5357754230499268
Epoch: 0, Train Loss: 1.2067286968231201
Epoch: 0, Train Loss: 1.443200707435608
Epoch: 0, Train Loss: 1.5006663799285889
Epoch: 0, Train Loss: 1.198630452156067
Epoch: 0, Train Loss: 1.6604400873184204
Epoch: 0, Train Loss: 1.5986965894699097
Epoch: 0, Train Loss: 1.8021882772445679
Epoch: 0, Train Loss: 1.6254277229309082
Epoch: 0, Train Loss: 1.2430839538574219
Epoch: 0, Train Loss: 1.5634844303131104
Epoch: 0, Train Loss: 1.417893886566162
Epoch: 0, Train Loss: 1.334552526473999
Epoch: 0, Train Loss: 1.7475093603134155
Epoch: 0, Train Loss: 1.206015706062317
Epoch: 0, Train Loss: 1.1452441215515137
Epoch: 0, Train Loss: 1.298311710357666
Epoch: 0, Train Loss: 1.1498537063598633
Epoch: 0, Train Loss: 1.3965425491333008
Epoch: 0, Train Loss: 1.6060067415237427
Epoch: 0, Train Loss: 1

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.5405573844909668
Epoch: 0, Train Loss: 1.3002386093139648
Epoch: 0, Train Loss: 1.4213414192199707
Epoch: 0, Train Loss: 1.2330344915390015
Epoch: 0, Train Loss: 1.4356980323791504
Epoch: 0, Train Loss: 1.1334396600723267
Epoch: 0, Train Loss: 1.3684542179107666
Epoch: 0, Train Loss: 1.5723774433135986
Epoch: 0, Train Loss: 1.51456618309021
Epoch: 0, Train Loss: 1.6448866128921509
Epoch: 0, Train Loss: 1.7339154481887817
Epoch: 0, Train Loss: 1.3438081741333008
Epoch: 0, Train Loss: 1.7171541452407837
Epoch: 0, Train Loss: 1.2869737148284912
Epoch: 0, Train Loss: 1.3265360593795776
Epoch: 0, Train Loss: 1.312913417816162
Epoch: 0, Train Loss: 1.6833945512771606
Epoch: 0, Train Loss: 1.353551983833313
Epoch: 0, Train Loss: 1.6056621074676514
Epoch: 0, Train Loss: 1.3593952655792236
Epoch: 0, Train Loss: 1.1852288246154785
Epoch: 0, Train Loss: 1.772740364074707
Epoch: 0, Train Loss: 1.9408100843429565
Epoch: 0, Train Loss: 1.2948634624481201
Epoch: 0, Train Loss:

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 2.018014669418335
Epoch: 0, Train Loss: 1.0346390008926392
Epoch: 0, Train Loss: 1.0835492610931396
Epoch: 0, Train Loss: 1.4758148193359375
Epoch: 0, Train Loss: 1.1987894773483276
Epoch: 0, Train Loss: 1.5899254083633423
Epoch: 0, Train Loss: 1.4881099462509155
Epoch: 0, Train Loss: 1.2427618503570557
Epoch: 0, Train Loss: 1.6085636615753174
Epoch: 0, Train Loss: 1.2352240085601807
Epoch: 0, Train Loss: 1.4592018127441406
Epoch: 0, Train Loss: 1.465531349182129
Epoch: 0, Train Loss: 1.2679717540740967
Epoch: 0, Train Loss: 1.8668493032455444
Epoch: 0, Train Loss: 1.229585886001587
Epoch: 0, Train Loss: 1.2580574750900269
Epoch: 0, Train Loss: 1.3900439739227295
Epoch: 0, Train Loss: 1.709628701210022
Epoch: 0, Train Loss: 1.4745559692382812
Epoch: 0, Train Loss: 1.7256801128387451
Epoch: 0, Train Loss: 1.7065221071243286
Epoch: 0, Train Loss: 1.47797691822052
Epoch: 0, Train Loss: 1.7547377347946167
Epoch: 0, Train Loss: 1.371711254119873
Epoch: 0, Train Loss: 1

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.3194174766540527
Epoch: 0, Train Loss: 1.2227438688278198
Epoch: 0, Train Loss: 1.3022862672805786
Epoch: 0, Train Loss: 1.2797574996948242
Epoch: 0, Train Loss: 1.4784036874771118
Epoch: 0, Train Loss: 1.780403733253479
Epoch: 0, Train Loss: 1.393613338470459
Epoch: 0, Train Loss: 1.4472967386245728
Epoch: 0, Train Loss: 1.5051392316818237
Epoch: 0, Train Loss: 1.4489156007766724
Epoch: 0, Train Loss: 1.6444183588027954
Epoch: 0, Train Loss: 1.4788285493850708
Epoch: 0, Train Loss: 1.5397053956985474
Epoch: 0, Train Loss: 1.3142856359481812
Epoch: 0, Train Loss: 1.7154006958007812
Epoch: 0, Train Loss: 1.3161011934280396
Epoch: 0, Train Loss: 1.6336348056793213
Epoch: 0, Train Loss: 1.1978681087493896
Epoch: 0, Train Loss: 1.537703514099121
Epoch: 0, Train Loss: 1.4271042346954346
Epoch: 0, Train Loss: 1.4047572612762451
Epoch: 0, Train Loss: 1.2860842943191528
Epoch: 0, Train Loss: 1.4349339008331299
Epoch: 0, Train Loss: 1.2910387516021729
Epoch: 0, Train Los

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.481774091720581
Epoch: 0, Train Loss: 1.0613662004470825
Epoch: 0, Train Loss: 1.5110158920288086
Epoch: 0, Train Loss: 1.6350802183151245
Epoch: 0, Train Loss: 1.3788217306137085
Epoch: 0, Train Loss: 1.3359073400497437
Epoch: 0, Train Loss: 1.4342477321624756
Epoch: 0, Train Loss: 1.2543668746948242
Epoch: 0, Train Loss: 1.4724699258804321
Epoch: 0, Train Loss: 1.3074079751968384
Epoch: 0, Train Loss: 1.3147653341293335
Epoch: 0, Train Loss: 1.1995413303375244
Epoch: 0, Train Loss: 1.6288268566131592
Epoch: 0, Train Loss: 1.451990008354187
Epoch: 0, Train Loss: 1.4635881185531616
Epoch: 0, Train Loss: 1.4409037828445435
Epoch: 0, Train Loss: 1.7295981645584106
Epoch: 0, Train Loss: 1.9201369285583496
Epoch: 0, Train Loss: 1.834984540939331
Epoch: 0, Train Loss: 1.771300196647644
Epoch: 0, Train Loss: 1.3660680055618286
Epoch: 0, Train Loss: 1.6554055213928223
Epoch: 0, Train Loss: 1.6688991785049438
Epoch: 0, Train Loss: 1.3885618448257446
Epoch: 0, Train Loss

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.179527997970581
Epoch: 0, Train Loss: 1.5907652378082275
Epoch: 0, Train Loss: 1.2346220016479492
Epoch: 0, Train Loss: 1.2993830442428589
Epoch: 0, Train Loss: 1.4435726404190063
Epoch: 0, Train Loss: 1.4046746492385864
Epoch: 0, Train Loss: 1.4083101749420166
Epoch: 0, Train Loss: 1.5479209423065186
Epoch: 0, Train Loss: 1.241588830947876
Epoch: 0, Train Loss: 1.4633177518844604
Epoch: 0, Train Loss: 1.6337363719940186
Epoch: 0, Train Loss: 1.9002971649169922
Epoch: 0, Train Loss: 1.1753299236297607
Epoch: 0, Train Loss: 1.5957696437835693
Epoch: 0, Train Loss: 1.920652151107788
Epoch: 0, Train Loss: 1.69463050365448
Epoch: 0, Train Loss: 1.226894497871399
Epoch: 0, Train Loss: 1.3702932596206665
Epoch: 0, Train Loss: 0.9605621099472046
Epoch: 0, Train Loss: 2.0653769969940186
Epoch: 0, Train Loss: 1.5121922492980957
Epoch: 0, Train Loss: 1.527611255645752
Epoch: 0, Train Loss: 1.5443806648254395
Epoch: 0, Train Loss: 1.19442880153656
Epoch: 0, Train Loss: 1.2

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.4660735130310059
Epoch: 0, Train Loss: 1.2555322647094727
Epoch: 0, Train Loss: 1.147089958190918
Epoch: 0, Train Loss: 1.2672784328460693
Epoch: 0, Train Loss: 1.64154052734375
Epoch: 0, Train Loss: 1.6065478324890137
Epoch: 0, Train Loss: 1.4541504383087158
Epoch: 0, Train Loss: 1.469825029373169
Epoch: 0, Train Loss: 1.3657642602920532
Epoch: 0, Train Loss: 1.955325961112976
Epoch: 0, Train Loss: 1.505200743675232
Epoch: 0, Train Loss: 1.271651029586792
Epoch: 0, Train Loss: 1.6987673044204712
Epoch: 0, Train Loss: 1.6188920736312866
Epoch: 0, Train Loss: 1.360520839691162
Epoch: 0, Train Loss: 1.4929111003875732
Epoch: 0, Train Loss: 1.5541000366210938
Epoch: 0, Train Loss: 1.5225670337677002
Epoch: 0, Train Loss: 1.1664093732833862
Epoch: 0, Train Loss: 1.452061653137207
Epoch: 0, Train Loss: 1.649443507194519
Epoch: 0, Train Loss: 1.1433656215667725
Epoch: 0, Train Loss: 1.392390489578247
Epoch: 0, Train Loss: 1.2646245956420898
Epoch: 0, Train Loss: 1.456

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.5081169605255127
Epoch: 0, Train Loss: 2.246753215789795
Epoch: 0, Train Loss: 1.1704537868499756
Epoch: 0, Train Loss: 1.750234603881836
Epoch: 0, Train Loss: 1.7465959787368774
Epoch: 0, Train Loss: 1.4008910655975342
Epoch: 0, Train Loss: 1.5021599531173706
Epoch: 0, Train Loss: 2.294689655303955
Epoch: 0, Train Loss: 1.61448073387146
Epoch: 0, Train Loss: 1.2667726278305054
Epoch: 0, Train Loss: 1.7074486017227173
Epoch: 0, Train Loss: 1.7038949728012085
Epoch: 0, Train Loss: 1.1452287435531616
Epoch: 0, Train Loss: 1.34806489944458
Epoch: 0, Train Loss: 1.2648570537567139
Epoch: 0, Train Loss: 1.702802300453186
Epoch: 0, Train Loss: 1.7728718519210815
Epoch: 0, Train Loss: 1.3232940435409546
Epoch: 0, Train Loss: 1.4151151180267334
Epoch: 0, Train Loss: 1.5534303188323975
Epoch: 0, Train Loss: 1.3410155773162842
Epoch: 0, Train Loss: 1.4289017915725708
Epoch: 0, Train Loss: 1.6450623273849487
Epoch: 0, Train Loss: 1.6420056819915771
Epoch: 0, Train Loss: 1.

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.3350352048873901
Epoch: 0, Train Loss: 1.2101900577545166
Epoch: 0, Train Loss: 1.5889137983322144
Epoch: 0, Train Loss: 1.22675359249115
Epoch: 0, Train Loss: 1.680619239807129
Epoch: 0, Train Loss: 1.134507656097412
Epoch: 0, Train Loss: 1.9094430208206177
Epoch: 0, Train Loss: 1.6130588054656982
Epoch: 0, Train Loss: 1.670190691947937
Epoch: 0, Train Loss: 1.4829413890838623
Epoch: 0, Train Loss: 1.295574426651001
Epoch: 0, Train Loss: 1.4213086366653442
Epoch: 0, Train Loss: 1.579591155052185
Epoch: 0, Train Loss: 1.7651846408843994
Epoch: 0, Train Loss: 1.415920376777649
Epoch: 0, Train Loss: 1.8587496280670166
Epoch: 0, Train Loss: 1.0493431091308594
Epoch: 0, Train Loss: 1.2693191766738892
Epoch: 0, Train Loss: 1.3356623649597168
Epoch: 0, Train Loss: 1.5768033266067505
Epoch: 0, Train Loss: 1.0722277164459229
Epoch: 0, Train Loss: 1.2933429479599
Epoch: 0, Train Loss: 1.376899242401123
Epoch: 0, Train Loss: 1.5717588663101196
Epoch: 0, Train Loss: 1.2739

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.516433835029602
Epoch: 0, Train Loss: 1.7738419771194458
Epoch: 0, Train Loss: 1.3119993209838867
Epoch: 0, Train Loss: 1.5193326473236084
Epoch: 0, Train Loss: 1.3864943981170654
Epoch: 0, Train Loss: 1.3915479183197021
Epoch: 0, Train Loss: 1.2154489755630493
Epoch: 0, Train Loss: 1.6366524696350098
Epoch: 0, Train Loss: 1.3894119262695312
Epoch: 0, Train Loss: 1.8657150268554688
Epoch: 0, Train Loss: 1.1726733446121216
Epoch: 0, Train Loss: 1.2848091125488281
Epoch: 0, Train Loss: 1.4402638673782349
Epoch: 0, Train Loss: 1.4023385047912598
Epoch: 0, Train Loss: 1.4523218870162964
Epoch: 0, Train Loss: 1.4538999795913696
Epoch: 0, Train Loss: 1.5347100496292114
Epoch: 0, Train Loss: 1.4561448097229004
Epoch: 0, Train Loss: 1.561181664466858
Epoch: 0, Train Loss: 1.4136062860488892
Epoch: 0, Train Loss: 1.4740521907806396
Epoch: 0, Train Loss: 1.4465863704681396
Epoch: 0, Train Loss: 1.322090983390808
Epoch: 0, Train Loss: 1.4632174968719482
Epoch: 0, Train Los

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.0840623378753662
Epoch: 0, Train Loss: 1.4163578748703003
Epoch: 0, Train Loss: 1.4714114665985107
Epoch: 0, Train Loss: 1.2892472743988037
Epoch: 0, Train Loss: 1.6278247833251953
Epoch: 0, Train Loss: 1.8187874555587769
Epoch: 0, Train Loss: 1.2401069402694702
Epoch: 0, Train Loss: 1.4698370695114136
Epoch: 0, Train Loss: 1.7413941621780396
Epoch: 0, Train Loss: 1.4792617559432983
Epoch: 0, Train Loss: 1.1691162586212158
Epoch: 0, Train Loss: 1.4837195873260498
Epoch: 0, Train Loss: 1.3588347434997559
Epoch: 0, Train Loss: 1.1751861572265625
Epoch: 0, Train Loss: 1.5410377979278564
Epoch: 0, Train Loss: 1.3823591470718384
Epoch: 0, Train Loss: 1.0942726135253906
Epoch: 0, Train Loss: 1.565574049949646
Epoch: 0, Train Loss: 1.4565675258636475
Epoch: 0, Train Loss: 1.5239663124084473
Epoch: 0, Train Loss: 1.523829698562622
Epoch: 0, Train Loss: 1.4819517135620117
Epoch: 0, Train Loss: 1.563149333000183
Epoch: 0, Train Loss: 1.7791136503219604
Epoch: 0, Train Los

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.3776557445526123
Epoch: 0, Train Loss: 1.051486611366272
Epoch: 0, Train Loss: 1.4272958040237427
Epoch: 0, Train Loss: 1.6094086170196533
Epoch: 0, Train Loss: 1.6818679571151733
Epoch: 0, Train Loss: 1.46061110496521
Epoch: 0, Train Loss: 1.4177048206329346
Epoch: 0, Train Loss: 1.2453924417495728
Epoch: 0, Train Loss: 1.956983208656311
Epoch: 0, Train Loss: 1.595838189125061
Epoch: 0, Train Loss: 2.0787253379821777
Epoch: 0, Train Loss: 1.3290927410125732
Epoch: 0, Train Loss: 0.9992460012435913
Epoch: 0, Train Loss: 1.6744718551635742
Epoch: 0, Train Loss: 1.5181907415390015
Epoch: 0, Train Loss: 1.4935156106948853
Epoch: 0, Train Loss: 1.6564514636993408
Epoch: 0, Train Loss: 1.3123986721038818
Epoch: 0, Train Loss: 1.3552385568618774
Epoch: 0, Train Loss: 1.385336995124817
Epoch: 0, Train Loss: 1.4721462726593018
Epoch: 0, Train Loss: 1.2947183847427368
Epoch: 0, Train Loss: 1.5820457935333252
Epoch: 0, Train Loss: 1.0304781198501587
Epoch: 0, Train Loss: 

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.304056167602539
Epoch: 0, Train Loss: 1.35660719871521
Epoch: 0, Train Loss: 1.3878315687179565
Epoch: 0, Train Loss: 1.0180437564849854
Epoch: 0, Train Loss: 1.5118868350982666
Epoch: 0, Train Loss: 1.8861087560653687
Epoch: 0, Train Loss: 1.2866344451904297
Epoch: 0, Train Loss: 1.289454698562622
Epoch: 0, Train Loss: 1.3168882131576538
Epoch: 0, Train Loss: 1.3036530017852783
Epoch: 0, Train Loss: 1.6383589506149292
Epoch: 0, Train Loss: 1.5079386234283447
Epoch: 0, Train Loss: 1.2629673480987549
Epoch: 0, Train Loss: 1.1923311948776245
Epoch: 0, Train Loss: 1.3724039793014526
Epoch: 0, Train Loss: 1.9609352350234985
Epoch: 0, Train Loss: 1.5357327461242676
Epoch: 0, Train Loss: 1.339063286781311
Epoch: 0, Train Loss: 1.3558779954910278
Epoch: 0, Train Loss: 1.558302640914917
Epoch: 0, Train Loss: 1.2949918508529663
Epoch: 0, Train Loss: 1.5645537376403809
Epoch: 0, Train Loss: 1.4084744453430176
Epoch: 0, Train Loss: 1.1038610935211182
Epoch: 0, Train Loss: 

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.5943230390548706
Epoch: 0, Train Loss: 1.2908209562301636
Epoch: 0, Train Loss: 1.5154067277908325
Epoch: 0, Train Loss: 1.3346061706542969
Epoch: 0, Train Loss: 1.0722342729568481
Epoch: 0, Train Loss: 0.9794151782989502
Epoch: 0, Train Loss: 1.6447510719299316
Epoch: 0, Train Loss: 1.5063226222991943
Epoch: 0, Train Loss: 1.0356228351593018
Epoch: 0, Train Loss: 1.6855103969573975
Epoch: 0, Train Loss: 1.2293773889541626
Epoch: 0, Train Loss: 1.3562103509902954
Epoch: 0, Train Loss: 1.075432300567627
Epoch: 0, Train Loss: 1.5059174299240112
Epoch: 0, Train Loss: 1.2768874168395996
Epoch: 0, Train Loss: 1.43403160572052
Epoch: 0, Train Loss: 1.5707169771194458
Epoch: 0, Train Loss: 1.6088768243789673
Epoch: 0, Train Loss: 1.5469673871994019
Epoch: 0, Train Loss: 1.1905940771102905
Epoch: 0, Train Loss: 1.7586191892623901
Epoch: 0, Train Loss: 1.5282480716705322
Epoch: 0, Train Loss: 1.2684078216552734
Epoch: 0, Train Loss: 1.7530570030212402
Epoch: 0, Train Los

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.6724014282226562
Epoch: 0, Train Loss: 1.8188748359680176
Epoch: 0, Train Loss: 1.5669896602630615
Epoch: 0, Train Loss: 1.458204746246338
Epoch: 0, Train Loss: 1.786551594734192
Epoch: 0, Train Loss: 2.1774981021881104
Epoch: 0, Train Loss: 1.2748900651931763
Epoch: 0, Train Loss: 1.5559667348861694
Epoch: 0, Train Loss: 1.6527161598205566
Epoch: 0, Train Loss: 1.0648514032363892
Epoch: 0, Train Loss: 1.4404562711715698
Epoch: 0, Train Loss: 2.030818462371826
Epoch: 0, Train Loss: 1.5542190074920654
Epoch: 0, Train Loss: 1.2077492475509644
Epoch: 0, Train Loss: 1.5043483972549438
Epoch: 0, Train Loss: 1.5115751028060913
Epoch: 0, Train Loss: 1.616668462753296
Epoch: 0, Train Loss: 1.3545626401901245
Epoch: 0, Train Loss: 1.5052769184112549
Epoch: 0, Train Loss: 0.9788475632667542
Epoch: 0, Train Loss: 1.3203721046447754
Epoch: 0, Train Loss: 1.2187941074371338
Epoch: 0, Train Loss: 1.3478283882141113
Epoch: 0, Train Loss: 1.4236167669296265
Epoch: 0, Train Loss

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.5969175100326538
Epoch: 0, Train Loss: 1.0650964975357056
Epoch: 0, Train Loss: 1.171156883239746
Epoch: 0, Train Loss: 1.423112392425537
Epoch: 0, Train Loss: 1.2641414403915405
Epoch: 0, Train Loss: 1.3949317932128906
Epoch: 0, Train Loss: 1.306795597076416
Epoch: 0, Train Loss: 1.7610994577407837
Epoch: 0, Train Loss: 1.287690281867981
Epoch: 0, Train Loss: 1.363587498664856
Epoch: 0, Train Loss: 1.2505320310592651
Epoch: 0, Train Loss: 1.5749714374542236
Epoch: 0, Train Loss: 1.0275683403015137
Epoch: 0, Train Loss: 1.4787541627883911
Epoch: 0, Train Loss: 1.4241267442703247
Epoch: 0, Train Loss: 1.7626675367355347
Epoch: 0, Train Loss: 1.8456393480300903
Epoch: 0, Train Loss: 1.255339503288269
Epoch: 0, Train Loss: 1.443488597869873
Epoch: 0, Train Loss: 1.5237152576446533
Epoch: 0, Train Loss: 1.3383296728134155
Epoch: 0, Train Loss: 1.0687593221664429
Epoch: 0, Train Loss: 1.3467081785202026
Epoch: 0, Train Loss: 1.3054016828536987
Epoch: 0, Train Loss: 1

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 2.4114415645599365
Epoch: 0, Train Loss: 1.20839262008667
Epoch: 0, Train Loss: 1.1273113489151
Epoch: 0, Train Loss: 1.3286701440811157
Epoch: 0, Train Loss: 1.7066030502319336
Epoch: 0, Train Loss: 1.5527645349502563
Epoch: 0, Train Loss: 1.5618135929107666
Epoch: 0, Train Loss: 1.0975892543792725
Epoch: 0, Train Loss: 1.398725152015686
Epoch: 0, Train Loss: 1.7138503789901733
Epoch: 0, Train Loss: 1.4511661529541016
Epoch: 0, Train Loss: 1.4308569431304932
Epoch: 0, Train Loss: 1.7147501707077026
Epoch: 0, Train Loss: 1.357008934020996
Epoch: 0, Train Loss: 1.644055724143982
Epoch: 0, Train Loss: 1.4099195003509521
Epoch: 0, Train Loss: 1.3548316955566406
Epoch: 0, Train Loss: 1.698057770729065
Epoch: 0, Train Loss: 1.3659117221832275
Epoch: 0, Train Loss: 1.3360886573791504
Epoch: 0, Train Loss: 1.381093978881836
Epoch: 0, Train Loss: 1.7294234037399292
Epoch: 0, Train Loss: 1.8542726039886475
Epoch: 0, Train Loss: 1.5773499011993408
Epoch: 0, Train Loss: 1.64

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.3217241764068604
Epoch: 0, Train Loss: 1.736367106437683
Epoch: 0, Train Loss: 1.2789667844772339
Epoch: 0, Train Loss: 1.123698115348816
Epoch: 0, Train Loss: 1.5386089086532593
Epoch: 0, Train Loss: 1.5659830570220947
Epoch: 0, Train Loss: 1.2047934532165527
Epoch: 0, Train Loss: 1.4067243337631226
Epoch: 0, Train Loss: 1.2795345783233643
Epoch: 0, Train Loss: 1.4737682342529297
Epoch: 0, Train Loss: 1.5074985027313232
Epoch: 0, Train Loss: 1.8131386041641235
Epoch: 0, Train Loss: 1.6050875186920166
Epoch: 0, Train Loss: 1.8901280164718628
Epoch: 0, Train Loss: 1.4891923666000366
Epoch: 0, Train Loss: 1.5651748180389404
Epoch: 0, Train Loss: 1.2655819654464722
Epoch: 0, Train Loss: 1.3297866582870483
Epoch: 0, Train Loss: 1.7866623401641846
Epoch: 0, Train Loss: 1.8650575876235962
Epoch: 0, Train Loss: 1.4400023221969604
Epoch: 0, Train Loss: 1.8216180801391602
Epoch: 0, Train Loss: 1.2382869720458984
Epoch: 0, Train Loss: 1.153058648109436
Epoch: 0, Train Los

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.3821280002593994
Epoch: 0, Train Loss: 1.4892940521240234
Epoch: 0, Train Loss: 1.8807610273361206
Epoch: 0, Train Loss: 1.3652946949005127
Epoch: 0, Train Loss: 1.6351503133773804
Epoch: 0, Train Loss: 0.921543538570404
Epoch: 0, Train Loss: 1.6038795709609985
Epoch: 0, Train Loss: 0.9666834473609924
Epoch: 0, Train Loss: 1.549000859260559
Epoch: 0, Train Loss: 1.2770118713378906
Epoch: 0, Train Loss: 1.1551953554153442
Epoch: 0, Train Loss: 1.8175829648971558
Epoch: 0, Train Loss: 1.550300121307373
Epoch: 0, Train Loss: 1.1646573543548584
Epoch: 0, Train Loss: 1.2025516033172607
Epoch: 0, Train Loss: 1.3818145990371704
Epoch: 0, Train Loss: 1.3852503299713135
Epoch: 0, Train Loss: 1.2704615592956543
Epoch: 0, Train Loss: 1.2785953283309937
Epoch: 0, Train Loss: 1.594264268875122
Epoch: 0, Train Loss: 1.410007357597351
Epoch: 0, Train Loss: 1.5871254205703735
Epoch: 0, Train Loss: 1.0030429363250732
Epoch: 0, Train Loss: 1.6518127918243408
Epoch: 0, Train Loss:

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.5905660390853882
Epoch: 0, Train Loss: 1.9375346899032593
Epoch: 0, Train Loss: 1.4334732294082642
Epoch: 0, Train Loss: 1.4479838609695435
Epoch: 0, Train Loss: 1.6287347078323364
Epoch: 0, Train Loss: 0.9253300428390503
Epoch: 0, Train Loss: 1.0033900737762451
Epoch: 0, Train Loss: 1.4699264764785767
Epoch: 0, Train Loss: 1.3936342000961304
Epoch: 0, Train Loss: 1.4659591913223267
Epoch: 0, Train Loss: 1.6222261190414429
Epoch: 0, Train Loss: 1.344772219657898
Epoch: 0, Train Loss: 1.6760735511779785
Epoch: 0, Train Loss: 1.6893863677978516
Epoch: 0, Train Loss: 1.6147712469100952
Epoch: 0, Train Loss: 1.5813738107681274
Epoch: 0, Train Loss: 1.741695761680603
Epoch: 0, Train Loss: 1.3411591053009033
Epoch: 0, Train Loss: 1.3135417699813843
Epoch: 0, Train Loss: 1.6162339448928833
Epoch: 0, Train Loss: 1.5124589204788208
Epoch: 0, Train Loss: 1.4110182523727417
Epoch: 0, Train Loss: 1.243923544883728
Epoch: 0, Train Loss: 1.573755145072937
Epoch: 0, Train Loss

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.5621140003204346
Epoch: 0, Train Loss: 1.5912930965423584
Epoch: 0, Train Loss: 1.6121442317962646
Epoch: 0, Train Loss: 2.0347821712493896
Epoch: 0, Train Loss: 1.7633553743362427
Epoch: 0, Train Loss: 0.9938694834709167
Epoch: 0, Train Loss: 1.136767864227295
Epoch: 0, Train Loss: 1.6096975803375244
Epoch: 0, Train Loss: 1.3881245851516724
Epoch: 0, Train Loss: 1.4914683103561401
Epoch: 0, Train Loss: 1.821097731590271
Epoch: 0, Train Loss: 1.4097938537597656
Epoch: 0, Train Loss: 1.5104490518569946
Epoch: 0, Train Loss: 1.6968145370483398
Epoch: 0, Train Loss: 1.7356919050216675
Epoch: 0, Train Loss: 1.7431377172470093
Epoch: 0, Train Loss: 1.7131602764129639
Epoch: 0, Train Loss: 1.4885624647140503
Epoch: 0, Train Loss: 1.4018052816390991
Epoch: 0, Train Loss: 1.651457667350769
Epoch: 0, Train Loss: 1.7092976570129395
Epoch: 0, Train Loss: 1.7547821998596191
Epoch: 0, Train Loss: 1.4358007907867432
Epoch: 0, Train Loss: 1.9659953117370605
Epoch: 0, Train Los

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.6672402620315552
Epoch: 0, Train Loss: 1.3645904064178467
Epoch: 0, Train Loss: 1.206939458847046
Epoch: 0, Train Loss: 1.4189698696136475
Epoch: 0, Train Loss: 1.483170747756958
Epoch: 0, Train Loss: 1.5041807889938354
Epoch: 0, Train Loss: 1.3658807277679443
Epoch: 0, Train Loss: 0.9258837103843689
Epoch: 0, Train Loss: 1.373093605041504
Epoch: 0, Train Loss: 1.5760934352874756
Epoch: 0, Train Loss: 1.56649911403656
Epoch: 0, Train Loss: 1.8298015594482422
Epoch: 0, Train Loss: 1.6552612781524658
Epoch: 0, Train Loss: 0.9500911235809326
Epoch: 0, Train Loss: 1.428054690361023
Epoch: 0, Train Loss: 0.8709469437599182
Epoch: 0, Train Loss: 1.2802817821502686
Epoch: 0, Train Loss: 1.3826292753219604
Epoch: 0, Train Loss: 1.844948410987854
Epoch: 0, Train Loss: 1.3928172588348389
Epoch: 0, Train Loss: 1.454334020614624
Epoch: 0, Train Loss: 1.526720404624939
Epoch: 0, Train Loss: 1.3995391130447388
Epoch: 0, Train Loss: 1.592651605606079
Epoch: 0, Train Loss: 1.66

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.548424243927002
Epoch: 0, Train Loss: 2.4872546195983887
Epoch: 0, Train Loss: 1.3742141723632812
Epoch: 0, Train Loss: 1.4675999879837036
Epoch: 0, Train Loss: 1.5736192464828491
Epoch: 0, Train Loss: 1.3041794300079346
Epoch: 0, Train Loss: 1.5909852981567383
Epoch: 0, Train Loss: 1.5875788927078247
Epoch: 0, Train Loss: 1.5000512599945068
Epoch: 0, Train Loss: 1.5622575283050537
Epoch: 0, Train Loss: 1.2965959310531616
Epoch: 0, Train Loss: 1.541111707687378
Epoch: 0, Train Loss: 1.8974497318267822
Epoch: 0, Train Loss: 1.5203640460968018
Epoch: 0, Train Loss: 1.825050711631775
Epoch: 0, Train Loss: 1.4498027563095093
Epoch: 0, Train Loss: 1.6756789684295654
Epoch: 0, Train Loss: 1.557428240776062
Epoch: 0, Train Loss: 1.4257861375808716
Epoch: 0, Train Loss: 1.6518489122390747
Epoch: 0, Train Loss: 1.4849636554718018
Epoch: 0, Train Loss: 1.1427608728408813
Epoch: 0, Train Loss: 1.3710389137268066
Epoch: 0, Train Loss: 1.5801408290863037
Epoch: 0, Train Loss

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.488903284072876
Epoch: 0, Train Loss: 2.435861349105835
Epoch: 0, Train Loss: 1.2482532262802124
Epoch: 0, Train Loss: 1.5063834190368652
Epoch: 0, Train Loss: 1.3531110286712646
Epoch: 0, Train Loss: 1.5214403867721558
Epoch: 0, Train Loss: 1.441693902015686
Epoch: 0, Train Loss: 1.535034418106079
Epoch: 0, Train Loss: 1.706514596939087
Epoch: 0, Train Loss: 1.5723060369491577
Epoch: 0, Train Loss: 1.4726992845535278
Epoch: 0, Train Loss: 1.4348821640014648
Epoch: 0, Train Loss: 1.0612528324127197
Epoch: 0, Train Loss: 1.3778084516525269
Epoch: 0, Train Loss: 1.395621657371521
Epoch: 0, Train Loss: 1.6770126819610596
Epoch: 0, Train Loss: 1.4405543804168701
Epoch: 0, Train Loss: 1.8283370733261108
Epoch: 0, Train Loss: 1.2172319889068604
Epoch: 0, Train Loss: 1.4212387800216675
Epoch: 0, Train Loss: 1.3361010551452637
Epoch: 0, Train Loss: 1.3576151132583618
Epoch: 0, Train Loss: 1.4837101697921753
Epoch: 0, Train Loss: 1.3128304481506348
Epoch: 0, Train Loss: 

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.8355274200439453
Epoch: 0, Train Loss: 1.83698570728302
Epoch: 0, Train Loss: 1.3633477687835693
Epoch: 0, Train Loss: 1.1779013872146606
Epoch: 0, Train Loss: 1.65040922164917
Epoch: 0, Train Loss: 1.6596717834472656
Epoch: 0, Train Loss: 1.5726979970932007
Epoch: 0, Train Loss: 1.0350345373153687
Epoch: 0, Train Loss: 1.2792202234268188
Epoch: 0, Train Loss: 1.9046170711517334
Epoch: 0, Train Loss: 1.3234777450561523
Epoch: 0, Train Loss: 1.6066571474075317
Epoch: 0, Train Loss: 1.5755633115768433
Epoch: 0, Train Loss: 1.3030842542648315
Epoch: 0, Train Loss: 1.7213590145111084
Epoch: 0, Train Loss: 1.3722518682479858
Epoch: 0, Train Loss: 1.4468210935592651
Epoch: 0, Train Loss: 1.5172817707061768
Epoch: 0, Train Loss: 1.2353477478027344
Epoch: 0, Train Loss: 0.8264064192771912
Epoch: 0, Train Loss: 1.613061547279358
Epoch: 0, Train Loss: 1.6318273544311523
Epoch: 0, Train Loss: 1.2612338066101074
Epoch: 0, Train Loss: 1.2969549894332886
Epoch: 0, Train Loss:

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.1936817169189453
Epoch: 0, Train Loss: 1.175711750984192
Epoch: 0, Train Loss: 1.4829449653625488
Epoch: 0, Train Loss: 1.2913181781768799
Epoch: 0, Train Loss: 1.6858786344528198
Epoch: 0, Train Loss: 1.9774373769760132
Epoch: 0, Train Loss: 1.5033178329467773
Epoch: 0, Train Loss: 1.3639063835144043
Epoch: 0, Train Loss: 0.9930316805839539
Epoch: 0, Train Loss: 1.8070069551467896
Epoch: 0, Train Loss: 1.4543354511260986
Epoch: 0, Train Loss: 1.420597791671753
Epoch: 0, Train Loss: 1.1293206214904785
Epoch: 0, Train Loss: 1.68525230884552
Epoch: 0, Train Loss: 1.7245670557022095
Epoch: 0, Train Loss: 1.2353366613388062
Epoch: 0, Train Loss: 1.667909026145935
Epoch: 0, Train Loss: 1.5817058086395264
Epoch: 0, Train Loss: 1.63036048412323
Epoch: 0, Train Loss: 1.533638596534729
Epoch: 0, Train Loss: 1.3473519086837769
Epoch: 0, Train Loss: 2.0216000080108643
Epoch: 0, Train Loss: 1.4041070938110352
Epoch: 0, Train Loss: 1.8391162157058716
Epoch: 0, Train Loss: 1.

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.5807850360870361
Epoch: 0, Train Loss: 1.1995670795440674
Epoch: 0, Train Loss: 1.242461919784546
Epoch: 0, Train Loss: 1.2651033401489258
Epoch: 0, Train Loss: 1.392555832862854
Epoch: 0, Train Loss: 1.233419418334961
Epoch: 0, Train Loss: 1.1762946844100952
Epoch: 0, Train Loss: 1.8023269176483154
Epoch: 0, Train Loss: 1.0940536260604858
Epoch: 0, Train Loss: 1.4689176082611084
Epoch: 0, Train Loss: 1.5779272317886353
Epoch: 0, Train Loss: 1.791995644569397
Epoch: 0, Train Loss: 1.5164834260940552
Epoch: 0, Train Loss: 1.4699738025665283
Epoch: 0, Train Loss: 1.611235499382019
Epoch: 0, Train Loss: 1.606314778327942
Epoch: 0, Train Loss: 1.491483449935913
Epoch: 0, Train Loss: 1.2389522790908813
Epoch: 0, Train Loss: 1.6356154680252075
Epoch: 0, Train Loss: 1.2973204851150513
Epoch: 0, Train Loss: 1.2635606527328491
Epoch: 0, Train Loss: 1.4377233982086182
Epoch: 0, Train Loss: 1.3843320608139038
Epoch: 0, Train Loss: 1.872666597366333
Epoch: 0, Train Loss: 1.

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.6682220697402954
Epoch: 0, Train Loss: 1.336239218711853
Epoch: 0, Train Loss: 1.1103721857070923
Epoch: 0, Train Loss: 1.0948764085769653
Epoch: 0, Train Loss: 1.6601766347885132
Epoch: 0, Train Loss: 1.4976757764816284
Epoch: 0, Train Loss: 1.0986733436584473
Epoch: 0, Train Loss: 1.5475387573242188
Epoch: 0, Train Loss: 2.1317126750946045
Epoch: 0, Train Loss: 1.580171823501587
Epoch: 0, Train Loss: 1.2486358880996704
Epoch: 0, Train Loss: 1.909498691558838
Epoch: 0, Train Loss: 1.0843013525009155
Epoch: 0, Train Loss: 1.2406644821166992
Epoch: 0, Train Loss: 1.4313429594039917
Epoch: 0, Train Loss: 1.0142916440963745
Epoch: 0, Train Loss: 1.526644229888916
Epoch: 0, Train Loss: 1.3223618268966675
Epoch: 0, Train Loss: 1.3152680397033691
Epoch: 0, Train Loss: 1.5131211280822754
Epoch: 0, Train Loss: 1.430722713470459
Epoch: 0, Train Loss: 1.962593674659729
Epoch: 0, Train Loss: 1.4134020805358887
Epoch: 0, Train Loss: 1.4074242115020752
Epoch: 0, Train Loss: 

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.3662325143814087
Epoch: 0, Train Loss: 1.1455001831054688
Epoch: 0, Train Loss: 2.207995653152466
Epoch: 0, Train Loss: 1.862378716468811
Epoch: 0, Train Loss: 1.1274604797363281
Epoch: 0, Train Loss: 1.6078695058822632
Epoch: 0, Train Loss: 1.6803101301193237
Epoch: 0, Train Loss: 1.4369953870773315
Epoch: 0, Train Loss: 1.1761826276779175
Epoch: 0, Train Loss: 1.6685329675674438
Epoch: 0, Train Loss: 1.4088314771652222
Epoch: 0, Train Loss: 1.3213084936141968
Epoch: 0, Train Loss: 1.3308955430984497
Epoch: 0, Train Loss: 1.4731780290603638
Epoch: 0, Train Loss: 1.277241587638855
Epoch: 0, Train Loss: 1.2871246337890625
Epoch: 0, Train Loss: 1.3477486371994019
Epoch: 0, Train Loss: 1.4217796325683594
Epoch: 0, Train Loss: 1.3436697721481323
Epoch: 0, Train Loss: 1.5158071517944336
Epoch: 0, Train Loss: 1.6352115869522095
Epoch: 0, Train Loss: 1.2672667503356934
Epoch: 0, Train Loss: 1.2982853651046753
Epoch: 0, Train Loss: 1.3213870525360107
Epoch: 0, Train Los

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.1814446449279785
Epoch: 0, Train Loss: 1.1817234754562378
Epoch: 0, Train Loss: 1.5102883577346802
Epoch: 0, Train Loss: 1.1496248245239258
Epoch: 0, Train Loss: 1.2180097103118896
Epoch: 0, Train Loss: 1.466323971748352
Epoch: 0, Train Loss: 1.471781849861145
Epoch: 0, Train Loss: 1.1231802701950073
Epoch: 0, Train Loss: 1.1429145336151123
Epoch: 0, Train Loss: 1.2532424926757812
Epoch: 0, Train Loss: 1.1372756958007812
Epoch: 0, Train Loss: 1.5147210359573364
Epoch: 0, Train Loss: 1.1171168088912964
Epoch: 0, Train Loss: 1.9540855884552002
Epoch: 0, Train Loss: 1.5675005912780762
Epoch: 0, Train Loss: 1.563397765159607
Epoch: 0, Train Loss: 1.242335319519043
Epoch: 0, Train Loss: 1.7234171628952026
Epoch: 0, Train Loss: 1.254036784172058
Epoch: 0, Train Loss: 1.5635017156600952
Epoch: 0, Train Loss: 1.6770374774932861
Epoch: 0, Train Loss: 1.3506735563278198
Epoch: 0, Train Loss: 1.9394551515579224
Epoch: 0, Train Loss: 1.4472047090530396
Epoch: 0, Train Loss:

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.575537085533142
Epoch: 0, Train Loss: 1.2209478616714478
Epoch: 0, Train Loss: 1.3746013641357422
Epoch: 0, Train Loss: 1.4976857900619507
Epoch: 0, Train Loss: 1.4025381803512573
Epoch: 0, Train Loss: 1.5043294429779053
Epoch: 0, Train Loss: 1.1245062351226807
Epoch: 0, Train Loss: 1.534485936164856
Epoch: 0, Train Loss: 1.8697842359542847
Epoch: 0, Train Loss: 1.088627576828003
Epoch: 0, Train Loss: 1.20278000831604
Epoch: 0, Train Loss: 1.6094850301742554
Epoch: 0, Train Loss: 1.3362807035446167
Epoch: 0, Train Loss: 1.4641271829605103
Epoch: 0, Train Loss: 1.226771593093872
Epoch: 0, Train Loss: 1.838089942932129
Epoch: 0, Train Loss: 1.488128662109375
Epoch: 0, Train Loss: 2.0029819011688232
Epoch: 0, Train Loss: 1.4872241020202637
Epoch: 0, Train Loss: 1.1356323957443237
Epoch: 0, Train Loss: 1.7426151037216187
Epoch: 0, Train Loss: 1.4454330205917358
Epoch: 0, Train Loss: 1.582145094871521
Epoch: 0, Train Loss: 1.8329308032989502
Epoch: 0, Train Loss: 1.4

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.8232334852218628
Epoch: 0, Train Loss: 1.3333041667938232
Epoch: 0, Train Loss: 1.5515944957733154
Epoch: 0, Train Loss: 1.0500727891921997
Epoch: 0, Train Loss: 1.381097435951233
Epoch: 0, Train Loss: 1.7916498184204102
Epoch: 0, Train Loss: 1.3074434995651245
Epoch: 0, Train Loss: 1.1921030282974243
Epoch: 0, Train Loss: 1.705901026725769
Epoch: 0, Train Loss: 1.2929587364196777
Epoch: 0, Train Loss: 1.5744906663894653
Epoch: 0, Train Loss: 1.1778489351272583
Epoch: 0, Train Loss: 1.5910972356796265
Epoch: 0, Train Loss: 1.3296465873718262
Epoch: 0, Train Loss: 1.3692388534545898
Epoch: 0, Train Loss: 1.8950477838516235
Epoch: 0, Train Loss: 1.422284483909607
Epoch: 0, Train Loss: 1.628250241279602
Epoch: 0, Train Loss: 1.4046295881271362
Epoch: 0, Train Loss: 2.085373878479004
Epoch: 0, Train Loss: 1.1441770792007446
Epoch: 0, Train Loss: 1.7469969987869263
Epoch: 0, Train Loss: 1.336168885231018
Epoch: 0, Train Loss: 1.7038477659225464
Epoch: 0, Train Loss: 

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.2974289655685425
Epoch: 0, Train Loss: 1.1787058115005493
Epoch: 0, Train Loss: 1.4452848434448242
Epoch: 0, Train Loss: 1.6042227745056152
Epoch: 0, Train Loss: 1.709869384765625
Epoch: 0, Train Loss: 1.4444979429244995
Epoch: 0, Train Loss: 1.044244647026062
Epoch: 0, Train Loss: 1.3772382736206055
Epoch: 0, Train Loss: 1.2673486471176147
Epoch: 0, Train Loss: 1.3616045713424683
Epoch: 0, Train Loss: 1.4791537523269653
Epoch: 0, Train Loss: 1.193764567375183
Epoch: 0, Train Loss: 1.4624024629592896
Epoch: 0, Train Loss: 1.438790202140808
Epoch: 0, Train Loss: 1.2442125082015991
Epoch: 0, Train Loss: 1.2762882709503174
Epoch: 0, Train Loss: 1.1830976009368896
Epoch: 0, Train Loss: 1.4390747547149658
Epoch: 0, Train Loss: 1.3982155323028564
Epoch: 0, Train Loss: 1.1078492403030396
Epoch: 0, Train Loss: 1.7115826606750488
Epoch: 0, Train Loss: 1.563048243522644
Epoch: 0, Train Loss: 1.621272087097168
Epoch: 0, Train Loss: 1.7801339626312256
Epoch: 0, Train Loss: 

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.6254451274871826
Epoch: 0, Train Loss: 1.1942704916000366
Epoch: 0, Train Loss: 1.6983283758163452
Epoch: 0, Train Loss: 1.7304556369781494
Epoch: 0, Train Loss: 1.5404026508331299
Epoch: 0, Train Loss: 1.3024102449417114
Epoch: 0, Train Loss: 1.5395970344543457
Epoch: 0, Train Loss: 1.626197338104248
Epoch: 0, Train Loss: 1.4213842153549194
Epoch: 0, Train Loss: 1.2170907258987427
Epoch: 0, Train Loss: 1.5032697916030884
Epoch: 0, Train Loss: 1.5098624229431152
Epoch: 0, Train Loss: 1.2626700401306152
Epoch: 0, Train Loss: 1.3669190406799316
Epoch: 0, Train Loss: 1.3007807731628418
Epoch: 0, Train Loss: 1.2718398571014404
Epoch: 0, Train Loss: 1.2597754001617432
Epoch: 0, Train Loss: 1.4000486135482788
Epoch: 0, Train Loss: 1.7584171295166016
Epoch: 0, Train Loss: 1.9791511297225952
Epoch: 0, Train Loss: 1.086935043334961
Epoch: 0, Train Loss: 1.6949305534362793
Epoch: 0, Train Loss: 1.3634517192840576
Epoch: 0, Train Loss: 1.2503292560577393
Epoch: 0, Train Lo

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.310631513595581
Epoch: 0, Train Loss: 1.2857813835144043
Epoch: 0, Train Loss: 1.487464427947998
Epoch: 0, Train Loss: 1.3628628253936768
Epoch: 0, Train Loss: 1.4604007005691528
Epoch: 0, Train Loss: 1.6821120977401733
Epoch: 0, Train Loss: 1.168765902519226
Epoch: 0, Train Loss: 1.4129680395126343
Epoch: 0, Train Loss: 1.4081366062164307
Epoch: 0, Train Loss: 1.5901297330856323
Epoch: 0, Train Loss: 1.1471112966537476
Epoch: 0, Train Loss: 1.5110349655151367
Epoch: 0, Train Loss: 1.163809895515442
Epoch: 0, Train Loss: 1.255997657775879
Epoch: 0, Train Loss: 1.575573444366455
Epoch: 0, Train Loss: 1.6110749244689941
Epoch: 0, Train Loss: 1.3675256967544556
Epoch: 0, Train Loss: 1.539835810661316
Epoch: 0, Train Loss: 1.2003782987594604
Epoch: 0, Train Loss: 1.4067978858947754
Epoch: 0, Train Loss: 1.5461722612380981
Epoch: 0, Train Loss: 1.6161350011825562
Epoch: 0, Train Loss: 1.3550430536270142
Epoch: 0, Train Loss: 1.3203059434890747
Epoch: 0, Train Loss: 1

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.737046718597412
Epoch: 0, Train Loss: 1.1492531299591064
Epoch: 0, Train Loss: 1.5396651029586792
Epoch: 0, Train Loss: 1.4897609949111938
Epoch: 0, Train Loss: 1.4125310182571411
Epoch: 0, Train Loss: 1.143105149269104
Epoch: 0, Train Loss: 1.6234545707702637
Epoch: 0, Train Loss: 1.4442355632781982
Epoch: 0, Train Loss: 1.3112951517105103
Epoch: 0, Train Loss: 1.5565441846847534
Epoch: 0, Train Loss: 1.3501917123794556
Epoch: 0, Train Loss: 2.0898849964141846
Epoch: 0, Train Loss: 1.2935069799423218
Epoch: 0, Train Loss: 1.298390507698059
Epoch: 0, Train Loss: 1.347843050956726
Epoch: 0, Train Loss: 1.5656778812408447
Epoch: 0, Train Loss: 1.6768912076950073
Epoch: 0, Train Loss: 1.3839844465255737
Epoch: 0, Train Loss: 1.5420007705688477
Epoch: 0, Train Loss: 1.487282633781433
Epoch: 0, Train Loss: 1.6547967195510864
Epoch: 0, Train Loss: 1.5031601190567017
Epoch: 0, Train Loss: 1.3973418474197388
Epoch: 0, Train Loss: 1.6674127578735352
Epoch: 0, Train Loss:

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.1896950006484985
Epoch: 0, Train Loss: 1.5027955770492554
Epoch: 0, Train Loss: 1.659189224243164
Epoch: 0, Train Loss: 1.415122628211975
Epoch: 0, Train Loss: 1.4677826166152954
Epoch: 0, Train Loss: 1.5840561389923096
Epoch: 0, Train Loss: 1.4279175996780396
Epoch: 0, Train Loss: 1.7089492082595825
Epoch: 0, Train Loss: 1.773118495941162
Epoch: 0, Train Loss: 1.7139257192611694
Epoch: 0, Train Loss: 1.4788658618927002
Epoch: 0, Train Loss: 1.0266374349594116
Epoch: 0, Train Loss: 1.3455603122711182
Epoch: 0, Train Loss: 1.422167181968689
Epoch: 0, Train Loss: 1.4314324855804443
Epoch: 0, Train Loss: 1.6879265308380127
Epoch: 0, Train Loss: 1.6730906963348389
Epoch: 0, Train Loss: 1.3162845373153687
Epoch: 0, Train Loss: 1.1184237003326416
Epoch: 0, Train Loss: 1.204501986503601
Epoch: 0, Train Loss: 1.6435518264770508
Epoch: 0, Train Loss: 1.2968260049819946
Epoch: 0, Train Loss: 1.4812910556793213
Epoch: 0, Train Loss: 1.5186307430267334
Epoch: 0, Train Loss:

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.595224142074585
Epoch: 0, Train Loss: 1.672165870666504
Epoch: 0, Train Loss: 0.9358368515968323
Epoch: 0, Train Loss: 1.32504403591156
Epoch: 0, Train Loss: 1.3889946937561035
Epoch: 0, Train Loss: 1.3267430067062378
Epoch: 0, Train Loss: 1.4923888444900513
Epoch: 0, Train Loss: 1.5136553049087524
Epoch: 0, Train Loss: 1.2705235481262207
Epoch: 0, Train Loss: 1.413920283317566
Epoch: 0, Train Loss: 1.620474934577942
Epoch: 0, Train Loss: 1.3528460264205933
Epoch: 0, Train Loss: 1.5823919773101807
Epoch: 0, Train Loss: 1.337187647819519
Epoch: 0, Train Loss: 1.5694619417190552
Epoch: 0, Train Loss: 1.3662447929382324
Epoch: 0, Train Loss: 1.6471173763275146
Epoch: 0, Train Loss: 1.474779486656189
Epoch: 0, Train Loss: 1.4277591705322266
Epoch: 0, Train Loss: 1.1757572889328003
Epoch: 0, Train Loss: 1.4437158107757568
Epoch: 0, Train Loss: 1.721886157989502
Epoch: 0, Train Loss: 1.3584259748458862
Epoch: 0, Train Loss: 1.3477070331573486
Epoch: 0, Train Loss: 1.2

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.1814154386520386
Epoch: 0, Train Loss: 1.0924687385559082
Epoch: 0, Train Loss: 1.5407426357269287
Epoch: 0, Train Loss: 1.1983975172042847
Epoch: 0, Train Loss: 1.3932676315307617
Epoch: 0, Train Loss: 1.7868199348449707
Epoch: 0, Train Loss: 1.7103687524795532
Epoch: 0, Train Loss: 1.6603882312774658
Epoch: 0, Train Loss: 1.226357102394104
Epoch: 0, Train Loss: 1.6158251762390137
Epoch: 0, Train Loss: 1.5442371368408203
Epoch: 0, Train Loss: 1.5367563962936401
Epoch: 0, Train Loss: 1.7000019550323486
Epoch: 0, Train Loss: 1.352028250694275
Epoch: 0, Train Loss: 1.377794623374939
Epoch: 0, Train Loss: 1.4426653385162354
Epoch: 0, Train Loss: 1.3538601398468018
Epoch: 0, Train Loss: 1.1856131553649902
Epoch: 0, Train Loss: 1.7346705198287964
Epoch: 0, Train Loss: 1.3914035558700562
Epoch: 0, Train Loss: 1.2258174419403076
Epoch: 0, Train Loss: 1.5603387355804443
Epoch: 0, Train Loss: 1.620257019996643
Epoch: 0, Train Loss: 1.3709676265716553
Epoch: 0, Train Loss

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.0811066627502441
Epoch: 0, Train Loss: 1.008384346961975
Epoch: 0, Train Loss: 1.4470293521881104
Epoch: 0, Train Loss: 1.6886563301086426
Epoch: 0, Train Loss: 1.3390506505966187
Epoch: 0, Train Loss: 1.5523154735565186
Epoch: 0, Train Loss: 1.647447943687439
Epoch: 0, Train Loss: 1.560552954673767
Epoch: 0, Train Loss: 1.2556606531143188
Epoch: 0, Train Loss: 1.494521975517273
Epoch: 0, Train Loss: 1.4213204383850098
Epoch: 0, Train Loss: 1.4859981536865234
Epoch: 0, Train Loss: 1.5185680389404297
Epoch: 0, Train Loss: 1.56191885471344
Epoch: 0, Train Loss: 1.636850118637085
Epoch: 0, Train Loss: 1.1733475923538208
Epoch: 0, Train Loss: 1.8068861961364746
Epoch: 0, Train Loss: 1.8334832191467285
Epoch: 0, Train Loss: 1.9304275512695312
Epoch: 0, Train Loss: 1.7620830535888672
Epoch: 0, Train Loss: 1.3015270233154297
Epoch: 0, Train Loss: 1.5681827068328857
Epoch: 0, Train Loss: 1.4198176860809326
Epoch: 0, Train Loss: 1.3609548807144165
Epoch: 0, Train Loss: 1

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.5126607418060303
Epoch: 0, Train Loss: 1.447584629058838
Epoch: 0, Train Loss: 1.4913239479064941
Epoch: 0, Train Loss: 1.4351016283035278
Epoch: 0, Train Loss: 1.4400185346603394
Epoch: 0, Train Loss: 1.4808323383331299
Epoch: 0, Train Loss: 1.4928162097930908
Epoch: 0, Train Loss: 1.3347781896591187
Epoch: 0, Train Loss: 1.2853978872299194
Epoch: 0, Train Loss: 1.2523950338363647
Epoch: 0, Train Loss: 1.765175700187683
Epoch: 0, Train Loss: 1.3629809617996216
Epoch: 0, Train Loss: 1.5601667165756226
Epoch: 0, Train Loss: 1.540095329284668
Epoch: 0, Train Loss: 1.2548656463623047
Epoch: 0, Train Loss: 1.2662427425384521
Epoch: 0, Train Loss: 1.3853718042373657
Epoch: 0, Train Loss: 1.8691498041152954
Epoch: 0, Train Loss: 1.5184792280197144
Epoch: 0, Train Loss: 1.4400393962860107
Epoch: 0, Train Loss: 1.5362437963485718
Epoch: 0, Train Loss: 1.2348564863204956
Epoch: 0, Train Loss: 1.4426285028457642
Epoch: 0, Train Loss: 1.3251348733901978
Epoch: 0, Train Los

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.6786165237426758
Epoch: 0, Train Loss: 1.4487788677215576
Epoch: 0, Train Loss: 1.5488489866256714
Epoch: 0, Train Loss: 1.8228228092193604
Epoch: 0, Train Loss: 1.4044468402862549
Epoch: 0, Train Loss: 1.604148030281067
Epoch: 0, Train Loss: 1.5760899782180786
Epoch: 0, Train Loss: 1.373409628868103
Epoch: 0, Train Loss: 1.718484878540039
Epoch: 0, Train Loss: 1.8382433652877808
Epoch: 0, Train Loss: 1.4188703298568726
Epoch: 0, Train Loss: 1.0587594509124756
Epoch: 0, Train Loss: 1.2920469045639038
Epoch: 0, Train Loss: 1.7154254913330078
Epoch: 0, Train Loss: 1.7258634567260742
Epoch: 0, Train Loss: 1.5996699333190918
Epoch: 0, Train Loss: 1.1547671556472778
Epoch: 0, Train Loss: 1.3781952857971191
Epoch: 0, Train Loss: 1.272208333015442
Epoch: 0, Train Loss: 1.1748592853546143
Epoch: 0, Train Loss: 1.4653877019882202
Epoch: 0, Train Loss: 1.4575979709625244
Epoch: 0, Train Loss: 1.3145033121109009
Epoch: 0, Train Loss: 1.40776789188385
Epoch: 0, Train Loss: 

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.7160975933074951
Epoch: 0, Train Loss: 1.3201541900634766
Epoch: 0, Train Loss: 1.5089622735977173
Epoch: 0, Train Loss: 1.207085132598877
Epoch: 0, Train Loss: 1.489312767982483
Epoch: 0, Train Loss: 2.021134853363037
Epoch: 0, Train Loss: 2.0123445987701416
Epoch: 0, Train Loss: 1.3620537519454956
Epoch: 0, Train Loss: 2.0770421028137207
Epoch: 0, Train Loss: 1.374207854270935
Epoch: 0, Train Loss: 1.5041441917419434
Epoch: 0, Train Loss: 1.397669792175293
Epoch: 0, Train Loss: 1.4445874691009521
Epoch: 0, Train Loss: 1.6307140588760376
Epoch: 0, Train Loss: 1.3745782375335693
Epoch: 0, Train Loss: 1.5532854795455933
Epoch: 0, Train Loss: 1.3822003602981567
Epoch: 0, Train Loss: 1.3711851835250854
Epoch: 0, Train Loss: 1.6506956815719604
Epoch: 0, Train Loss: 1.8734073638916016
Epoch: 0, Train Loss: 1.2795175313949585
Epoch: 0, Train Loss: 1.3321409225463867
Epoch: 0, Train Loss: 1.838007926940918
Epoch: 0, Train Loss: 1.4463412761688232
Epoch: 0, Train Loss: 

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.5747350454330444
Epoch: 0, Train Loss: 1.3791356086730957
Epoch: 0, Train Loss: 1.2050358057022095
Epoch: 0, Train Loss: 1.4220491647720337
Epoch: 0, Train Loss: 1.2143489122390747
Epoch: 0, Train Loss: 1.4032127857208252
Epoch: 0, Train Loss: 1.396604061126709
Epoch: 0, Train Loss: 1.729311227798462
Epoch: 0, Train Loss: 1.4284249544143677
Epoch: 0, Train Loss: 1.3430137634277344
Epoch: 0, Train Loss: 1.049959421157837
Epoch: 0, Train Loss: 1.865168571472168
Epoch: 0, Train Loss: 1.4656447172164917
Epoch: 0, Train Loss: 1.3462297916412354
Epoch: 0, Train Loss: 1.3535125255584717
Epoch: 0, Train Loss: 1.4065057039260864
Epoch: 0, Train Loss: 1.5602248907089233
Epoch: 0, Train Loss: 1.194691777229309
Epoch: 0, Train Loss: 1.335134744644165
Epoch: 0, Train Loss: 1.501717448234558
Epoch: 0, Train Loss: 1.7787134647369385


Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.5000431537628174
Epoch: 0, Train Loss: 0.8110422492027283
Epoch: 0, Train Loss: 1.0267378091812134
Epoch: 0, Train Loss: 1.4643300771713257
Epoch: 0, Train Loss: 1.7953156232833862
Epoch: 0, Train Loss: 1.359802007675171
Epoch: 0, Train Loss: 1.032338261604309
Epoch: 0, Train Loss: 1.2239623069763184
Epoch: 0, Train Loss: 1.676766276359558
Epoch: 0, Train Loss: 1.3748759031295776
Epoch: 0, Train Loss: 1.2952492237091064
Epoch: 0, Train Loss: 1.8695690631866455
Epoch: 0, Train Loss: 1.4907879829406738
Epoch: 0, Train Loss: 1.0783601999282837
Epoch: 0, Train Loss: 1.3004581928253174
Epoch: 0, Train Loss: 1.216789960861206
Epoch: 0, Train Loss: 1.458067536354065
Epoch: 0, Train Loss: 1.0286487340927124
Epoch: 0, Train Loss: 1.5191246271133423
Epoch: 0, Train Loss: 1.3278971910476685
Epoch: 0, Train Loss: 1.3496259450912476
Epoch: 0, Train Loss: 1.7920897006988525
Epoch: 0, Train Loss: 1.528719186782837
Epoch: 0, Train Loss: 1.497654914855957
Epoch: 0, Train Loss: 1

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.369529128074646
Epoch: 0, Train Loss: 1.1817036867141724
Epoch: 0, Train Loss: 1.6234122514724731
Epoch: 0, Train Loss: 1.469232201576233
Epoch: 0, Train Loss: 1.6703802347183228
Epoch: 0, Train Loss: 1.5392473936080933
Epoch: 0, Train Loss: 1.1208438873291016
Epoch: 0, Train Loss: 1.4694725275039673
Epoch: 0, Train Loss: 1.4777283668518066
Epoch: 0, Train Loss: 1.6920256614685059
Epoch: 0, Train Loss: 1.4071015119552612
Epoch: 0, Train Loss: 1.1365290880203247
Epoch: 0, Train Loss: 1.3595186471939087
Epoch: 0, Train Loss: 1.6236863136291504
Epoch: 0, Train Loss: 1.4782533645629883
Epoch: 0, Train Loss: 1.282818078994751
Epoch: 0, Train Loss: 1.3853812217712402
Epoch: 0, Train Loss: 1.0623223781585693
Epoch: 0, Train Loss: 1.6083624362945557
Epoch: 0, Train Loss: 1.338446855545044
Epoch: 0, Train Loss: 1.5684340000152588
Epoch: 0, Train Loss: 1.9861576557159424
Epoch: 0, Train Loss: 1.6232484579086304
Epoch: 0, Train Loss: 1.2998071908950806
Epoch: 0, Train Loss

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.6103699207305908
Epoch: 0, Train Loss: 0.9840517640113831
Epoch: 0, Train Loss: 1.1028704643249512
Epoch: 0, Train Loss: 1.4967437982559204
Epoch: 0, Train Loss: 1.5476471185684204
Epoch: 0, Train Loss: 1.347495198249817
Epoch: 0, Train Loss: 1.816278338432312
Epoch: 0, Train Loss: 1.291753888130188
Epoch: 0, Train Loss: 1.9574294090270996
Epoch: 0, Train Loss: 1.5775572061538696
Epoch: 0, Train Loss: 1.4996469020843506
Epoch: 0, Train Loss: 1.3470826148986816
Epoch: 0, Train Loss: 1.586357831954956
Epoch: 0, Train Loss: 1.2858564853668213
Epoch: 0, Train Loss: 1.453683614730835
Epoch: 0, Train Loss: 1.396980881690979
Epoch: 0, Train Loss: 1.4683250188827515
Epoch: 0, Train Loss: 1.281007170677185
Epoch: 0, Train Loss: 1.5972622632980347
Epoch: 0, Train Loss: 1.4770089387893677
Epoch: 0, Train Loss: 1.5161885023117065
Epoch: 0, Train Loss: 1.6244758367538452
Epoch: 0, Train Loss: 1.2470470666885376
Epoch: 0, Train Loss: 1.4345297813415527
Epoch: 0, Train Loss: 0

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.3618931770324707
Epoch: 0, Train Loss: 1.2487822771072388
Epoch: 0, Train Loss: 1.354517936706543
Epoch: 0, Train Loss: 1.567659616470337
Epoch: 0, Train Loss: 1.4400256872177124
Epoch: 0, Train Loss: 1.8619707822799683
Epoch: 0, Train Loss: 1.2692317962646484
Epoch: 0, Train Loss: 1.6849092245101929
Epoch: 0, Train Loss: 1.5204275846481323
Epoch: 0, Train Loss: 1.3550384044647217
Epoch: 0, Train Loss: 1.5139724016189575
Epoch: 0, Train Loss: 1.4003509283065796
Epoch: 0, Train Loss: 1.3120546340942383
Epoch: 0, Train Loss: 1.4384785890579224
Epoch: 0, Train Loss: 1.403120517730713
Epoch: 0, Train Loss: 1.373632550239563
Epoch: 0, Train Loss: 1.5844378471374512
Epoch: 0, Train Loss: 1.7512236833572388
Epoch: 0, Train Loss: 1.3505799770355225
Epoch: 0, Train Loss: 1.3610247373580933
Epoch: 0, Train Loss: 1.7602120637893677
Epoch: 0, Train Loss: 1.4361412525177002
Epoch: 0, Train Loss: 1.0788369178771973
Epoch: 0, Train Loss: 1.7161585092544556
Epoch: 0, Train Loss

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.3235431909561157
Epoch: 0, Train Loss: 1.1274160146713257
Epoch: 0, Train Loss: 1.348468542098999
Epoch: 0, Train Loss: 1.3872023820877075
Epoch: 0, Train Loss: 1.582680106163025
Epoch: 0, Train Loss: 1.6243069171905518
Epoch: 0, Train Loss: 1.0942227840423584
Epoch: 0, Train Loss: 1.603216528892517
Epoch: 0, Train Loss: 1.4506161212921143
Epoch: 0, Train Loss: 1.4465934038162231


Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.2287994623184204
Epoch: 0, Train Loss: 2.123051881790161
Epoch: 0, Train Loss: 1.1014015674591064
Epoch: 0, Train Loss: 1.2688076496124268
Epoch: 0, Train Loss: 1.4697134494781494
Epoch: 0, Train Loss: 1.4278018474578857
Epoch: 0, Train Loss: 1.656234860420227
Epoch: 0, Train Loss: 1.3788591623306274
Epoch: 0, Train Loss: 1.497089147567749
Epoch: 0, Train Loss: 1.3305491209030151
Epoch: 0, Train Loss: 1.5578513145446777
Epoch: 0, Train Loss: 1.4801052808761597
Epoch: 0, Train Loss: 1.32439386844635
Epoch: 0, Train Loss: 1.5128412246704102
Epoch: 0, Train Loss: 1.719348430633545
Epoch: 0, Train Loss: 1.5880110263824463
Epoch: 0, Train Loss: 1.7067272663116455
Epoch: 0, Train Loss: 1.7473024129867554
Epoch: 0, Train Loss: 1.4647700786590576
Epoch: 0, Train Loss: 1.4743931293487549
Epoch: 0, Train Loss: 1.2812696695327759
Epoch: 0, Train Loss: 1.3406014442443848
Epoch: 0, Train Loss: 1.3284811973571777
Epoch: 0, Train Loss: 1.4806108474731445
Epoch: 0, Train Loss: 

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.3037585020065308
Epoch: 0, Train Loss: 1.2161931991577148
Epoch: 0, Train Loss: 1.35124933719635
Epoch: 0, Train Loss: 1.6609458923339844
Epoch: 0, Train Loss: 1.5134143829345703
Epoch: 0, Train Loss: 1.4703619480133057
Epoch: 0, Train Loss: 1.288733959197998
Epoch: 0, Train Loss: 1.5252352952957153
Epoch: 0, Train Loss: 1.3670547008514404
Epoch: 0, Train Loss: 1.3225934505462646
Epoch: 0, Train Loss: 1.7965317964553833
Epoch: 0, Train Loss: 1.6213860511779785
Epoch: 0, Train Loss: 1.6038545370101929
Epoch: 0, Train Loss: 1.2208045721054077
Epoch: 0, Train Loss: 1.4714264869689941
Epoch: 0, Train Loss: 1.9324145317077637
Epoch: 0, Train Loss: 1.703660488128662
Epoch: 0, Train Loss: 1.597252368927002
Epoch: 0, Train Loss: 1.6961299180984497
Epoch: 0, Train Loss: 1.7275092601776123
Epoch: 0, Train Loss: 1.046704649925232
Epoch: 0, Train Loss: 1.3704047203063965
Epoch: 0, Train Loss: 1.625118374824524
Epoch: 0, Train Loss: 1.350527048110962
Epoch: 0, Train Loss: 1.

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.2431156635284424
Epoch: 0, Train Loss: 1.6275224685668945
Epoch: 0, Train Loss: 1.512717843055725
Epoch: 0, Train Loss: 1.365944504737854
Epoch: 0, Train Loss: 1.199167251586914
Epoch: 0, Train Loss: 1.2952033281326294
Epoch: 0, Train Loss: 1.8167628049850464
Epoch: 0, Train Loss: 1.240404486656189
Epoch: 0, Train Loss: 1.6646606922149658
Epoch: 0, Train Loss: 1.1072614192962646
Epoch: 0, Train Loss: 1.0991674661636353
Epoch: 0, Train Loss: 1.0251306295394897
Epoch: 0, Train Loss: 1.6328802108764648
Epoch: 0, Train Loss: 1.5003200769424438
Epoch: 0, Train Loss: 1.5610904693603516
Epoch: 0, Train Loss: 0.9075992107391357
Epoch: 0, Train Loss: 1.2761918306350708
Epoch: 0, Train Loss: 1.3575836420059204
Epoch: 0, Train Loss: 1.331423282623291
Epoch: 0, Train Loss: 1.3159735202789307
Epoch: 0, Train Loss: 1.442487359046936
Epoch: 0, Train Loss: 1.6705421209335327
Epoch: 0, Train Loss: 1.3605304956436157
Epoch: 0, Train Loss: 1.5042011737823486
Epoch: 0, Train Loss: 

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.67132568359375
Epoch: 0, Train Loss: 0.825347363948822
Epoch: 0, Train Loss: 1.3631715774536133
Epoch: 0, Train Loss: 1.5617477893829346
Epoch: 0, Train Loss: 1.4025812149047852
Epoch: 0, Train Loss: 1.6914846897125244
Epoch: 0, Train Loss: 1.612913966178894
Epoch: 0, Train Loss: 1.2414765357971191
Epoch: 0, Train Loss: 1.5888580083847046
Epoch: 0, Train Loss: 1.4248617887496948
Epoch: 0, Train Loss: 1.2371553182601929
Epoch: 0, Train Loss: 1.2808250188827515
Epoch: 0, Train Loss: 1.3396072387695312
Epoch: 0, Train Loss: 1.4127745628356934
Epoch: 0, Train Loss: 1.452623963356018
Epoch: 0, Train Loss: 1.4366645812988281
Epoch: 0, Train Loss: 1.3121095895767212
Epoch: 0, Train Loss: 1.4891984462738037
Epoch: 0, Train Loss: 1.3272161483764648
Epoch: 0, Train Loss: 1.329702377319336
Epoch: 0, Train Loss: 1.0476292371749878
Epoch: 0, Train Loss: 1.3014328479766846
Epoch: 0, Train Loss: 1.4404631853103638
Epoch: 0, Train Loss: 1.6518141031265259
Epoch: 0, Train Loss: 

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.3657896518707275
Epoch: 0, Train Loss: 1.3293616771697998
Epoch: 0, Train Loss: 1.6521462202072144
Epoch: 0, Train Loss: 1.4278392791748047
Epoch: 0, Train Loss: 1.4011152982711792
Epoch: 0, Train Loss: 1.4482513666152954
Epoch: 0, Train Loss: 1.6637940406799316
Epoch: 0, Train Loss: 1.1620153188705444
Epoch: 0, Train Loss: 1.5090813636779785
Epoch: 0, Train Loss: 1.4977859258651733
Epoch: 0, Train Loss: 1.2374697923660278
Epoch: 0, Train Loss: 1.610103726387024
Epoch: 0, Train Loss: 1.9038270711898804
Epoch: 0, Train Loss: 1.0646649599075317
Epoch: 0, Train Loss: 1.7307569980621338
Epoch: 0, Train Loss: 1.473531723022461
Epoch: 0, Train Loss: 1.3685215711593628
Epoch: 0, Train Loss: 1.1835553646087646
Epoch: 0, Train Loss: 1.2868938446044922
Epoch: 0, Train Loss: 1.110253095626831
Epoch: 0, Train Loss: 1.2618287801742554
Epoch: 0, Train Loss: 1.46597421169281
Epoch: 0, Train Loss: 1.5282577276229858
Epoch: 0, Train Loss: 1.2113234996795654
Epoch: 0, Train Loss:

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.5350123643875122
Epoch: 0, Train Loss: 0.9704327583312988
Epoch: 0, Train Loss: 1.1224772930145264
Epoch: 0, Train Loss: 1.750991940498352
Epoch: 0, Train Loss: 1.1665359735488892
Epoch: 0, Train Loss: 1.3919905424118042
Epoch: 0, Train Loss: 1.2564793825149536
Epoch: 0, Train Loss: 1.1635147333145142
Epoch: 0, Train Loss: 1.2700552940368652
Epoch: 0, Train Loss: 1.7085684537887573
Epoch: 0, Train Loss: 1.3707109689712524
Epoch: 0, Train Loss: 1.268730878829956
Epoch: 0, Train Loss: 1.6524066925048828
Epoch: 0, Train Loss: 1.8671071529388428
Epoch: 0, Train Loss: 1.4405018091201782
Epoch: 0, Train Loss: 1.4103609323501587
Epoch: 0, Train Loss: 1.6266177892684937
Epoch: 0, Train Loss: 1.325379729270935
Epoch: 0, Train Loss: 1.7989387512207031
Epoch: 0, Train Loss: 1.3931723833084106
Epoch: 0, Train Loss: 1.299172043800354
Epoch: 0, Train Loss: 1.6261099576950073
Epoch: 0, Train Loss: 1.1635894775390625
Epoch: 0, Train Loss: 1.1267977952957153
Epoch: 0, Train Loss

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.344237208366394
Epoch: 0, Train Loss: 1.0754495859146118
Epoch: 0, Train Loss: 1.6470426321029663
Epoch: 0, Train Loss: 1.1388338804244995
Epoch: 0, Train Loss: 1.46403169631958
Epoch: 0, Train Loss: 1.6752079725265503
Epoch: 0, Train Loss: 1.397766351699829
Epoch: 0, Train Loss: 1.6887388229370117
Epoch: 0, Train Loss: 1.1583032608032227
Epoch: 0, Train Loss: 1.490147590637207
Epoch: 0, Train Loss: 0.99698406457901
Epoch: 0, Train Loss: 1.6839203834533691
Epoch: 0, Train Loss: 1.313733696937561
Epoch: 0, Train Loss: 1.7170175313949585
Epoch: 0, Train Loss: 1.561245322227478
Epoch: 0, Train Loss: 1.608842372894287
Epoch: 0, Train Loss: 1.588868498802185
Epoch: 0, Train Loss: 1.3963172435760498
Epoch: 0, Train Loss: 1.691319465637207
Epoch: 0, Train Loss: 1.7358578443527222
Epoch: 0, Train Loss: 1.3890999555587769
Epoch: 0, Train Loss: 1.5665384531021118
Epoch: 0, Train Loss: 1.5398929119110107
Epoch: 0, Train Loss: 1.2883495092391968
Epoch: 0, Train Loss: 1.8838

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.4629918336868286
Epoch: 0, Train Loss: 1.4209272861480713
Epoch: 0, Train Loss: 1.5867127180099487
Epoch: 0, Train Loss: 1.7416207790374756
Epoch: 0, Train Loss: 1.0672141313552856
Epoch: 0, Train Loss: 1.4889137744903564
Epoch: 0, Train Loss: 1.577318787574768
Epoch: 0, Train Loss: 1.6426602602005005
Epoch: 0, Train Loss: 1.3718498945236206
Epoch: 0, Train Loss: 1.308571457862854
Epoch: 0, Train Loss: 1.5699714422225952
Epoch: 0, Train Loss: 1.523989200592041
Epoch: 0, Train Loss: 1.19217848777771
Epoch: 0, Train Loss: 1.455743670463562
Epoch: 0, Train Loss: 1.273964285850525
Epoch: 0, Train Loss: 1.580973744392395
Epoch: 0, Train Loss: 1.5502829551696777
Epoch: 0, Train Loss: 1.3324837684631348
Epoch: 0, Train Loss: 1.2537368535995483
Epoch: 0, Train Loss: 1.4201616048812866
Epoch: 0, Train Loss: 1.2838306427001953
Epoch: 0, Train Loss: 1.3297176361083984
Epoch: 0, Train Loss: 1.2707899808883667
Epoch: 0, Train Loss: 1.3026021718978882
Epoch: 0, Train Loss: 1.

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.6617257595062256
Epoch: 0, Train Loss: 1.3976271152496338
Epoch: 0, Train Loss: 2.2206497192382812
Epoch: 0, Train Loss: 1.6019550561904907
Epoch: 0, Train Loss: 1.443428874015808
Epoch: 0, Train Loss: 1.6226081848144531
Epoch: 0, Train Loss: 1.5336369276046753
Epoch: 0, Train Loss: 1.2682764530181885
Epoch: 0, Train Loss: 1.4225434064865112
Epoch: 0, Train Loss: 1.1137841939926147
Epoch: 0, Train Loss: 1.4970234632492065
Epoch: 0, Train Loss: 1.3324315547943115
Epoch: 0, Train Loss: 1.1787561178207397
Epoch: 0, Train Loss: 1.214701533317566
Epoch: 0, Train Loss: 1.5412218570709229
Epoch: 0, Train Loss: 1.3241229057312012
Epoch: 0, Train Loss: 1.3600382804870605
Epoch: 0, Train Loss: 1.3677250146865845
Epoch: 0, Train Loss: 1.3689525127410889
Epoch: 0, Train Loss: 1.858530044555664
Epoch: 0, Train Loss: 1.4300658702850342
Epoch: 0, Train Loss: 1.5511633157730103
Epoch: 0, Train Loss: 1.3378303050994873
Epoch: 0, Train Loss: 1.5563654899597168
Epoch: 0, Train Los

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.6952476501464844
Epoch: 0, Train Loss: 1.0622618198394775
Epoch: 0, Train Loss: 1.5343976020812988
Epoch: 0, Train Loss: 1.5851978063583374
Epoch: 0, Train Loss: 1.258693814277649
Epoch: 0, Train Loss: 1.2537821531295776
Epoch: 0, Train Loss: 1.5952762365341187
Epoch: 0, Train Loss: 1.4391025304794312
Epoch: 0, Train Loss: 1.7003209590911865
Epoch: 0, Train Loss: 1.452008843421936
Epoch: 0, Train Loss: 1.4102948904037476
Epoch: 0, Train Loss: 1.3158773183822632
Epoch: 0, Train Loss: 1.516872525215149
Epoch: 0, Train Loss: 1.196189045906067
Epoch: 0, Train Loss: 1.0553785562515259
Epoch: 0, Train Loss: 1.637085199356079
Epoch: 0, Train Loss: 1.416859745979309
Epoch: 0, Train Loss: 1.0899423360824585
Epoch: 0, Train Loss: 1.3059802055358887
Epoch: 0, Train Loss: 1.6476737260818481
Epoch: 0, Train Loss: 1.2080203294754028
Epoch: 0, Train Loss: 1.429221749305725
Epoch: 0, Train Loss: 1.513384222984314
Epoch: 0, Train Loss: 1.3006854057312012
Epoch: 0, Train Loss: 1.

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.5647679567337036
Epoch: 0, Train Loss: 1.1634305715560913


Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.7136949300765991
Epoch: 0, Train Loss: 0.8517724871635437
Epoch: 0, Train Loss: 1.9683640003204346
Epoch: 0, Train Loss: 1.260196566581726
Epoch: 0, Train Loss: 1.2376495599746704
Epoch: 0, Train Loss: 1.3769863843917847
Epoch: 0, Train Loss: 1.621470332145691
Epoch: 0, Train Loss: 1.6659945249557495
Epoch: 0, Train Loss: 1.2527191638946533
Epoch: 0, Train Loss: 1.464802861213684
Epoch: 0, Train Loss: 1.375204086303711
Epoch: 0, Train Loss: 0.948570191860199
Epoch: 0, Train Loss: 1.4125876426696777
Epoch: 0, Train Loss: 1.1982749700546265
Epoch: 0, Train Loss: 1.4167646169662476
Epoch: 0, Train Loss: 1.472110629081726
Epoch: 0, Train Loss: 1.5797598361968994
Epoch: 0, Train Loss: 1.923311710357666
Epoch: 0, Train Loss: 1.2085018157958984
Epoch: 0, Train Loss: 1.4458084106445312
Epoch: 0, Train Loss: 1.4258437156677246
Epoch: 0, Train Loss: 1.2765001058578491
Epoch: 0, Train Loss: 1.2997195720672607
Epoch: 0, Train Loss: 1.2926855087280273
Epoch: 0, Train Loss: 1

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.9223734140396118
Epoch: 0, Train Loss: 1.9804116487503052
Epoch: 0, Train Loss: 1.4579432010650635
Epoch: 0, Train Loss: 1.5648072957992554
Epoch: 0, Train Loss: 1.3669384717941284
Epoch: 0, Train Loss: 1.281393051147461
Epoch: 0, Train Loss: 1.224956750869751
Epoch: 0, Train Loss: 1.2253586053848267
Epoch: 0, Train Loss: 1.2284460067749023
Epoch: 0, Train Loss: 1.3035657405853271
Epoch: 0, Train Loss: 1.8966648578643799
Epoch: 0, Train Loss: 1.0856289863586426
Epoch: 0, Train Loss: 1.3533769845962524
Epoch: 0, Train Loss: 1.2354215383529663
Epoch: 0, Train Loss: 1.1941365003585815
Epoch: 0, Train Loss: 1.5253887176513672
Epoch: 0, Train Loss: 1.6139538288116455
Epoch: 0, Train Loss: 1.2440675497055054
Epoch: 0, Train Loss: 1.6455734968185425
Epoch: 0, Train Loss: 1.55631685256958
Epoch: 0, Train Loss: 1.6865055561065674
Epoch: 0, Train Loss: 1.2897356748580933
Epoch: 0, Train Loss: 1.627117395401001
Epoch: 0, Train Loss: 1.391430377960205
Epoch: 0, Train Loss: 

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.4790716171264648
Epoch: 0, Train Loss: 1.3852818012237549
Epoch: 0, Train Loss: 1.764316439628601
Epoch: 0, Train Loss: 1.834151029586792
Epoch: 0, Train Loss: 1.4482402801513672
Epoch: 0, Train Loss: 1.0737600326538086
Epoch: 0, Train Loss: 1.1380095481872559
Epoch: 0, Train Loss: 1.2437993288040161
Epoch: 0, Train Loss: 1.3563026189804077
Epoch: 0, Train Loss: 1.6165885925292969
Epoch: 0, Train Loss: 1.1551076173782349
Epoch: 0, Train Loss: 1.4422886371612549
Epoch: 0, Train Loss: 1.407042145729065
Epoch: 0, Train Loss: 1.4359687566757202
Epoch: 0, Train Loss: 1.439606785774231
Epoch: 0, Train Loss: 1.6719229221343994
Epoch: 0, Train Loss: 1.4068394899368286
Epoch: 0, Train Loss: 1.3544691801071167
Epoch: 0, Train Loss: 1.5116668939590454
Epoch: 0, Train Loss: 1.5814396142959595
Epoch: 0, Train Loss: 1.1472492218017578
Epoch: 0, Train Loss: 1.4975471496582031
Epoch: 0, Train Loss: 0.9648005962371826
Epoch: 0, Train Loss: 1.5030573606491089
Epoch: 0, Train Loss

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.9978175163269043
Epoch: 0, Train Loss: 1.0166358947753906
Epoch: 0, Train Loss: 1.8692569732666016
Epoch: 0, Train Loss: 1.1228995323181152
Epoch: 0, Train Loss: 1.8930788040161133
Epoch: 0, Train Loss: 1.2139804363250732
Epoch: 0, Train Loss: 1.4034336805343628
Epoch: 0, Train Loss: 1.4663645029067993
Epoch: 0, Train Loss: 1.4142049551010132
Epoch: 0, Train Loss: 1.3211231231689453
Epoch: 0, Train Loss: 1.0325391292572021
Epoch: 0, Train Loss: 1.420543909072876
Epoch: 0, Train Loss: 1.235931396484375
Epoch: 0, Train Loss: 1.448196530342102
Epoch: 0, Train Loss: 1.7338999509811401
Epoch: 0, Train Loss: 1.2844021320343018
Epoch: 0, Train Loss: 1.3262840509414673
Epoch: 0, Train Loss: 1.3016921281814575
Epoch: 0, Train Loss: 1.158054232597351
Epoch: 0, Train Loss: 1.8236881494522095
Epoch: 0, Train Loss: 1.6329944133758545
Epoch: 0, Train Loss: 1.4865771532058716
Epoch: 0, Train Loss: 1.1702754497528076
Epoch: 0, Train Loss: 1.427956223487854
Epoch: 0, Train Loss:

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.123366355895996
Epoch: 0, Train Loss: 1.2262495756149292
Epoch: 0, Train Loss: 1.1455754041671753
Epoch: 0, Train Loss: 1.2447535991668701
Epoch: 0, Train Loss: 1.1927950382232666
Epoch: 0, Train Loss: 1.1253668069839478
Epoch: 0, Train Loss: 1.5218682289123535
Epoch: 0, Train Loss: 1.3172619342803955
Epoch: 0, Train Loss: 1.337854027748108
Epoch: 0, Train Loss: 1.2222039699554443
Epoch: 0, Train Loss: 1.3791477680206299
Epoch: 0, Train Loss: 1.3187414407730103
Epoch: 0, Train Loss: 1.6284265518188477
Epoch: 0, Train Loss: 1.733253002166748
Epoch: 0, Train Loss: 1.4020220041275024
Epoch: 0, Train Loss: 1.7070168256759644
Epoch: 0, Train Loss: 1.4946664571762085
Epoch: 0, Train Loss: 1.296371579170227
Epoch: 0, Train Loss: 1.4200477600097656
Epoch: 0, Train Loss: 1.409956455230713
Epoch: 0, Train Loss: 1.208237886428833
Epoch: 0, Train Loss: 1.175588607788086
Epoch: 0, Train Loss: 1.7090837955474854
Epoch: 0, Train Loss: 1.400227665901184
Epoch: 0, Train Loss: 1.

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.5823498964309692
Epoch: 0, Train Loss: 1.1237949132919312
Epoch: 0, Train Loss: 1.4810962677001953
Epoch: 0, Train Loss: 1.1003719568252563
Epoch: 0, Train Loss: 1.3451591730117798
Epoch: 0, Train Loss: 0.9557815790176392
Epoch: 0, Train Loss: 1.3748018741607666
Epoch: 0, Train Loss: 1.5046815872192383
Epoch: 0, Train Loss: 1.2143123149871826
Epoch: 0, Train Loss: 1.4219831228256226
Epoch: 0, Train Loss: 1.2864006757736206
Epoch: 0, Train Loss: 1.6461185216903687
Epoch: 0, Train Loss: 1.6577467918395996
Epoch: 0, Train Loss: 1.9185022115707397
Epoch: 0, Train Loss: 1.2701815366744995
Epoch: 0, Train Loss: 1.2759020328521729
Epoch: 0, Train Loss: 1.6537327766418457
Epoch: 0, Train Loss: 1.5464149713516235
Epoch: 0, Train Loss: 1.0985993146896362
Epoch: 0, Train Loss: 1.4337114095687866
Epoch: 0, Train Loss: 1.382049798965454
Epoch: 0, Train Loss: 1.5004568099975586
Epoch: 0, Train Loss: 1.706709384918213
Epoch: 0, Train Loss: 1.6876039505004883
Epoch: 0, Train Lo

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.2441140413284302
Epoch: 0, Train Loss: 1.6111010313034058
Epoch: 0, Train Loss: 1.5533241033554077
Epoch: 0, Train Loss: 1.4412888288497925
Epoch: 0, Train Loss: 1.2214668989181519
Epoch: 0, Train Loss: 1.4256900548934937
Epoch: 0, Train Loss: 1.3443694114685059
Epoch: 0, Train Loss: 1.4107264280319214
Epoch: 0, Train Loss: 1.7363407611846924
Epoch: 0, Train Loss: 1.3356364965438843
Epoch: 0, Train Loss: 1.2166388034820557
Epoch: 0, Train Loss: 1.3742674589157104
Epoch: 0, Train Loss: 1.4887126684188843
Epoch: 0, Train Loss: 1.3151694536209106
Epoch: 0, Train Loss: 1.105218768119812
Epoch: 0, Train Loss: 1.4306919574737549
Epoch: 0, Train Loss: 1.8678369522094727
Epoch: 0, Train Loss: 1.2776556015014648
Epoch: 0, Train Loss: 1.5697994232177734
Epoch: 0, Train Loss: 1.2073947191238403
Epoch: 0, Train Loss: 1.5259805917739868
Epoch: 0, Train Loss: 1.20688796043396
Epoch: 0, Train Loss: 1.2087079286575317
Epoch: 0, Train Loss: 1.0750616788864136
Epoch: 0, Train Los

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.030434012413025
Epoch: 0, Train Loss: 2.0189056396484375
Epoch: 0, Train Loss: 1.2453715801239014
Epoch: 0, Train Loss: 2.016411066055298
Epoch: 0, Train Loss: 1.4611040353775024
Epoch: 0, Train Loss: 1.4730606079101562
Epoch: 0, Train Loss: 1.4113099575042725
Epoch: 0, Train Loss: 1.4746971130371094
Epoch: 0, Train Loss: 1.4182133674621582
Epoch: 0, Train Loss: 1.4305871725082397
Epoch: 0, Train Loss: 1.9004241228103638
Epoch: 0, Train Loss: 1.3845853805541992
Epoch: 0, Train Loss: 1.492311716079712
Epoch: 0, Train Loss: 1.389628529548645
Epoch: 0, Train Loss: 1.4452351331710815
Epoch: 0, Train Loss: 1.522674798965454
Epoch: 0, Train Loss: 1.7885160446166992
Epoch: 0, Train Loss: 1.744827389717102
Epoch: 0, Train Loss: 1.4862092733383179
Epoch: 0, Train Loss: 1.5444626808166504
Epoch: 0, Train Loss: 1.1155531406402588
Epoch: 0, Train Loss: 1.3908706903457642
Epoch: 0, Train Loss: 1.2904242277145386
Epoch: 0, Train Loss: 1.4483458995819092
Epoch: 0, Train Loss: 

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.405761957168579
Epoch: 0, Train Loss: 1.3461084365844727
Epoch: 0, Train Loss: 1.7403016090393066
Epoch: 0, Train Loss: 1.5206290483474731
Epoch: 0, Train Loss: 1.6585180759429932
Epoch: 0, Train Loss: 1.595594882965088
Epoch: 0, Train Loss: 1.2040632963180542
Epoch: 0, Train Loss: 1.555213451385498
Epoch: 0, Train Loss: 1.1281999349594116
Epoch: 0, Train Loss: 1.2884936332702637
Epoch: 0, Train Loss: 1.0702956914901733
Epoch: 0, Train Loss: 1.340428113937378
Epoch: 0, Train Loss: 1.3552724123001099
Epoch: 0, Train Loss: 1.4543477296829224
Epoch: 0, Train Loss: 1.2798820734024048
Epoch: 0, Train Loss: 1.2811362743377686
Epoch: 0, Train Loss: 1.5452475547790527
Epoch: 0, Train Loss: 1.3726760149002075
Epoch: 0, Train Loss: 1.1880370378494263
Epoch: 0, Train Loss: 1.5186975002288818
Epoch: 0, Train Loss: 1.6110910177230835
Epoch: 0, Train Loss: 1.2795971632003784
Epoch: 0, Train Loss: 1.536428689956665
Epoch: 0, Train Loss: 1.12735116481781
Epoch: 0, Train Loss: 1

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.1114400625228882
Epoch: 0, Train Loss: 2.0342063903808594
Epoch: 0, Train Loss: 1.2899380922317505
Epoch: 0, Train Loss: 1.493385672569275
Epoch: 0, Train Loss: 1.192097544670105
Epoch: 0, Train Loss: 1.530079960823059
Epoch: 0, Train Loss: 1.8648220300674438
Epoch: 0, Train Loss: 1.189555287361145
Epoch: 0, Train Loss: 1.3483631610870361
Epoch: 0, Train Loss: 1.5027601718902588
Epoch: 0, Train Loss: 1.5512447357177734
Epoch: 0, Train Loss: 1.298674464225769
Epoch: 0, Train Loss: 1.768872618675232
Epoch: 0, Train Loss: 1.4715287685394287
Epoch: 0, Train Loss: 1.6133990287780762
Epoch: 0, Train Loss: 2.1042845249176025
Epoch: 0, Train Loss: 1.1797345876693726
Epoch: 0, Train Loss: 1.1449545621871948
Epoch: 0, Train Loss: 1.394011378288269
Epoch: 0, Train Loss: 1.3752849102020264
Epoch: 0, Train Loss: 1.277103304862976
Epoch: 0, Train Loss: 1.6269724369049072
Epoch: 0, Train Loss: 1.7096092700958252
Epoch: 0, Train Loss: 1.1469281911849976
Epoch: 0, Train Loss: 1.

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.2641003131866455
Epoch: 0, Train Loss: 1.1080238819122314
Epoch: 0, Train Loss: 1.21033775806427
Epoch: 0, Train Loss: 1.3976298570632935
Epoch: 0, Train Loss: 1.3982173204421997
Epoch: 0, Train Loss: 1.2638065814971924
Epoch: 0, Train Loss: 1.5855686664581299
Epoch: 0, Train Loss: 1.576850414276123
Epoch: 0, Train Loss: 1.1168004274368286
Epoch: 0, Train Loss: 1.555377721786499
Epoch: 0, Train Loss: 1.1407643556594849
Epoch: 0, Train Loss: 1.2867686748504639
Epoch: 0, Train Loss: 1.7290968894958496
Epoch: 0, Train Loss: 1.4573818445205688
Epoch: 0, Train Loss: 1.195204734802246
Epoch: 0, Train Loss: 1.5281840562820435
Epoch: 0, Train Loss: 1.1883738040924072
Epoch: 0, Train Loss: 1.3650380373001099
Epoch: 0, Train Loss: 1.4432777166366577
Epoch: 0, Train Loss: 1.6040723323822021
Epoch: 0, Train Loss: 1.3200533390045166
Epoch: 0, Train Loss: 1.3698738813400269
Epoch: 0, Train Loss: 1.7519919872283936
Epoch: 0, Train Loss: 1.1879907846450806
Epoch: 0, Train Loss:

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.2813048362731934
Epoch: 0, Train Loss: 1.2825922966003418
Epoch: 0, Train Loss: 1.7764207124710083
Epoch: 0, Train Loss: 1.5005297660827637
Epoch: 0, Train Loss: 1.5094634294509888
Epoch: 0, Train Loss: 1.9510117769241333
Epoch: 0, Train Loss: 1.254704475402832
Epoch: 0, Train Loss: 1.5080740451812744
Epoch: 0, Train Loss: 1.3804072141647339
Epoch: 0, Train Loss: 1.288320779800415
Epoch: 0, Train Loss: 1.3759477138519287
Epoch: 0, Train Loss: 1.5275245904922485
Epoch: 0, Train Loss: 1.412814736366272
Epoch: 0, Train Loss: 1.6867687702178955
Epoch: 0, Train Loss: 1.1200023889541626
Epoch: 0, Train Loss: 1.6107805967330933
Epoch: 0, Train Loss: 1.2931214570999146
Epoch: 0, Train Loss: 1.2729169130325317
Epoch: 0, Train Loss: 1.6008968353271484
Epoch: 0, Train Loss: 1.4588689804077148
Epoch: 0, Train Loss: 1.1676392555236816
Epoch: 0, Train Loss: 1.3135902881622314
Epoch: 0, Train Loss: 1.6731760501861572
Epoch: 0, Train Loss: 1.3513312339782715
Epoch: 0, Train Los

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.3592203855514526
Epoch: 0, Train Loss: 1.0627442598342896
Epoch: 0, Train Loss: 1.3470021486282349
Epoch: 0, Train Loss: 1.539401650428772
Epoch: 0, Train Loss: 1.4190999269485474
Epoch: 0, Train Loss: 1.2758042812347412
Epoch: 0, Train Loss: 1.3418172597885132
Epoch: 0, Train Loss: 1.5146936178207397
Epoch: 0, Train Loss: 1.4829139709472656
Epoch: 0, Train Loss: 1.434563159942627
Epoch: 0, Train Loss: 1.4108041524887085
Epoch: 0, Train Loss: 1.2396469116210938
Epoch: 0, Train Loss: 1.619306206703186
Epoch: 0, Train Loss: 1.4019455909729004
Epoch: 0, Train Loss: 1.5724408626556396
Epoch: 0, Train Loss: 1.5694231986999512
Epoch: 0, Train Loss: 1.187532901763916
Epoch: 0, Train Loss: 1.194091558456421
Epoch: 0, Train Loss: 1.4942117929458618
Epoch: 0, Train Loss: 1.1471577882766724
Epoch: 0, Train Loss: 1.3217532634735107
Epoch: 0, Train Loss: 1.675183653831482
Epoch: 0, Train Loss: 1.5901621580123901
Epoch: 0, Train Loss: 1.6149568557739258
Epoch: 0, Train Loss: 

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.1660940647125244
Epoch: 0, Train Loss: 1.7799324989318848
Epoch: 0, Train Loss: 1.3972618579864502
Epoch: 0, Train Loss: 1.5175495147705078
Epoch: 0, Train Loss: 1.5529167652130127
Epoch: 0, Train Loss: 1.7926117181777954
Epoch: 0, Train Loss: 1.5737937688827515
Epoch: 0, Train Loss: 1.4754215478897095
Epoch: 0, Train Loss: 1.4077374935150146
Epoch: 0, Train Loss: 1.397694706916809
Epoch: 0, Train Loss: 1.4129637479782104
Epoch: 0, Train Loss: 1.6651307344436646
Epoch: 0, Train Loss: 1.494914174079895
Epoch: 0, Train Loss: 1.6969170570373535
Epoch: 0, Train Loss: 1.4110394716262817
Epoch: 0, Train Loss: 1.1930676698684692
Epoch: 0, Train Loss: 1.4824167490005493
Epoch: 0, Train Loss: 1.511730670928955
Epoch: 0, Train Loss: 1.2773640155792236
Epoch: 0, Train Loss: 1.5645350217819214
Epoch: 0, Train Loss: 1.245684266090393
Epoch: 0, Train Loss: 1.5076377391815186
Epoch: 0, Train Loss: 1.0876336097717285
Epoch: 0, Train Loss: 1.2241225242614746
Epoch: 0, Train Loss

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.4198323488235474
Epoch: 0, Train Loss: 1.6862472295761108
Epoch: 0, Train Loss: 1.5298439264297485
Epoch: 0, Train Loss: 1.3268548250198364
Epoch: 0, Train Loss: 1.5412722826004028
Epoch: 0, Train Loss: 1.4203896522521973
Epoch: 0, Train Loss: 1.4865230321884155
Epoch: 0, Train Loss: 1.2077085971832275
Epoch: 0, Train Loss: 1.27583909034729
Epoch: 0, Train Loss: 1.6833717823028564
Epoch: 0, Train Loss: 1.3706438541412354
Epoch: 0, Train Loss: 1.6394469738006592
Epoch: 0, Train Loss: 1.2153252363204956
Epoch: 0, Train Loss: 1.3376314640045166
Epoch: 0, Train Loss: 1.3194313049316406
Epoch: 0, Train Loss: 1.1074819564819336
Epoch: 0, Train Loss: 1.2549843788146973
Epoch: 0, Train Loss: 1.2501494884490967
Epoch: 0, Train Loss: 1.3058456182479858
Epoch: 0, Train Loss: 1.188841462135315
Epoch: 0, Train Loss: 1.2801867723464966
Epoch: 0, Train Loss: 1.641939401626587
Epoch: 0, Train Loss: 1.5025850534439087
Epoch: 0, Train Loss: 1.0436410903930664
Epoch: 0, Train Loss

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.069699764251709
Epoch: 0, Train Loss: 1.164716362953186
Epoch: 0, Train Loss: 1.1679667234420776
Epoch: 0, Train Loss: 1.7370407581329346
Epoch: 0, Train Loss: 1.305963158607483
Epoch: 0, Train Loss: 1.4681402444839478
Epoch: 0, Train Loss: 1.3155183792114258
Epoch: 0, Train Loss: 1.617172360420227
Epoch: 0, Train Loss: 1.3986725807189941
Epoch: 0, Train Loss: 1.3537850379943848
Epoch: 0, Train Loss: 1.3831373453140259
Epoch: 0, Train Loss: 1.9165700674057007
Epoch: 0, Train Loss: 1.1885050535202026
Epoch: 0, Train Loss: 1.4657543897628784
Epoch: 0, Train Loss: 1.1300870180130005
Epoch: 0, Train Loss: 1.3646483421325684
Epoch: 0, Train Loss: 1.184044599533081
Epoch: 0, Train Loss: 1.2016961574554443
Epoch: 0, Train Loss: 1.4281752109527588
Epoch: 0, Train Loss: 0.9495043754577637
Epoch: 0, Train Loss: 1.6110434532165527
Epoch: 0, Train Loss: 1.2876267433166504
Epoch: 0, Train Loss: 1.4072133302688599
Epoch: 0, Train Loss: 1.6556988954544067
Epoch: 0, Train Loss:

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.501223087310791
Epoch: 0, Train Loss: 0.8621087670326233
Epoch: 0, Train Loss: 1.2185146808624268
Epoch: 0, Train Loss: 1.3109291791915894
Epoch: 0, Train Loss: 1.5381449460983276
Epoch: 0, Train Loss: 1.5530030727386475
Epoch: 0, Train Loss: 1.5904300212860107
Epoch: 0, Train Loss: 1.31938898563385
Epoch: 0, Train Loss: 1.884633183479309
Epoch: 0, Train Loss: 0.8804950714111328
Epoch: 0, Train Loss: 1.2233597040176392
Epoch: 0, Train Loss: 1.2592949867248535
Epoch: 0, Train Loss: 1.4940274953842163
Epoch: 0, Train Loss: 1.4467648267745972
Epoch: 0, Train Loss: 1.6389031410217285
Epoch: 0, Train Loss: 1.3869891166687012
Epoch: 0, Train Loss: 1.6635915040969849
Epoch: 0, Train Loss: 1.146003246307373
Epoch: 0, Train Loss: 1.5171796083450317
Epoch: 0, Train Loss: 1.4822485446929932
Epoch: 0, Train Loss: 1.774584412574768
Epoch: 0, Train Loss: 1.018351435661316
Epoch: 0, Train Loss: 1.4967150688171387
Epoch: 0, Train Loss: 1.2807797193527222
Epoch: 0, Train Loss: 1

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.5729049444198608
Epoch: 0, Train Loss: 1.018758773803711
Epoch: 0, Train Loss: 1.169294834136963
Epoch: 0, Train Loss: 1.3469552993774414
Epoch: 0, Train Loss: 1.5243752002716064
Epoch: 0, Train Loss: 1.3447728157043457
Epoch: 0, Train Loss: 1.7756634950637817
Epoch: 0, Train Loss: 1.4456121921539307
Epoch: 0, Train Loss: 1.515048623085022
Epoch: 0, Train Loss: 1.5939289331436157
Epoch: 0, Train Loss: 1.364336609840393
Epoch: 0, Train Loss: 1.4779566526412964
Epoch: 0, Train Loss: 1.4825336933135986
Epoch: 0, Train Loss: 1.3769856691360474
Epoch: 0, Train Loss: 1.3008934259414673
Epoch: 0, Train Loss: 1.3920707702636719
Epoch: 0, Train Loss: 1.1774959564208984
Epoch: 0, Train Loss: 1.1284130811691284
Epoch: 0, Train Loss: 1.2253296375274658
Epoch: 0, Train Loss: 1.2520859241485596
Epoch: 0, Train Loss: 1.4310152530670166
Epoch: 0, Train Loss: 1.7589625120162964
Epoch: 0, Train Loss: 1.4460194110870361
Epoch: 0, Train Loss: 1.4015008211135864
Epoch: 0, Train Loss

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.4436060190200806
Epoch: 0, Train Loss: 0.9949524998664856
Epoch: 0, Train Loss: 1.4149141311645508
Epoch: 0, Train Loss: 0.9316625595092773
Epoch: 0, Train Loss: 1.3525346517562866
Epoch: 0, Train Loss: 1.5813745260238647
Epoch: 0, Train Loss: 1.1921415328979492
Epoch: 0, Train Loss: 1.5117218494415283
Epoch: 0, Train Loss: 1.3154231309890747


Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.1897881031036377
Epoch: 0, Train Loss: 1.1929845809936523
Epoch: 0, Train Loss: 1.3134634494781494
Epoch: 0, Train Loss: 1.778125524520874
Epoch: 0, Train Loss: 1.198500633239746
Epoch: 0, Train Loss: 0.9742251634597778
Epoch: 0, Train Loss: 1.4994392395019531
Epoch: 0, Train Loss: 1.6187748908996582
Epoch: 0, Train Loss: 1.4569631814956665
Epoch: 0, Train Loss: 1.4003379344940186
Epoch: 0, Train Loss: 1.365430474281311
Epoch: 0, Train Loss: 1.211855411529541
Epoch: 0, Train Loss: 1.0236341953277588
Epoch: 0, Train Loss: 1.2624965906143188
Epoch: 0, Train Loss: 1.062018871307373
Epoch: 0, Train Loss: 1.276733636856079
Epoch: 0, Train Loss: 1.1448519229888916
Epoch: 0, Train Loss: 1.1193180084228516
Epoch: 0, Train Loss: 1.5394418239593506
Epoch: 0, Train Loss: 1.3950997591018677
Epoch: 0, Train Loss: 1.4141579866409302
Epoch: 0, Train Loss: 1.6348929405212402
Epoch: 0, Train Loss: 1.258718490600586
Epoch: 0, Train Loss: 1.5172690153121948
Epoch: 0, Train Loss: 1

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.5694217681884766
Epoch: 0, Train Loss: 1.4462521076202393
Epoch: 0, Train Loss: 1.6881802082061768
Epoch: 0, Train Loss: 1.534151554107666
Epoch: 0, Train Loss: 1.3281357288360596
Epoch: 0, Train Loss: 1.3286802768707275
Epoch: 0, Train Loss: 1.610859751701355
Epoch: 0, Train Loss: 1.5423498153686523
Epoch: 0, Train Loss: 1.5057493448257446
Epoch: 0, Train Loss: 1.269339919090271
Epoch: 0, Train Loss: 1.1754909753799438
Epoch: 0, Train Loss: 1.6326936483383179
Epoch: 0, Train Loss: 1.056807518005371
Epoch: 0, Train Loss: 1.5556777715682983
Epoch: 0, Train Loss: 1.3446818590164185
Epoch: 0, Train Loss: 1.4292927980422974
Epoch: 0, Train Loss: 1.5251541137695312
Epoch: 0, Train Loss: 1.5142863988876343
Epoch: 0, Train Loss: 1.76752507686615
Epoch: 0, Train Loss: 1.8463983535766602
Epoch: 0, Train Loss: 1.7047626972198486
Epoch: 0, Train Loss: 1.1281527280807495
Epoch: 0, Train Loss: 1.3621249198913574
Epoch: 0, Train Loss: 1.497642993927002
Epoch: 0, Train Loss: 1

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.0721800327301025
Epoch: 0, Train Loss: 1.4842849969863892
Epoch: 0, Train Loss: 1.1016860008239746
Epoch: 0, Train Loss: 1.5254429578781128
Epoch: 0, Train Loss: 1.5058164596557617
Epoch: 0, Train Loss: 1.9643762111663818
Epoch: 0, Train Loss: 1.1445554494857788
Epoch: 0, Train Loss: 1.4540773630142212
Epoch: 0, Train Loss: 1.8401154279708862
Epoch: 0, Train Loss: 1.746656060218811
Epoch: 0, Train Loss: 1.299576997756958
Epoch: 0, Train Loss: 1.5337984561920166
Epoch: 0, Train Loss: 1.5299643278121948
Epoch: 0, Train Loss: 1.7737360000610352
Epoch: 0, Train Loss: 2.0740725994110107
Epoch: 0, Train Loss: 1.3519302606582642
Epoch: 0, Train Loss: 1.3135688304901123
Epoch: 0, Train Loss: 1.1017097234725952
Epoch: 0, Train Loss: 1.5364631414413452
Epoch: 0, Train Loss: 1.6072216033935547


Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.3176032304763794
Epoch: 0, Train Loss: 0.8271034359931946
Epoch: 0, Train Loss: 1.8305968046188354
Epoch: 0, Train Loss: 1.264994502067566
Epoch: 0, Train Loss: 1.1947885751724243
Epoch: 0, Train Loss: 1.1729686260223389
Epoch: 0, Train Loss: 1.5707494020462036
Epoch: 0, Train Loss: 1.8662277460098267
Epoch: 0, Train Loss: 1.1074564456939697
Epoch: 0, Train Loss: 1.4820096492767334
Epoch: 0, Train Loss: 1.1338821649551392
Epoch: 0, Train Loss: 1.141086220741272
Epoch: 0, Train Loss: 1.4531519412994385
Epoch: 0, Train Loss: 1.4351177215576172
Epoch: 0, Train Loss: 1.6345685720443726
Epoch: 0, Train Loss: 1.775197148323059
Epoch: 0, Train Loss: 1.7098060846328735
Epoch: 0, Train Loss: 1.6075845956802368
Epoch: 0, Train Loss: 1.641596794128418
Epoch: 0, Train Loss: 1.3343229293823242
Epoch: 0, Train Loss: 1.4660935401916504
Epoch: 0, Train Loss: 1.4115383625030518
Epoch: 0, Train Loss: 1.5408390760421753
Epoch: 0, Train Loss: 1.5604788064956665
Epoch: 0, Train Loss

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.6453739404678345
Epoch: 0, Train Loss: 1.2713209390640259
Epoch: 0, Train Loss: 1.3236701488494873
Epoch: 0, Train Loss: 1.3396903276443481
Epoch: 0, Train Loss: 1.3083494901657104
Epoch: 0, Train Loss: 1.1054452657699585
Epoch: 0, Train Loss: 1.7510111331939697
Epoch: 0, Train Loss: 1.8917511701583862
Epoch: 0, Train Loss: 1.4503282308578491
Epoch: 0, Train Loss: 1.4918954372406006
Epoch: 0, Train Loss: 1.6604770421981812
Epoch: 0, Train Loss: 1.4356050491333008
Epoch: 0, Train Loss: 1.5364490747451782
Epoch: 0, Train Loss: 1.3597888946533203
Epoch: 0, Train Loss: 1.1435518264770508
Epoch: 0, Train Loss: 1.4223309755325317
Epoch: 0, Train Loss: 0.7762455940246582
Epoch: 0, Train Loss: 1.4572757482528687
Epoch: 0, Train Loss: 1.3597667217254639
Epoch: 0, Train Loss: 1.6888525485992432
Epoch: 0, Train Loss: 1.6018381118774414
Epoch: 0, Train Loss: 1.2455166578292847
Epoch: 0, Train Loss: 1.1811683177947998
Epoch: 0, Train Loss: 1.5505712032318115
Epoch: 0, Train 

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.5319428443908691
Epoch: 0, Train Loss: 1.3220995664596558
Epoch: 0, Train Loss: 1.5484676361083984
Epoch: 0, Train Loss: 1.277588129043579
Epoch: 0, Train Loss: 1.2612895965576172
Epoch: 0, Train Loss: 1.0258095264434814
Epoch: 0, Train Loss: 1.6031146049499512
Epoch: 0, Train Loss: 1.6999461650848389
Epoch: 0, Train Loss: 1.340609073638916
Epoch: 0, Train Loss: 1.5860809087753296
Epoch: 0, Train Loss: 1.3515691757202148
Epoch: 0, Train Loss: 1.1910792589187622
Epoch: 0, Train Loss: 1.4339755773544312
Epoch: 0, Train Loss: 1.3154070377349854
Epoch: 0, Train Loss: 1.1837748289108276
Epoch: 0, Train Loss: 1.298919677734375
Epoch: 0, Train Loss: 1.7022370100021362
Epoch: 0, Train Loss: 1.4909268617630005
Epoch: 0, Train Loss: 1.7157686948776245
Epoch: 0, Train Loss: 1.53857421875
Epoch: 0, Train Loss: 1.398956298828125
Epoch: 0, Train Loss: 1.4977264404296875
Epoch: 0, Train Loss: 1.7347346544265747
Epoch: 0, Train Loss: 0.9748594760894775
Epoch: 0, Train Loss: 1.3

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.3115428686141968
Epoch: 0, Train Loss: 1.5734440088272095
Epoch: 0, Train Loss: 1.661812663078308
Epoch: 0, Train Loss: 1.6058229207992554
Epoch: 0, Train Loss: 1.4944299459457397
Epoch: 0, Train Loss: 1.4223238229751587
Epoch: 0, Train Loss: 1.4958854913711548
Epoch: 0, Train Loss: 1.4135316610336304
Epoch: 0, Train Loss: 1.486360788345337
Epoch: 0, Train Loss: 1.5344791412353516
Epoch: 0, Train Loss: 1.7274913787841797
Epoch: 0, Train Loss: 1.5647330284118652
Epoch: 0, Train Loss: 1.643331527709961
Epoch: 0, Train Loss: 1.3941999673843384
Epoch: 0, Train Loss: 1.5604312419891357
Epoch: 0, Train Loss: 1.2484952211380005
Epoch: 0, Train Loss: 1.4956587553024292
Epoch: 0, Train Loss: 1.2328143119812012
Epoch: 0, Train Loss: 1.423845887184143
Epoch: 0, Train Loss: 1.235531210899353
Epoch: 0, Train Loss: 0.9825200438499451
Epoch: 0, Train Loss: 1.1959911584854126
Epoch: 0, Train Loss: 1.2015159130096436
Epoch: 0, Train Loss: 1.0400691032409668
Epoch: 0, Train Loss:

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.207299828529358
Epoch: 0, Train Loss: 1.5243268013000488
Epoch: 0, Train Loss: 1.2639232873916626
Epoch: 0, Train Loss: 1.5650196075439453
Epoch: 0, Train Loss: 1.5046627521514893
Epoch: 0, Train Loss: 1.2085455656051636
Epoch: 0, Train Loss: 1.8399603366851807
Epoch: 0, Train Loss: 1.9456270933151245
Epoch: 0, Train Loss: 1.446273922920227
Epoch: 0, Train Loss: 1.0794306993484497
Epoch: 0, Train Loss: 1.4721577167510986
Epoch: 0, Train Loss: 1.5183972120285034
Epoch: 0, Train Loss: 1.4111762046813965
Epoch: 0, Train Loss: 1.6147401332855225
Epoch: 0, Train Loss: 1.530213475227356
Epoch: 0, Train Loss: 1.4640295505523682
Epoch: 0, Train Loss: 1.1408535242080688
Epoch: 0, Train Loss: 1.2223623991012573
Epoch: 0, Train Loss: 1.3941935300827026
Epoch: 0, Train Loss: 1.4343757629394531
Epoch: 0, Train Loss: 1.1223493814468384
Epoch: 0, Train Loss: 1.3868789672851562
Epoch: 0, Train Loss: 1.3264756202697754
Epoch: 0, Train Loss: 1.419680118560791
Epoch: 0, Train Loss

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.592818021774292
Epoch: 0, Train Loss: 1.5091825723648071
Epoch: 0, Train Loss: 1.8017151355743408
Epoch: 0, Train Loss: 1.493737816810608
Epoch: 0, Train Loss: 1.4384634494781494
Epoch: 0, Train Loss: 1.3073899745941162
Epoch: 0, Train Loss: 1.5519219636917114
Epoch: 0, Train Loss: 1.7878124713897705
Epoch: 0, Train Loss: 1.4418597221374512
Epoch: 0, Train Loss: 1.3292032480239868
Epoch: 0, Train Loss: 1.1948169469833374
Epoch: 0, Train Loss: 1.4846558570861816
Epoch: 0, Train Loss: 1.577651023864746
Epoch: 0, Train Loss: 1.2387398481369019
Epoch: 0, Train Loss: 1.436639666557312
Epoch: 0, Train Loss: 1.2384507656097412
Epoch: 0, Train Loss: 1.5194694995880127
Epoch: 0, Train Loss: 1.216872215270996
Epoch: 0, Train Loss: 1.0510591268539429
Epoch: 0, Train Loss: 1.3988667726516724
Epoch: 0, Train Loss: 1.4252502918243408
Epoch: 0, Train Loss: 1.4751349687576294
Epoch: 0, Train Loss: 1.4802407026290894
Epoch: 0, Train Loss: 1.3650232553482056
Epoch: 0, Train Loss:

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.3394101858139038
Epoch: 0, Train Loss: 1.4347467422485352
Epoch: 0, Train Loss: 1.5612707138061523
Epoch: 0, Train Loss: 1.346360206604004
Epoch: 0, Train Loss: 1.413563847541809
Epoch: 0, Train Loss: 1.7334182262420654
Epoch: 0, Train Loss: 1.2732845544815063
Epoch: 0, Train Loss: 1.470524787902832
Epoch: 0, Train Loss: 1.3476550579071045
Epoch: 0, Train Loss: 1.5367401838302612
Epoch: 0, Train Loss: 1.2465804815292358
Epoch: 0, Train Loss: 1.3637856245040894
Epoch: 0, Train Loss: 1.1355676651000977
Epoch: 0, Train Loss: 1.2727779150009155
Epoch: 0, Train Loss: 1.7519547939300537
Epoch: 0, Train Loss: 1.0479090213775635
Epoch: 0, Train Loss: 1.0516494512557983
Epoch: 0, Train Loss: 1.2373043298721313
Epoch: 0, Train Loss: 1.603663444519043
Epoch: 0, Train Loss: 1.058205008506775
Epoch: 0, Train Loss: 1.3594398498535156
Epoch: 0, Train Loss: 1.2023921012878418
Epoch: 0, Train Loss: 1.6535272598266602
Epoch: 0, Train Loss: 1.3697681427001953
Epoch: 0, Train Loss:

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.4494318962097168
Epoch: 0, Train Loss: 1.136474370956421
Epoch: 0, Train Loss: 1.4981718063354492
Epoch: 0, Train Loss: 1.6015279293060303
Epoch: 0, Train Loss: 1.4233955144882202
Epoch: 0, Train Loss: 1.7520631551742554
Epoch: 0, Train Loss: 1.2980138063430786
Epoch: 0, Train Loss: 1.0357136726379395
Epoch: 0, Train Loss: 1.6085574626922607
Epoch: 0, Train Loss: 1.268902063369751
Epoch: 0, Train Loss: 1.837803602218628
Epoch: 0, Train Loss: 1.243294358253479
Epoch: 0, Train Loss: 1.0673320293426514
Epoch: 0, Train Loss: 1.2590677738189697
Epoch: 0, Train Loss: 1.293882131576538
Epoch: 0, Train Loss: 1.3616492748260498
Epoch: 0, Train Loss: 1.2117130756378174
Epoch: 0, Train Loss: 1.4259299039840698
Epoch: 0, Train Loss: 1.5138347148895264
Epoch: 0, Train Loss: 1.2652190923690796
Epoch: 0, Train Loss: 1.2641189098358154
Epoch: 0, Train Loss: 1.6594626903533936
Epoch: 0, Train Loss: 2.0360774993896484
Epoch: 0, Train Loss: 1.5818519592285156
Epoch: 0, Train Loss:

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.3646931648254395
Epoch: 0, Train Loss: 1.1603058576583862
Epoch: 0, Train Loss: 1.7489792108535767
Epoch: 0, Train Loss: 1.6489521265029907
Epoch: 0, Train Loss: 1.18982994556427
Epoch: 0, Train Loss: 1.4072073698043823
Epoch: 0, Train Loss: 1.3173128366470337
Epoch: 0, Train Loss: 1.3778009414672852
Epoch: 0, Train Loss: 1.5919405221939087
Epoch: 0, Train Loss: 1.0647703409194946
Epoch: 0, Train Loss: 1.5837396383285522
Epoch: 0, Train Loss: 1.4227994680404663
Epoch: 0, Train Loss: 1.4745672941207886
Epoch: 0, Train Loss: 1.3613274097442627
Epoch: 0, Train Loss: 1.0562328100204468
Epoch: 0, Train Loss: 1.3304598331451416
Epoch: 0, Train Loss: 1.9560415744781494
Epoch: 0, Train Loss: 1.5960718393325806
Epoch: 0, Train Loss: 1.3026984930038452
Epoch: 0, Train Loss: 1.6794570684432983
Epoch: 0, Train Loss: 1.0867336988449097
Epoch: 0, Train Loss: 1.3041826486587524
Epoch: 0, Train Loss: 1.6138986349105835
Epoch: 0, Train Loss: 1.6913455724716187
Epoch: 0, Train Lo

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.5533429384231567
Epoch: 0, Train Loss: 1.2584418058395386
Epoch: 0, Train Loss: 1.202406406402588
Epoch: 0, Train Loss: 1.4852827787399292
Epoch: 0, Train Loss: 1.4046332836151123
Epoch: 0, Train Loss: 1.6058557033538818
Epoch: 0, Train Loss: 1.004340410232544
Epoch: 0, Train Loss: 1.7417447566986084
Epoch: 0, Train Loss: 1.2839127779006958
Epoch: 0, Train Loss: 1.0506951808929443
Epoch: 0, Train Loss: 1.255653977394104
Epoch: 0, Train Loss: 1.5623514652252197
Epoch: 0, Train Loss: 1.7429733276367188
Epoch: 0, Train Loss: 1.4659268856048584
Epoch: 0, Train Loss: 1.5526413917541504
Epoch: 0, Train Loss: 1.2843854427337646
Epoch: 0, Train Loss: 1.2824116945266724
Epoch: 0, Train Loss: 0.991380512714386
Epoch: 0, Train Loss: 1.3485175371170044
Epoch: 0, Train Loss: 1.0465465784072876
Epoch: 0, Train Loss: 1.5805643796920776
Epoch: 0, Train Loss: 1.3249139785766602
Epoch: 0, Train Loss: 1.6351033449172974
Epoch: 0, Train Loss: 1.3983136415481567
Epoch: 0, Train Loss

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.3567415475845337
Epoch: 0, Train Loss: 1.2334622144699097
Epoch: 0, Train Loss: 1.4139254093170166
Epoch: 0, Train Loss: 1.2836940288543701
Epoch: 0, Train Loss: 1.1643856763839722
Epoch: 0, Train Loss: 1.0221279859542847
Epoch: 0, Train Loss: 1.3358906507492065
Epoch: 0, Train Loss: 1.3003339767456055
Epoch: 0, Train Loss: 1.526010274887085
Epoch: 0, Train Loss: 1.3874690532684326
Epoch: 0, Train Loss: 1.4693522453308105
Epoch: 0, Train Loss: 1.4989551305770874
Epoch: 0, Train Loss: 1.2837235927581787
Epoch: 0, Train Loss: 1.4489246606826782
Epoch: 0, Train Loss: 1.739385962486267
Epoch: 0, Train Loss: 1.449784278869629
Epoch: 0, Train Loss: 1.3175504207611084
Epoch: 0, Train Loss: 1.3405120372772217
Epoch: 0, Train Loss: 1.3744220733642578
Epoch: 0, Train Loss: 1.3551139831542969
Epoch: 0, Train Loss: 1.3243980407714844
Epoch: 0, Train Loss: 1.1643905639648438
Epoch: 0, Train Loss: 1.1703433990478516
Epoch: 0, Train Loss: 1.7093603610992432
Epoch: 0, Train Los

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.1394144296646118
Epoch: 0, Train Loss: 0.906576931476593
Epoch: 0, Train Loss: 1.3605855703353882
Epoch: 0, Train Loss: 1.357376217842102
Epoch: 0, Train Loss: 1.185856819152832
Epoch: 0, Train Loss: 1.2785437107086182
Epoch: 0, Train Loss: 1.3555256128311157
Epoch: 0, Train Loss: 1.2006210088729858
Epoch: 0, Train Loss: 1.3935489654541016
Epoch: 0, Train Loss: 1.1317753791809082
Epoch: 0, Train Loss: 1.435086727142334
Epoch: 0, Train Loss: 1.6212191581726074
Epoch: 0, Train Loss: 1.3574764728546143
Epoch: 0, Train Loss: 1.767008662223816
Epoch: 0, Train Loss: 1.5762614011764526
Epoch: 0, Train Loss: 1.2769436836242676
Epoch: 0, Train Loss: 1.3478784561157227
Epoch: 0, Train Loss: 1.2859798669815063
Epoch: 0, Train Loss: 1.369516134262085
Epoch: 0, Train Loss: 1.4174132347106934
Epoch: 0, Train Loss: 1.45188570022583
Epoch: 0, Train Loss: 1.4858827590942383
Epoch: 0, Train Loss: 1.2650169134140015
Epoch: 0, Train Loss: 1.1729580163955688
Epoch: 0, Train Loss: 1.

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.0086301565170288
Epoch: 0, Train Loss: 1.5246610641479492
Epoch: 0, Train Loss: 1.1313457489013672
Epoch: 0, Train Loss: 1.1993416547775269
Epoch: 0, Train Loss: 1.533018946647644
Epoch: 0, Train Loss: 1.9463015794754028
Epoch: 0, Train Loss: 1.6797091960906982
Epoch: 0, Train Loss: 1.4767996072769165
Epoch: 0, Train Loss: 1.5760146379470825
Epoch: 0, Train Loss: 1.5572984218597412
Epoch: 0, Train Loss: 1.5688036680221558
Epoch: 0, Train Loss: 1.271573781967163
Epoch: 0, Train Loss: 1.03998601436615
Epoch: 0, Train Loss: 1.1501517295837402
Epoch: 0, Train Loss: 1.6412969827651978
Epoch: 0, Train Loss: 1.5169070959091187
Epoch: 0, Train Loss: 1.6422475576400757
Epoch: 0, Train Loss: 1.7077511548995972
Epoch: 0, Train Loss: 1.353614330291748
Epoch: 0, Train Loss: 1.135843276977539
Epoch: 0, Train Loss: 1.5403172969818115
Epoch: 0, Train Loss: 1.4328792095184326
Epoch: 0, Train Loss: 0.988106369972229
Epoch: 0, Train Loss: 1.5215474367141724
Epoch: 0, Train Loss: 1

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.5940330028533936
Epoch: 0, Train Loss: 1.619059681892395
Epoch: 0, Train Loss: 1.6515750885009766
Epoch: 0, Train Loss: 1.2349944114685059
Epoch: 0, Train Loss: 1.3333429098129272
Epoch: 0, Train Loss: 1.5639111995697021
Epoch: 0, Train Loss: 1.3527014255523682
Epoch: 0, Train Loss: 1.734848976135254
Epoch: 0, Train Loss: 1.2057348489761353
Epoch: 0, Train Loss: 1.589359164237976
Epoch: 0, Train Loss: 1.1511434316635132
Epoch: 0, Train Loss: 1.2809804677963257
Epoch: 0, Train Loss: 1.4106422662734985
Epoch: 0, Train Loss: 1.4544929265975952
Epoch: 0, Train Loss: 1.32957923412323
Epoch: 0, Train Loss: 1.3811198472976685
Epoch: 0, Train Loss: 1.425003170967102
Epoch: 0, Train Loss: 1.3952022790908813
Epoch: 0, Train Loss: 1.463660478591919
Epoch: 0, Train Loss: 1.3847049474716187
Epoch: 0, Train Loss: 1.1614634990692139
Epoch: 0, Train Loss: 1.6214789152145386
Epoch: 0, Train Loss: 1.6395448446273804
Epoch: 0, Train Loss: 1.5880874395370483
Epoch: 0, Train Loss: 1

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.0849900245666504
Epoch: 0, Train Loss: 0.8763011693954468
Epoch: 0, Train Loss: 1.2028239965438843
Epoch: 0, Train Loss: 1.1467827558517456
Epoch: 0, Train Loss: 1.4570626020431519
Epoch: 0, Train Loss: 1.5898211002349854
Epoch: 0, Train Loss: 1.2301340103149414
Epoch: 0, Train Loss: 1.4028102159500122
Epoch: 0, Train Loss: 1.148584246635437
Epoch: 0, Train Loss: 1.4568581581115723
Epoch: 0, Train Loss: 1.2604070901870728
Epoch: 0, Train Loss: 1.0885145664215088
Epoch: 0, Train Loss: 1.5404422283172607
Epoch: 0, Train Loss: 1.424620270729065
Epoch: 0, Train Loss: 1.49752938747406
Epoch: 0, Train Loss: 1.1235265731811523
Epoch: 0, Train Loss: 1.3788368701934814
Epoch: 0, Train Loss: 1.5301625728607178
Epoch: 0, Train Loss: 1.4339916706085205
Epoch: 0, Train Loss: 1.297619342803955
Epoch: 0, Train Loss: 1.3072102069854736
Epoch: 0, Train Loss: 1.5558617115020752
Epoch: 0, Train Loss: 1.587670087814331
Epoch: 0, Train Loss: 1.441122055053711
Epoch: 0, Train Loss: 1

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.3983056545257568
Epoch: 0, Train Loss: 2.1659841537475586
Epoch: 0, Train Loss: 1.5485972166061401
Epoch: 0, Train Loss: 1.8188116550445557
Epoch: 0, Train Loss: 1.812262773513794
Epoch: 0, Train Loss: 1.637105941772461
Epoch: 0, Train Loss: 1.3352916240692139
Epoch: 0, Train Loss: 1.3356941938400269
Epoch: 0, Train Loss: 1.1390917301177979
Epoch: 0, Train Loss: 1.1526387929916382
Epoch: 0, Train Loss: 1.7102718353271484
Epoch: 0, Train Loss: 1.51190984249115
Epoch: 0, Train Loss: 1.4330543279647827
Epoch: 0, Train Loss: 1.2291724681854248
Epoch: 0, Train Loss: 1.7241977453231812
Epoch: 0, Train Loss: 1.422489881515503
Epoch: 0, Train Loss: 1.3228949308395386
Epoch: 0, Train Loss: 1.2585630416870117
Epoch: 0, Train Loss: 1.3679908514022827
Epoch: 0, Train Loss: 1.6454955339431763
Epoch: 0, Train Loss: 1.2677271366119385
Epoch: 0, Train Loss: 1.2442429065704346
Epoch: 0, Train Loss: 1.6946719884872437
Epoch: 0, Train Loss: 1.2688941955566406
Epoch: 0, Train Loss:

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.3145931959152222
Epoch: 0, Train Loss: 2.0177204608917236
Epoch: 0, Train Loss: 1.3496071100234985
Epoch: 0, Train Loss: 1.2296841144561768
Epoch: 0, Train Loss: 1.250868558883667
Epoch: 0, Train Loss: 1.2453922033309937
Epoch: 0, Train Loss: 1.6002694368362427
Epoch: 0, Train Loss: 1.5521124601364136
Epoch: 0, Train Loss: 1.0121183395385742
Epoch: 0, Train Loss: 1.2490369081497192
Epoch: 0, Train Loss: 1.2762084007263184
Epoch: 0, Train Loss: 1.4709047079086304
Epoch: 0, Train Loss: 1.2527661323547363
Epoch: 0, Train Loss: 1.3097591400146484
Epoch: 0, Train Loss: 1.5476425886154175
Epoch: 0, Train Loss: 1.7048470973968506
Epoch: 0, Train Loss: 1.2899417877197266
Epoch: 0, Train Loss: 1.25111722946167
Epoch: 0, Train Loss: 1.5008844137191772
Epoch: 0, Train Loss: 1.3472626209259033
Epoch: 0, Train Loss: 1.6110533475875854
Epoch: 0, Train Loss: 1.6584153175354004
Epoch: 0, Train Loss: 1.4909565448760986
Epoch: 0, Train Loss: 1.0552208423614502
Epoch: 0, Train Los

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.436220645904541
Epoch: 0, Train Loss: 1.0160194635391235
Epoch: 0, Train Loss: 1.3802522420883179
Epoch: 0, Train Loss: 1.8584678173065186
Epoch: 0, Train Loss: 1.0777099132537842
Epoch: 0, Train Loss: 1.4041603803634644
Epoch: 0, Train Loss: 1.3352309465408325
Epoch: 0, Train Loss: 1.118159294128418
Epoch: 0, Train Loss: 1.6698708534240723
Epoch: 0, Train Loss: 1.589877724647522
Epoch: 0, Train Loss: 1.3878740072250366
Epoch: 0, Train Loss: 1.2804549932479858
Epoch: 0, Train Loss: 1.6011584997177124
Epoch: 0, Train Loss: 1.3565335273742676
Epoch: 0, Train Loss: 1.458691120147705
Epoch: 0, Train Loss: 1.2225795984268188
Epoch: 0, Train Loss: 1.3047407865524292
Epoch: 0, Train Loss: 1.3828234672546387
Epoch: 0, Train Loss: 1.3746789693832397
Epoch: 0, Train Loss: 1.3812819719314575
Epoch: 0, Train Loss: 1.6940096616744995
Epoch: 0, Train Loss: 1.2005308866500854
Epoch: 0, Train Loss: 1.3611942529678345
Epoch: 0, Train Loss: 1.3829721212387085
Epoch: 0, Train Loss

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.2339575290679932
Epoch: 0, Train Loss: 0.987193763256073
Epoch: 0, Train Loss: 1.5755904912948608
Epoch: 0, Train Loss: 1.2594094276428223
Epoch: 0, Train Loss: 1.610600233078003
Epoch: 0, Train Loss: 1.4945178031921387
Epoch: 0, Train Loss: 1.2701761722564697
Epoch: 0, Train Loss: 1.3406267166137695
Epoch: 0, Train Loss: 1.305549144744873
Epoch: 0, Train Loss: 0.9785422086715698
Epoch: 0, Train Loss: 1.36234450340271
Epoch: 0, Train Loss: 1.4101794958114624
Epoch: 0, Train Loss: 1.3743420839309692
Epoch: 0, Train Loss: 1.2837634086608887
Epoch: 0, Train Loss: 1.0660009384155273
Epoch: 0, Train Loss: 1.3867591619491577
Epoch: 0, Train Loss: 1.0322964191436768
Epoch: 0, Train Loss: 1.3454852104187012
Epoch: 0, Train Loss: 1.1809982061386108
Epoch: 0, Train Loss: 1.4342443943023682
Epoch: 0, Train Loss: 1.2443716526031494
Epoch: 0, Train Loss: 1.64261794090271
Epoch: 0, Train Loss: 1.4163790941238403
Epoch: 0, Train Loss: 1.4880470037460327
Epoch: 0, Train Loss: 0

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.5695602893829346
Epoch: 0, Train Loss: 1.3829724788665771
Epoch: 0, Train Loss: 1.149910569190979
Epoch: 0, Train Loss: 1.2248438596725464
Epoch: 0, Train Loss: 1.1600233316421509
Epoch: 0, Train Loss: 1.5422661304473877
Epoch: 0, Train Loss: 1.5587788820266724
Epoch: 0, Train Loss: 1.3756436109542847
Epoch: 0, Train Loss: 1.2824296951293945
Epoch: 0, Train Loss: 1.3881800174713135
Epoch: 0, Train Loss: 1.5189392566680908
Epoch: 0, Train Loss: 1.466870665550232
Epoch: 0, Train Loss: 1.895015001296997
Epoch: 0, Train Loss: 1.6614619493484497
Epoch: 0, Train Loss: 1.296480655670166
Epoch: 0, Train Loss: 1.43414306640625
Epoch: 0, Train Loss: 1.2771116495132446
Epoch: 0, Train Loss: 1.9237443208694458
Epoch: 0, Train Loss: 1.1424421072006226
Epoch: 0, Train Loss: 1.3164995908737183
Epoch: 0, Train Loss: 1.3417367935180664
Epoch: 0, Train Loss: 1.7803422212600708
Epoch: 0, Train Loss: 1.430230736732483
Epoch: 0, Train Loss: 1.5289692878723145
Epoch: 0, Train Loss: 1

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.3765172958374023
Epoch: 0, Train Loss: 1.4011129140853882
Epoch: 0, Train Loss: 1.0742559432983398
Epoch: 0, Train Loss: 1.353996992111206
Epoch: 0, Train Loss: 1.1943351030349731
Epoch: 0, Train Loss: 1.3943647146224976
Epoch: 0, Train Loss: 1.1524168252944946
Epoch: 0, Train Loss: 1.8823983669281006
Epoch: 0, Train Loss: 1.7008754014968872
Epoch: 0, Train Loss: 1.5058753490447998
Epoch: 0, Train Loss: 1.4811854362487793
Epoch: 0, Train Loss: 1.5151417255401611
Epoch: 0, Train Loss: 1.0732113122940063
Epoch: 0, Train Loss: 1.3744186162948608
Epoch: 0, Train Loss: 1.2918239831924438
Epoch: 0, Train Loss: 1.4262564182281494
Epoch: 0, Train Loss: 1.4085588455200195
Epoch: 0, Train Loss: 1.22769296169281
Epoch: 0, Train Loss: 1.5959084033966064
Epoch: 0, Train Loss: 1.5758246183395386
Epoch: 0, Train Loss: 1.3262914419174194
Epoch: 0, Train Loss: 1.0452136993408203
Epoch: 0, Train Loss: 1.3552714586257935
Epoch: 0, Train Loss: 1.5644571781158447
Epoch: 0, Train Los

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.6200064420700073
Epoch: 0, Train Loss: 0.9611678123474121
Epoch: 0, Train Loss: 1.3071565628051758
Epoch: 0, Train Loss: 1.2777223587036133
Epoch: 0, Train Loss: 1.953023076057434
Epoch: 0, Train Loss: 1.1140553951263428
Epoch: 0, Train Loss: 1.6252615451812744
Epoch: 0, Train Loss: 1.5954123735427856
Epoch: 0, Train Loss: 1.125011920928955
Epoch: 0, Train Loss: 1.2644861936569214
Epoch: 0, Train Loss: 1.2984834909439087
Epoch: 0, Train Loss: 1.602837085723877
Epoch: 0, Train Loss: 1.237850546836853
Epoch: 0, Train Loss: 1.3352831602096558
Epoch: 0, Train Loss: 1.0871471166610718
Epoch: 0, Train Loss: 1.3285449743270874
Epoch: 0, Train Loss: 1.2543262243270874
Epoch: 0, Train Loss: 1.3683855533599854
Epoch: 0, Train Loss: 1.5582375526428223
Epoch: 0, Train Loss: 1.313197135925293
Epoch: 0, Train Loss: 1.5709058046340942
Epoch: 0, Train Loss: 1.486190915107727
Epoch: 0, Train Loss: 1.3852227926254272
Epoch: 0, Train Loss: 1.4832781553268433
Epoch: 0, Train Loss: 

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.6195732355117798
Epoch: 0, Train Loss: 1.1254329681396484
Epoch: 0, Train Loss: 1.2276853322982788
Epoch: 0, Train Loss: 0.9552250504493713
Epoch: 0, Train Loss: 1.3225762844085693
Epoch: 0, Train Loss: 1.487770676612854
Epoch: 0, Train Loss: 1.325075387954712
Epoch: 0, Train Loss: 1.3227624893188477
Epoch: 0, Train Loss: 1.3254224061965942
Epoch: 0, Train Loss: 1.8093681335449219
Epoch: 0, Train Loss: 1.5467185974121094
Epoch: 0, Train Loss: 1.21535325050354
Epoch: 0, Train Loss: 1.2260273694992065
Epoch: 0, Train Loss: 1.2392315864562988
Epoch: 0, Train Loss: 1.6028380393981934
Epoch: 0, Train Loss: 1.5561572313308716
Epoch: 0, Train Loss: 1.1213563680648804
Epoch: 0, Train Loss: 1.8605937957763672
Epoch: 0, Train Loss: 1.7131658792495728
Epoch: 0, Train Loss: 1.9557620286941528
Epoch: 0, Train Loss: 1.1468632221221924
Epoch: 0, Train Loss: 0.8703926205635071
Epoch: 0, Train Loss: 1.3354977369308472
Epoch: 0, Train Loss: 1.0824328660964966
Epoch: 0, Train Loss

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.6618552207946777
Epoch: 0, Train Loss: 1.5800832509994507
Epoch: 0, Train Loss: 1.724165916442871
Epoch: 0, Train Loss: 1.0423072576522827
Epoch: 0, Train Loss: 1.1812585592269897
Epoch: 0, Train Loss: 2.0647969245910645
Epoch: 0, Train Loss: 1.3420252799987793
Epoch: 0, Train Loss: 1.5228291749954224
Epoch: 0, Train Loss: 1.5928841829299927
Epoch: 0, Train Loss: 1.7557622194290161
Epoch: 0, Train Loss: 1.344295859336853
Epoch: 0, Train Loss: 1.217836618423462
Epoch: 0, Train Loss: 1.2837436199188232
Epoch: 0, Train Loss: 1.4318714141845703
Epoch: 0, Train Loss: 1.2912436723709106
Epoch: 0, Train Loss: 1.7538172006607056
Epoch: 0, Train Loss: 1.2437925338745117
Epoch: 0, Train Loss: 1.1199687719345093
Epoch: 0, Train Loss: 1.312487244606018
Epoch: 0, Train Loss: 1.1793698072433472
Epoch: 0, Train Loss: 1.1082360744476318
Epoch: 0, Train Loss: 1.343895673751831
Epoch: 0, Train Loss: 1.225325107574463
Epoch: 0, Train Loss: 1.3962219953536987
Epoch: 0, Train Loss: 

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.5975345373153687
Epoch: 0, Train Loss: 1.1871243715286255
Epoch: 0, Train Loss: 1.5720398426055908
Epoch: 0, Train Loss: 1.1180179119110107
Epoch: 0, Train Loss: 1.6769872903823853
Epoch: 0, Train Loss: 1.448983907699585
Epoch: 0, Train Loss: 1.288630723953247
Epoch: 0, Train Loss: 1.3443622589111328
Epoch: 0, Train Loss: 1.2879798412322998
Epoch: 0, Train Loss: 1.5464500188827515
Epoch: 0, Train Loss: 1.5583093166351318
Epoch: 0, Train Loss: 1.6227610111236572
Epoch: 0, Train Loss: 1.7182382345199585
Epoch: 0, Train Loss: 1.4414833784103394
Epoch: 0, Train Loss: 1.474959135055542
Epoch: 0, Train Loss: 1.0671162605285645
Epoch: 0, Train Loss: 1.4837536811828613
Epoch: 0, Train Loss: 1.7129288911819458
Epoch: 0, Train Loss: 1.647890567779541
Epoch: 0, Train Loss: 1.3871456384658813
Epoch: 0, Train Loss: 0.9067357182502747
Epoch: 0, Train Loss: 0.910731315612793
Epoch: 0, Train Loss: 1.3302901983261108
Epoch: 0, Train Loss: 1.7948797941207886
Epoch: 0, Train Loss:

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.353047251701355
Epoch: 0, Train Loss: 0.7830298542976379
Epoch: 0, Train Loss: 1.2053769826889038
Epoch: 0, Train Loss: 1.604975700378418
Epoch: 0, Train Loss: 1.3313937187194824
Epoch: 0, Train Loss: 1.6506096124649048
Epoch: 0, Train Loss: 1.550812840461731
Epoch: 0, Train Loss: 1.501070499420166
Epoch: 0, Train Loss: 1.1132268905639648
Epoch: 0, Train Loss: 1.439327597618103
Epoch: 0, Train Loss: 1.314255714416504
Epoch: 0, Train Loss: 1.2736830711364746
Epoch: 0, Train Loss: 1.7274516820907593
Epoch: 0, Train Loss: 1.5751757621765137
Epoch: 0, Train Loss: 0.898536741733551
Epoch: 0, Train Loss: 1.4324686527252197
Epoch: 0, Train Loss: 1.386579990386963
Epoch: 0, Train Loss: 1.3981014490127563
Epoch: 0, Train Loss: 1.3685076236724854
Epoch: 0, Train Loss: 1.2599859237670898
Epoch: 0, Train Loss: 1.2994425296783447
Epoch: 0, Train Loss: 1.3720840215682983
Epoch: 0, Train Loss: 1.1153463125228882
Epoch: 0, Train Loss: 1.3697608709335327
Epoch: 0, Train Loss: 1.

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.2472254037857056
Epoch: 0, Train Loss: 1.4925875663757324
Epoch: 0, Train Loss: 1.637154459953308
Epoch: 0, Train Loss: 1.2699965238571167
Epoch: 0, Train Loss: 1.2104783058166504
Epoch: 0, Train Loss: 1.7828905582427979
Epoch: 0, Train Loss: 1.4772074222564697
Epoch: 0, Train Loss: 1.330186367034912
Epoch: 0, Train Loss: 1.293658971786499
Epoch: 0, Train Loss: 1.347090721130371
Epoch: 0, Train Loss: 1.4996588230133057
Epoch: 0, Train Loss: 1.140373706817627
Epoch: 0, Train Loss: 1.228011965751648
Epoch: 0, Train Loss: 1.0960294008255005
Epoch: 0, Train Loss: 1.201366901397705
Epoch: 0, Train Loss: 1.6693475246429443
Epoch: 0, Train Loss: 1.8677287101745605
Epoch: 0, Train Loss: 1.4028722047805786
Epoch: 0, Train Loss: 2.3123250007629395
Epoch: 0, Train Loss: 1.5603617429733276
Epoch: 0, Train Loss: 1.1883081197738647
Epoch: 0, Train Loss: 1.565065622329712
Epoch: 0, Train Loss: 1.6697161197662354
Epoch: 0, Train Loss: 1.4681437015533447
Epoch: 0, Train Loss: 1.

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.5004243850708008
Epoch: 0, Train Loss: 1.0416829586029053
Epoch: 0, Train Loss: 1.6947519779205322
Epoch: 0, Train Loss: 1.1967628002166748
Epoch: 0, Train Loss: 1.2555969953536987
Epoch: 0, Train Loss: 1.514685869216919
Epoch: 0, Train Loss: 1.540687084197998
Epoch: 0, Train Loss: 1.3660255670547485
Epoch: 0, Train Loss: 1.008696436882019
Epoch: 0, Train Loss: 1.7420142889022827
Epoch: 0, Train Loss: 1.5371155738830566
Epoch: 0, Train Loss: 0.8823094367980957
Epoch: 0, Train Loss: 1.5860856771469116
Epoch: 0, Train Loss: 1.7654789686203003
Epoch: 0, Train Loss: 1.7152576446533203
Epoch: 0, Train Loss: 1.3340123891830444
Epoch: 0, Train Loss: 1.4631775617599487
Epoch: 0, Train Loss: 1.1673953533172607
Epoch: 0, Train Loss: 1.4204270839691162
Epoch: 0, Train Loss: 1.1286256313323975
Epoch: 0, Train Loss: 1.2675039768218994
Epoch: 0, Train Loss: 1.3230911493301392
Epoch: 0, Train Loss: 1.4688419103622437
Epoch: 0, Train Loss: 1.0902711153030396
Epoch: 0, Train Los

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.4514946937561035
Epoch: 0, Train Loss: 1.143685221672058
Epoch: 0, Train Loss: 1.134515643119812
Epoch: 0, Train Loss: 1.5164570808410645
Epoch: 0, Train Loss: 1.5354777574539185
Epoch: 0, Train Loss: 1.6199349164962769
Epoch: 0, Train Loss: 1.38800048828125
Epoch: 0, Train Loss: 1.1037334203720093
Epoch: 0, Train Loss: 1.1021615266799927
Epoch: 0, Train Loss: 1.0958856344223022
Epoch: 0, Train Loss: 1.3621100187301636
Epoch: 0, Train Loss: 1.309319019317627
Epoch: 0, Train Loss: 1.7729617357254028
Epoch: 0, Train Loss: 1.2665055990219116
Epoch: 0, Train Loss: 1.7221630811691284
Epoch: 0, Train Loss: 1.5264315605163574
Epoch: 0, Train Loss: 1.4188846349716187
Epoch: 0, Train Loss: 1.3051317930221558
Epoch: 0, Train Loss: 1.39247465133667
Epoch: 0, Train Loss: 1.282020092010498
Epoch: 0, Train Loss: 1.3069480657577515
Epoch: 0, Train Loss: 1.56632661819458
Epoch: 0, Train Loss: 1.4628841876983643
Epoch: 0, Train Loss: 1.410211443901062
Epoch: 0, Train Loss: 1.559

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.4854439496994019
Epoch: 0, Train Loss: 1.0757861137390137
Epoch: 0, Train Loss: 1.020034670829773
Epoch: 0, Train Loss: 1.6971409320831299
Epoch: 0, Train Loss: 1.7082302570343018
Epoch: 0, Train Loss: 1.210082769393921
Epoch: 0, Train Loss: 1.3445206880569458
Epoch: 0, Train Loss: 1.24459707736969
Epoch: 0, Train Loss: 1.593929409980774
Epoch: 0, Train Loss: 1.2682411670684814
Epoch: 0, Train Loss: 1.4848577976226807
Epoch: 0, Train Loss: 1.2869001626968384
Epoch: 0, Train Loss: 1.4820853471755981
Epoch: 0, Train Loss: 1.1786304712295532
Epoch: 0, Train Loss: 2.078512191772461
Epoch: 0, Train Loss: 1.4865022897720337
Epoch: 0, Train Loss: 1.6764674186706543
Epoch: 0, Train Loss: 1.2428239583969116
Epoch: 0, Train Loss: 1.491760015487671
Epoch: 0, Train Loss: 1.2471349239349365
Epoch: 0, Train Loss: 1.5794535875320435
Epoch: 0, Train Loss: 1.3466382026672363
Epoch: 0, Train Loss: 1.2271432876586914
Epoch: 0, Train Loss: 1.3563883304595947
Epoch: 0, Train Loss: 1

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.6639821529388428
Epoch: 0, Train Loss: 1.8896623849868774
Epoch: 0, Train Loss: 1.1585910320281982
Epoch: 0, Train Loss: 1.3583327531814575
Epoch: 0, Train Loss: 1.4060161113739014
Epoch: 0, Train Loss: 1.3789396286010742
Epoch: 0, Train Loss: 1.6687204837799072
Epoch: 0, Train Loss: 1.616806983947754
Epoch: 0, Train Loss: 1.0266385078430176
Epoch: 0, Train Loss: 1.447513222694397
Epoch: 0, Train Loss: 1.267339825630188
Epoch: 0, Train Loss: 1.7310047149658203
Epoch: 0, Train Loss: 1.4040745496749878
Epoch: 0, Train Loss: 1.3822797536849976
Epoch: 0, Train Loss: 1.3661370277404785
Epoch: 0, Train Loss: 1.5848753452301025
Epoch: 0, Train Loss: 1.4014428853988647
Epoch: 0, Train Loss: 1.3610622882843018
Epoch: 0, Train Loss: 1.539934515953064
Epoch: 0, Train Loss: 1.6268069744110107
Epoch: 0, Train Loss: 1.1223682165145874
Epoch: 0, Train Loss: 1.4155669212341309
Epoch: 0, Train Loss: 1.3382817506790161
Epoch: 0, Train Loss: 1.3808608055114746
Epoch: 0, Train Loss

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.2095160484313965
Epoch: 0, Train Loss: 1.6495356559753418
Epoch: 0, Train Loss: 1.3956495523452759
Epoch: 0, Train Loss: 1.4962838888168335
Epoch: 0, Train Loss: 1.4771705865859985
Epoch: 0, Train Loss: 1.4623711109161377
Epoch: 0, Train Loss: 1.4187688827514648
Epoch: 0, Train Loss: 1.48908269405365
Epoch: 0, Train Loss: 1.6158572435379028
Epoch: 0, Train Loss: 1.456550121307373
Epoch: 0, Train Loss: 1.5553171634674072
Epoch: 0, Train Loss: 1.5471913814544678
Epoch: 0, Train Loss: 1.2442331314086914
Epoch: 0, Train Loss: 1.3974008560180664
Epoch: 0, Train Loss: 1.857560157775879
Epoch: 0, Train Loss: 1.5629799365997314
Epoch: 0, Train Loss: 1.6334829330444336
Epoch: 0, Train Loss: 1.3902857303619385
Epoch: 0, Train Loss: 1.3692848682403564
Epoch: 0, Train Loss: 1.578193187713623
Epoch: 0, Train Loss: 1.374967336654663
Epoch: 0, Train Loss: 1.5484079122543335
Epoch: 0, Train Loss: 1.106180191040039
Epoch: 0, Train Loss: 1.1327730417251587
Epoch: 0, Train Loss: 1

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.234067678451538
Epoch: 0, Train Loss: 1.1844699382781982
Epoch: 0, Train Loss: 1.717635154724121
Epoch: 0, Train Loss: 1.2681641578674316
Epoch: 0, Train Loss: 0.9802936911582947
Epoch: 0, Train Loss: 1.8775430917739868
Epoch: 0, Train Loss: 1.4716373682022095
Epoch: 0, Train Loss: 1.402819275856018
Epoch: 0, Train Loss: 1.2819890975952148
Epoch: 0, Train Loss: 1.7709087133407593
Epoch: 0, Train Loss: 1.1044780015945435
Epoch: 0, Train Loss: 1.0938206911087036
Epoch: 0, Train Loss: 1.5148166418075562
Epoch: 0, Train Loss: 1.4876313209533691
Epoch: 0, Train Loss: 1.7187201976776123
Epoch: 0, Train Loss: 1.702866792678833
Epoch: 0, Train Loss: 1.322001338005066
Epoch: 0, Train Loss: 1.2982659339904785
Epoch: 0, Train Loss: 1.1159979104995728
Epoch: 0, Train Loss: 1.0411192178726196
Epoch: 0, Train Loss: 1.9179885387420654
Epoch: 0, Train Loss: 1.6182664632797241
Epoch: 0, Train Loss: 1.6819745302200317
Epoch: 0, Train Loss: 1.53585684299469
Epoch: 0, Train Loss: 1

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.3828623294830322
Epoch: 0, Train Loss: 1.0745517015457153
Epoch: 0, Train Loss: 1.3565053939819336
Epoch: 0, Train Loss: 1.640404462814331
Epoch: 0, Train Loss: 1.1485131978988647
Epoch: 0, Train Loss: 1.298769235610962
Epoch: 0, Train Loss: 1.1778862476348877
Epoch: 0, Train Loss: 1.3506921529769897
Epoch: 0, Train Loss: 1.3350974321365356
Epoch: 0, Train Loss: 1.585748314857483
Epoch: 0, Train Loss: 1.6063846349716187
Epoch: 0, Train Loss: 1.8712791204452515
Epoch: 0, Train Loss: 1.5939632654190063
Epoch: 0, Train Loss: 1.6164805889129639
Epoch: 0, Train Loss: 1.19608473777771
Epoch: 0, Train Loss: 1.6490812301635742
Epoch: 0, Train Loss: 1.5959904193878174
Epoch: 0, Train Loss: 1.595412015914917
Epoch: 0, Train Loss: 1.367844820022583
Epoch: 0, Train Loss: 1.2893067598342896
Epoch: 0, Train Loss: 1.3699766397476196
Epoch: 0, Train Loss: 1.3697774410247803
Epoch: 0, Train Loss: 1.6754372119903564
Epoch: 0, Train Loss: 1.549013376235962
Epoch: 0, Train Loss: 1.

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.7826588153839111
Epoch: 0, Train Loss: 1.024047613143921
Epoch: 0, Train Loss: 1.5369915962219238
Epoch: 0, Train Loss: 1.287501573562622
Epoch: 0, Train Loss: 1.1502270698547363
Epoch: 0, Train Loss: 1.3752952814102173
Epoch: 0, Train Loss: 1.742854118347168
Epoch: 0, Train Loss: 1.2368805408477783
Epoch: 0, Train Loss: 1.4211987257003784
Epoch: 0, Train Loss: 1.6494190692901611
Epoch: 0, Train Loss: 1.614782691001892
Epoch: 0, Train Loss: 1.3348054885864258
Epoch: 0, Train Loss: 1.2465412616729736
Epoch: 0, Train Loss: 1.6766160726547241
Epoch: 0, Train Loss: 1.353066325187683
Epoch: 0, Train Loss: 2.044750690460205
Epoch: 0, Train Loss: 1.3343712091445923
Epoch: 0, Train Loss: 1.3607419729232788
Epoch: 0, Train Loss: 1.8236569166183472
Epoch: 0, Train Loss: 1.1932406425476074
Epoch: 0, Train Loss: 1.475119709968567
Epoch: 0, Train Loss: 1.1044996976852417
Epoch: 0, Train Loss: 1.6463422775268555
Epoch: 0, Train Loss: 1.2726815938949585
Epoch: 0, Train Loss: 1

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.5177628993988037
Epoch: 0, Train Loss: 1.4520583152770996
Epoch: 0, Train Loss: 1.0733637809753418
Epoch: 0, Train Loss: 1.133051872253418
Epoch: 0, Train Loss: 1.5532888174057007
Epoch: 0, Train Loss: 1.7439603805541992
Epoch: 0, Train Loss: 1.336763620376587
Epoch: 0, Train Loss: 1.32039475440979
Epoch: 0, Train Loss: 1.4925915002822876
Epoch: 0, Train Loss: 1.4396336078643799
Epoch: 0, Train Loss: 1.557090401649475
Epoch: 0, Train Loss: 1.135324239730835
Epoch: 0, Train Loss: 1.306374430656433
Epoch: 0, Train Loss: 1.62260901927948
Epoch: 0, Train Loss: 1.4811931848526
Epoch: 0, Train Loss: 1.5952240228652954
Epoch: 0, Train Loss: 1.0396860837936401
Epoch: 0, Train Loss: 1.575697898864746
Epoch: 0, Train Loss: 1.353003978729248
Epoch: 0, Train Loss: 1.407646894454956
Epoch: 0, Train Loss: 1.4906340837478638
Epoch: 0, Train Loss: 1.320704698562622
Epoch: 0, Train Loss: 1.7726103067398071
Epoch: 0, Train Loss: 1.4980875253677368
Epoch: 0, Train Loss: 0.94101071

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.0037522315979004
Epoch: 0, Train Loss: 1.1667513847351074
Epoch: 0, Train Loss: 1.2893879413604736
Epoch: 0, Train Loss: 1.4405922889709473
Epoch: 0, Train Loss: 1.6974766254425049
Epoch: 0, Train Loss: 1.3640657663345337
Epoch: 0, Train Loss: 1.4603956937789917
Epoch: 0, Train Loss: 1.4032021760940552
Epoch: 0, Train Loss: 1.5702691078186035
Epoch: 0, Train Loss: 1.6248087882995605
Epoch: 0, Train Loss: 1.945824384689331
Epoch: 0, Train Loss: 1.2825100421905518
Epoch: 0, Train Loss: 1.4793452024459839
Epoch: 0, Train Loss: 1.3476347923278809
Epoch: 0, Train Loss: 1.3323384523391724
Epoch: 0, Train Loss: 1.105773687362671
Epoch: 0, Train Loss: 1.2185465097427368
Epoch: 0, Train Loss: 1.0557578802108765
Epoch: 0, Train Loss: 1.6629867553710938
Epoch: 0, Train Loss: 1.6658215522766113
Epoch: 0, Train Loss: 1.3676106929779053
Epoch: 0, Train Loss: 1.4506596326828003
Epoch: 0, Train Loss: 1.5443657636642456
Epoch: 0, Train Loss: 1.4844647645950317
Epoch: 0, Train Lo

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.


Epoch: 0, Train Loss: 1.445310115814209
Epoch: 0, Train Loss: 1.3765790462493896
Epoch: 0, Train Loss: 1.6581090688705444
Epoch: 0, Train Loss: 1.1082159280776978
Epoch: 0, Train Loss: 1.34280264377594
Epoch: 0, Train Loss: 1.514636754989624
Epoch: 0, Train Loss: 1.382891058921814
Epoch: 0, Train Loss: 1.0917034149169922
Epoch: 0, Train Loss: 1.5560190677642822
Epoch: 0, Train Loss: 1.4129784107208252
Epoch: 0, Train Loss: 1.2490909099578857
Epoch: 0, Train Loss: 1.5315498113632202
Epoch: 0, Train Loss: 1.242071509361267
Epoch: 0, Train Loss: 1.1823973655700684
Epoch: 0, Train Loss: 1.401228427886963
Epoch: 0, Train Loss: 1.656576156616211
Epoch: 0, Train Loss: 1.3525364398956299
Epoch: 0, Train Loss: 1.492348074913025
Epoch: 0, Train Loss: 1.4186252355575562
Epoch: 0, Train Loss: 1.7778105735778809
Epoch: 0, Train Loss: 1.2360248565673828
Epoch: 0, Train Loss: 1.5811011791229248
Epoch: 0, Train Loss: 1.2510806322097778
Epoch: 0, Train Loss: 1.235640048980713
Epoch: 0, Train Loss: 1.30

KeyboardInterrupt: 

### 850000件学習済み

In [None]:
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Token: 
Add token as git credential? (Y/n) n
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [None]:
model.push_to_hub("SHONOSUKE/Addtional_Trained_BERT_For_Legal_Domain_v1")

model.safetensors:   0%|          | 0.00/443M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/SHONOSUKE/Addtional_Trained_BERT_For_Legal_Domain_v1/commit/7219a4575f688c4d771699b138ed8b5e969631b6', commit_message='Upload BertForMaskedLM', commit_description='', oid='7219a4575f688c4d771699b138ed8b5e969631b6', pr_url=None, pr_revision=None, pr_num=None)

In [None]:
model.save_pretrained("/content/drive/MyDrive/Addtional_Trained_BERT_For_Legal_Domain_v1")

In [None]:
with open("/content/drive/MyDrive/Addtional_Trained_BERT_For_Legal_Domain_v1/dataset.json", "w") as f:
  json.dump(data_preprocessor.dataset, f)


In [None]:
wandb.save("SHONOSUKE/Addtional_Trained_BERT_For_Legal_Domain_v1.safetensors")

[]

In [None]:
wandb.finish()

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
train_loss,▅▆▂█▄▄█▄▃▄▃▂▆▅▃▃▄▄▄▇▃▃▄█▇▇▂▄▁▇▅▆▆▄▃▄▃▃▃▃

0,1
train_loss,1.06017


AttributeError: 'BERTDataset' object has no attribute 'tensors'