## Function

In [1]:
import os
import sys
os.environ["CUDA_VISIBLE_DEVICES"] = "0"


In [2]:
import datasets
import random
from contextlib import nullcontext
import numpy as np
import pandas as pd
from sklearn.metrics import average_precision_score
from torch import nn
from transformers import default_data_collator, Trainer, TrainingArguments

import itertools
from tqdm.auto import tqdm

import torch


sys.path.append("../src")

from utils import number_split, create_mix
from process_SHAC import load_process_SHAC

In [3]:

from transformers import AutoTokenizer, AutoModelForSequenceClassification
from peft import (
    get_peft_model,
    LoraConfig,
    TaskType,
    prepare_model_for_int8_training,
)


In [4]:

##### Dataset Loader and Tokenizer
def preprocess_function(examples):
    # tokenize
    ret = tokenizer(examples['text'], return_tensors='pt', max_length=globalconfig.max_seq_length, padding='max_length', truncation=True).to(globalconfig.device)

    return  ret

def datasets_loader(df, txt_col):
    # from pandas df to Dataset & tokenize
    ret_datasets = datasets.Dataset.from_pandas(df[[txt_col,"label"]].reset_index(drop=True))
    ret_tokenized = ret_datasets.map(preprocess_function, batched=True)

    return ret_tokenized

def create_peft_config(model):

    peft_config = LoraConfig(
        task_type=TaskType.SEQ_CLS,
        inference_mode=False,
        r=8,
        bias="none",
        lora_alpha=32,
        lora_dropout=0.05,
        target_modules = ["query", "value"],
        modules_to_save=["classifier"],
    )

    # prepare int-8 model for training
    if globalconfig.quantization:
        model = prepare_model_for_int8_training(model)
    model = get_peft_model(model, peft_config)
    model.print_trainable_parameters()
    return model, peft_config

## Define metric
def compute_metrics_twoLevels(eval_pred):
    # compute AUPRC, based on only two levels of Y
    predictions, labels = eval_pred
    probabilities = nn.functional.softmax(torch.FloatTensor(predictions), dim=-1)[:,1]

    auprc = average_precision_score(y_true=labels, y_score=probabilities)

    return {"auprc":auprc}


# Load Data

In [5]:
df_shac = load_process_SHAC(replaceNA="all")

In [6]:
z_Categories = ["uw", "mimic"]  # the order here matters! Should match with df0, df1
label='Drug'
n_zCats = len(z_Categories)
txt_col="text"
domain_col = "location"

y_cat = [0, 1]

In [7]:
# Create binary version of "label"
assert "label" not in df_shac.columns

In [8]:
df_shac['label'] = df_shac[label].astype(int)

In [9]:
df_shac_uw = df_shac.query("location == 'uw'").reset_index(drop=True)
df_shac_mimic = df_shac.query("location == 'mimic'").reset_index(drop=True)


In [10]:
df0 = df_shac_uw
df1 = df_shac_mimic


In [11]:



label2id = {y:idx for idx,y in zip(range(len(y_cat)), y_cat)}
id2label = {idx:y for idx,y in zip(range(len(y_cat)), y_cat)}


In [12]:
label2id

{0: 0, 1: 1}

# Split

In [13]:
# ##### Split
# # SHAC-Drug - Balanced Alpha
# n_test = 200
# train_test_ratio = 4


# p_pos_train_z0_ls = np.arange(0, 1, 0.1) # probability of training set examples drawn from site/domain z0 being positive
# p_pos_train_z1_ls = np.arange(0, 1, 0.1) # probability of test set examples drawn from site/domain z1 being positive

# p_mix_z1_ls     = np.arange(0, 1, 0.05) 

# numvals = 1023
# base = 1.1


# alpha_test_ls = np.power(base, np.arange(numvals))/np.power(base,numvals//2)

# valid_full_settings = []
# for combination in itertools.product(p_pos_train_z0_ls, 
#                                      p_pos_train_z1_ls, 
#                                      p_mix_z1_ls,
#                                      alpha_test_ls
#                                     ):
    

#     number_setting = number_split(p_pos_train_z0=combination[0], 
#                            p_pos_train_z1 = combination[1], 
#                            p_mix_z1 = combination[2], alpha_test = combination[3],
#                            train_test_ratio = train_test_ratio, 
#                            n_test=n_test,
#                                   verbose=False
#                                  )

#     if (number_setting is not None):
#         if np.all([number_setting[k] >= 10 for k in list(number_setting.keys())[:-1]]):
#             valid_full_settings.append(number_setting)
    
    
    
    
# # run for check valid settings

# import warnings; warnings.simplefilter('ignore')

# # Validate settings

# df0 = df_shac_uw
# df1 = df_shac_mimic


# valid_n_full_settings = []

# for c in tqdm(valid_full_settings):
#     c = c.copy()
#     # create train/test split according to stats
#     dfs = create_mix(df0=df0, df1=df1, target=label, setting=c, sample=False, 
#                      seed=222
#                     )

#     if dfs is None:
#         continue
    
#     valid_n_full_settings.append(c)


In [14]:
##### Split
# SHAC-Drug - Balanced Alpha
## Only selecting C_y in [0.2, 0.48, 0.72]
n_test = 200
train_test_ratio = 4


p_pos_train_z0_ls = np.arange(0, 1, 0.1) # probability of training set examples drawn from site/domain z0 being positive
p_pos_train_z1_ls = np.arange(0, 1, 0.1) # probability of test set examples drawn from site/domain z1 being positive

p_mix_z1_ls     = np.arange(0, 1, 0.05) 

numvals = 129
base = 1.01

alpha_test_ls = np.power(base, np.arange(numvals))/np.power(base,numvals//2)

valid_full_settings = []
for combination in itertools.product(p_pos_train_z0_ls, 
                                     p_pos_train_z1_ls, 
                                     p_mix_z1_ls,
                                     alpha_test_ls
                                    ):
    

    number_setting = number_split(p_pos_train_z0=combination[0], 
                           p_pos_train_z1 = combination[1], 
                           p_mix_z1 = combination[2], alpha_test = combination[3],
                           train_test_ratio = train_test_ratio, 
                           n_test=n_test,
                                  verbose=False
                                 )

    if (number_setting is not None) and (number_setting['mix_param_dict']['C_y'] in [0.2, 0.48, 0.72]) and (number_setting['mix_param_dict']['alpha_train'] in [1., 3, 5, 1/3, 0.2]):
        if np.all([number_setting[k] >= 10 for k in list(number_setting.keys())[:-1]]):
            valid_full_settings.append(number_setting)
    
    
    
    
# run for check valid settings

import warnings; warnings.simplefilter('ignore')

# Validate settings

df0 = df_shac_uw
df1 = df_shac_mimic


valid_n_full_settings = []

for c in tqdm(valid_full_settings):
    c = c.copy()
    # create train/test split according to stats
    dfs = create_mix(df0=df0, df1=df1, target=label, setting=c, sample=False, 
                     seed=222
                    )

    if dfs is None:
        continue
    
    valid_n_full_settings.append(c)


  alpha_train = p_pos_train_z1 / p_pos_train_z0
  alpha_train = p_pos_train_z1 / p_pos_train_z0


  0%|          | 0/1428 [00:00<?, ?it/s]

In [15]:
len(valid_n_full_settings)

1428

In [16]:
tmp_df = pd.DataFrame([st['mix_param_dict'] for st in valid_n_full_settings])

In [17]:
tmp_df['C_y'].unique()

array([0.2])

In [18]:
tmp_df['alpha_train'].unique()

array([5.        , 1.        , 0.33333333, 0.2       ])

# Set Up For Multiple Runs

In [19]:
class train_config:
    def __init__(self):
        self.quantization: bool = False

    

In [20]:
globalconfig = train_config()

In [21]:
globalconfig.model_id="bert-base-uncased"

In [22]:
globalconfig.quantization = False

In [23]:
globalconfig.device = "cuda:0"

In [24]:
globalconfig.runs = 3

In [25]:
globalconfig.profiler = False

In [26]:
globalconfig.max_seq_length=512

In [27]:
globalconfig.num_train_epochs=3

In [28]:
globalconfig.lr = 1e-4
globalconfig.warmup_ratio = 0.1

In [29]:
rand_seed_np = 24
rand_seed_torch = 187

In [30]:
resume = True
ct_resume = 628 # resume ON this number

In [None]:
# run for check valid settings
random.seed(rand_seed_np)
np.random.seed(rand_seed_np)
torch.manual_seed(rand_seed_torch)
torch.cuda.manual_seed(rand_seed_torch)

# Validate settings

df0 = df_shac_uw
df1 = df_shac_mimic


valid_n_full_settings = []


for ct,c in enumerate(tqdm(valid_full_settings)):
    
    if resume and (ct < ct_resume):
    
        continue
        
    c = c.copy()
    # create train/test split according to stats
    dfs = create_mix(df0=df0, df1=df1, target=label, setting=c, sample=False, 
                     seed=222
                    )

    if dfs is None:
        continue
    
    valid_n_full_settings.append(c)
    
    
    for run_i in range(globalconfig.runs):
        dfs = create_mix(
                df0=df0,
                df1=df1,
                target=label,
                setting=c,
                sample=False,
                seed=random.randint(0, 1000),
            )

        assert dfs is not None
        
        # Init model
        tokenizer = AutoTokenizer.from_pretrained(globalconfig.model_id)
        model = AutoModelForSequenceClassification.from_pretrained(globalconfig.model_id)
        
        ## Peft Config
        model, lora_config = create_peft_config(model)
        
        ## Profiler

        
        globalconfig.output_dir = f"/bime-munin/xiruod/LoRA_BERT_SHAC/exp_{ct}_run_{run_i}"
        
        config = {
            'lora_config': lora_config,
            'learning_rate': globalconfig.lr,
            'num_train_epochs': globalconfig.num_train_epochs,
            'gradient_accumulation_steps': 2,
            'per_device_train_batch_size': 8,
            'per_device_eval_batch_size': 8,
            'gradient_checkpointing': False,
            'warmup_ratio':globalconfig.warmup_ratio,
        }

        # Set up profiler
        profiler = nullcontext()
        
        tokenized_train = datasets_loader(dfs['train'], txt_col=txt_col)
        tokenized_test = datasets_loader(dfs['test'], txt_col=txt_col)
        
        # Define training args
        training_args = TrainingArguments(
            output_dir=globalconfig.output_dir,
            overwrite_output_dir=True,
            bf16=globalconfig.quantization,  # Use BF16 if available
            # logging strategies
            logging_dir=f"{globalconfig.output_dir}/logs",
            logging_strategy="steps",
            logging_steps=10,
            save_strategy="no",
            optim="adamw_torch_fused" if globalconfig.quantization else "adamw_torch",
            max_steps=total_steps if globalconfig.profiler else -1,
            
            **{k:v for k,v in config.items() if k != 'lora_config'}
        )
        
        with profiler:
            # Create Trainer instance
            trainer = Trainer(
                model=model,
                args=training_args,
                train_dataset=tokenized_train,
                eval_dataset=tokenized_test,
                data_collator=default_data_collator,
                tokenizer=tokenizer,
                compute_metrics=compute_metrics_twoLevels,
                callbacks=[],
            )

            # Start training
            ret_train = trainer.train()
            ret_eval = trainer.evaluate()

        # save metrics
        ret = c
        ret.update(ret_eval)
        ret.update(ret_train.metrics)
        trainer.save_metrics(split="all", metrics=ret)

        ret_code = 1
        
        
        # model.save_pretrained(output_dir)

  0%|          | 0/1428 [00:00<?, ?it/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6108
20,0.5765
30,0.4876
40,0.4444
50,0.4826
60,0.4391
70,0.5333
80,0.481
90,0.4965
100,0.4975


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6494
20,0.5958
30,0.4973
40,0.453
50,0.5002
60,0.4491
70,0.5442
80,0.497
90,0.501
100,0.525


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6494
20,0.5958
30,0.4973
40,0.453
50,0.5002
60,0.4491
70,0.5442
80,0.497
90,0.501
100,0.525


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6494
20,0.5958
30,0.4973
40,0.453
50,0.5002
60,0.4491
70,0.5442
80,0.497
90,0.501
100,0.525


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6494
20,0.5958
30,0.4973
40,0.453
50,0.5002
60,0.4491
70,0.5442
80,0.497
90,0.501
100,0.525


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6494
20,0.5958
30,0.4973
40,0.453
50,0.5002
60,0.4491
70,0.5442
80,0.497
90,0.501
100,0.525


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6494
20,0.5958
30,0.4973
40,0.453
50,0.5002
60,0.4491
70,0.5442
80,0.497
90,0.501
100,0.525


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6494
20,0.5958
30,0.4973
40,0.453
50,0.5002
60,0.4491
70,0.5442
80,0.497
90,0.501
100,0.525


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6494
20,0.5958
30,0.4973
40,0.453
50,0.5002
60,0.4491
70,0.5442
80,0.497
90,0.501
100,0.525


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6494
20,0.5958
30,0.4973
40,0.453
50,0.5002
60,0.4491
70,0.5442
80,0.497
90,0.501
100,0.525


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6494
20,0.5958
30,0.4973
40,0.453
50,0.5002
60,0.4491
70,0.5442
80,0.497
90,0.501
100,0.525


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6494
20,0.5958
30,0.4973
40,0.453
50,0.5002
60,0.4491
70,0.5442
80,0.497
90,0.501
100,0.525


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6494
20,0.5958
30,0.4973
40,0.453
50,0.5002
60,0.4491
70,0.5442
80,0.497
90,0.501
100,0.525


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6494
20,0.5958
30,0.4973
40,0.453
50,0.5002
60,0.4491
70,0.5442
80,0.497
90,0.501
100,0.525


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6494
20,0.5958
30,0.4973
40,0.453
50,0.5002
60,0.4491
70,0.5442
80,0.497
90,0.501
100,0.525


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6484
20,0.5807
30,0.4879
40,0.5118
50,0.4838
60,0.5204
70,0.5235
80,0.5026
90,0.4424
100,0.5242


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6484
20,0.5807
30,0.4879
40,0.5118
50,0.4838
60,0.5204
70,0.5235
80,0.5026
90,0.4424
100,0.5242


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6484
20,0.5807
30,0.4879
40,0.5118
50,0.4838
60,0.5204
70,0.5235
80,0.5026
90,0.4424
100,0.5242


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6484
20,0.5807
30,0.4879
40,0.5118
50,0.4838
60,0.5204
70,0.5235
80,0.5026
90,0.4424
100,0.5242


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6484
20,0.5807
30,0.4879
40,0.5118
50,0.4838
60,0.5204
70,0.5235
80,0.5026
90,0.4424
100,0.5242


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6484
20,0.5807
30,0.4879
40,0.5118
50,0.4838
60,0.5204
70,0.5235
80,0.5026
90,0.4424
100,0.5242


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6484
20,0.5807
30,0.4879
40,0.5118
50,0.4838
60,0.5204
70,0.5235
80,0.5026
90,0.4424
100,0.5242


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6484
20,0.5807
30,0.4879
40,0.5118
50,0.4838
60,0.5204
70,0.5235
80,0.5026
90,0.4424
100,0.5242


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6484
20,0.5807
30,0.4879
40,0.5118
50,0.4838
60,0.5204
70,0.5235
80,0.5026
90,0.4424
100,0.5242


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6484
20,0.5807
30,0.4879
40,0.5118
50,0.4838
60,0.5204
70,0.5235
80,0.5026
90,0.4424
100,0.5242


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6484
20,0.5807
30,0.4879
40,0.5118
50,0.4838
60,0.5204
70,0.5235
80,0.5026
90,0.4424
100,0.5242


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6484
20,0.5807
30,0.4879
40,0.5118
50,0.4838
60,0.5204
70,0.5235
80,0.5026
90,0.4424
100,0.5242


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6484
20,0.5807
30,0.4879
40,0.5118
50,0.4838
60,0.5204
70,0.5235
80,0.5026
90,0.4424
100,0.5242


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6484
20,0.5807
30,0.4879
40,0.5118
50,0.4838
60,0.5204
70,0.5235
80,0.5026
90,0.4424
100,0.5242


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6484
20,0.5807
30,0.4879
40,0.5118
50,0.4838
60,0.5204
70,0.5235
80,0.5026
90,0.4424
100,0.5242


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6484
20,0.5807
30,0.4879
40,0.5118
50,0.4838
60,0.5204
70,0.5235
80,0.5026
90,0.4424
100,0.5242


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6484
20,0.5807
30,0.4879
40,0.5118
50,0.4838
60,0.5204
70,0.5235
80,0.5026
90,0.4424
100,0.5242


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6484
20,0.5807
30,0.4879
40,0.5118
50,0.4838
60,0.5204
70,0.5235
80,0.5026
90,0.4424
100,0.5242


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6484
20,0.5807
30,0.4879
40,0.5118
50,0.4838
60,0.5204
70,0.5235
80,0.5026
90,0.4424
100,0.5242


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6484
20,0.5807
30,0.4879
40,0.5118
50,0.4838
60,0.5204
70,0.5235
80,0.5026
90,0.4424
100,0.5242


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6484
20,0.5807
30,0.4879
40,0.5118
50,0.4838
60,0.5204
70,0.5235
80,0.5026
90,0.4424
100,0.5242


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6484
20,0.5807
30,0.4879
40,0.5118
50,0.4838
60,0.5204
70,0.5235
80,0.5026
90,0.4424
100,0.5242


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6484
20,0.5807
30,0.4879
40,0.5118
50,0.4838
60,0.5204
70,0.5235
80,0.5026
90,0.4424
100,0.5242


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6484
20,0.5807
30,0.4879
40,0.5118
50,0.4838
60,0.5204
70,0.5235
80,0.5026
90,0.4424
100,0.5242


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6484
20,0.5807
30,0.4879
40,0.5118
50,0.4838
60,0.5204
70,0.5235
80,0.5026
90,0.4424
100,0.5242


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6484
20,0.5807
30,0.4879
40,0.5118
50,0.4838
60,0.5204
70,0.5235
80,0.5026
90,0.4424
100,0.5242


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6484
20,0.5807
30,0.4879
40,0.5118
50,0.4838
60,0.5204
70,0.5235
80,0.5026
90,0.4424
100,0.5242


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.644
20,0.5836
30,0.4893
40,0.5103
50,0.4825
60,0.523
70,0.5292
80,0.5059
90,0.4462
100,0.5259


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.644
20,0.5836
30,0.4893
40,0.5103
50,0.4825
60,0.523
70,0.5292
80,0.5059
90,0.4462
100,0.5259


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.644
20,0.5836
30,0.4893
40,0.5103
50,0.4825
60,0.523
70,0.5292
80,0.5059
90,0.4462
100,0.5259


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.644
20,0.5836
30,0.4893
40,0.5103
50,0.4825
60,0.523
70,0.5292
80,0.5059
90,0.4462
100,0.5259


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.644
20,0.5836
30,0.4893
40,0.5103
50,0.4825
60,0.523
70,0.5292
80,0.5059
90,0.4462
100,0.5259


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.644
20,0.5836
30,0.4893
40,0.5103
50,0.4825
60,0.523
70,0.5292
80,0.5059
90,0.4462
100,0.5259


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.644
20,0.5836
30,0.4893
40,0.5103
50,0.4825
60,0.523
70,0.5292
80,0.5059
90,0.4462
100,0.5259


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.644
20,0.5836
30,0.4893
40,0.5103
50,0.4825
60,0.523
70,0.5292
80,0.5059
90,0.4462
100,0.5259


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.644
20,0.5836
30,0.4893
40,0.5103
50,0.4825
60,0.523
70,0.5292
80,0.5059
90,0.4462
100,0.5259


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.644
20,0.5836
30,0.4893
40,0.5103
50,0.4825
60,0.523
70,0.5292
80,0.5059
90,0.4462
100,0.5259


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.644
20,0.5836
30,0.4893
40,0.5103
50,0.4825
60,0.523
70,0.5292
80,0.5059
90,0.4462
100,0.5259


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.644
20,0.5836
30,0.4893
40,0.5103
50,0.4825
60,0.523
70,0.5292
80,0.5059
90,0.4462
100,0.5259


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.644
20,0.5836
30,0.4893
40,0.5103
50,0.4825
60,0.523
70,0.5292
80,0.5059
90,0.4462
100,0.5259


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.644
20,0.5836
30,0.4893
40,0.5103
50,0.4825
60,0.523
70,0.5292
80,0.5059
90,0.4462
100,0.5259


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.644
20,0.5836
30,0.4893
40,0.5103
50,0.4825
60,0.523
70,0.5292
80,0.5059
90,0.4462
100,0.5259


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.644
20,0.5836
30,0.4893
40,0.5103
50,0.4825
60,0.523
70,0.5292
80,0.5059
90,0.4462
100,0.5259


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.644
20,0.5836
30,0.4893
40,0.5103
50,0.4825
60,0.523
70,0.5292
80,0.5059
90,0.4462
100,0.5259


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.644
20,0.5836
30,0.4893
40,0.5103
50,0.4825
60,0.523
70,0.5292
80,0.5059
90,0.4462
100,0.5259


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.644
20,0.5836
30,0.4893
40,0.5103
50,0.4825
60,0.523
70,0.5292
80,0.5059
90,0.4462
100,0.5259


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.644
20,0.5836
30,0.4893
40,0.5103
50,0.4825
60,0.523
70,0.5292
80,0.5059
90,0.4462
100,0.5259


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.644
20,0.5836
30,0.4893
40,0.5103
50,0.4825
60,0.523
70,0.5292
80,0.5059
90,0.4462
100,0.5259


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.644
20,0.5836
30,0.4893
40,0.5103
50,0.4825
60,0.523
70,0.5292
80,0.5059
90,0.4462
100,0.5259


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.644
20,0.5836
30,0.4893
40,0.5103
50,0.4825
60,0.523
70,0.5292
80,0.5059
90,0.4462
100,0.5259


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.644
20,0.5836
30,0.4893
40,0.5103
50,0.4825
60,0.523
70,0.5292
80,0.5059
90,0.4462
100,0.5259


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.644
20,0.5836
30,0.4893
40,0.5103
50,0.4825
60,0.523
70,0.5292
80,0.5059
90,0.4462
100,0.5259


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.644
20,0.5836
30,0.4893
40,0.5103
50,0.4825
60,0.523
70,0.5292
80,0.5059
90,0.4462
100,0.5259


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.644
20,0.5836
30,0.4893
40,0.5103
50,0.4825
60,0.523
70,0.5292
80,0.5059
90,0.4462
100,0.5259


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.644
20,0.5836
30,0.4893
40,0.5103
50,0.4825
60,0.523
70,0.5292
80,0.5059
90,0.4462
100,0.5259


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.644
20,0.5836
30,0.4893
40,0.5103
50,0.4825
60,0.523
70,0.5292
80,0.5059
90,0.4462
100,0.5259


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.644
20,0.5836
30,0.4893
40,0.5103
50,0.4825
60,0.523
70,0.5292
80,0.5059
90,0.4462
100,0.5259


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6479
20,0.5842
30,0.4882
40,0.5023
50,0.4853
60,0.5223
70,0.5264
80,0.5077
90,0.4493
100,0.5285


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6479
20,0.5842
30,0.4882
40,0.5023
50,0.4853
60,0.5223
70,0.5264
80,0.5077
90,0.4493
100,0.5285


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6479
20,0.5842
30,0.4882
40,0.5023
50,0.4853
60,0.5223
70,0.5264
80,0.5077
90,0.4493
100,0.5285


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6479
20,0.5842
30,0.4882
40,0.5023
50,0.4853
60,0.5223
70,0.5264
80,0.5077
90,0.4493
100,0.5285


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6479
20,0.5842
30,0.4882
40,0.5023
50,0.4853
60,0.5223
70,0.5264
80,0.5077
90,0.4493
100,0.5285


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6479
20,0.5842
30,0.4882
40,0.5023
50,0.4853
60,0.5223
70,0.5264
80,0.5077
90,0.4493
100,0.5285


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6479
20,0.5842
30,0.4882
40,0.5023
50,0.4853
60,0.5223
70,0.5264
80,0.5077
90,0.4493
100,0.5285


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6479
20,0.5842
30,0.4882
40,0.5023
50,0.4853
60,0.5223
70,0.5264
80,0.5077
90,0.4493
100,0.5285


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6479
20,0.5842
30,0.4882
40,0.5023
50,0.4853
60,0.5223
70,0.5264
80,0.5077
90,0.4493
100,0.5285


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6479
20,0.5842
30,0.4882
40,0.5023
50,0.4853
60,0.5223
70,0.5264
80,0.5077
90,0.4493
100,0.5285


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6479
20,0.5842
30,0.4882
40,0.5023
50,0.4853
60,0.5223
70,0.5264
80,0.5077
90,0.4493
100,0.5285


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6479
20,0.5842
30,0.4882
40,0.5023
50,0.4853
60,0.5223
70,0.5264
80,0.5077
90,0.4493
100,0.5285


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6479
20,0.5842
30,0.4882
40,0.5023
50,0.4853
60,0.5223
70,0.5264
80,0.5077
90,0.4493
100,0.5285


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6479
20,0.5842
30,0.4882
40,0.5023
50,0.4853
60,0.5223
70,0.5264
80,0.5077
90,0.4493
100,0.5285


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6479
20,0.5842
30,0.4882
40,0.5023
50,0.4853
60,0.5223
70,0.5264
80,0.5077
90,0.4493
100,0.5285


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6479
20,0.5842
30,0.4882
40,0.5023
50,0.4853
60,0.5223
70,0.5264
80,0.5077
90,0.4493
100,0.5285


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6479
20,0.5842
30,0.4882
40,0.5023
50,0.4853
60,0.5223
70,0.5264
80,0.5077
90,0.4493
100,0.5285


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6479
20,0.5842
30,0.4882
40,0.5023
50,0.4853
60,0.5223
70,0.5264
80,0.5077
90,0.4493
100,0.5285


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6479
20,0.5842
30,0.4882
40,0.5023
50,0.4853
60,0.5223
70,0.5264
80,0.5077
90,0.4493
100,0.5285


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6479
20,0.5842
30,0.4882
40,0.5023
50,0.4853
60,0.5223
70,0.5264
80,0.5077
90,0.4493
100,0.5285


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6479
20,0.5842
30,0.4882
40,0.5023
50,0.4853
60,0.5223
70,0.5264
80,0.5077
90,0.4493
100,0.5285


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6479
20,0.5842
30,0.4882
40,0.5023
50,0.4853
60,0.5223
70,0.5264
80,0.5077
90,0.4493
100,0.5285


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6479
20,0.5842
30,0.4882
40,0.5023
50,0.4853
60,0.5223
70,0.5264
80,0.5077
90,0.4493
100,0.5285


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6479
20,0.5842
30,0.4882
40,0.5023
50,0.4853
60,0.5223
70,0.5264
80,0.5077
90,0.4493
100,0.5285


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6479
20,0.5842
30,0.4882
40,0.5023
50,0.4853
60,0.5223
70,0.5264
80,0.5077
90,0.4493
100,0.5285


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6479
20,0.5842
30,0.4882
40,0.5023
50,0.4853
60,0.5223
70,0.5264
80,0.5077
90,0.4493
100,0.5285


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6479
20,0.5842
30,0.4882
40,0.5023
50,0.4853
60,0.5223
70,0.5264
80,0.5077
90,0.4493
100,0.5285


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6479
20,0.5842
30,0.4882
40,0.5023
50,0.4853
60,0.5223
70,0.5264
80,0.5077
90,0.4493
100,0.5285


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6479
20,0.5842
30,0.4882
40,0.5023
50,0.4853
60,0.5223
70,0.5264
80,0.5077
90,0.4493
100,0.5285


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6479
20,0.5842
30,0.4882
40,0.5023
50,0.4853
60,0.5223
70,0.5264
80,0.5077
90,0.4493
100,0.5285


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6488
20,0.5804
30,0.4857
40,0.5036
50,0.485
60,0.5312
70,0.5323
80,0.5015
90,0.4456
100,0.5186


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6488
20,0.5804
30,0.4857
40,0.5036
50,0.485
60,0.5312
70,0.5323
80,0.5015
90,0.4456
100,0.5186


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6488
20,0.5804
30,0.4857
40,0.5036
50,0.485
60,0.5312
70,0.5323
80,0.5015
90,0.4456
100,0.5186


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6488
20,0.5804
30,0.4857
40,0.5036
50,0.485
60,0.5312
70,0.5323
80,0.5015
90,0.4456
100,0.5186


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6488
20,0.5804
30,0.4857
40,0.5036
50,0.485
60,0.5312
70,0.5323
80,0.5015
90,0.4456
100,0.5186


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6488
20,0.5804
30,0.4857
40,0.5036
50,0.485
60,0.5312
70,0.5323
80,0.5015
90,0.4456
100,0.5186


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6488
20,0.5804
30,0.4857
40,0.5036
50,0.485
60,0.5312
70,0.5323
80,0.5015
90,0.4456
100,0.5186


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6488
20,0.5804
30,0.4857
40,0.5036
50,0.485
60,0.5312
70,0.5323
80,0.5015
90,0.4456
100,0.5186


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6488
20,0.5804
30,0.4857
40,0.5036
50,0.485
60,0.5312
70,0.5323
80,0.5015
90,0.4456
100,0.5186


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6488
20,0.5804
30,0.4857
40,0.5036
50,0.485
60,0.5312
70,0.5323
80,0.5015
90,0.4456
100,0.5186


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6488
20,0.5804
30,0.4857
40,0.5036
50,0.485
60,0.5312
70,0.5323
80,0.5015
90,0.4456
100,0.5186


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6488
20,0.5804
30,0.4857
40,0.5036
50,0.485
60,0.5312
70,0.5323
80,0.5015
90,0.4456
100,0.5186


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6488
20,0.5804
30,0.4857
40,0.5036
50,0.485
60,0.5312
70,0.5323
80,0.5015
90,0.4456
100,0.5186


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6488
20,0.5804
30,0.4857
40,0.5036
50,0.485
60,0.5312
70,0.5323
80,0.5015
90,0.4456
100,0.5186


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6488
20,0.5804
30,0.4857
40,0.5036
50,0.485
60,0.5312
70,0.5323
80,0.5015
90,0.4456
100,0.5186


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6488
20,0.5804
30,0.4857
40,0.5036
50,0.485
60,0.5312
70,0.5323
80,0.5015
90,0.4456
100,0.5186


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6488
20,0.5804
30,0.4857
40,0.5036
50,0.485
60,0.5312
70,0.5323
80,0.5015
90,0.4456
100,0.5186


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6488
20,0.5804
30,0.4857
40,0.5036
50,0.485
60,0.5312
70,0.5323
80,0.5015
90,0.4456
100,0.5186


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6488
20,0.5804
30,0.4857
40,0.5036
50,0.485
60,0.5312
70,0.5323
80,0.5015
90,0.4456
100,0.5186


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6488
20,0.5804
30,0.4857
40,0.5036
50,0.485
60,0.5312
70,0.5323
80,0.5015
90,0.4456
100,0.5186


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6488
20,0.5804
30,0.4857
40,0.5036
50,0.485
60,0.5312
70,0.5323
80,0.5015
90,0.4456
100,0.5186


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6488
20,0.5804
30,0.4857
40,0.5036
50,0.485
60,0.5312
70,0.5323
80,0.5015
90,0.4456
100,0.5186


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6488
20,0.5804
30,0.4857
40,0.5036
50,0.485
60,0.5312
70,0.5323
80,0.5015
90,0.4456
100,0.5186


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6488
20,0.5804
30,0.4857
40,0.5036
50,0.485
60,0.5312
70,0.5323
80,0.5015
90,0.4456
100,0.5186


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6488
20,0.5804
30,0.4857
40,0.5036
50,0.485
60,0.5312
70,0.5323
80,0.5015
90,0.4456
100,0.5186


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6488
20,0.5804
30,0.4857
40,0.5036
50,0.485
60,0.5312
70,0.5323
80,0.5015
90,0.4456
100,0.5186


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6488
20,0.5804
30,0.4857
40,0.5036
50,0.485
60,0.5312
70,0.5323
80,0.5015
90,0.4456
100,0.5186


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6488
20,0.5804
30,0.4857
40,0.5036
50,0.485
60,0.5312
70,0.5323
80,0.5015
90,0.4456
100,0.5186


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6488
20,0.5804
30,0.4857
40,0.5036
50,0.485
60,0.5312
70,0.5323
80,0.5015
90,0.4456
100,0.5186


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6488
20,0.5804
30,0.4857
40,0.5036
50,0.485
60,0.5312
70,0.5323
80,0.5015
90,0.4456
100,0.5186


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6401
20,0.5841
30,0.4888
40,0.5012
50,0.4829
60,0.5303
70,0.52
80,0.5021
90,0.4486
100,0.5156


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6401
20,0.5841
30,0.4888
40,0.5012
50,0.4829
60,0.5303
70,0.52
80,0.5021
90,0.4486
100,0.5156


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6401
20,0.5841
30,0.4888
40,0.5012
50,0.4829
60,0.5303
70,0.52
80,0.5021
90,0.4486
100,0.5156


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6401
20,0.5841
30,0.4888
40,0.5012
50,0.4829
60,0.5303
70,0.52
80,0.5021
90,0.4486
100,0.5156


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6401
20,0.5841
30,0.4888
40,0.5012
50,0.4829
60,0.5303
70,0.52
80,0.5021
90,0.4486
100,0.5156


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6401
20,0.5841
30,0.4888
40,0.5012
50,0.4829
60,0.5303
70,0.52
80,0.5021
90,0.4486
100,0.5156


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6401
20,0.5841
30,0.4888
40,0.5012
50,0.4829
60,0.5303
70,0.52
80,0.5021
90,0.4486
100,0.5156


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6401
20,0.5841
30,0.4888
40,0.5012
50,0.4829
60,0.5303
70,0.52
80,0.5021
90,0.4486
100,0.5156


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6401
20,0.5841
30,0.4888
40,0.5012
50,0.4829
60,0.5303
70,0.52
80,0.5021
90,0.4486
100,0.5156


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6401
20,0.5841
30,0.4888
40,0.5012
50,0.4829
60,0.5303
70,0.52
80,0.5021
90,0.4486
100,0.5156


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6401
20,0.5841
30,0.4888
40,0.5012
50,0.4829
60,0.5303
70,0.52
80,0.5021
90,0.4486
100,0.5156


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6401
20,0.5841
30,0.4888
40,0.5012
50,0.4829
60,0.5303
70,0.52
80,0.5021
90,0.4486
100,0.5156


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6401
20,0.5841
30,0.4888
40,0.5012
50,0.4829
60,0.5303
70,0.52
80,0.5021
90,0.4486
100,0.5156


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6401
20,0.5841
30,0.4888
40,0.5012
50,0.4829
60,0.5303
70,0.52
80,0.5021
90,0.4486
100,0.5156


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6401
20,0.5841
30,0.4888
40,0.5012
50,0.4829
60,0.5303
70,0.52
80,0.5021
90,0.4486
100,0.5156


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6401
20,0.5841
30,0.4888
40,0.5012
50,0.4829
60,0.5303
70,0.52
80,0.5021
90,0.4486
100,0.5156


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6401
20,0.5841
30,0.4888
40,0.5012
50,0.4829
60,0.5303
70,0.52
80,0.5021
90,0.4486
100,0.5156


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6401
20,0.5841
30,0.4888
40,0.5012
50,0.4829
60,0.5303
70,0.52
80,0.5021
90,0.4486
100,0.5156


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6401
20,0.5841
30,0.4888
40,0.5012
50,0.4829
60,0.5303
70,0.52
80,0.5021
90,0.4486
100,0.5156


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6401
20,0.5841
30,0.4888
40,0.5012
50,0.4829
60,0.5303
70,0.52
80,0.5021
90,0.4486
100,0.5156


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6401
20,0.5841
30,0.4888
40,0.5012
50,0.4829
60,0.5303
70,0.52
80,0.5021
90,0.4486
100,0.5156


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6401
20,0.5841
30,0.4888
40,0.5012
50,0.4829
60,0.5303
70,0.52
80,0.5021
90,0.4486
100,0.5156


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6401
20,0.5841
30,0.4888
40,0.5012
50,0.4829
60,0.5303
70,0.52
80,0.5021
90,0.4486
100,0.5156


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6401
20,0.5841
30,0.4888
40,0.5012
50,0.4829
60,0.5303
70,0.52
80,0.5021
90,0.4486
100,0.5156


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6401
20,0.5841
30,0.4888
40,0.5012
50,0.4829
60,0.5303
70,0.52
80,0.5021
90,0.4486
100,0.5156


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6401
20,0.5841
30,0.4888
40,0.5012
50,0.4829
60,0.5303
70,0.52
80,0.5021
90,0.4486
100,0.5156


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6401
20,0.5841
30,0.4888
40,0.5012
50,0.4829
60,0.5303
70,0.52
80,0.5021
90,0.4486
100,0.5156


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6401
20,0.5841
30,0.4888
40,0.5012
50,0.4829
60,0.5303
70,0.52
80,0.5021
90,0.4486
100,0.5156


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6401
20,0.5841
30,0.4888
40,0.5012
50,0.4829
60,0.5303
70,0.52
80,0.5021
90,0.4486
100,0.5156


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6401
20,0.5841
30,0.4888
40,0.5012
50,0.4829
60,0.5303
70,0.52
80,0.5021
90,0.4486
100,0.5156


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6419
20,0.5825
30,0.4847
40,0.5061
50,0.4893
60,0.5245
70,0.5212
80,0.5007
90,0.4443
100,0.5234


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6419
20,0.5825
30,0.4847
40,0.5061
50,0.4893
60,0.5245
70,0.5212
80,0.5007
90,0.4443
100,0.5234


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6419
20,0.5825
30,0.4847
40,0.5061
50,0.4893
60,0.5245
70,0.5212
80,0.5007
90,0.4443
100,0.5234


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6419
20,0.5825
30,0.4847
40,0.5061
50,0.4893
60,0.5245
70,0.5212
80,0.5007
90,0.4443
100,0.5234


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6419
20,0.5825
30,0.4847
40,0.5061
50,0.4893
60,0.5245
70,0.5212
80,0.5007
90,0.4443
100,0.5234


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6419
20,0.5825
30,0.4847
40,0.5061
50,0.4893
60,0.5245
70,0.5212
80,0.5007
90,0.4443
100,0.5234


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6419
20,0.5825
30,0.4847
40,0.5061
50,0.4893
60,0.5245
70,0.5212
80,0.5007
90,0.4443
100,0.5234


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6419
20,0.5825
30,0.4847
40,0.5061
50,0.4893
60,0.5245
70,0.5212
80,0.5007
90,0.4443
100,0.5234


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6419
20,0.5825
30,0.4847
40,0.5061
50,0.4893
60,0.5245
70,0.5212
80,0.5007
90,0.4443
100,0.5234


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6419
20,0.5825
30,0.4847
40,0.5061
50,0.4893
60,0.5245
70,0.5212
80,0.5007
90,0.4443
100,0.5234


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6419
20,0.5825
30,0.4847
40,0.5061
50,0.4893
60,0.5245
70,0.5212
80,0.5007
90,0.4443
100,0.5234


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6419
20,0.5825
30,0.4847
40,0.5061
50,0.4893
60,0.5245
70,0.5212
80,0.5007
90,0.4443
100,0.5234


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6419
20,0.5825
30,0.4847
40,0.5061
50,0.4893
60,0.5245
70,0.5212
80,0.5007
90,0.4443
100,0.5234


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6419
20,0.5825
30,0.4847
40,0.5061
50,0.4893
60,0.5245
70,0.5212
80,0.5007
90,0.4443
100,0.5234


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6419
20,0.5825
30,0.4847
40,0.5061
50,0.4893
60,0.5245
70,0.5212
80,0.5007
90,0.4443
100,0.5234


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6419
20,0.5825
30,0.4847
40,0.5061
50,0.4893
60,0.5245
70,0.5212
80,0.5007
90,0.4443
100,0.5234


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6419
20,0.5825
30,0.4847
40,0.5061
50,0.4893
60,0.5245
70,0.5212
80,0.5007
90,0.4443
100,0.5234


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6419
20,0.5825
30,0.4847
40,0.5061
50,0.4893
60,0.5245
70,0.5212
80,0.5007
90,0.4443
100,0.5234


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6419
20,0.5825
30,0.4847
40,0.5061
50,0.4893
60,0.5245
70,0.5212
80,0.5007
90,0.4443
100,0.5234


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6419
20,0.5825
30,0.4847
40,0.5061
50,0.4893
60,0.5245
70,0.5212
80,0.5007
90,0.4443
100,0.5234


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6419
20,0.5825
30,0.4847
40,0.5061
50,0.4893
60,0.5245
70,0.5212
80,0.5007
90,0.4443
100,0.5234


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6419
20,0.5825
30,0.4847
40,0.5061
50,0.4893
60,0.5245
70,0.5212
80,0.5007
90,0.4443
100,0.5234


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6419
20,0.5825
30,0.4847
40,0.5061
50,0.4893
60,0.5245
70,0.5212
80,0.5007
90,0.4443
100,0.5234


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6419
20,0.5825
30,0.4847
40,0.5061
50,0.4893
60,0.5245
70,0.5212
80,0.5007
90,0.4443
100,0.5234


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6419
20,0.5825
30,0.4847
40,0.5061
50,0.4893
60,0.5245
70,0.5212
80,0.5007
90,0.4443
100,0.5234


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6419
20,0.5825
30,0.4847
40,0.5061
50,0.4893
60,0.5245
70,0.5212
80,0.5007
90,0.4443
100,0.5234


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6419
20,0.5825
30,0.4847
40,0.5061
50,0.4893
60,0.5245
70,0.5212
80,0.5007
90,0.4443
100,0.5234


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6419
20,0.5825
30,0.4847
40,0.5061
50,0.4893
60,0.5245
70,0.5212
80,0.5007
90,0.4443
100,0.5234


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6419
20,0.5825
30,0.4847
40,0.5061
50,0.4893
60,0.5245
70,0.5212
80,0.5007
90,0.4443
100,0.5234


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6419
20,0.5825
30,0.4847
40,0.5061
50,0.4893
60,0.5245
70,0.5212
80,0.5007
90,0.4443
100,0.5234


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6445
20,0.5862
30,0.4859
40,0.504
50,0.4826
60,0.5279
70,0.5215
80,0.5117
90,0.4468
100,0.5168


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6445
20,0.5862
30,0.4859
40,0.504
50,0.4826
60,0.5279
70,0.5215
80,0.5117
90,0.4468
100,0.5168


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6445
20,0.5862
30,0.4859
40,0.504
50,0.4826
60,0.5279
70,0.5215
80,0.5117
90,0.4468
100,0.5168


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6445
20,0.5862
30,0.4859
40,0.504
50,0.4826
60,0.5279
70,0.5215
80,0.5117
90,0.4468
100,0.5168


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6445
20,0.5862
30,0.4859
40,0.504
50,0.4826
60,0.5279
70,0.5215
80,0.5117
90,0.4468
100,0.5168


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6445
20,0.5862
30,0.4859
40,0.504
50,0.4826
60,0.5279
70,0.5215
80,0.5117
90,0.4468
100,0.5168


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6445
20,0.5862
30,0.4859
40,0.504
50,0.4826
60,0.5279
70,0.5215
80,0.5117
90,0.4468
100,0.5168


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6445
20,0.5862
30,0.4859
40,0.504
50,0.4826
60,0.5279
70,0.5215
80,0.5117
90,0.4468
100,0.5168


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6445
20,0.5862
30,0.4859
40,0.504
50,0.4826
60,0.5279
70,0.5215
80,0.5117
90,0.4468
100,0.5168


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6445
20,0.5862
30,0.4859
40,0.504
50,0.4826
60,0.5279
70,0.5215
80,0.5117
90,0.4468
100,0.5168


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6445
20,0.5862
30,0.4859
40,0.504
50,0.4826
60,0.5279
70,0.5215
80,0.5117
90,0.4468
100,0.5168


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6445
20,0.5862
30,0.4859
40,0.504
50,0.4826
60,0.5279
70,0.5215
80,0.5117
90,0.4468
100,0.5168


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6445
20,0.5862
30,0.4859
40,0.504
50,0.4826
60,0.5279
70,0.5215
80,0.5117
90,0.4468
100,0.5168


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6445
20,0.5862
30,0.4859
40,0.504
50,0.4826
60,0.5279
70,0.5215
80,0.5117
90,0.4468
100,0.5168


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6445
20,0.5862
30,0.4859
40,0.504
50,0.4826
60,0.5279
70,0.5215
80,0.5117
90,0.4468
100,0.5168


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6445
20,0.5862
30,0.4859
40,0.504
50,0.4826
60,0.5279
70,0.5215
80,0.5117
90,0.4468
100,0.5168


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6445
20,0.5862
30,0.4859
40,0.504
50,0.4826
60,0.5279
70,0.5215
80,0.5117
90,0.4468
100,0.5168


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6445
20,0.5862
30,0.4859
40,0.504
50,0.4826
60,0.5279
70,0.5215
80,0.5117
90,0.4468
100,0.5168


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6445
20,0.5862
30,0.4859
40,0.504
50,0.4826
60,0.5279
70,0.5215
80,0.5117
90,0.4468
100,0.5168


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6445
20,0.5862
30,0.4859
40,0.504
50,0.4826
60,0.5279
70,0.5215
80,0.5117
90,0.4468
100,0.5168


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6445
20,0.5862
30,0.4859
40,0.504
50,0.4826
60,0.5279
70,0.5215
80,0.5117
90,0.4468
100,0.5168


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6445
20,0.5862
30,0.4859
40,0.504
50,0.4826
60,0.5279
70,0.5215
80,0.5117
90,0.4468
100,0.5168


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6445
20,0.5862
30,0.4859
40,0.504
50,0.4826
60,0.5279
70,0.5215
80,0.5117
90,0.4468
100,0.5168


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6445
20,0.5862
30,0.4859
40,0.504
50,0.4826
60,0.5279
70,0.5215
80,0.5117
90,0.4468
100,0.5168


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6445
20,0.5862
30,0.4859
40,0.504
50,0.4826
60,0.5279
70,0.5215
80,0.5117
90,0.4468
100,0.5168


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6445
20,0.5862
30,0.4859
40,0.504
50,0.4826
60,0.5279
70,0.5215
80,0.5117
90,0.4468
100,0.5168


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6445
20,0.5862
30,0.4859
40,0.504
50,0.4826
60,0.5279
70,0.5215
80,0.5117
90,0.4468
100,0.5168


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6445
20,0.5862
30,0.4859
40,0.504
50,0.4826
60,0.5279
70,0.5215
80,0.5117
90,0.4468
100,0.5168


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6445
20,0.5862
30,0.4859
40,0.504
50,0.4826
60,0.5279
70,0.5215
80,0.5117
90,0.4468
100,0.5168


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6445
20,0.5862
30,0.4859
40,0.504
50,0.4826
60,0.5279
70,0.5215
80,0.5117
90,0.4468
100,0.5168


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6445
20,0.5862
30,0.4859
40,0.504
50,0.4826
60,0.5279
70,0.5215
80,0.5117
90,0.4468
100,0.5168


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6445
20,0.5862
30,0.4859
40,0.504
50,0.4826
60,0.5279
70,0.5215
80,0.5117
90,0.4468
100,0.5168


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.6445
20,0.5862
30,0.4859
40,0.504
50,0.4826
60,0.5279
70,0.5215
80,0.5117
90,0.4468
100,0.5168


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.653
20,0.5828
30,0.4863
40,0.505
50,0.4822
60,0.521
70,0.5199
80,0.5019
90,0.4453
100,0.5289


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.653
20,0.5828
30,0.4863
40,0.505
50,0.4822
60,0.521
70,0.5199
80,0.5019
90,0.4453
100,0.5289


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 297,988 || all params: 109,780,228 || trainable%: 0.27144050019644705


Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Step,Training Loss
10,0.653
20,0.5828
30,0.4863
40,0.505
50,0.4822
60,0.521


In [None]:
len(valid_n_full_settings)

In [None]:
len(valid_full_settings)