In [None]:
import random
random.seed(42)

from sklearn.metrics import cohen_kappa_score

import pandas as pd 
df1 = pd.read_excel('/content/annotator1.xlsx')
df2 = pd.read_excel('/content/annotator2.xlsx')

#Creating a contingency table
cont_table = pd.crosstab(df1['label'], df2['label'])

# calculate the percentage agreement
total_instances = cont_table.values.sum() #Getting the total number of entries in the dataset
agreement_instances = sum(cont_table[i][i] for i in cont_table.index) #Taking the diagnol entries which corresponds to agreed labels
raw_agreement = (agreement_instances / total_instances) * 100 #Calcultaing the percentage agreement 

print(f"Raw Percent Agreement: {raw_agreement:.2f}%")

print(f"Cohens Kappa Score: {cohen_kappa_score(df1['label'].values, df2['label'].values)}") #Using the sklearn implementation of Cohen's Kappa

Raw Percent Agreement: 73.33%
Cohens Kappa Score: 0.19463087248322153


Now we'll move onto fine-tuning  pretrained language models specifically on your dataset. Since we're dealing with large models, the first step is to change to a GPU runtime.

## Adding a hardware accelerator

Please go to the menu and add a GPU as follows:

`Edit > Notebook Settings > Hardware accelerator > (GPU)`

Run the following cell to confirm that the GPU is detected.

In [None]:
import torch

# Confirm that the GPU is detected

assert torch.cuda.is_available()

# Get the GPU device name.
device_name = torch.cuda.get_device_name()
n_gpu = torch.cuda.device_count()
print(f"Found device: {device_name}, n_gpu: {n_gpu}")
device = torch.device("cuda")

Found device: Tesla T4, n_gpu: 1


## Installing Hugging Face's Transformers library
We will use Hugging Face's Transformers (https://github.com/huggingface/transformers), an open-source library that provides general-purpose architectures for natural language understanding and generation with a collection of various pretrained models made by the NLP community. This library will allow us to easily use pretrained models like `BERT` and perform experiments on top of them. We can use these models to solve downstream target tasks, such as text classification, question answering, and sequence labeling.

Note that you will be asked to link with your Google Drive account to download some of these files. If you're concerned about security risks (there have not been any issues in previous semesters), feel free to make a new Google account and use it!

In [None]:
!pip install transformers
!pip install -U -q PyDrive

from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
# Authenticate and create the PyDrive client.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)
print('success!')

import os
import zipfile

# Download helper functions file
helper_file = drive.CreateFile({'id': '16HW-z9Y1tM3gZ_vFpJAuwUDohz91Aac-'})
helper_file.GetContentFile('helpers.py')
print('helper file downloaded! (helpers.py)')

# Download sample file of tweets
data_file = drive.CreateFile({'id': '1QcoAmjOYRtsMX7njjQTYooIbJHPc6Ese'})
data_file.GetContentFile('tweets.csv')
print('sample tweets downloaded! (tweets.csv)')

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.27.4-py3-none-any.whl (6.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.8/6.8 MB[0m [31m90.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting huggingface-hub<1.0,>=0.11.0
  Downloading huggingface_hub-0.13.3-py3-none-any.whl (199 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m199.8/199.8 KB[0m [31m25.6 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m105.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.13.3 tokenizers-0.13.2 transformers-4.27.4
success!
helper file downloaded! (helpers.py)
sa

In [None]:
from helpers import tokenize_and_format, flat_accuracy

In [None]:
from helpers import tokenize_and_format, flat_accuracy
import pandas as pd

df = pd.read_excel('final_data.xlsx')

df = df.sample(frac=1).reset_index(drop=True)

texts = df.text.values
labels = df.label.values

### tokenize_and_format() is a helper function provided in helpers.py ###
input_ids, attention_masks = tokenize_and_format(texts)

# Convert the lists into tensors.
input_ids = torch.cat(input_ids, dim=0)
attention_masks = torch.cat(attention_masks, dim=0)
labels = torch.tensor(labels)

# Print sentence 0, now as a list of IDs.
print('Original: ', texts[0])
print('Token IDs:', input_ids[0])

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Original:  if 2020 was a nose it’d be mine 🐦
Token IDs: tensor([  101,  2065, 12609,  2001,  1037,  4451,  2009,  1521,  1040,  2022,
         3067,   100,   102,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0])


## Create train/test/validation splits

Here we split your dataset into 3 parts: a training set, a validation set, and a testing set. Each item in your dataset will be a 3-tuple containing an input_id tensor, an attention_mask tensor, and a label tensor.



In [None]:

total = len(df)

num_train = int(total * .8)
num_val = int(total * .1)
num_test = total - num_train - num_val

# make lists of 3-tuples (already shuffled the dataframe in cell above)

train_set = [(input_ids[i], attention_masks[i], labels[i]) for i in range(num_train)]
val_set = [(input_ids[i], attention_masks[i], labels[i]) for i in range(num_train, num_val+num_train)]
test_set = [(input_ids[i], attention_masks[i], labels[i]) for i in range(num_val + num_train, total)]

train_text = [texts[i] for i in range(num_train)]
val_text = [texts[i] for i in range(num_train, num_val+num_train)]
test_text = [texts[i] for i in range(num_val + num_train, total)]


In [None]:
from transformers import BertForSequenceClassification, AdamW, BertConfig

model = BertForSequenceClassification.from_pretrained(
    "bert-base-uncased", # Use the 12-layer BERT model, with an uncased vocab.
    num_labels = 2, # The number of output labels.   
    output_attentions = False, # Whether the model returns attentions weights.
    output_hidden_states = False, # Whether the model returns all hidden-states.
)

# Tell pytorch to run this model on the GPU.
model.cuda()


Downloading pytorch_model.bin:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, element

In [None]:
batch_size = [8,16,32]
learning_rate = [2e-5, 5e-5, 2e-4, 5e-4, 5e-3]
weight_decay = [2e-3,5e-3,1e-2,0.1]
epochs = [5,10,15,20]

In [None]:
import numpy as np
# function to get validation accuracy
def get_validation_performance(val_set, b_size):
    # Put the model in evaluation mode
    model.eval()

    # Tracking variables 
    total_eval_accuracy = 0
    total_eval_loss = 0

    num_batches = int(len(val_set)/b_size) + 1

    total_correct = 0

    for i in range(num_batches):

      end_index = min(b_size * (i+1), len(val_set))

      batch = val_set[i*b_size:end_index]
      
      if len(batch) == 0: continue

      input_id_tensors = torch.stack([data[0] for data in batch])
      input_mask_tensors = torch.stack([data[1] for data in batch])
      label_tensors = torch.stack([data[2] for data in batch])
      
      # Move tensors to the GPU
      b_input_ids = input_id_tensors.to(device)
      b_input_mask = input_mask_tensors.to(device)
      b_labels = label_tensors.to(device)
        
      # Tell pytorch not to bother with constructing the compute graph during
      # the forward pass, since this is only needed for backprop (training).
      with torch.no_grad():        

        # Forward pass, calculate logit predictions.
        outputs = model(b_input_ids, 
                                token_type_ids=None, 
                                attention_mask=b_input_mask,
                                labels=b_labels)
        loss = outputs.loss
        logits = outputs.logits
            
        # Accumulate the validation loss.
        total_eval_loss += loss.item()
        
        # Move logits and labels to CPU
        logits = logits.detach().cpu().numpy()
        label_ids = b_labels.to('cpu').numpy()

        # Calculate the number of correctly labeled examples in batch
        pred_flat = np.argmax(logits, axis=1).flatten()
        labels_flat = label_ids.flatten()
        num_correct = np.sum(pred_flat == labels_flat)
        total_correct += num_correct
        
    # Report the final accuracy for this validation run.
    avg_val_accuracy = total_correct / len(val_set)
    return avg_val_accuracy



In [None]:
import random

performance_track = []
for epoc in epochs:
  for learning_r in learning_rate:
    for b_size in batch_size:
      for weight_d in weight_decay:
        
        model = BertForSequenceClassification.from_pretrained(
          "bert-base-uncased", # Use the 12-layer BERT model, with an uncased vocab.
          num_labels = 2, # The number of output labels.   
          output_attentions = False, # Whether the model returns attentions weights.
          output_hidden_states = False, # Whether the model returns all hidden-states.
        )
        model.cuda()
        optimizer = AdamW(model.parameters(),
                    lr = learning_r,
                    weight_decay = weight_d
                  )
        # training loop
        # For each epoch...
        for epoch_i in range(0, epoc):
            # Perform one full pass over the training set.

            print("")
            print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epoc))
            print('Training...')

            # Reset the total loss for this epoch.
            total_train_loss = 0

            # Put the model into training mode.
            model.train()

            # For each batch of training data...
            num_batches = int(len(train_set)/b_size) + 1

            for i in range(num_batches):
              end_index = min(b_size * (i+1), len(train_set))

              batch = train_set[i*b_size:end_index]

              if len(batch) == 0: continue

              input_id_tensors = torch.stack([data[0] for data in batch])
              input_mask_tensors = torch.stack([data[1] for data in batch])
              label_tensors = torch.stack([data[2] for data in batch])

              # Move tensors to the GPU
              b_input_ids = input_id_tensors.to(device)
              b_input_mask = input_mask_tensors.to(device)
              b_labels = label_tensors.to(device)

              # Clear the previously calculated gradient
              model.zero_grad()        

              # Perform a forward pass (evaluate the model on this training batch).
              outputs = model(b_input_ids, 
                                    token_type_ids=None, 
                                    attention_mask=b_input_mask, 
                                    labels=b_labels)
              loss = outputs.loss
              logits = outputs.logits

              total_train_loss += loss.item()

              # Perform a backward pass to calculate the gradients.
              loss.backward()

              # Update parameters and take a step using the computed gradient.
              optimizer.step()
                
            # ========================================
            #               Validation
            # ========================================
            # After the completion of each training epoch, measure our performance on
            # our validation set. Implement this function in the cell above.
            print(f"Parameters:")
            print(f"Epochs: {epoc}, Learning rate: {learning_r}, Batch size: {b_size}, Weight Decay: {weight_d}")
            print(f"Total loss: {total_train_loss}")
            val_acc = get_validation_performance(val_set, b_size)
            performance_track.append([epoc, learning_r, b_size, weight_d, total_train_loss, val_acc])
            print(f"Validation accuracy: {val_acc}")
    
print("")
print("Training complete!")


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.002
Total loss: 7.041821181774139
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.002
Total loss: 6.127247512340546
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.002
Total loss: 5.397361189126968
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.002
Total loss: 4.143135875463486
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.002
Total loss: 2.8526184260845184
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.005
Total loss: 6.8850173354148865
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.005
Total loss: 5.998784631490707
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.005
Total loss: 5.367733359336853
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.005
Total loss: 3.3400527760386467
Validation accuracy: 0.8333333333333334

Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.005
Total loss: 2.5254799723625183
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.01
Total loss: 7.918610751628876
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.01
Total loss: 6.403905093669891
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.01
Total loss: 5.266626074910164
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.01
Total loss: 4.325309380888939
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.01
Total loss: 2.509429767727852
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.1
Total loss: 7.010846048593521
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.1
Total loss: 6.013786017894745
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.1
Total loss: 5.205998778343201
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.1
Total loss: 3.4861432760953903
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.1
Total loss: 2.1676493510603905
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.002
Total loss: 4.142705202102661
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.002
Total loss: 3.446189761161804
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.002
Total loss: 3.2910279631614685
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.002
Total loss: 2.8532515466213226
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.002
Total loss: 2.5980715453624725
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.005
Total loss: 4.488847136497498
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.005
Total loss: 3.381258338689804
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.005
Total loss: 3.1023981869220734
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.005
Total loss: 2.6331768929958344
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.005
Total loss: 2.3256108164787292
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.01
Total loss: 3.489090472459793
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.01
Total loss: 3.1530544459819794
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.01
Total loss: 2.8978030681610107
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.01
Total loss: 2.3506387770175934
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.01
Total loss: 1.6564958691596985
Validation accuracy: 0.8333333333333334


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.1
Total loss: 4.466754913330078
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.1
Total loss: 3.459981083869934
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.1
Total loss: 3.2136799097061157
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.1
Total loss: 3.0022249817848206
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.1
Total loss: 2.802316665649414
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.002
Total loss: 2.294944405555725
Validation accuracy: 0.8333333333333334

Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.002
Total loss: 1.927071213722229
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.002
Total loss: 1.669574499130249
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.002
Total loss: 1.5552171170711517
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.002
Total loss: 1.4603026807308197
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.005
Total loss: 1.9313870072364807
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.005
Total loss: 1.7650411128997803
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.005
Total loss: 1.6078715324401855
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.005
Total loss: 1.487882912158966
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.005
Total loss: 1.4793311655521393
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.01
Total loss: 1.7490251660346985
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.01
Total loss: 1.622476488351822
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.01
Total loss: 1.5836879909038544
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.01
Total loss: 1.4278517663478851
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.01
Total loss: 1.312380164861679
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.1
Total loss: 2.3780784606933594
Validation accuracy: 0.8333333333333334

Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.1
Total loss: 1.8605226874351501
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.1
Total loss: 1.6372392177581787
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.1
Total loss: 1.5988945662975311
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.1
Total loss: 1.5249278247356415
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.002
Total loss: 7.800852477550507
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.002
Total loss: 6.476753205060959
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.002
Total loss: 6.078785985708237
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.002
Total loss: 3.916199803352356
Validation accuracy: 0.8333333333333334

Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.002
Total loss: 1.7729222811758518
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.005
Total loss: 6.9570333659648895
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.005
Total loss: 6.027150541543961
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.005
Total loss: 3.544884368777275
Validation accuracy: 0.8333333333333334

Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.005
Total loss: 1.6262604147195816
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.005
Total loss: 1.5475534461438656
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.01
Total loss: 7.160768896341324
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.01
Total loss: 6.425100028514862
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.01
Total loss: 5.895263850688934
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.01
Total loss: 3.2706214264035225
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.01
Total loss: 1.3358673248440027
Validation accuracy: 0.75


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.1
Total loss: 7.000029414892197
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.1
Total loss: 5.942992150783539
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.1
Total loss: 3.9701424837112427
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.1
Total loss: 2.170895531773567
Validation accuracy: 0.8333333333333334

Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.1
Total loss: 0.9741423483937979
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.002
Total loss: 3.5643296241760254
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.002
Total loss: 3.26078000664711
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.002
Total loss: 3.13285955786705
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.002
Total loss: 2.8610228896141052
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.002
Total loss: 2.065007582306862
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.005
Total loss: 3.412827432155609
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.005
Total loss: 3.2871975898742676
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.005
Total loss: 2.952857941389084
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.005
Total loss: 2.2046258449554443
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.005
Total loss: 1.3315808176994324
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.01
Total loss: 4.149270951747894
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.01
Total loss: 3.2831933200359344
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.01
Total loss: 3.061253786087036
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.01
Total loss: 2.514203667640686
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.01
Total loss: 1.699069932103157
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.1
Total loss: 3.562899947166443
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.1
Total loss: 2.897915631532669
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.1
Total loss: 2.3048039972782135
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.1
Total loss: 1.3124100267887115
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.1
Total loss: 0.5181474611163139
Validation accuracy: 0.8333333333333334


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.002
Total loss: 1.7668529152870178
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.002
Total loss: 1.6424745619297028
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.002
Total loss: 1.5853712260723114
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.002
Total loss: 1.5644395649433136
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.002
Total loss: 1.378256767988205
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.005
Total loss: 1.8694462180137634
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.005
Total loss: 1.7244004607200623
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.005
Total loss: 1.6254655122756958
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.005
Total loss: 1.4468930661678314
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.005
Total loss: 1.3337328433990479
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.01
Total loss: 1.9932134747505188
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.01
Total loss: 1.670272171497345
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.01
Total loss: 1.6469332873821259
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.01
Total loss: 1.5684531927108765
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.01
Total loss: 1.4860398173332214
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.1
Total loss: 2.092168629169464
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.1
Total loss: 1.7071359753608704
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.1
Total loss: 1.6069477498531342
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.1
Total loss: 1.5025175511837006
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.1
Total loss: 1.4147336781024933
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.002
Total loss: 7.669571414589882
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.002
Total loss: 6.846815794706345
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.002
Total loss: 6.732427507638931
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.002
Total loss: 6.683155119419098
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.002
Total loss: 6.783340364694595
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.005
Total loss: 7.3286745846271515
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.005
Total loss: 6.902379155158997
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.005
Total loss: 6.945516020059586
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.005
Total loss: 6.86956724524498
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.005
Total loss: 6.639881908893585
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.01
Total loss: 7.28010156750679
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.01
Total loss: 6.665799170732498
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.01
Total loss: 6.361759006977081
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.01
Total loss: 7.101182632148266
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.01
Total loss: 5.933144390583038
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.1
Total loss: 6.830864280462265
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.1
Total loss: 6.948203772306442
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.1
Total loss: 6.80234757065773
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.1
Total loss: 6.867653787136078
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.1
Total loss: 6.897290855646133
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.002
Total loss: 3.665325343608856
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.002
Total loss: 3.26776060461998
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.002
Total loss: 2.6875034272670746
Validation accuracy: 1.0

Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.002
Total loss: 1.8134075962007046
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.002
Total loss: 1.1117386110126972
Validation accuracy: 0.8333333333333334


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.005
Total loss: 4.074518978595734
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.005
Total loss: 3.3568661212921143
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.005
Total loss: 3.289413332939148
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.005
Total loss: 3.055858999490738
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.005
Total loss: 3.59284707903862
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.01
Total loss: 3.5740089118480682
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.01
Total loss: 3.5031842589378357
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.01
Total loss: 2.933026760816574
Validation accuracy: 0.75

Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.01
Total loss: 1.5026459321379662
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.01
Total loss: 1.2856908775866032
Validation accuracy: 0.8333333333333334


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.1
Total loss: 3.6067590713500977
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.1
Total loss: 3.2420178055763245
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.1
Total loss: 3.2990590929985046
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.1
Total loss: 2.7745940387248993
Validation accuracy: 0.8333333333333334

Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.1
Total loss: 3.1734518259763718
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.002
Total loss: 1.9574260115623474
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.002
Total loss: 1.6371262669563293
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.002
Total loss: 1.6778931319713593
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.002
Total loss: 1.457462877035141
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.002
Total loss: 1.6155556738376617
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.005
Total loss: 1.7172307968139648
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.005
Total loss: 1.72860985994339
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.005
Total loss: 1.6780816614627838
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.005
Total loss: 1.62197345495224
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.005
Total loss: 1.432862401008606
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.01
Total loss: 1.7892413139343262
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.01
Total loss: 1.7841501832008362
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.01
Total loss: 1.6456876695156097
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.01
Total loss: 1.5973597466945648
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.01
Total loss: 1.511708825826645
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.1
Total loss: 1.9122337102890015
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.1
Total loss: 1.5833742916584015
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.1
Total loss: 1.3008305728435516
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.1
Total loss: 0.6195624321699142
Validation accuracy: 0.25

Training...
Parameters:
Epochs: 5, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.1
Total loss: 1.1443399414420128
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.002
Total loss: 9.101510167121887
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.002
Total loss: 7.056186646223068
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.002
Total loss: 6.775037556886673
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.002
Total loss: 6.945331007242203
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.002
Total loss: 6.999821156263351
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.005
Total loss: 7.2992928102612495
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.005
Total loss: 7.052841126918793
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.005
Total loss: 6.925727009773254
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.005
Total loss: 6.910226613283157
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.005
Total loss: 6.7518350481987
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.01
Total loss: 7.49217876791954
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.01
Total loss: 6.597192168235779
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.01
Total loss: 6.966999441385269
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.01
Total loss: 6.669189959764481
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.01
Total loss: 6.819780349731445
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.1
Total loss: 7.212502807378769
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.1
Total loss: 7.332567170262337
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.1
Total loss: 6.86994206905365
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.1
Total loss: 7.070178717374802
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.1
Total loss: 6.6294476091861725
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.002
Total loss: 3.819072663784027
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.002
Total loss: 4.208418995141983
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.002
Total loss: 3.6638421714305878
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.002
Total loss: 3.775419056415558
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.002
Total loss: 3.2575377821922302
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.005
Total loss: 4.124851942062378
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.005
Total loss: 4.073368042707443
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.005
Total loss: 3.365941822528839
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.005
Total loss: 3.3611200153827667
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.005
Total loss: 3.3613454699516296
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.01
Total loss: 4.3813130259513855
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.01
Total loss: 3.4312921166419983
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.01
Total loss: 3.35782527923584
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.01
Total loss: 3.2952205538749695
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.01
Total loss: 3.4469525814056396
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.1
Total loss: 4.357815384864807
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.1
Total loss: 3.781996726989746
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.1
Total loss: 3.4833262860774994
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.1
Total loss: 3.412307560443878
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.1
Total loss: 3.4475077986717224
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.002
Total loss: 2.150247633457184
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.002
Total loss: 1.944358468055725
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.002
Total loss: 1.6887404918670654
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.002
Total loss: 1.777910590171814
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.002
Total loss: 1.669643521308899
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.005
Total loss: 2.025204598903656
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.005
Total loss: 1.6811297535896301
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.005
Total loss: 1.6979849636554718
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.005
Total loss: 1.6692236065864563
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.005
Total loss: 1.7150757312774658
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.01
Total loss: 1.9298980236053467
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.01
Total loss: 1.76058429479599
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.01
Total loss: 2.2111939787864685
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.01
Total loss: 1.6671571135520935
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.01
Total loss: 1.6702442467212677
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.1
Total loss: 2.9063186049461365
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.1
Total loss: 2.275286018848419
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.1
Total loss: 1.67786306142807
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.1
Total loss: 2.1667544841766357
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.1
Total loss: 1.6335218846797943
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.002
Total loss: 21.636146634817123
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.002
Total loss: 9.364514887332916
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.002
Total loss: 8.114140897989273
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.002
Total loss: 8.8025484085083
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.002
Total loss: 7.656534358859062
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.005
Total loss: 18.030657392548164
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.005
Total loss: 9.193889886140823
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.005
Total loss: 8.455847203731537
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.005
Total loss: 8.718083381652832
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.005
Total loss: 7.589120030403137
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.01
Total loss: 25.720197428017855
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.01
Total loss: 15.849840611219406
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.01
Total loss: 11.456053018569946
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.01
Total loss: 10.537883967161179
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.01
Total loss: 10.447069853544235
Validation accuracy: 0.08333333333333333


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.1
Total loss: 23.603376775979996
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.1
Total loss: 9.524464420974255
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.1
Total loss: 7.812279492616653
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.1
Total loss: 7.664070099592209
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.1
Total loss: 7.987537294626236
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.002
Total loss: 14.469756245613098
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.002
Total loss: 7.24253585934639
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.002
Total loss: 4.423380434513092
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.002
Total loss: 4.378253817558289
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.002
Total loss: 3.4191459864377975
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.005
Total loss: 10.394383192062378
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.005
Total loss: 4.315777063369751
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.005
Total loss: 4.115977585315704
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.005
Total loss: 4.23440220952034
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.005
Total loss: 3.6796616315841675
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.01
Total loss: 18.249892711639404
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.01
Total loss: 3.7409695386886597
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.01
Total loss: 3.4772334694862366
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.01
Total loss: 3.758665829896927
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.01
Total loss: 3.7301589250564575
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.1
Total loss: 17.273602426052094
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.1
Total loss: 7.9402613043785095
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.1
Total loss: 4.363415539264679
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.1
Total loss: 3.4927491396665573
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.1
Total loss: 3.6461872458457947
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.002
Total loss: 8.808984696865082
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.002
Total loss: 6.292712688446045
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.002
Total loss: 2.095134735107422
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.002
Total loss: 1.7304418683052063
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.002
Total loss: 1.7316526472568512
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.005
Total loss: 12.56324303150177
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.005
Total loss: 16.06938338279724
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.005
Total loss: 5.144066095352173
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.005
Total loss: 6.668284058570862
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.005
Total loss: 3.9118226766586304
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.01
Total loss: 5.924816846847534
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.01
Total loss: 11.703470528125763
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.01
Total loss: 4.010778725147247
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.01
Total loss: 2.0859678387641907
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.01
Total loss: 1.8758997321128845
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.1
Total loss: 10.934768855571747
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.1
Total loss: 2.923213005065918
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.1
Total loss: 2.9145480394363403
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.1
Total loss: 1.7198777198791504
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 5, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.1
Total loss: 1.8805627226829529
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.002
Total loss: 10.211076259613037
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.002
Total loss: 6.685832232236862
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.002
Total loss: 5.1153744161129
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.002
Total loss: 4.486054688692093
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.002
Total loss: 3.784572534263134
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.002
Total loss: 2.8589475601911545
Validation accuracy: 0.9166666666666666

Training...
Par

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.005
Total loss: 8.481525957584381
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.005
Total loss: 6.64906844496727
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.005
Total loss: 6.209931701421738
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.005
Total loss: 5.600508436560631
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.005
Total loss: 3.5549078434705734
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.005
Total loss: 1.835004486143589
Validation accuracy: 0.8333333333333334

Training...
Par

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.01
Total loss: 8.344689846038818
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.01
Total loss: 6.513972699642181
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.01
Total loss: 5.684675216674805
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.01
Total loss: 4.127270594239235
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.01
Total loss: 2.2203069627285004
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.01
Total loss: 1.6301886662840843
Validation accuracy: 0.9166666666666666

Training...
Paramet

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.1
Total loss: 8.438757240772247
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.1
Total loss: 7.098213315010071
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.1
Total loss: 6.393258899450302
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.1
Total loss: 6.310639917850494
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.1
Total loss: 5.213709443807602
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.1
Total loss: 3.242944397032261
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epo

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.002
Total loss: 3.611757755279541
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.002
Total loss: 3.269814193248749
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.002
Total loss: 2.9861954748630524
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.002
Total loss: 2.4666512310504913
Validation accuracy: 0.75

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.002
Total loss: 1.9265545010566711
Validation accuracy: 0.8333333333333334

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.002
Total loss: 1.2159086167812347
Validation accuracy: 0.8333333333333334

Training...
Paramet

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.005
Total loss: 3.593001961708069
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.005
Total loss: 3.284434825181961
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.005
Total loss: 3.006230741739273
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.005
Total loss: 2.6538152992725372
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.005
Total loss: 2.0390821397304535
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.005
Total loss: 1.1564435511827469
Validation accuracy: 0.8333333333333334

Traini

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.01
Total loss: 4.172731995582581
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.01
Total loss: 3.4318555295467377
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.01
Total loss: 3.3075538873672485
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.01
Total loss: 3.2958559095859528
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.01
Total loss: 3.0439635813236237
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.01
Total loss: 2.916111797094345
Validation accuracy: 0.9166666666666666

Training...

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.1
Total loss: 3.5624953508377075
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.1
Total loss: 3.333886206150055
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.1
Total loss: 3.0816763043403625
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.1
Total loss: 2.551370292901993
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.1
Total loss: 2.015836924314499
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.1
Total loss: 1.4637876898050308
Validation accuracy: 0.8333333333333334

Training...
Parame

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.002
Total loss: 2.6589768528938293
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.002
Total loss: 1.8930322527885437
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.002
Total loss: 1.7543319463729858
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.002
Total loss: 1.7184350490570068
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.002
Total loss: 1.7166082859039307
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.002
Total loss: 1.6686628460884094
Validation accuracy: 0.9166666666666666

Tra

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.005
Total loss: 2.326114296913147
Validation accuracy: 0.8333333333333334

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.005
Total loss: 2.033876597881317
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.005
Total loss: 1.8328206539154053
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.005
Total loss: 1.6534621715545654
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.005
Total loss: 1.5347800254821777
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.005
Total loss: 1.4417261183261871
Validation accuracy: 0.9166666666666666

Train

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.01
Total loss: 2.3288321495056152
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.01
Total loss: 1.7416641116142273
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.01
Total loss: 1.6250216364860535
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.01
Total loss: 1.5285144448280334
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.01
Total loss: 1.4770441353321075
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.01
Total loss: 1.3499974012374878
Validation accuracy: 0.9166666666666666

Training.

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.1
Total loss: 1.7485800385475159
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.1
Total loss: 1.638618528842926
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.1
Total loss: 1.6241865456104279
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.1
Total loss: 1.5152596533298492
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.1
Total loss: 1.4510195851325989
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.1
Total loss: 1.317597895860672
Validation accuracy: 0.9166666666666666

Training...
Param

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.002
Total loss: 7.920523941516876
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.002
Total loss: 6.276930153369904
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.002
Total loss: 4.349533721804619
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.002
Total loss: 2.1216916516423225
Validation accuracy: 0.8333333333333334

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.002
Total loss: 1.199120830744505
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.002
Total loss: 0.37260296382009983
Validation accuracy: 0.9166666666666666

Training...


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.005
Total loss: 7.176649898290634
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.005
Total loss: 6.486792236566544
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.005
Total loss: 5.462561950087547
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.005
Total loss: 3.102491781115532
Validation accuracy: 0.8333333333333334

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.005
Total loss: 2.566882496699691
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.005
Total loss: 1.6282149655744433
Validation accuracy: 0.9166666666666666

Training...
Pa

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.01
Total loss: 7.182018041610718
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.01
Total loss: 6.01510451734066
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.01
Total loss: 3.6242626011371613
Validation accuracy: 0.8333333333333334

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.01
Total loss: 1.7257716953754425
Validation accuracy: 0.8333333333333334

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.01
Total loss: 1.899028953164816
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.01
Total loss: 0.9242566246539354
Validation accuracy: 0.9166666666666666

Training...
Paramet

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.1
Total loss: 7.208665460348129
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.1
Total loss: 6.535653620958328
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.1
Total loss: 6.033380299806595
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.1
Total loss: 3.4405660033226013
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.1
Total loss: 2.0402057580649853
Validation accuracy: 0.6666666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.1
Total loss: 1.102982135489583
Validation accuracy: 0.9166666666666666

Training...
Parameters:
E

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.002
Total loss: 4.089035451412201
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.002
Total loss: 3.1027161478996277
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.002
Total loss: 2.8881508111953735
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.002
Total loss: 2.3983785212039948
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.002
Total loss: 1.465796321630478
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.002
Total loss: 0.8384812548756599
Validation accuracy: 0.9166666666666666

Train

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.005
Total loss: 3.5453073978424072
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.005
Total loss: 3.216141164302826
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.005
Total loss: 2.979086697101593
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.005
Total loss: 1.8930898010730743
Validation accuracy: 0.8333333333333334

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.005
Total loss: 0.9955289140343666
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.005
Total loss: 0.3194758892059326
Validation accuracy: 0.75

Training...
Paramet

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.01
Total loss: 3.964880406856537
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.01
Total loss: 3.22504323720932
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.01
Total loss: 3.140518307685852
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.01
Total loss: 2.6756186485290527
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.01
Total loss: 1.856217160820961
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.01
Total loss: 0.8516404032707214
Validation accuracy: 0.9166666666666666

Training...
Pa

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.1
Total loss: 3.9011752605438232
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.1
Total loss: 3.433863580226898
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.1
Total loss: 2.998362362384796
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.1
Total loss: 2.733207792043686
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.1
Total loss: 2.066727876663208
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.1
Total loss: 1.513573095202446
Validation accuracy: 0.9166666666666666

Training...
Paramete

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.002
Total loss: 1.8704206347465515
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.002
Total loss: 1.6061273217201233
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.002
Total loss: 1.4779163897037506
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.002
Total loss: 1.285889357328415
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.002
Total loss: 1.0465631484985352
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.002
Total loss: 0.7622110396623611
Validation accuracy: 0.9166666666666666

Trai

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.005
Total loss: 1.928430438041687
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.005
Total loss: 1.634814977645874
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.005
Total loss: 1.5968652665615082
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.005
Total loss: 1.459166407585144
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.005
Total loss: 1.3640636503696442
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.005
Total loss: 1.1601039171218872
Validation accuracy: 0.9166666666666666

Traini

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.01
Total loss: 1.8378660082817078
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.01
Total loss: 1.6355504989624023
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.01
Total loss: 1.5849356055259705
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.01
Total loss: 1.4660106897354126
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.01
Total loss: 1.3507249653339386
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.01
Total loss: 1.0661604404449463
Validation accuracy: 0.9166666666666666

Training.

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.1
Total loss: 2.0337982773780823
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.1
Total loss: 1.6441930532455444
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.1
Total loss: 1.621099203824997
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.1
Total loss: 1.5997804403305054
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.1
Total loss: 1.4967342615127563
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.1
Total loss: 1.3383759260177612
Validation accuracy: 0.9166666666666666

Training...
Para

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.002
Total loss: 8.259965606033802
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.002
Total loss: 6.843333721160889
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.002
Total loss: 6.887839496135712
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.002
Total loss: 6.7440275847911835
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.002
Total loss: 6.704671382904053
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.002
Total loss: 6.794461786746979
Validation accuracy: 0.9166666666666666

Training

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.005
Total loss: 7.300631269812584
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.005
Total loss: 6.710162565112114
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.005
Total loss: 6.829022377729416
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.005
Total loss: 6.768793880939484
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.005
Total loss: 6.762105286121368
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.005
Total loss: 6.750668883323669
Validation accuracy: 0.9166666666666666

Training.

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.01
Total loss: 7.266439452767372
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.01
Total loss: 6.756115674972534
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.01
Total loss: 6.555121153593063
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.01
Total loss: 6.493520349264145
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.01
Total loss: 7.175853073596954
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.01
Total loss: 6.844811052083969
Validation accuracy: 0.9166666666666666

Training...
Par

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.1
Total loss: 7.013276219367981
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.1
Total loss: 6.675640970468521
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.1
Total loss: 6.623731404542923
Validation accuracy: 0.8333333333333334

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.1
Total loss: 7.017665505409241
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.1
Total loss: 5.865223079919815
Validation accuracy: 0.8333333333333334

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.1
Total loss: 5.822831865400076
Validation accuracy: 0.9166666666666666

Training...
Parameter

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.002
Total loss: 3.7739613950252533
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.002
Total loss: 3.3083638548851013
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.002
Total loss: 3.243431568145752
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.002
Total loss: 3.3946078717708588
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.002
Total loss: 3.071769952774048
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.002
Total loss: 5.498688340187073
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.005
Total loss: 3.980928957462311
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.005
Total loss: 4.388097286224365
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.005
Total loss: 3.3276793360710144
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.005
Total loss: 3.2519160211086273
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.005
Total loss: 3.11468967795372
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.005
Total loss: 2.808476001024246
Validation accuracy: 0.9166666666666666

T

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.01
Total loss: 3.948420226573944
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.01
Total loss: 4.271932601928711
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.01
Total loss: 3.240975648164749
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.01
Total loss: 3.212445020675659
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.01
Total loss: 2.4336357712745667
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.01
Total loss: 0.8995329700410366
Validation accuracy: 0.9166666666666666

Trainin

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.1
Total loss: 3.8207935094833374
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.1
Total loss: 3.3490560054779053
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.1
Total loss: 3.342215985059738
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.1
Total loss: 3.3723847568035126
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.1
Total loss: 3.339176833629608
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.1
Total loss: 3.289312571287155
Validation accuracy: 0.9166666666666666

Training...


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.002
Total loss: 2.188668668270111
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.002
Total loss: 1.7332011461257935
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.002
Total loss: 1.6468137502670288
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.002
Total loss: 1.591234266757965
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.002
Total loss: 1.607687920331955
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.002
Total loss: 1.2560836970806122
Validation accuracy: 0.9166666666666666



Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.005
Total loss: 2.2714595794677734
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.005
Total loss: 1.6693448722362518
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.005
Total loss: 1.6890408396720886
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.005
Total loss: 1.6425098478794098
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.005
Total loss: 1.7713842988014221
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.005
Total loss: 1.6973176896572113
Validation accuracy: 0.916666666666666

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.01
Total loss: 2.206825017929077
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.01
Total loss: 1.7099257111549377
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.01
Total loss: 1.662543535232544
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.01
Total loss: 1.5898779034614563
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.01
Total loss: 1.243721604347229
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.01
Total loss: 0.7961411103606224
Validation accuracy: 0.75

Training...
Paramete

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.1
Total loss: 1.756473958492279
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.1
Total loss: 1.7977977395057678
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.1
Total loss: 1.4124725461006165
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.1
Total loss: 0.8867463022470474
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.1
Total loss: 0.36458777636289597
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.1
Total loss: 0.31052448600530624
Validation accuracy: 0.6666666666666666

Training

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.002
Total loss: 8.864836394786835
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.002
Total loss: 6.839964032173157
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.002
Total loss: 6.894424110651016
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.002
Total loss: 6.810215771198273
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.002
Total loss: 6.874473512172699
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.002
Total loss: 6.606902629137039
Validation accuracy: 0.9166666666666666

Training.

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.005
Total loss: 7.5085548013448715
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.005
Total loss: 7.0096500515937805
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.005
Total loss: 7.124319821596146
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.005
Total loss: 6.759217530488968
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.005
Total loss: 6.775299966335297
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.005
Total loss: 6.792233884334564
Validation accuracy: 0.9166666666666666

Trainin

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.01
Total loss: 7.379841297864914
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.01
Total loss: 7.006503343582153
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.01
Total loss: 6.94773867726326
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.01
Total loss: 6.827063649892807
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.01
Total loss: 6.842168718576431
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.01
Total loss: 6.728285014629364
Validation accuracy: 0.9166666666666666

Training...
Para

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.1
Total loss: 8.719556525349617
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.1
Total loss: 7.284376621246338
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.1
Total loss: 6.7401604652404785
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.1
Total loss: 6.789975523948669
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.1
Total loss: 6.791238039731979
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.1
Total loss: 6.908023178577423
Validation accuracy: 0.9166666666666666

Training...
Paramete

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.002
Total loss: 4.180243611335754
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.002
Total loss: 3.769523322582245
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.002
Total loss: 3.9161829948425293
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.002
Total loss: 3.4823323488235474
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.002
Total loss: 3.3423230946063995
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.002
Total loss: 3.3969741463661194
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.005
Total loss: 3.8703825771808624
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.005
Total loss: 3.5869342386722565
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.005
Total loss: 3.354801744222641
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.005
Total loss: 3.368585616350174
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.005
Total loss: 3.335149794816971
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.005
Total loss: 3.332819253206253
Validation accuracy: 0.9166666666666666

T

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.01
Total loss: 4.5315001010894775
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.01
Total loss: 4.052716881036758
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.01
Total loss: 3.351985663175583
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.01
Total loss: 3.270106017589569
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.01
Total loss: 3.2779179513454437
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.01
Total loss: 3.3119453489780426
Validation accuracy: 0.9166666666666666

Train

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.1
Total loss: 4.398559391498566
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.1
Total loss: 3.6284290850162506
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.1
Total loss: 3.282747894525528
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.1
Total loss: 3.48545241355896
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.1
Total loss: 3.3271945416927338
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.1
Total loss: 3.371975392103195
Validation accuracy: 0.9166666666666666

Training...
Pa

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.002
Total loss: 2.1354294419288635
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.002
Total loss: 1.6181643307209015
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.002
Total loss: 1.7198365330696106
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.002
Total loss: 1.6681718826293945
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.002
Total loss: 1.7061918377876282
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.002
Total loss: 1.656455636024475
Validation accuracy: 0.9166666666666666

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.005
Total loss: 2.076584577560425
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.005
Total loss: 1.682447910308838
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.005
Total loss: 1.7367035150527954
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.005
Total loss: 1.6612552106380463
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.005
Total loss: 1.6354227662086487
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.005
Total loss: 1.6845220029354095
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.01
Total loss: 2.3816259503364563
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.01
Total loss: 1.7121648788452148
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.01
Total loss: 2.7757633328437805
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.01
Total loss: 1.8186053931713104
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.01
Total loss: 1.82865709066391
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.01
Total loss: 1.6207966208457947
Validation accuracy: 0.9166666666666666

Tra

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.1
Total loss: 2.074721544981003
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.1
Total loss: 1.659224808216095
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.1
Total loss: 1.8701589703559875
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.1
Total loss: 1.7284084260463715
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.1
Total loss: 1.658735990524292
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.1
Total loss: 1.7026976346969604
Validation accuracy: 0.9166666666666666

Training...


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.002
Total loss: 13.731626741588116
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.002
Total loss: 8.531499922275543
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.002
Total loss: 8.58275380730629
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.002
Total loss: 7.709522694349289
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.002
Total loss: 8.238297402858734
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.002
Total loss: 7.720125466585159
Validation accuracy: 0.9166666666666666

Training...
Par

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.005
Total loss: 17.677637726068497
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.005
Total loss: 9.359837487339973
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.005
Total loss: 7.833427339792252
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.005
Total loss: 7.9606762528419495
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.005
Total loss: 7.658876806497574
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.005
Total loss: 7.77125671505928
Validation accuracy: 0.9166666666666666

Training...
Pa

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.01
Total loss: 18.27194035053253
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.01
Total loss: 10.002483367919922
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.01
Total loss: 7.972487151622772
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.01
Total loss: 8.551608622074127
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.01
Total loss: 8.060024365782738
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.01
Total loss: 7.776620119810104
Validation accuracy: 0.9166666666666666

Training...
Parame

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.1
Total loss: 25.249260306358337
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.1
Total loss: 13.505210816860199
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.1
Total loss: 11.074227780103683
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.1
Total loss: 10.736042350530624
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.1
Total loss: 10.888032525777817
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.1
Total loss: 9.736530393362045
Validation accuracy: 0.9166666666666666

Training...
Param

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.002
Total loss: 12.482288718223572
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.002
Total loss: 5.690835118293762
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.002
Total loss: 3.9403981268405914
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.002
Total loss: 4.052280008792877
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.002
Total loss: 3.7357839047908783
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.002
Total loss: 3.5421785414218903
Validation accuracy: 0.9166666666666666

Trai

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.005
Total loss: 25.841553211212158
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.005
Total loss: 5.809400767087936
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.005
Total loss: 3.597419559955597
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.005
Total loss: 3.572112023830414
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.005
Total loss: 3.407656669616699
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.005
Total loss: 3.412066698074341
Validation accuracy: 0.9166666666666666

Training

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.01
Total loss: 14.602625131607056
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.01
Total loss: 4.298023909330368
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.01
Total loss: 3.539191961288452
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.01
Total loss: 3.4889078736305237
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.01
Total loss: 3.440699726343155
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.01
Total loss: 3.527983784675598
Validation accuracy: 0.9166666666666666

Training...
P

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.1
Total loss: 25.074158370494843
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.1
Total loss: 10.866572678089142
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.1
Total loss: 6.4407923221588135
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.1
Total loss: 4.294872343540192
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.1
Total loss: 3.4178194999694824
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.1
Total loss: 3.2870644629001617
Validation accuracy: 0.9166666666666666

Training...
P

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.002
Total loss: 4.931169807910919
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.002
Total loss: 5.844923973083496
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.002
Total loss: 3.9852002263069153
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.002
Total loss: 2.339184820652008
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.002
Total loss: 2.350590765476227
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.002
Total loss: 1.7138546109199524
Validation accuracy: 0.9166666666666666

Train

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.005
Total loss: 6.383369028568268
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.005
Total loss: 16.92167842388153
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.005
Total loss: 2.766236424446106
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.005
Total loss: 2.3996846675872803
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.005
Total loss: 1.7726260423660278
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.005
Total loss: 1.810046672821045
Validation accuracy: 0.9166666666666666

Traini

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.01
Total loss: 12.241083979606628
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.01
Total loss: 3.7691197395324707
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.01
Total loss: 6.315470278263092
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.01
Total loss: 2.7913269996643066
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.01
Total loss: 1.7181479930877686
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.01
Total loss: 1.9664297699928284
Validation accuracy: 0.9166666666666666

Trainin

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.1
Total loss: 11.360302805900574
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.1
Total loss: 9.612849056720734
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.1
Total loss: 1.962032973766327
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.1
Total loss: 1.758586049079895
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.1
Total loss: 1.8160374164581299
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 10, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.1
Total loss: 1.72026526927948
Validation accuracy: 0.9166666666666666

Training...
Paramete

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.002
Total loss: 7.362564504146576
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.002
Total loss: 6.375759482383728
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.002
Total loss: 5.903387784957886
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.002
Total loss: 5.133096665143967
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.002
Total loss: 3.98849655687809
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.002
Total loss: 2.809693396091461
Validation accuracy: 0.9166666666666666

Training...
Para

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.005
Total loss: 7.664048552513123
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.005
Total loss: 6.502729594707489
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.005
Total loss: 5.804100126028061
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.005
Total loss: 4.8843501806259155
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.005
Total loss: 3.6645592525601387
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.005
Total loss: 2.3970084339380264
Validation accuracy: 0.9166666666666666

Training...


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.01
Total loss: 6.999897748231888
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.01
Total loss: 5.798189803957939
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.01
Total loss: 3.854538843035698
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.01
Total loss: 2.363919995725155
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.01
Total loss: 0.9771462753415108
Validation accuracy: 0.8333333333333334

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.01
Total loss: 0.5998383648693562
Validation accuracy: 0.9166666666666666

Training...
Paramet

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.1
Total loss: 7.415424406528473
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.1
Total loss: 6.473075181245804
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.1
Total loss: 5.595423117280006
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.1
Total loss: 4.864944949746132
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.1
Total loss: 3.962684601545334
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.1
Total loss: 2.7711478173732758
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Ep

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.002
Total loss: 3.806454598903656
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.002
Total loss: 3.293457269668579
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.002
Total loss: 3.1411455273628235
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.002
Total loss: 3.015804260969162
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.002
Total loss: 2.7096931636333466
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.002
Total loss: 2.247380018234253
Validation accuracy: 0.9166666666666666

Trainin

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.005
Total loss: 4.465716183185577
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.005
Total loss: 3.7104191184043884
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.005
Total loss: 3.4215802252292633
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.005
Total loss: 3.2212148904800415
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.005
Total loss: 2.9325186908245087
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.005
Total loss: 2.759973794221878
Validation accuracy: 0.9166666666666666

Train

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.01
Total loss: 3.848015248775482
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.01
Total loss: 3.378284513950348
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.01
Total loss: 3.1045385897159576
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.01
Total loss: 2.882562607526779
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.01
Total loss: 2.539648950099945
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.01
Total loss: 2.2853581607341766
Validation accuracy: 0.9166666666666666

Training...
P

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.1
Total loss: 4.325249135494232
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.1
Total loss: 3.38684418797493
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.1
Total loss: 3.096065431833267
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.1
Total loss: 2.8281957805156708
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.1
Total loss: 2.5880680978298187
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.1
Total loss: 2.2464660555124283
Validation accuracy: 0.9166666666666666

Training...
Paramet

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.002
Total loss: 1.8121683597564697
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.002
Total loss: 1.7086027264595032
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.002
Total loss: 1.6822020411491394
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.002
Total loss: 1.6337802410125732
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.002
Total loss: 1.5378226041793823
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.002
Total loss: 1.5297359228134155
Validation accuracy: 0.9166666666666666

Tra

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.005
Total loss: 2.2504408955574036
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.005
Total loss: 1.7954787015914917
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.005
Total loss: 1.589685171842575
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.005
Total loss: 1.4747689366340637
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.005
Total loss: 1.4488057792186737
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.005
Total loss: 1.2499833405017853
Validation accuracy: 0.9166666666666666

Trai

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.01
Total loss: 2.1041406393051147
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.01
Total loss: 1.808245301246643
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.01
Total loss: 1.6465255916118622
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.01
Total loss: 1.579104721546173
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.01
Total loss: 1.4642390310764313
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.01
Total loss: 1.3610251247882843
Validation accuracy: 0.9166666666666666

Training...

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.1
Total loss: 1.7905471920967102
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.1
Total loss: 1.663720428943634
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.1
Total loss: 1.6067734062671661
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.1
Total loss: 1.5446573793888092
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.1
Total loss: 1.4971367120742798
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.1
Total loss: 1.2743963599205017
Validation accuracy: 0.9166666666666666

Training...
Para

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.002
Total loss: 7.32408681511879
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.002
Total loss: 6.517194181680679
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.002
Total loss: 5.8879566341638565
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.002
Total loss: 3.2630874291062355
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.002
Total loss: 0.732360728085041
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.002
Total loss: 0.13823768589645624
Validation accuracy: 0.9166666666666666

Training...


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.005
Total loss: 7.081350952386856
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.005
Total loss: 6.211536198854446
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.005
Total loss: 3.858428508043289
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.005
Total loss: 1.5049968883395195
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.005
Total loss: 1.1927865268662572
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.005
Total loss: 2.5953017622232437
Validation accuracy: 0.9166666666666666

Training...


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.01
Total loss: 7.413504481315613
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.01
Total loss: 6.657281547784805
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.01
Total loss: 6.584295153617859
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.01
Total loss: 5.377744644880295
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.01
Total loss: 3.1860283613204956
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.01
Total loss: 1.5129950493574142
Validation accuracy: 0.9166666666666666

Training...
Paramet

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.1
Total loss: 7.335552990436554
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.1
Total loss: 5.722377061843872
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.1
Total loss: 3.0471255406737328
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.1
Total loss: 1.119733065366745
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.1
Total loss: 1.672103081829846
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.1
Total loss: 1.0330143086612225
Validation accuracy: 0.8333333333333334

Training...
Parameters:
E

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.002
Total loss: 3.488824427127838
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.002
Total loss: 3.1982513666152954
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.002
Total loss: 3.127765864133835
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.002
Total loss: 2.5268361568450928
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.002
Total loss: 1.6923405528068542
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.002
Total loss: 0.814669668674469
Validation accuracy: 0.9166666666666666

Traini

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.005
Total loss: 3.6056439876556396
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.005
Total loss: 3.1390931606292725
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.005
Total loss: 2.4448072612285614
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.005
Total loss: 1.3595168739557266
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.005
Total loss: 0.5030987374484539
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.005
Total loss: 0.22873572818934917
Validation accuracy: 0.9166666666666666

Tr

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.01
Total loss: 3.960505783557892
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.01
Total loss: 3.1078887581825256
Validation accuracy: 0.75

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.01
Total loss: 2.5825984179973602
Validation accuracy: 0.8333333333333334

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.01
Total loss: 2.0704107880592346
Validation accuracy: 0.8333333333333334

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.01
Total loss: 1.3342213183641434
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.01
Total loss: 0.6497003063559532
Validation accuracy: 0.9166666666666666

Training...
Parameters:


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.1
Total loss: 3.8554323315620422
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.1
Total loss: 3.2304939329624176
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.1
Total loss: 2.9084406793117523
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.1
Total loss: 2.3970965147018433
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.1
Total loss: 1.7394254356622696
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.1
Total loss: 1.0887825638055801
Validation accuracy: 0.9166666666666666

Training...
Par

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.002
Total loss: 2.1817551851272583
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.002
Total loss: 1.6135286390781403
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.002
Total loss: 1.4357044100761414
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.002
Total loss: 1.3074198365211487
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.002
Total loss: 1.1248652935028076
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.002
Total loss: 0.8589477241039276
Validation accuracy: 0.9166666666666666

Tra

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.005
Total loss: 1.775510311126709
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.005
Total loss: 1.6486515700817108
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.005
Total loss: 1.4546004235744476
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.005
Total loss: 1.0937905013561249
Validation accuracy: 0.8333333333333334

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.005
Total loss: 0.7126619219779968
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.005
Total loss: 0.43998026102781296
Validation accuracy: 0.8333333333333334

Tra

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.01
Total loss: 2.188830018043518
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.01
Total loss: 1.5823842883110046
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.01
Total loss: 1.5873679220676422
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.01
Total loss: 1.5265378654003143
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.01
Total loss: 1.47185480594635
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.01
Total loss: 1.3706128895282745
Validation accuracy: 0.9166666666666666

Training...


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.1
Total loss: 1.7983140349388123
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.1
Total loss: 1.479280412197113
Validation accuracy: 0.75

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.1
Total loss: 1.37767893075943
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.1
Total loss: 1.0809522867202759
Validation accuracy: 0.8333333333333334

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.1
Total loss: 0.9100697934627533
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.1
Total loss: 0.6833929866552353
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.002
Total loss: 6.882241681218147
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.002
Total loss: 7.117975562810898
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.002
Total loss: 6.6631432473659515
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.002
Total loss: 6.819493770599365
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.002
Total loss: 6.681192100048065
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.002
Total loss: 6.756862789392471
Validation accuracy: 0.9166666666666666

Training

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.005
Total loss: 7.343723684549332
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.005
Total loss: 6.717388600111008
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.005
Total loss: 6.867552936077118
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.005
Total loss: 6.806061387062073
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.005
Total loss: 6.570671647787094
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.005
Total loss: 6.58263486623764
Validation accuracy: 0.9166666666666666

Training..

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.01
Total loss: 8.145332336425781
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.01
Total loss: 7.001768559217453
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.01
Total loss: 6.853101491928101
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.01
Total loss: 6.6696992218494415
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.01
Total loss: 6.8988639414310455
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.01
Total loss: 6.791530430316925
Validation accuracy: 0.9166666666666666

Training...
P

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.1
Total loss: 7.107881620526314
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.1
Total loss: 6.839482337236404
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.1
Total loss: 6.631554305553436
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.1
Total loss: 6.744572162628174
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.1
Total loss: 6.641987442970276
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.1
Total loss: 6.416716128587723
Validation accuracy: 0.9166666666666666

Training...
Parameter

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.002
Total loss: 3.6453645825386047
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.002
Total loss: 3.38315212726593
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.002
Total loss: 2.36799556016922
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.002
Total loss: 3.589992642402649
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.002
Total loss: 3.7897197008132935
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.002
Total loss: 1.7225850224494934
Validation accuracy: 0.9166666666666666

T

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.005
Total loss: 3.5508543848991394
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.005
Total loss: 3.3220008313655853
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.005
Total loss: 2.4832348227500916
Validation accuracy: 0.4166666666666667

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.005
Total loss: 1.7373702600598335
Validation accuracy: 0.5833333333333334

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.005
Total loss: 1.5131670143455267
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.005
Total loss: 1.166632317006588
Validation accuracy: 0.9166666666666666

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.01
Total loss: 4.767283916473389
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.01
Total loss: 3.3461000621318817
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.01
Total loss: 3.3421880304813385
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.01
Total loss: 2.7488309144973755
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.01
Total loss: 2.226277008652687
Validation accuracy: 0.5

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.01
Total loss: 2.3794146180152893
Validation accuracy: 0.6666666666666666

Training...
Paramete

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.1
Total loss: 3.587508738040924
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.1
Total loss: 3.2221078872680664
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.1
Total loss: 2.8641929626464844
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.1
Total loss: 1.6063946075737476
Validation accuracy: 0.8333333333333334

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.1
Total loss: 0.39296562038362026
Validation accuracy: 0.75

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.1
Total loss: 3.8023476116359234
Validation accuracy: 0.8333333333333334

Training...
Parameters:

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.002
Total loss: 2.0942184925079346
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.002
Total loss: 1.623323917388916
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.002
Total loss: 1.5926227867603302
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.002
Total loss: 1.1274687945842743
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.002
Total loss: 0.5101291760802269
Validation accuracy: 0.5

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.002
Total loss: 0.25829898566007614
Validation accuracy: 0.9166666666666666

Training...


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.005
Total loss: 1.8567986488342285
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.005
Total loss: 1.7206098437309265
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.005
Total loss: 1.5452672839164734
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.005
Total loss: 1.2628440856933594
Validation accuracy: 1.0

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.005
Total loss: 1.1324290186166763
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.005
Total loss: 0.4553122781217098
Validation accuracy: 0.9166666666666666

Training...


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.01
Total loss: 1.9097723960876465
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.01
Total loss: 1.648646354675293
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.01
Total loss: 1.5270716845989227
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.01
Total loss: 1.4005452990531921
Validation accuracy: 1.0

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.01
Total loss: 1.618355929851532
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.01
Total loss: 0.6563370823860168
Validation accuracy: 0.8333333333333334

Training...
Paramete

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.1
Total loss: 2.24606055021286
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.1
Total loss: 1.6855371594429016
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.1
Total loss: 1.6735103726387024
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.1
Total loss: 1.781428188085556
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.1
Total loss: 1.7757272124290466
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.1
Total loss: 1.8020588159561157
Validation accuracy: 0.9166666666666666

Training...


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.002
Total loss: 7.838566958904266
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.002
Total loss: 7.037475883960724
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.002
Total loss: 6.887930065393448
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.002
Total loss: 6.696638584136963
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.002
Total loss: 6.87184876203537
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.002
Total loss: 6.840013265609741
Validation accuracy: 0.9166666666666666

Training..

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.005
Total loss: 7.233051210641861
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.005
Total loss: 7.098863333463669
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.005
Total loss: 7.279266446828842
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.005
Total loss: 6.769655495882034
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.005
Total loss: 6.935803860425949
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.005
Total loss: 6.725042968988419
Validation accuracy: 0.9166666666666666

Training.

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.01
Total loss: 7.445242017507553
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.01
Total loss: 7.058429300785065
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.01
Total loss: 6.933604389429092
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.01
Total loss: 6.784408301115036
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.01
Total loss: 6.960904747247696
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.01
Total loss: 6.868216693401337
Validation accuracy: 0.9166666666666666

Training...
Par

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.1
Total loss: 7.3301016092300415
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.1
Total loss: 6.814711719751358
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.1
Total loss: 6.728703796863556
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.1
Total loss: 6.9092526733875275
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.1
Total loss: 6.818820893764496
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.1
Total loss: 6.759936451911926
Validation accuracy: 0.9166666666666666

Training...
Paramet

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.002
Total loss: 4.069140553474426
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.002
Total loss: 3.494218736886978
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.002
Total loss: 3.4068275690078735
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.002
Total loss: 3.3658881783485413
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.002
Total loss: 3.275751143693924
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.002
Total loss: 3.3034229576587677
Validation accuracy: 0.9166666666666666



Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.005
Total loss: 3.9946374893188477
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.005
Total loss: 3.4251647293567657
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.005
Total loss: 3.263782113790512
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.005
Total loss: 3.4593869745731354
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.005
Total loss: 3.466148555278778
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.005
Total loss: 3.3489095866680145
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.01
Total loss: 4.541456699371338
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.01
Total loss: 3.4976061582565308
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.01
Total loss: 3.332874685525894
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.01
Total loss: 3.648830473423004
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.01
Total loss: 3.330060303211212
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.01
Total loss: 3.295461982488632
Validation accuracy: 0.9166666666666666

Training

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.1
Total loss: 4.010403692722321
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.1
Total loss: 3.449118584394455
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.1
Total loss: 3.43331378698349
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.1
Total loss: 3.3485783338546753
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.1
Total loss: 3.374662846326828
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.1
Total loss: 3.3752599954605103
Validation accuracy: 0.9166666666666666

Training...
Pa

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.002
Total loss: 2.0360499024391174
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.002
Total loss: 1.6672198474407196
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.002
Total loss: 1.6992669701576233
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.002
Total loss: 1.6725678443908691
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.002
Total loss: 1.6804762184619904
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.002
Total loss: 1.7169576287269592
Validation accuracy: 0.916666666666666

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.005
Total loss: 2.6366571187973022
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.005
Total loss: 2.4450336694717407
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.005
Total loss: 1.9714512825012207
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.005
Total loss: 1.701289713382721
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.005
Total loss: 1.6640616953372955
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.005
Total loss: 1.6553455591201782
Validation accuracy: 0.91666666666666

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.01
Total loss: 1.8876492977142334
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.01
Total loss: 1.9563369154930115
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.01
Total loss: 1.6648088693618774
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.01
Total loss: 1.8671444654464722
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.01
Total loss: 1.6412273347377777
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.01
Total loss: 1.7103663682937622
Validation accuracy: 0.9166666666666666

Tra

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.1
Total loss: 2.3107438683509827
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.1
Total loss: 1.6840710639953613
Validation accuracy: 0.6666666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.1
Total loss: 1.9963605403900146
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.1
Total loss: 1.804778814315796
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.1
Total loss: 1.6154155135154724
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.1
Total loss: 1.6599987149238586
Validation accuracy: 0.9166666666666666

Training..

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.002
Total loss: 15.340568877756596
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.002
Total loss: 8.930922791361809
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.002
Total loss: 7.8613366186618805
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.002
Total loss: 8.017496705055237
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.002
Total loss: 7.900995999574661
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.002
Total loss: 8.096941232681274
Validation accuracy: 0.9166666666666666

Training...
P

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.005
Total loss: 34.16965937614441
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.005
Total loss: 9.524738729000092
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.005
Total loss: 7.666788190603256
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.005
Total loss: 7.841821193695068
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.005
Total loss: 7.836267322301865
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.005
Total loss: 7.7446998953819275
Validation accuracy: 0.9166666666666666

Training...
P

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.01
Total loss: 24.74494707584381
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.01
Total loss: 11.900972977280617
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.01
Total loss: 9.151067100465298
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.01
Total loss: 8.418411448597908
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.01
Total loss: 7.389093339443207
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.01
Total loss: 8.066364526748657
Validation accuracy: 0.9166666666666666

Training...
Paramet

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.1
Total loss: 26.53777176141739
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.1
Total loss: 9.929807417094707
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.1
Total loss: 8.697386011481285
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.1
Total loss: 7.6199837028980255
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.1
Total loss: 7.866377770900726
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.1
Total loss: 7.6502083241939545
Validation accuracy: 0.9166666666666666

Training...
Parameters:
E

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.002
Total loss: 13.536385238170624
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.002
Total loss: 7.12954968214035
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.002
Total loss: 5.612507581710815
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.002
Total loss: 3.6175698339939117
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.002
Total loss: 3.465412884950638
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.002
Total loss: 3.760052889585495
Validation accuracy: 0.9166666666666666

Trainin

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.005
Total loss: 15.86775815486908
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.005
Total loss: 3.5146121978759766
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.005
Total loss: 3.6810213327407837
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.005
Total loss: 3.5632369220256805
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.005
Total loss: 3.4269781708717346
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.005
Total loss: 3.630923807621002
Validation accuracy: 0.9166666666666666

Train

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.01
Total loss: 18.58099788427353
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.01
Total loss: 3.7842881083488464
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.01
Total loss: 3.6445196866989136
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.01
Total loss: 3.8380843698978424
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.01
Total loss: 3.78420227766037
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.01
Total loss: 3.6025278568267822
Validation accuracy: 0.9166666666666666

Training...


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.1
Total loss: 20.16493332386017
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.1
Total loss: 4.7598876953125
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.1
Total loss: 5.042217791080475
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.1
Total loss: 3.280995100736618
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.1
Total loss: 3.427547365427017
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.1
Total loss: 3.802220046520233
Validation accuracy: 0.9166666666666666

Training...
Parameters

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.002
Total loss: 2.2582631707191467
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.002
Total loss: 10.736836671829224
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.002
Total loss: 1.7858637571334839
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.002
Total loss: 2.074539363384247
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.002
Total loss: 1.7701444029808044
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.002
Total loss: 1.736523598432541
Validation accuracy: 0.9166666666666666

Trai

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.005
Total loss: 4.113375186920166
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.005
Total loss: 21.113412618637085
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.005
Total loss: 4.645611822605133
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.005
Total loss: 6.72418749332428
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.005
Total loss: 6.870680093765259
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.005
Total loss: 2.017881453037262
Validation accuracy: 0.9166666666666666

Trainin

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.01
Total loss: 6.832680106163025
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.01
Total loss: 6.644332647323608
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.01
Total loss: 3.718435764312744
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.01
Total loss: 2.4730640053749084
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.01
Total loss: 2.1613003313541412
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.01
Total loss: 1.9844532012939453
Validation accuracy: 0.9166666666666666

Training...

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.1
Total loss: 6.187294781208038
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.1
Total loss: 18.527069568634033
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.1
Total loss: 1.6653329730033875
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.1
Total loss: 2.0318695306777954
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.1
Total loss: 1.9869906902313232
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 15, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.1
Total loss: 1.7147667706012726
Validation accuracy: 0.9166666666666666

Training...
Pa

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.002
Total loss: 7.978500008583069
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.002
Total loss: 6.246550738811493
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.002
Total loss: 5.467593505978584
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.002
Total loss: 3.9265971183776855
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.002
Total loss: 2.209050290286541
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.002
Total loss: 1.1666543744504452
Validation accuracy: 0.9166666666666666

Training...
P

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.005
Total loss: 6.888822585344315
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.005
Total loss: 6.554911255836487
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.005
Total loss: 5.920699864625931
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.005
Total loss: 4.943386048078537
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.005
Total loss: 3.1805006861686707
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.005
Total loss: 1.720228910446167
Validation accuracy: 0.9166666666666666

Training...
Pa

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.01
Total loss: 6.927022755146027
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.01
Total loss: 6.538516491651535
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.01
Total loss: 5.776988595724106
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.01
Total loss: 4.628877192735672
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.01
Total loss: 3.33152075111866
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.01
Total loss: 1.7052621617913246
Validation accuracy: 0.9166666666666666

Training...
Parameter

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.1
Total loss: 8.001035749912262
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.1
Total loss: 6.804540157318115
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.1
Total loss: 5.911000072956085
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.1
Total loss: 5.267570614814758
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.1
Total loss: 4.456482097506523
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 8, Weight Decay: 0.1
Total loss: 3.383500263094902
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epo

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.002
Total loss: 3.606405556201935
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.002
Total loss: 3.1169395744800568
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.002
Total loss: 2.934892922639847
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.002
Total loss: 2.242890387773514
Validation accuracy: 0.8333333333333334

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.002
Total loss: 1.4816377460956573
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.002
Total loss: 0.9301579743623734
Validation accuracy: 0.9166666666666666

Traini

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.005
Total loss: 3.7936500906944275
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.005
Total loss: 3.29254749417305
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.005
Total loss: 3.2543003857135773
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.005
Total loss: 2.9855120182037354
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.005
Total loss: 2.8074787259101868
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.005
Total loss: 2.2355214059352875
Validation accuracy: 0.9166666666666666

Train

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.01
Total loss: 4.100775837898254
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.01
Total loss: 3.2235942482948303
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.01
Total loss: 2.955058366060257
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.01
Total loss: 2.680706262588501
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.01
Total loss: 2.3838396966457367
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.01
Total loss: 2.1212007105350494
Validation accuracy: 0.9166666666666666

Training...


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.1
Total loss: 3.6991939544677734
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.1
Total loss: 3.2722312808036804
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.1
Total loss: 3.107132166624069
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.1
Total loss: 2.826846122741699
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.1
Total loss: 2.6097839176654816
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 16, Weight Decay: 0.1
Total loss: 2.2313065379858017
Validation accuracy: 0.9166666666666666

Training...
Param

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.002
Total loss: 1.8801643252372742
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.002
Total loss: 1.6566298007965088
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.002
Total loss: 1.5978153944015503
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.002
Total loss: 1.6026515364646912
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.002
Total loss: 1.5817893743515015
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.002
Total loss: 1.5200940668582916
Validation accuracy: 0.9166666666666666

Tra

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.005
Total loss: 1.8356244564056396
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.005
Total loss: 1.7186804413795471
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.005
Total loss: 1.6425020098686218
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.005
Total loss: 1.5828311443328857
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.005
Total loss: 1.506591260433197
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.005
Total loss: 1.3915674090385437
Validation accuracy: 0.9166666666666666

Trai

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.01
Total loss: 1.9421226382255554
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.01
Total loss: 1.7172004580497742
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.01
Total loss: 1.689562976360321
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.01
Total loss: 1.5918107628822327
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.01
Total loss: 1.6288599371910095
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.01
Total loss: 1.5627644658088684
Validation accuracy: 0.9166666666666666

Training..

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.1
Total loss: 1.7604283094406128
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.1
Total loss: 1.6637329459190369
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.1
Total loss: 1.671137273311615
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.1
Total loss: 1.5526903867721558
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.1
Total loss: 1.4659606516361237
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 2e-05, Batch size: 32, Weight Decay: 0.1
Total loss: 1.3528507053852081
Validation accuracy: 0.9166666666666666

Training...
Para

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.002
Total loss: 6.772575467824936
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.002
Total loss: 6.365324884653091
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.002
Total loss: 4.691273480653763
Validation accuracy: 1.0

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.002
Total loss: 1.4011413045227528
Validation accuracy: 0.75

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.002
Total loss: 1.3098928891122341
Validation accuracy: 0.75

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.002
Total loss: 1.6366648180410266
Validation accuracy: 0.8333333333333334

Training...
Parameters:
Epochs: 20, Learning rate: 5e-0

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.005
Total loss: 7.4007628262043
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.005
Total loss: 6.11849108338356
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.005
Total loss: 3.5890847593545914
Validation accuracy: 0.8333333333333334

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.005
Total loss: 1.4696389958262444
Validation accuracy: 0.8333333333333334

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.005
Total loss: 2.2259201519191265
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.005
Total loss: 2.02008441882208
Validation accuracy: 0.9166666666666666

Training...
Para

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.01
Total loss: 6.6234085857868195
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.01
Total loss: 6.324461102485657
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.01
Total loss: 3.8814009577035904
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.01
Total loss: 1.5427168868482113
Validation accuracy: 0.8333333333333334

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.01
Total loss: 2.5810469863936305
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.01
Total loss: 2.3489551916718483
Validation accuracy: 0.9166666666666666

Training...
Para

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.1
Total loss: 7.044384926557541
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.1
Total loss: 5.882004618644714
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.1
Total loss: 2.661160223186016
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.1
Total loss: 2.031252708286047
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.1
Total loss: 0.9543705452233553
Validation accuracy: 0.8333333333333334

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 8, Weight Decay: 0.1
Total loss: 1.4587854593992233
Validation accuracy: 0.8333333333333334

Training...
Parameters:
E

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.002
Total loss: 3.7380706667900085
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.002
Total loss: 3.0542505979537964
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.002
Total loss: 2.530057519674301
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.002
Total loss: 1.8235357999801636
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.002
Total loss: 1.049988031387329
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.002
Total loss: 0.3502963185310364
Validation accuracy: 0.8333333333333334

Train

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.005
Total loss: 3.4263104796409607
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.005
Total loss: 3.022410660982132
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.005
Total loss: 2.280215859413147
Validation accuracy: 0.8333333333333334

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.005
Total loss: 1.578866884112358
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.005
Total loss: 0.7541668638586998
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.005
Total loss: 0.27017971128225327
Validation accuracy: 0.9166666666666666

Train

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.01
Total loss: 3.8643699884414673
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.01
Total loss: 3.13474303483963
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.01
Total loss: 2.566564291715622
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.01
Total loss: 1.8968487679958344
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.01
Total loss: 0.9930144026875496
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.01
Total loss: 0.3488006070256233
Validation accuracy: 0.9166666666666666

Training...


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.1
Total loss: 3.680288791656494
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.1
Total loss: 3.25166392326355
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.1
Total loss: 3.2187929451465607
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.1
Total loss: 3.050081044435501
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.1
Total loss: 2.6280319690704346
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 16, Weight Decay: 0.1
Total loss: 1.8042092323303223
Validation accuracy: 0.9166666666666666

Training...
Paramet

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.002
Total loss: 1.9725887775421143
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.002
Total loss: 1.6407973170280457
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.002
Total loss: 1.4829767048358917
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.002
Total loss: 1.3276762962341309
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.002
Total loss: 1.1188583374023438
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.002
Total loss: 0.8863796591758728
Validation accuracy: 0.9166666666666666

Tra

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.005
Total loss: 1.9151906967163086
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.005
Total loss: 1.6709522008895874
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.005
Total loss: 1.5120790302753448
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.005
Total loss: 1.3585091531276703
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.005
Total loss: 1.2492686212062836
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.005
Total loss: 1.0294203460216522
Validation accuracy: 0.9166666666666666

Tra

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.01
Total loss: 1.8912153840065002
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.01
Total loss: 1.6669419407844543
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.01
Total loss: 1.4096911549568176
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.01
Total loss: 1.2854414582252502
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.01
Total loss: 1.0896069705486298
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.01
Total loss: 0.8990833461284637
Validation accuracy: 0.9166666666666666

Training.

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.1
Total loss: 1.7785266041755676
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.1
Total loss: 1.64568492770195
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.1
Total loss: 1.5323878228664398
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.1
Total loss: 1.4104278683662415
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.1
Total loss: 1.1466721594333649
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 5e-05, Batch size: 32, Weight Decay: 0.1
Total loss: 0.8456845283508301
Validation accuracy: 1.0

Training...
Parameters:
Epochs: 

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.002
Total loss: 7.776828944683075
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.002
Total loss: 6.449459671974182
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.002
Total loss: 6.652325481176376
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.002
Total loss: 6.638223588466644
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.002
Total loss: 6.499643385410309
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.002
Total loss: 5.349720053374767
Validation accuracy: 1.0

Training...
Parameters:


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.005
Total loss: 7.702277839183807
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.005
Total loss: 6.730900913476944
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.005
Total loss: 6.714589089155197
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.005
Total loss: 6.78220534324646
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.005
Total loss: 6.48215851187706
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.005
Total loss: 7.1824846267700195
Validation accuracy: 0.9166666666666666

Training..

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.01
Total loss: 7.3323635160923
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.01
Total loss: 6.955686390399933
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.01
Total loss: 7.289746046066284
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.01
Total loss: 6.717028915882111
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.01
Total loss: 6.780761122703552
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.01
Total loss: 6.890278667211533
Validation accuracy: 0.9166666666666666

Training...
Param

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.1
Total loss: 7.5366769433021545
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.1
Total loss: 7.1903271079063416
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.1
Total loss: 6.888040482997894
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.1
Total loss: 6.854388952255249
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.1
Total loss: 6.6256119310855865
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 8, Weight Decay: 0.1
Total loss: 6.76293158531189
Validation accuracy: 0.9166666666666666

Training...
Paramet

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.002
Total loss: 3.6297889947891235
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.002
Total loss: 3.422736793756485
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.002
Total loss: 3.348818838596344
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.002
Total loss: 3.4080119729042053
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.002
Total loss: 3.073944091796875
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.002
Total loss: 4.543990254402161
Validation accuracy: 0.9166666666666666



Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.005
Total loss: 3.82620370388031
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.005
Total loss: 3.17138808965683
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.005
Total loss: 2.294385999441147
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.005
Total loss: 0.8812435269355774
Validation accuracy: 0.5

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.005
Total loss: 1.4312838856130838
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.005
Total loss: 1.2453252361156046
Validation accuracy: 0.9166666666666666

Training...
Param

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.01
Total loss: 3.440759688615799
Validation accuracy: 0.75

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.01
Total loss: 3.0680583119392395
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.01
Total loss: 1.1971472725272179
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.01
Total loss: 0.24027376621961594
Validation accuracy: 0.8333333333333334

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.01
Total loss: 3.969208016525954
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.01
Total loss: 3.3330560326576233
Validation accuracy: 0.9166666666666666

Training...
Parame

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.1
Total loss: 3.7621671557426453
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.1
Total loss: 3.3300419747829437
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.1
Total loss: 3.0222074687480927
Validation accuracy: 0.8333333333333334

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.1
Total loss: 3.0819129943847656
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.1
Total loss: 1.21034075319767
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 16, Weight Decay: 0.1
Total loss: 0.10239108419045806
Validation accuracy: 0.9166666666666666

Training..

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.002
Total loss: 1.9644243717193604
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.002
Total loss: 1.8974878787994385
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.002
Total loss: 1.6922567486763
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.002
Total loss: 1.5802248418331146
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.002
Total loss: 1.626011997461319
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.002
Total loss: 1.7065391540527344
Validation accuracy: 0.9166666666666666

T

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.005
Total loss: 1.7440783381462097
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.005
Total loss: 1.7219170928001404
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.005
Total loss: 1.3217798173427582
Validation accuracy: 0.75

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.005
Total loss: 0.7107264772057533
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.005
Total loss: 0.19958598166704178
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.005
Total loss: 0.07618100009858608
Validation accuracy: 0.9166666666666666

Training.

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.01
Total loss: 1.7898241877555847
Validation accuracy: 0.5

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.01
Total loss: 1.966640055179596
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.01
Total loss: 2.001295566558838
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.01
Total loss: 2.2117762565612793
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.01
Total loss: 1.5895072221755981
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.01
Total loss: 1.4654586911201477
Validation accuracy: 0.9166666666666666

Training...
Paramet

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.1
Total loss: 1.844559907913208
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.1
Total loss: 1.6258562207221985
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.1
Total loss: 1.2964681088924408
Validation accuracy: 0.75

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.1
Total loss: 0.6408548355102539
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.1
Total loss: 0.3728506714105606
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0002, Batch size: 32, Weight Decay: 0.1
Total loss: 0.5843378826975822
Validation accuracy: 0.9166666666666666

Training...
Parameters:


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.002
Total loss: 8.16679897904396
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.002
Total loss: 7.2266353368759155
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.002
Total loss: 6.951248049736023
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.002
Total loss: 7.101246893405914
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.002
Total loss: 6.997777909040451
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.002
Total loss: 6.853575527667999
Validation accuracy: 0.9166666666666666

Training.

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.005
Total loss: 7.735130995512009
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.005
Total loss: 6.809909909963608
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.005
Total loss: 6.837293475866318
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.005
Total loss: 6.921887964010239
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.005
Total loss: 6.888384252786636
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.005
Total loss: 6.885950893163681
Validation accuracy: 0.9166666666666666

Training.

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.01
Total loss: 8.876012444496155
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.01
Total loss: 7.027336463332176
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.01
Total loss: 6.911076784133911
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.01
Total loss: 6.56924644112587
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.01
Total loss: 6.850429087877274
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.01
Total loss: 6.881725996732712
Validation accuracy: 0.9166666666666666

Training...
Para

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.1
Total loss: 7.860262349247932
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.1
Total loss: 7.950555711984634
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.1
Total loss: 6.900690734386444
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.1
Total loss: 6.721824377775192
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.1
Total loss: 6.9826385378837585
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 8, Weight Decay: 0.1
Total loss: 6.763279467821121
Validation accuracy: 0.9166666666666666

Training...
Paramet

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.002
Total loss: 3.733476370573044
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.002
Total loss: 4.469538301229477
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.002
Total loss: 3.324956864118576
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.002
Total loss: 3.3970870971679688
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.002
Total loss: 3.3566898703575134
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.002
Total loss: 3.2693422734737396
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.005
Total loss: 3.6133024394512177
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.005
Total loss: 3.4233237206935883
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.005
Total loss: 3.379433512687683
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.005
Total loss: 3.4329459071159363
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.005
Total loss: 3.312164396047592
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.005
Total loss: 3.3635248839855194
Validation accuracy: 0.9166666666666666


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.01
Total loss: 3.97466242313385
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.01
Total loss: 3.5814604461193085
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.01
Total loss: 3.4607780277729034
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.01
Total loss: 3.3802219927310944
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.01
Total loss: 3.448821187019348
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.01
Total loss: 3.2672528624534607
Validation accuracy: 0.9166666666666666

Traini

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.1
Total loss: 4.054043412208557
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.1
Total loss: 4.359672397375107
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.1
Total loss: 3.493323266506195
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.1
Total loss: 3.300932854413986
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.1
Total loss: 3.4315360486507416
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 16, Weight Decay: 0.1
Total loss: 3.340793162584305
Validation accuracy: 0.9166666666666666

Training...
P

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.002
Total loss: 1.8960778713226318
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.002
Total loss: 1.7021187543869019
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.002
Total loss: 1.8587129712104797
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.002
Total loss: 1.6253417432308197
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.002
Total loss: 1.8937776684761047
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.002
Total loss: 1.6919915676116943
Validation accuracy: 0.916666666666666

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.005
Total loss: 2.1390963196754456
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.005
Total loss: 1.6412855982780457
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.005
Total loss: 1.768713355064392
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.005
Total loss: 1.6950191259384155
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.005
Total loss: 1.7129612565040588
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.005
Total loss: 1.6404173970222473
Validation accuracy: 0.9166666666666666

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.01
Total loss: 2.455158531665802
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.01
Total loss: 2.112726926803589
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.01
Total loss: 2.3375699520111084
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.01
Total loss: 1.7001442909240723
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.01
Total loss: 1.7994232177734375
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.01
Total loss: 1.6850696504116058
Validation accuracy: 0.9166666666666666

Tra

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.1
Total loss: 1.8850399851799011
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.1
Total loss: 1.8313478827476501
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.1
Total loss: 1.6392182409763336
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.1
Total loss: 1.723772943019867
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.1
Total loss: 1.6446446180343628
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.0005, Batch size: 32, Weight Decay: 0.1
Total loss: 1.6698523461818695
Validation accuracy: 0.9166666666666666

Training..

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.002
Total loss: 30.162403345108032
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.002
Total loss: 8.197828866541386
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.002
Total loss: 8.977590471506119
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.002
Total loss: 8.378496631979942
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.002
Total loss: 7.723226815462112
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.002
Total loss: 7.773632377386093
Validation accuracy: 0.9166666666666666

Training...


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.005
Total loss: 19.17734071612358
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.005
Total loss: 9.277500629425049
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.005
Total loss: 7.910982117056847
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.005
Total loss: 8.001510202884674
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.005
Total loss: 7.894018709659576
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.005
Total loss: 8.000921308994293
Validation accuracy: 0.9166666666666666

Training...
Par

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.01
Total loss: 25.9164360165596
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.01
Total loss: 12.014239579439163
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.01
Total loss: 10.069948300719261
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.01
Total loss: 9.151891395449638
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.01
Total loss: 8.06135007739067
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.01
Total loss: 6.735739916563034
Validation accuracy: 0.9166666666666666

Training...
Paramete

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.1
Total loss: 24.50799250602722
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.1
Total loss: 10.520269185304642
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.1
Total loss: 7.8477378487586975
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.1
Total loss: 7.992059081792831
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.1
Total loss: 7.9356962740421295
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 8, Weight Decay: 0.1
Total loss: 7.636663496494293
Validation accuracy: 0.9166666666666666

Training...
Parameters:

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.002
Total loss: 11.13521933555603
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.002
Total loss: 5.9996980130672455
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.002
Total loss: 4.1331468522548676
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.002
Total loss: 3.8729476630687714
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.002
Total loss: 4.348424732685089
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.002
Total loss: 3.666343241930008
Validation accuracy: 0.9166666666666666

Train

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.005
Total loss: 10.349695920944214
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.005
Total loss: 6.106435716152191
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.005
Total loss: 3.5846579670906067
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.005
Total loss: 3.6253268718719482
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.005
Total loss: 3.7133335173130035
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.005
Total loss: 3.6607333421707153
Validation accuracy: 0.9166666666666666

Tra

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.01
Total loss: 21.359567523002625
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.01
Total loss: 4.245263457298279
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.01
Total loss: 3.387542188167572
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.01
Total loss: 3.4137156009674072
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.01
Total loss: 3.5998713970184326
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.01
Total loss: 3.5706712007522583
Validation accuracy: 0.9166666666666666

Training...

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.1
Total loss: 9.554680585861206
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.1
Total loss: 19.512747183442116
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.1
Total loss: 8.601797461509705
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.1
Total loss: 4.258604019880295
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.1
Total loss: 3.4543356597423553
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 16, Weight Decay: 0.1
Total loss: 3.377283364534378
Validation accuracy: 0.9166666666666666

Training...
Paramet

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.002
Total loss: 7.729346334934235
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.002
Total loss: 4.148017108440399
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.002
Total loss: 4.430699348449707
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.002
Total loss: 2.697658061981201
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.002
Total loss: 1.83938729763031
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.002
Total loss: 1.9040226340293884
Validation accuracy: 0.9166666666666666

Training

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.005
Total loss: 2.8338509798049927
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.005
Total loss: 12.796870470046997
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.005
Total loss: 4.339087337255478
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.005
Total loss: 7.569774270057678
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.005
Total loss: 8.925135493278503
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.005
Total loss: 1.7976329326629639
Validation accuracy: 0.9166666666666666

Tra

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.01
Total loss: 6.578895032405853
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.01
Total loss: 15.172421216964722
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.01
Total loss: 4.96329402923584
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.01
Total loss: 5.168459415435791
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.01
Total loss: 2.0042052268981934
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.01
Total loss: 1.911657691001892
Validation accuracy: 0.08333333333333333

Training...


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.1
Total loss: 4.652902722358704
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.1
Total loss: 12.145523071289062
Validation accuracy: 0.08333333333333333

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.1
Total loss: 8.029756605625153
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.1
Total loss: 7.27954375743866
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.1
Total loss: 7.106004476547241
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Epochs: 20, Learning rate: 0.005, Batch size: 32, Weight Decay: 0.1
Total loss: 3.076737642288208
Validation accuracy: 0.08333333333333333

Training...
Parame

In [None]:
#Training the model with optimal parameter values
epochs= 10
b_size = 32
weight_decay = 0.1
learning_rate = 5e-5

model = BertForSequenceClassification.from_pretrained(
          "bert-base-uncased", # Use the 12-layer BERT model, with an uncased vocab.
          num_labels = 2, # The number of output labels.   
          output_attentions = False, # Whether the model returns attentions weights.
          output_hidden_states = False, # Whether the model returns all hidden-states.
        )
model.cuda()
optimizer = AdamW(model.parameters(),
            lr = learning_rate,
            weight_decay = weight_decay
          )
# training loop
# For each epoch...
for epoch_i in range(0, epochs):
    # Perform one full pass over the training set.

    print("")
    print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
    print('Training...')

    # Reset the total loss for this epoch.
    total_train_loss = 0

    # Put the model into training mode.
    model.train()

    # For each batch of training data...
    num_batches = int(len(train_set)/b_size) + 1

    for i in range(num_batches):
      end_index = min(b_size * (i+1), len(train_set))

      batch = train_set[i*b_size:end_index]

      if len(batch) == 0: continue

      input_id_tensors = torch.stack([data[0] for data in batch])
      input_mask_tensors = torch.stack([data[1] for data in batch])
      label_tensors = torch.stack([data[2] for data in batch])

      # Move tensors to the GPU
      b_input_ids = input_id_tensors.to(device)
      b_input_mask = input_mask_tensors.to(device)
      b_labels = label_tensors.to(device)

      # Clear the previously calculated gradient
      model.zero_grad()        

      # Perform a forward pass (evaluate the model on this training batch).
      outputs = model(b_input_ids, 
                            token_type_ids=None, 
                            attention_mask=b_input_mask, 
                            labels=b_labels)
      loss = outputs.loss
      logits = outputs.logits

      total_train_loss += loss.item()

      # Perform a backward pass to calculate the gradients.
      loss.backward()

      # Update parameters and take a step using the computed gradient.
      optimizer.step()
        
    # ========================================
    #               Validation
    # ========================================
    # After the completion of each training epoch, measure our performance on
    # our validation set. Implement this function in the cell above.
    print(f"Parameters:")
    print(f"Total loss: {total_train_loss}")
    val_acc = get_validation_performance(val_set, b_size)
    print(f"Validation accuracy: {val_acc}")
    
print("")
print("Training complete!")


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at


Training...
Parameters:
Total loss: 2.269478678703308
Validation accuracy: 0.8333333333333334

Training...
Parameters:
Total loss: 1.7473209500312805
Validation accuracy: 0.8333333333333334

Training...
Parameters:
Total loss: 1.5795238614082336
Validation accuracy: 0.8333333333333334

Training...
Parameters:
Total loss: 1.4203079044818878
Validation accuracy: 0.8333333333333334

Training...
Parameters:
Total loss: 1.2445179224014282
Validation accuracy: 0.8333333333333334

Training...
Parameters:
Total loss: 0.9792076945304871
Validation accuracy: 0.8333333333333334

Training...
Parameters:
Total loss: 0.7272263169288635
Validation accuracy: 0.8333333333333334

Training...
Parameters:
Total loss: 0.4920831471681595
Validation accuracy: 0.8333333333333334

Training...
Parameters:
Total loss: 0.3618459478020668
Validation accuracy: 0.9166666666666666

Training...
Parameters:
Total loss: 0.26559726893901825
Validation accuracy: 0.8333333333333334

Training complete!


# Evaluation on the test set


In [None]:
get_validation_performance(test_set, b_size)

0.9166666666666666

In [None]:
##ERROR ANALYSIS CODE
def get_sentence_idx(val_set, b_size):
    model.eval()
    total_eval_accuracy = 0
    total_eval_loss = 0

    num_batches = int(len(val_set)/b_size) + 1

    total_correct = 0

    for i in range(num_batches):

      end_index = min(b_size * (i+1), len(val_set))

      batch = val_set[i*b_size:end_index]
      
      if len(batch) == 0: continue

      input_id_tensors = torch.stack([data[0] for data in batch])
      input_mask_tensors = torch.stack([data[1] for data in batch])
      label_tensors = torch.stack([data[2] for data in batch])
      
      b_input_ids = input_id_tensors.to(device)
      b_input_mask = input_mask_tensors.to(device)
      b_labels = label_tensors.to(device)
        
      with torch.no_grad():        

        outputs = model(b_input_ids, 
                                token_type_ids=None, 
                                attention_mask=b_input_mask,
                                labels=b_labels)
        logits = outputs.logits
            
        logits = logits.detach().cpu().numpy()
        label_ids = b_labels.to('cpu').numpy()

        pred_flat = np.argmax(logits, axis=1).flatten()
        labels_flat = label_ids.flatten()

        #Getting the indices of sentences that were predicted incorrectly by the model 
        sentence_idx = (pred_flat!=labels_flat)
    return sentence_idx

incorrect_text = []
ea_labels = []

idx = np.array(get_sentence_idx(test_set, b_size))
idx = [int(i) for i in idx]
filtered_values = list(filter(lambda x: x[1], zip(test_set, idx)))
for i in range(len(input_ids)):
  for j in range(len(filtered_values)):
    if torch.equal(input_ids[i],filtered_values[j][0][0]):
      incorrect_text.append(texts[i])
      ea_labels.append(labels[i])

incorrect_text.append("Wow, thanks for the feedback. I had no idea that my presentation skills were so terrible.#notreally")
ea_labels.append(0)

incorrect_text.append("Oh great, another meeting. I can't wait to sit through hours of mind-numbing presentations.")
ea_labels.append(0)

incorrect_text.append("I love sitting in traffic for hours. It gives me so much time to contemplate life's mysteries.")
ea_labels.append(0)

incorrect_text.append("Oh, I just love filling out paperwork. It's my favorite thing to do on a Friday afternoon.")
ea_labels.append(0)

ea_input_ids, ea_attention_masks = tokenize_and_format(incorrect_text)
ea_input_ids = torch.cat(ea_input_ids, dim=0)
ea_attention_masks = torch.cat(ea_attention_masks, dim=0)
ea_labels = torch.tensor(ea_labels)

analysis_set = [(ea_input_ids[i], ea_attention_masks[i], ea_labels[i]) for i in range(len(ea_input_ids))]
print(f"Error Analysis performace: {get_validation_performance(analysis_set,len(analysis_set))}")

## print out up to 5 test set examples (or adversarial examples) that your model gets wrong
for i in range(len(incorrect_text)):
  print(f"Incorrect text: {incorrect_text[i]}, Actual label: {ea_labels[i]}")

Error Analysis performace: 0.0
Incorrect text: Trying to know all this history tonight is gonna kill me, Actual label: 0
Incorrect text: Wow, thanks for the feedback. I had no idea that my presentation skills were so terrible.#notreally, Actual label: 0
Incorrect text: Oh great, another meeting. I can't wait to sit through hours of mind-numbing presentations., Actual label: 0
Incorrect text: I love sitting in traffic for hours. It gives me so much time to contemplate life's mysteries., Actual label: 0
Incorrect text: Oh, I just love filling out paperwork. It's my favorite thing to do on a Friday afternoon., Actual label: 0
