# PURPOSE OF THIS NOTEBOOK 

This notebook takes the data needed for the individual 22 models and trains all the models to predict which job the titles belong to once the first model predicts the job family they belong to. There will be specific challenges I will need to look through, one of these specifically being that I need to figure how to benchmark these models. 

In [1]:
import pandas as pd
import numpy as np
from tqdm.autonotebook import tqdm
from transformers import BertTokenizer, BertModel, BertConfig
from pathlib import Path
from torch import cuda
import torch

from CommonFunctions import CustomDataset, BERTClass, loss_fn
from sklearn import metrics 
import transformers
from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler

  from tqdm.autonotebook import tqdm


In [2]:
# Setting up for GPU 

device = 'cuda' if cuda.is_available() else 'cpu'

In [3]:
device

'cuda'

In [4]:
# Load in the tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-large-uncased')

In [5]:
# Set the parmeters of the models 
MAX_LEN = 175
TRAIN_BATCH_SIZE = 8
VALID_BATCH_SIZE = 4
EPOCHS = 30
LEARNING_RATE = 2e-5

# Training the models 

Just like generating the data, I would need to create a for loop that will call and train the models based on what I need to do. 

In [6]:
# import the training/testing data
test_df = pd.read_csv("../Data/MajorGroupTrainTestSplit/ONET_11_test_df.csv")
train_df = pd.read_csv("../Data/MajorGroupTrainTestSplit/ONET_11_train_df.csv")
label_df = pd.read_csv("../Data/label_df.csv")
onet_group_df = pd.read_csv("../Data/ONET_Group_list.csv")

In [7]:
onet_group_list = onet_group_df['# ONET_Group'].to_list()

In [8]:
# Making sure that the label column is actual lists instead of strings of lists
train_df['Label'] = train_df['Label'].apply(lambda s: [float(x.strip(' []')) for x in s.split(',')])
test_df['Label'] = test_df['Label'].apply(lambda s: [float(x.strip(' []')) for x in s.split(',')])

In [9]:
# Define the parameters of the data
train_params = {'batch_size': TRAIN_BATCH_SIZE,
                'shuffle': True,
                'num_workers': 0}

test_params = {'batch_size': VALID_BATCH_SIZE,
               'shuffle': True,
               'num_workers': 0}

## Create the loop to train the data in bulk 

In [10]:
def train(epoch, model, training_loader, optimizer):
    model.train()
    for _,data in tqdm(enumerate(training_loader, 0)):
        ids = data['ids'].to(device, dtype = torch.long)
        mask = data['mask'].to(device, dtype = torch.long)
        token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
        targets = data['targets'].to(device, dtype = torch.float)

        outputs = model(ids, mask, token_type_ids)

        optimizer.zero_grad()
        loss= loss_fn(outputs, targets)
        if _%100==0:
            print(f'Epoch : {epoch}, Loss: {loss.item()}')
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    return outputs

In [11]:
def validation(epoch, model, testing_loader):
    model.eval()
    fin_targets=[]
    fin_outputs=[]
    with torch.no_grad():
        for _,data in tqdm(enumerate(testing_loader, 0)):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
            targets = data['targets'].to(device, dtype = torch.float)
            
            outputs = model(ids, mask, token_type_ids)

            fin_targets.extend(targets.cpu().detach().numpy().tolist())
            fin_outputs.extend(torch.sigmoid(outputs).cpu().detach().numpy().tolist())
    return fin_outputs, fin_targets

In [12]:
# Start the for loop
for group in onet_group_list:
    # Import the data
    train_df = pd.read_csv(f"../Data/MajorGroupTrainTestSplit/ONET_{group}_train_df.csv")
    test_df = pd.read_csv(f"../Data/MajorGroupTrainTestSplit/ONET_{group}_test_df.csv")

    # Ensuring that the label columns contain lists not strings of lists
    train_df['Label'] = train_df['Label'].apply(lambda s: [float(x.strip(' []')) for x in s.split(',')])
    test_df['Label'] = test_df['Label'].apply(lambda s: [float(x.strip(' []')) for x in s.split(',')])

    # transform the training and testing data into the datasets needed for training 
    train_set = CustomDataset(train_df, tokenizer, MAX_LEN)
    test_set = CustomDataset(test_df, tokenizer, MAX_LEN)

    # Run the DataLoader on the train/test set pair
    training_loader = DataLoader(train_set, **train_params)
    testing_loader = DataLoader(test_set, **test_params)

    # load in the model
    model = BERTClass(len=train_df.Label.str.len()[0])
    model.to(device)

    # load in the optimizer
    optimizer = torch.optim.Adam(params = model.parameters(), lr=LEARNING_RATE)
    
    for epoch in range(EPOCHS):
        output = train(epoch, model, training_loader, optimizer)

    for epoch in range(EPOCHS):
        outputs, targets = validation(epoch, model, testing_loader)
        outputs = np.array(outputs) >= 0.5
        accuracy = metrics.accuracy_score(targets, outputs)
        f1_score_micro = metrics.f1_score(targets, outputs, average='micro')
        f1_score_macro = metrics.f1_score(targets, outputs, average='macro')
        print(f"Accuracy Score = {accuracy}")
        print(f"F1 Score (Micro) = {f1_score_micro}")
        print(f"F1 Score (Macro) = {f1_score_macro}")

    # Lastly save the model for later testing. 
    torch.save(model, f'../Data/Models/ONET_Group_{group}_Model')

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
0it [00:00, ?it/s]Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulti

Epoch : 0, Loss: 0.7089774012565613


102it [00:29,  5.11it/s]

Epoch : 0, Loss: 0.21244443953037262


202it [00:49,  5.07it/s]

Epoch : 0, Loss: 0.12061012536287308


230it [00:55,  4.16it/s]
2it [00:00,  5.51it/s]

Epoch : 1, Loss: 0.11081593483686447


102it [00:21,  5.10it/s]

Epoch : 1, Loss: 0.0971708819270134


201it [00:42,  4.19it/s]

Epoch : 1, Loss: 0.08710701018571854


230it [00:48,  4.72it/s]
2it [00:00,  5.45it/s]

Epoch : 2, Loss: 0.09074664860963821


102it [00:21,  5.06it/s]

Epoch : 2, Loss: 0.08125755190849304


202it [00:42,  5.08it/s]

Epoch : 2, Loss: 0.08645759522914886


230it [00:47,  4.80it/s]
2it [00:00,  5.66it/s]

Epoch : 3, Loss: 0.07941947877407074


102it [00:20,  5.10it/s]

Epoch : 3, Loss: 0.07824013382196426


202it [00:41,  5.11it/s]

Epoch : 3, Loss: 0.09038057178258896


230it [00:47,  4.84it/s]
2it [00:00,  5.66it/s]

Epoch : 4, Loss: 0.08809497952461243


102it [00:21,  5.09it/s]

Epoch : 4, Loss: 0.0883762538433075


202it [00:41,  5.10it/s]

Epoch : 4, Loss: 0.0791606456041336


230it [00:47,  4.82it/s]
2it [00:00,  5.44it/s]

Epoch : 5, Loss: 0.0804608017206192


102it [00:21,  5.08it/s]

Epoch : 5, Loss: 0.08978083729743958


202it [00:41,  5.08it/s]

Epoch : 5, Loss: 0.08232443779706955


230it [00:47,  4.83it/s]
2it [00:00,  5.65it/s]

Epoch : 6, Loss: 0.07994910329580307


102it [00:21,  5.10it/s]

Epoch : 6, Loss: 0.08735538274049759


202it [00:42,  5.13it/s]

Epoch : 6, Loss: 0.08265131711959839


230it [00:47,  4.80it/s]
2it [00:00,  5.63it/s]

Epoch : 7, Loss: 0.08578933775424957


102it [00:21,  5.11it/s]

Epoch : 7, Loss: 0.0772174820303917


202it [00:41,  5.10it/s]

Epoch : 7, Loss: 0.0784277692437172


230it [00:47,  4.83it/s]
2it [00:00,  5.65it/s]

Epoch : 8, Loss: 0.07637666910886765


102it [00:21,  5.10it/s]

Epoch : 8, Loss: 0.0765056163072586


202it [00:42,  5.07it/s]

Epoch : 8, Loss: 0.07567501068115234


230it [00:47,  4.80it/s]
2it [00:00,  5.64it/s]

Epoch : 9, Loss: 0.07147356122732162


102it [00:21,  5.08it/s]

Epoch : 9, Loss: 0.0762755498290062


202it [00:41,  5.12it/s]

Epoch : 9, Loss: 0.05854521691799164


230it [00:47,  4.84it/s]
2it [00:00,  5.63it/s]

Epoch : 10, Loss: 0.06375117599964142


102it [00:20,  5.08it/s]

Epoch : 10, Loss: 0.06589062511920929


202it [00:41,  5.08it/s]

Epoch : 10, Loss: 0.06707984209060669


230it [00:47,  4.87it/s]
2it [00:00,  5.65it/s]

Epoch : 11, Loss: 0.06204074248671532


102it [00:20,  5.10it/s]

Epoch : 11, Loss: 0.04746146872639656


202it [00:41,  5.10it/s]

Epoch : 11, Loss: 0.048096779733896255


230it [00:47,  4.83it/s]
2it [00:00,  5.52it/s]

Epoch : 12, Loss: 0.048892367631196976


102it [00:21,  5.09it/s]

Epoch : 12, Loss: 0.04076846316456795


202it [00:41,  5.11it/s]

Epoch : 12, Loss: 0.03195078670978546


230it [00:47,  4.83it/s]
2it [00:00,  5.63it/s]

Epoch : 13, Loss: 0.04645548760890961


102it [00:21,  5.09it/s]

Epoch : 13, Loss: 0.03134867548942566


202it [00:41,  5.11it/s]

Epoch : 13, Loss: 0.04022226855158806


230it [00:47,  4.84it/s]
2it [00:00,  5.64it/s]

Epoch : 14, Loss: 0.02788889966905117


102it [00:21,  5.11it/s]

Epoch : 14, Loss: 0.027083687484264374


202it [00:41,  5.10it/s]

Epoch : 14, Loss: 0.045603323727846146


230it [00:47,  4.82it/s]
2it [00:00,  5.47it/s]

Epoch : 15, Loss: 0.041489552706480026


102it [00:20,  5.11it/s]

Epoch : 15, Loss: 0.02501952461898327


202it [00:41,  5.09it/s]

Epoch : 15, Loss: 0.020912162959575653


230it [00:47,  4.87it/s]
2it [00:00,  5.67it/s]

Epoch : 16, Loss: 0.031225046142935753


102it [00:20,  5.09it/s]

Epoch : 16, Loss: 0.03429204225540161


202it [00:41,  5.13it/s]

Epoch : 16, Loss: 0.022879932075738907


230it [00:47,  4.83it/s]
2it [00:00,  5.64it/s]

Epoch : 17, Loss: 0.03127896785736084


102it [00:20,  5.09it/s]

Epoch : 17, Loss: 0.018745800480246544


202it [00:41,  5.11it/s]

Epoch : 17, Loss: 0.017451196908950806


230it [00:47,  4.87it/s]
2it [00:00,  5.66it/s]

Epoch : 18, Loss: 0.012537647038698196


102it [00:21,  5.11it/s]

Epoch : 18, Loss: 0.0237045306712389


202it [00:41,  5.10it/s]

Epoch : 18, Loss: 0.014518214389681816


230it [00:47,  4.83it/s]
2it [00:00,  5.61it/s]

Epoch : 19, Loss: 0.014491011388599873


102it [00:20,  5.10it/s]

Epoch : 19, Loss: 0.02149171009659767


202it [00:41,  5.12it/s]

Epoch : 19, Loss: 0.023206239566206932


230it [00:47,  4.82it/s]
197it [00:07, 26.92it/s]


Accuracy Score = 0.6053299492385786
F1 Score (Micro) = 0.6790035587188611
F1 Score (Macro) = 0.6413612465959175


197it [00:07, 27.44it/s]


Accuracy Score = 0.6053299492385786
F1 Score (Micro) = 0.6790035587188611
F1 Score (Macro) = 0.6413612465959175


197it [00:07, 27.41it/s]


Accuracy Score = 0.6053299492385786
F1 Score (Micro) = 0.6790035587188611
F1 Score (Macro) = 0.6413612465959175


197it [00:07, 26.71it/s]


Accuracy Score = 0.6053299492385786
F1 Score (Micro) = 0.6790035587188611
F1 Score (Macro) = 0.6413612465959175


197it [00:07, 26.71it/s]


Accuracy Score = 0.6053299492385786
F1 Score (Micro) = 0.6790035587188611
F1 Score (Macro) = 0.6413612465959175


197it [00:07, 26.75it/s]


Accuracy Score = 0.6053299492385786
F1 Score (Micro) = 0.6790035587188611
F1 Score (Macro) = 0.6413612465959175


197it [00:07, 26.68it/s]


Accuracy Score = 0.6053299492385786
F1 Score (Micro) = 0.6790035587188611
F1 Score (Macro) = 0.6413612465959175


197it [00:07, 27.43it/s]


Accuracy Score = 0.6053299492385786
F1 Score (Micro) = 0.6790035587188611
F1 Score (Macro) = 0.6413612465959175


197it [00:07, 26.61it/s]


Accuracy Score = 0.6053299492385786
F1 Score (Micro) = 0.6790035587188611
F1 Score (Macro) = 0.6413612465959175


197it [00:07, 26.68it/s]


Accuracy Score = 0.6053299492385786
F1 Score (Micro) = 0.6790035587188611
F1 Score (Macro) = 0.6413612465959175


197it [00:07, 26.71it/s]


Accuracy Score = 0.6053299492385786
F1 Score (Micro) = 0.6790035587188611
F1 Score (Macro) = 0.6413612465959175


197it [00:07, 26.66it/s]


Accuracy Score = 0.6053299492385786
F1 Score (Micro) = 0.6790035587188611
F1 Score (Macro) = 0.6413612465959175


197it [00:07, 26.70it/s]


Accuracy Score = 0.6053299492385786
F1 Score (Micro) = 0.6790035587188611
F1 Score (Macro) = 0.6413612465959175


197it [00:07, 26.70it/s]


Accuracy Score = 0.6053299492385786
F1 Score (Micro) = 0.6790035587188611
F1 Score (Macro) = 0.6413612465959175


197it [00:07, 26.73it/s]


Accuracy Score = 0.6053299492385786
F1 Score (Micro) = 0.6790035587188611
F1 Score (Macro) = 0.6413612465959175


197it [00:07, 26.69it/s]


Accuracy Score = 0.6053299492385786
F1 Score (Micro) = 0.6790035587188611
F1 Score (Macro) = 0.6413612465959175


197it [00:07, 26.72it/s]


Accuracy Score = 0.6053299492385786
F1 Score (Micro) = 0.6790035587188611
F1 Score (Macro) = 0.6413612465959175


197it [00:07, 27.46it/s]


Accuracy Score = 0.6053299492385786
F1 Score (Micro) = 0.6790035587188611
F1 Score (Macro) = 0.6413612465959175


197it [00:07, 26.68it/s]


Accuracy Score = 0.6053299492385786
F1 Score (Micro) = 0.6790035587188611
F1 Score (Macro) = 0.6413612465959175


197it [00:07, 27.44it/s]


Accuracy Score = 0.6053299492385786
F1 Score (Micro) = 0.6790035587188611
F1 Score (Macro) = 0.6413612465959175


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
1it [00:00,  8.85it/s]

Epoch : 0, Loss: 0.6807284355163574


102it [00:20,  5.09it/s]

Epoch : 0, Loss: 0.20092272758483887


202it [00:41,  5.10it/s]

Epoch : 0, Loss: 0.12673570215702057


203it [00:41,  4.87it/s]
2it [00:00,  5.41it/s]

Epoch : 1, Loss: 0.1317046731710434


102it [00:20,  5.12it/s]

Epoch : 1, Loss: 0.10715879499912262


202it [00:41,  5.12it/s]

Epoch : 1, Loss: 0.09770117700099945


203it [00:41,  4.88it/s]
2it [00:00,  5.40it/s]

Epoch : 2, Loss: 0.09997352212667465


102it [00:20,  5.09it/s]

Epoch : 2, Loss: 0.10758304595947266


202it [00:41,  5.09it/s]

Epoch : 2, Loss: 0.09638700634241104


203it [00:41,  4.84it/s]
2it [00:00,  5.48it/s]

Epoch : 3, Loss: 0.09646473079919815


102it [00:20,  5.08it/s]

Epoch : 3, Loss: 0.09483487904071808


202it [00:41,  5.09it/s]

Epoch : 3, Loss: 0.09059467166662216


203it [00:41,  4.87it/s]
2it [00:00,  5.41it/s]

Epoch : 4, Loss: 0.09150274842977524


102it [00:20,  5.13it/s]

Epoch : 4, Loss: 0.09848722070455551


202it [00:41,  5.08it/s]

Epoch : 4, Loss: 0.09782367199659348


203it [00:41,  4.88it/s]
2it [00:00,  5.47it/s]

Epoch : 5, Loss: 0.09143129736185074


102it [00:20,  5.07it/s]

Epoch : 5, Loss: 0.08973901718854904


202it [00:41,  5.10it/s]

Epoch : 5, Loss: 0.08911239355802536


203it [00:41,  4.88it/s]
2it [00:00,  5.43it/s]

Epoch : 6, Loss: 0.08295263350009918


102it [00:20,  5.09it/s]

Epoch : 6, Loss: 0.07314392179250717


202it [00:41,  5.08it/s]

Epoch : 6, Loss: 0.08480656147003174


203it [00:41,  4.84it/s]
2it [00:00,  5.26it/s]

Epoch : 7, Loss: 0.08366027474403381


102it [00:21,  5.11it/s]

Epoch : 7, Loss: 0.0692862719297409


202it [00:41,  5.10it/s]

Epoch : 7, Loss: 0.061139531433582306


203it [00:41,  4.84it/s]
2it [00:00,  5.44it/s]

Epoch : 8, Loss: 0.059680741280317307


102it [00:20,  5.10it/s]

Epoch : 8, Loss: 0.08051834255456924


202it [00:41,  5.11it/s]

Epoch : 8, Loss: 0.05198710784316063


203it [00:41,  4.88it/s]
2it [00:00,  5.25it/s]

Epoch : 9, Loss: 0.053707242012023926


102it [00:21,  5.09it/s]

Epoch : 9, Loss: 0.0624593049287796


202it [00:42,  5.09it/s]

Epoch : 9, Loss: 0.05530943721532822


203it [00:42,  4.80it/s]
2it [00:00,  5.48it/s]

Epoch : 10, Loss: 0.046262674033641815


102it [00:21,  5.06it/s]

Epoch : 10, Loss: 0.046489372849464417


202it [00:41,  5.11it/s]

Epoch : 10, Loss: 0.05426524206995964


203it [00:41,  4.84it/s]
2it [00:00,  5.26it/s]

Epoch : 11, Loss: 0.045711059123277664


102it [00:20,  5.09it/s]

Epoch : 11, Loss: 0.03985753655433655


202it [00:41,  5.07it/s]

Epoch : 11, Loss: 0.04190864786505699


203it [00:41,  4.85it/s]
2it [00:00,  5.46it/s]

Epoch : 12, Loss: 0.035700444132089615


102it [00:21,  5.09it/s]

Epoch : 12, Loss: 0.04996400326490402


202it [00:41,  5.09it/s]

Epoch : 12, Loss: 0.042544081807136536


203it [00:41,  4.85it/s]
2it [00:00,  5.38it/s]

Epoch : 13, Loss: 0.03661729767918587


102it [00:21,  5.04it/s]

Epoch : 13, Loss: 0.02497880719602108


202it [00:42,  5.09it/s]

Epoch : 13, Loss: 0.027479538694024086


203it [00:42,  4.80it/s]
2it [00:00,  5.28it/s]

Epoch : 14, Loss: 0.029693638905882835


102it [00:20,  5.10it/s]

Epoch : 14, Loss: 0.02227739989757538


202it [00:41,  5.08it/s]

Epoch : 14, Loss: 0.03192726522684097


203it [00:41,  4.83it/s]
2it [00:00,  5.34it/s]

Epoch : 15, Loss: 0.026634760200977325


102it [00:21,  5.10it/s]

Epoch : 15, Loss: 0.032573867589235306


202it [00:41,  5.11it/s]

Epoch : 15, Loss: 0.021867146715521812


203it [00:42,  4.83it/s]
2it [00:00,  5.24it/s]

Epoch : 16, Loss: 0.022942842915654182


102it [00:20,  5.08it/s]

Epoch : 16, Loss: 0.017732303589582443


202it [00:41,  5.10it/s]

Epoch : 16, Loss: 0.029616454616189003


203it [00:41,  4.83it/s]
2it [00:00,  5.41it/s]

Epoch : 17, Loss: 0.019045403227210045


102it [00:20,  5.07it/s]

Epoch : 17, Loss: 0.03357759490609169


202it [00:41,  5.11it/s]

Epoch : 17, Loss: 0.025743432343006134


203it [00:41,  4.88it/s]
2it [00:00,  5.45it/s]

Epoch : 18, Loss: 0.04286637157201767


102it [00:21,  5.09it/s]

Epoch : 18, Loss: 0.01752801053225994


202it [00:41,  5.11it/s]

Epoch : 18, Loss: 0.022705277428030968


203it [00:42,  4.81it/s]
2it [00:00,  5.41it/s]

Epoch : 19, Loss: 0.02391432411968708


102it [00:21,  5.12it/s]

Epoch : 19, Loss: 0.028371863067150116


202it [00:41,  5.11it/s]

Epoch : 19, Loss: 0.024308379739522934


203it [00:42,  4.83it/s]
176it [00:06, 26.84it/s]


Accuracy Score = 0.6562054208273894
F1 Score (Micro) = 0.703902065799541
F1 Score (Macro) = 0.691842605523307


176it [00:06, 26.82it/s]


Accuracy Score = 0.6562054208273894
F1 Score (Micro) = 0.703902065799541
F1 Score (Macro) = 0.691842605523307


176it [00:06, 27.48it/s]


Accuracy Score = 0.6562054208273894
F1 Score (Micro) = 0.703902065799541
F1 Score (Macro) = 0.691842605523307


176it [00:06, 27.22it/s]


Accuracy Score = 0.6562054208273894
F1 Score (Micro) = 0.703902065799541
F1 Score (Macro) = 0.691842605523307


176it [00:06, 27.51it/s]


Accuracy Score = 0.6562054208273894
F1 Score (Micro) = 0.703902065799541
F1 Score (Macro) = 0.691842605523307


176it [00:06, 26.83it/s]


Accuracy Score = 0.6562054208273894
F1 Score (Micro) = 0.703902065799541
F1 Score (Macro) = 0.691842605523307


176it [00:06, 26.76it/s]


Accuracy Score = 0.6562054208273894
F1 Score (Micro) = 0.703902065799541
F1 Score (Macro) = 0.691842605523307


176it [00:06, 27.52it/s]


Accuracy Score = 0.6562054208273894
F1 Score (Micro) = 0.703902065799541
F1 Score (Macro) = 0.691842605523307


176it [00:06, 26.73it/s]


Accuracy Score = 0.6562054208273894
F1 Score (Micro) = 0.703902065799541
F1 Score (Macro) = 0.691842605523307


176it [00:06, 26.83it/s]


Accuracy Score = 0.6562054208273894
F1 Score (Micro) = 0.703902065799541
F1 Score (Macro) = 0.691842605523307


176it [00:06, 26.72it/s]


Accuracy Score = 0.6562054208273894
F1 Score (Micro) = 0.703902065799541
F1 Score (Macro) = 0.691842605523307


176it [00:06, 27.50it/s]


Accuracy Score = 0.6562054208273894
F1 Score (Micro) = 0.703902065799541
F1 Score (Macro) = 0.691842605523307


176it [00:06, 27.21it/s]


Accuracy Score = 0.6562054208273894
F1 Score (Micro) = 0.703902065799541
F1 Score (Macro) = 0.691842605523307


176it [00:06, 26.42it/s]


Accuracy Score = 0.6562054208273894
F1 Score (Micro) = 0.703902065799541
F1 Score (Macro) = 0.691842605523307


176it [00:06, 26.77it/s]


Accuracy Score = 0.6562054208273894
F1 Score (Micro) = 0.703902065799541
F1 Score (Macro) = 0.691842605523307


176it [00:06, 26.76it/s]


Accuracy Score = 0.6562054208273894
F1 Score (Micro) = 0.703902065799541
F1 Score (Macro) = 0.691842605523307


176it [00:06, 26.79it/s]


Accuracy Score = 0.6562054208273894
F1 Score (Micro) = 0.703902065799541
F1 Score (Macro) = 0.691842605523307


176it [00:06, 27.48it/s]


Accuracy Score = 0.6562054208273894
F1 Score (Micro) = 0.703902065799541
F1 Score (Macro) = 0.691842605523307


176it [00:06, 27.48it/s]


Accuracy Score = 0.6562054208273894
F1 Score (Micro) = 0.703902065799541
F1 Score (Macro) = 0.691842605523307


176it [00:06, 26.79it/s]


Accuracy Score = 0.6562054208273894
F1 Score (Micro) = 0.703902065799541
F1 Score (Macro) = 0.691842605523307


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
2it [00:00,  8.20it/s]

Epoch : 0, Loss: 0.6777293682098389


102it [00:21,  5.09it/s]

Epoch : 0, Loss: 0.19743195176124573


154it [00:32,  4.77it/s]
2it [00:00,  7.19it/s]

Epoch : 1, Loss: 0.1530732363462448


102it [00:20,  5.08it/s]

Epoch : 1, Loss: 0.13178850710391998


154it [00:31,  4.86it/s]
2it [00:00,  7.24it/s]

Epoch : 2, Loss: 0.12027265876531601


102it [00:21,  5.09it/s]

Epoch : 2, Loss: 0.12285108119249344


154it [00:32,  4.77it/s]
2it [00:00,  7.16it/s]

Epoch : 3, Loss: 0.12049208581447601


102it [00:21,  5.12it/s]

Epoch : 3, Loss: 0.12707215547561646


154it [00:32,  4.79it/s]
2it [00:00,  7.17it/s]

Epoch : 4, Loss: 0.11041544377803802


102it [00:21,  5.11it/s]

Epoch : 4, Loss: 0.1185028925538063


154it [00:31,  4.82it/s]
2it [00:00,  7.19it/s]

Epoch : 5, Loss: 0.12234324216842651


102it [00:21,  5.12it/s]

Epoch : 5, Loss: 0.12021443992853165


154it [00:31,  4.82it/s]
2it [00:00,  7.25it/s]

Epoch : 6, Loss: 0.10913839936256409


102it [00:21,  5.09it/s]

Epoch : 6, Loss: 0.11077883094549179


154it [00:32,  4.77it/s]
2it [00:00,  7.15it/s]

Epoch : 7, Loss: 0.10600187629461288


102it [00:21,  5.10it/s]

Epoch : 7, Loss: 0.0901934951543808


154it [00:32,  4.77it/s]
2it [00:00,  7.15it/s]

Epoch : 8, Loss: 0.08222265541553497


102it [00:20,  5.10it/s]

Epoch : 8, Loss: 0.0957736149430275


154it [00:31,  4.82it/s]
2it [00:00,  7.14it/s]

Epoch : 9, Loss: 0.08104060590267181


102it [00:21,  5.11it/s]

Epoch : 9, Loss: 0.10449535399675369


154it [00:31,  4.82it/s]
2it [00:00,  7.30it/s]

Epoch : 10, Loss: 0.07059600949287415


102it [00:21,  5.07it/s]

Epoch : 10, Loss: 0.05971401557326317


154it [00:32,  4.77it/s]
2it [00:00,  7.25it/s]

Epoch : 11, Loss: 0.08732432872056961


102it [00:20,  5.08it/s]

Epoch : 11, Loss: 0.062107715755701065


154it [00:31,  4.82it/s]
2it [00:00,  7.29it/s]

Epoch : 12, Loss: 0.07458017766475677


102it [00:20,  5.09it/s]

Epoch : 12, Loss: 0.06449198722839355


154it [00:32,  4.79it/s]
2it [00:00,  7.14it/s]

Epoch : 13, Loss: 0.05585296079516411


102it [00:21,  5.12it/s]

Epoch : 13, Loss: 0.05434083938598633


154it [00:32,  4.77it/s]
2it [00:00,  7.22it/s]

Epoch : 14, Loss: 0.07259812206029892


102it [00:21,  5.12it/s]

Epoch : 14, Loss: 0.05626187101006508


154it [00:31,  4.82it/s]
2it [00:00,  7.28it/s]

Epoch : 15, Loss: 0.03950312361121178


102it [00:21,  5.09it/s]

Epoch : 15, Loss: 0.045843787491321564


154it [00:31,  4.82it/s]
2it [00:00,  7.19it/s]

Epoch : 16, Loss: 0.05907105281949043


102it [00:20,  5.09it/s]

Epoch : 16, Loss: 0.05014267563819885


154it [00:31,  4.81it/s]
2it [00:00,  7.17it/s]

Epoch : 17, Loss: 0.03971930965781212


102it [00:21,  5.09it/s]

Epoch : 17, Loss: 0.034058261662721634


154it [00:32,  4.79it/s]
2it [00:00,  7.19it/s]

Epoch : 18, Loss: 0.03861552104353905


102it [00:21,  5.11it/s]

Epoch : 18, Loss: 0.026221109554171562


154it [00:31,  4.82it/s]
2it [00:00,  7.26it/s]

Epoch : 19, Loss: 0.04279148951172829


102it [00:21,  5.08it/s]

Epoch : 19, Loss: 0.04909730330109596


154it [00:32,  4.77it/s]
132it [00:04, 27.91it/s]


Accuracy Score = 0.44866920152091255
F1 Score (Micro) = 0.5375854214123006
F1 Score (Macro) = 0.46875016930986513


132it [00:04, 26.80it/s]


Accuracy Score = 0.44866920152091255
F1 Score (Micro) = 0.5375854214123006
F1 Score (Macro) = 0.46875016930986513


132it [00:04, 26.84it/s]


Accuracy Score = 0.44866920152091255
F1 Score (Micro) = 0.5375854214123006
F1 Score (Macro) = 0.46875016930986513


132it [00:04, 26.86it/s]


Accuracy Score = 0.44866920152091255
F1 Score (Micro) = 0.5375854214123006
F1 Score (Macro) = 0.46875016930986513


132it [00:04, 26.81it/s]


Accuracy Score = 0.44866920152091255
F1 Score (Micro) = 0.5375854214123006
F1 Score (Macro) = 0.46875016930986513


132it [00:04, 26.79it/s]


Accuracy Score = 0.44866920152091255
F1 Score (Micro) = 0.5375854214123006
F1 Score (Macro) = 0.46875016930986513


132it [00:04, 26.77it/s]


Accuracy Score = 0.44866920152091255
F1 Score (Micro) = 0.5375854214123006
F1 Score (Macro) = 0.46875016930986513


132it [00:04, 26.79it/s]


Accuracy Score = 0.44866920152091255
F1 Score (Micro) = 0.5375854214123006
F1 Score (Macro) = 0.46875016930986513


132it [00:04, 27.40it/s]


Accuracy Score = 0.44866920152091255
F1 Score (Micro) = 0.5375854214123006
F1 Score (Macro) = 0.46875016930986513


132it [00:04, 26.87it/s]


Accuracy Score = 0.44866920152091255
F1 Score (Micro) = 0.5375854214123006
F1 Score (Macro) = 0.46875016930986513


132it [00:04, 27.47it/s]


Accuracy Score = 0.44866920152091255
F1 Score (Micro) = 0.5375854214123006
F1 Score (Macro) = 0.46875016930986513


132it [00:04, 27.48it/s]


Accuracy Score = 0.44866920152091255
F1 Score (Micro) = 0.5375854214123006
F1 Score (Macro) = 0.46875016930986513


132it [00:04, 27.50it/s]


Accuracy Score = 0.44866920152091255
F1 Score (Micro) = 0.5375854214123006
F1 Score (Macro) = 0.46875016930986513


132it [00:04, 27.48it/s]


Accuracy Score = 0.44866920152091255
F1 Score (Micro) = 0.5375854214123006
F1 Score (Macro) = 0.46875016930986513


132it [00:04, 27.50it/s]


Accuracy Score = 0.44866920152091255
F1 Score (Micro) = 0.5375854214123006
F1 Score (Macro) = 0.46875016930986513


132it [00:04, 27.46it/s]


Accuracy Score = 0.44866920152091255
F1 Score (Micro) = 0.5375854214123006
F1 Score (Macro) = 0.46875016930986513


132it [00:04, 27.49it/s]


Accuracy Score = 0.44866920152091255
F1 Score (Micro) = 0.5375854214123006
F1 Score (Macro) = 0.46875016930986513


132it [00:04, 27.47it/s]


Accuracy Score = 0.44866920152091255
F1 Score (Micro) = 0.5375854214123006
F1 Score (Macro) = 0.46875016930986513


132it [00:04, 27.02it/s]


Accuracy Score = 0.44866920152091255
F1 Score (Micro) = 0.5375854214123006
F1 Score (Macro) = 0.46875016930986513


132it [00:04, 26.84it/s]


Accuracy Score = 0.44866920152091255
F1 Score (Micro) = 0.5375854214123006
F1 Score (Macro) = 0.46875016930986513


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
2it [00:00,  8.44it/s]

Epoch : 0, Loss: 0.7083056569099426


102it [00:21,  5.09it/s]

Epoch : 0, Loss: 0.21238656342029572


202it [00:41,  5.11it/s]

Epoch : 0, Loss: 0.12279976159334183


227it [00:46,  4.85it/s]
2it [00:00,  6.14it/s]

Epoch : 1, Loss: 0.1128784567117691


102it [00:21,  5.08it/s]

Epoch : 1, Loss: 0.09335911273956299


202it [00:41,  5.09it/s]

Epoch : 1, Loss: 0.09291267395019531


227it [00:47,  4.81it/s]
2it [00:00,  5.97it/s]

Epoch : 2, Loss: 0.09241210669279099


102it [00:20,  5.10it/s]

Epoch : 2, Loss: 0.08499246090650558


202it [00:41,  5.08it/s]

Epoch : 2, Loss: 0.08842676132917404


227it [00:46,  4.85it/s]
2it [00:00,  5.91it/s]

Epoch : 3, Loss: 0.08672945201396942


102it [00:20,  5.11it/s]

Epoch : 3, Loss: 0.0843815878033638


202it [00:41,  5.11it/s]

Epoch : 3, Loss: 0.08347276598215103


227it [00:46,  4.85it/s]
2it [00:00,  5.88it/s]

Epoch : 4, Loss: 0.08611779659986496


102it [00:21,  5.11it/s]

Epoch : 4, Loss: 0.08451106399297714


202it [00:41,  5.13it/s]

Epoch : 4, Loss: 0.08788115531206131


227it [00:46,  4.84it/s]
2it [00:00,  6.11it/s]

Epoch : 5, Loss: 0.08420203626155853


102it [00:20,  5.07it/s]

Epoch : 5, Loss: 0.0873989388346672


202it [00:41,  5.06it/s]

Epoch : 5, Loss: 0.08213884383440018


227it [00:46,  4.85it/s]
2it [00:00,  5.96it/s]

Epoch : 6, Loss: 0.08243168890476227


102it [00:20,  5.06it/s]

Epoch : 6, Loss: 0.08172561973333359


202it [00:41,  5.10it/s]

Epoch : 6, Loss: 0.0816645473241806


227it [00:46,  4.84it/s]
2it [00:00,  5.92it/s]

Epoch : 7, Loss: 0.07504741102457047


102it [00:21,  5.11it/s]

Epoch : 7, Loss: 0.07246093451976776


202it [00:41,  5.11it/s]

Epoch : 7, Loss: 0.07658813148736954


227it [00:46,  4.84it/s]
2it [00:00,  6.06it/s]

Epoch : 8, Loss: 0.0763954147696495


102it [00:20,  5.11it/s]

Epoch : 8, Loss: 0.07381010800600052


202it [00:41,  5.12it/s]

Epoch : 8, Loss: 0.06938344240188599


227it [00:47,  4.82it/s]
2it [00:00,  6.11it/s]

Epoch : 9, Loss: 0.0725753903388977


102it [00:20,  5.10it/s]

Epoch : 9, Loss: 0.06846287846565247


202it [00:41,  5.11it/s]

Epoch : 9, Loss: 0.060113754123449326


227it [00:46,  4.86it/s]
2it [00:00,  6.12it/s]

Epoch : 10, Loss: 0.06102978065609932


102it [00:20,  5.08it/s]

Epoch : 10, Loss: 0.06336382776498795


202it [00:40,  5.07it/s]

Epoch : 10, Loss: 0.06415512412786484


227it [00:45,  4.95it/s]
2it [00:00,  6.35it/s]

Epoch : 11, Loss: 0.05735030397772789


102it [00:20,  5.11it/s]

Epoch : 11, Loss: 0.053221993148326874


202it [00:40,  5.10it/s]

Epoch : 11, Loss: 0.05822620540857315


227it [00:45,  4.98it/s]
2it [00:00,  6.39it/s]

Epoch : 12, Loss: 0.061624348163604736


102it [00:20,  5.08it/s]

Epoch : 12, Loss: 0.05264665186405182


202it [00:40,  5.11it/s]

Epoch : 12, Loss: 0.0602053627371788


227it [00:45,  4.98it/s]
2it [00:00,  6.33it/s]

Epoch : 13, Loss: 0.0392608679831028


102it [00:20,  5.11it/s]

Epoch : 13, Loss: 0.05857383832335472


202it [00:40,  5.07it/s]

Epoch : 13, Loss: 0.055255286395549774


227it [00:45,  4.98it/s]
2it [00:00,  6.32it/s]

Epoch : 14, Loss: 0.05370219424366951


102it [00:20,  5.11it/s]

Epoch : 14, Loss: 0.06314250081777573


202it [00:40,  5.09it/s]

Epoch : 14, Loss: 0.04880797490477562


227it [00:45,  4.98it/s]
2it [00:00,  6.39it/s]

Epoch : 15, Loss: 0.04536422714591026


102it [00:20,  5.10it/s]

Epoch : 15, Loss: 0.043260879814624786


202it [00:40,  5.12it/s]

Epoch : 15, Loss: 0.036496199667453766


227it [00:45,  4.99it/s]
2it [00:00,  6.33it/s]

Epoch : 16, Loss: 0.03535193204879761


102it [00:20,  5.08it/s]

Epoch : 16, Loss: 0.041548389941453934


202it [00:40,  5.08it/s]

Epoch : 16, Loss: 0.03630903363227844


227it [00:45,  4.98it/s]
2it [00:00,  6.41it/s]

Epoch : 17, Loss: 0.03486117720603943


102it [00:20,  5.09it/s]

Epoch : 17, Loss: 0.04220319911837578


202it [00:40,  5.08it/s]

Epoch : 17, Loss: 0.03357976675033569


227it [00:45,  4.98it/s]
2it [00:00,  6.33it/s]

Epoch : 18, Loss: 0.02765124849975109


102it [00:20,  5.08it/s]

Epoch : 18, Loss: 0.03365191072225571


202it [00:40,  5.08it/s]

Epoch : 18, Loss: 0.03230234980583191


227it [00:45,  4.98it/s]
2it [00:00,  6.38it/s]

Epoch : 19, Loss: 0.020419461652636528


102it [00:20,  5.10it/s]

Epoch : 19, Loss: 0.028181198984384537


202it [00:40,  5.11it/s]

Epoch : 19, Loss: 0.04820151999592781


227it [00:45,  4.99it/s]
195it [00:06, 28.15it/s]


Accuracy Score = 0.44787644787644787
F1 Score (Micro) = 0.5515055467511885
F1 Score (Macro) = 0.4655267578067646


195it [00:06, 28.39it/s]


Accuracy Score = 0.44787644787644787
F1 Score (Micro) = 0.5515055467511885
F1 Score (Macro) = 0.4655267578067646


195it [00:06, 28.47it/s]


Accuracy Score = 0.44787644787644787
F1 Score (Micro) = 0.5515055467511885
F1 Score (Macro) = 0.4655267578067646


195it [00:06, 28.45it/s]


Accuracy Score = 0.44787644787644787
F1 Score (Micro) = 0.5515055467511885
F1 Score (Macro) = 0.4655267578067646


195it [00:06, 28.26it/s]


Accuracy Score = 0.44787644787644787
F1 Score (Micro) = 0.5515055467511885
F1 Score (Macro) = 0.4655267578067646


195it [00:06, 28.44it/s]


Accuracy Score = 0.44787644787644787
F1 Score (Micro) = 0.5515055467511885
F1 Score (Macro) = 0.4655267578067646


195it [00:06, 28.51it/s]


Accuracy Score = 0.44787644787644787
F1 Score (Micro) = 0.5515055467511885
F1 Score (Macro) = 0.4655267578067646


195it [00:06, 28.48it/s]


Accuracy Score = 0.44787644787644787
F1 Score (Micro) = 0.5515055467511885
F1 Score (Macro) = 0.4655267578067646


195it [00:06, 28.49it/s]


Accuracy Score = 0.44787644787644787
F1 Score (Micro) = 0.5515055467511885
F1 Score (Macro) = 0.4655267578067646


195it [00:06, 28.50it/s]


Accuracy Score = 0.44787644787644787
F1 Score (Micro) = 0.5515055467511885
F1 Score (Macro) = 0.4655267578067646


195it [00:06, 28.47it/s]


Accuracy Score = 0.44787644787644787
F1 Score (Micro) = 0.5515055467511885
F1 Score (Macro) = 0.4655267578067646


195it [00:06, 28.48it/s]


Accuracy Score = 0.44787644787644787
F1 Score (Micro) = 0.5515055467511885
F1 Score (Macro) = 0.4655267578067646


195it [00:06, 28.48it/s]


Accuracy Score = 0.44787644787644787
F1 Score (Micro) = 0.5515055467511885
F1 Score (Macro) = 0.4655267578067646


195it [00:06, 28.50it/s]


Accuracy Score = 0.44787644787644787
F1 Score (Micro) = 0.5515055467511885
F1 Score (Macro) = 0.4655267578067646


195it [00:06, 28.52it/s]


Accuracy Score = 0.44787644787644787
F1 Score (Micro) = 0.5515055467511885
F1 Score (Macro) = 0.4655267578067646


195it [00:06, 28.47it/s]


Accuracy Score = 0.44787644787644787
F1 Score (Micro) = 0.5515055467511885
F1 Score (Macro) = 0.4655267578067646


195it [00:06, 28.46it/s]


Accuracy Score = 0.44787644787644787
F1 Score (Micro) = 0.5515055467511885
F1 Score (Macro) = 0.4655267578067646


195it [00:06, 28.45it/s]


Accuracy Score = 0.44787644787644787
F1 Score (Micro) = 0.5515055467511885
F1 Score (Macro) = 0.4655267578067646


195it [00:06, 28.46it/s]


Accuracy Score = 0.44787644787644787
F1 Score (Micro) = 0.5515055467511885
F1 Score (Macro) = 0.4655267578067646


195it [00:06, 28.46it/s]


Accuracy Score = 0.44787644787644787
F1 Score (Micro) = 0.5515055467511885
F1 Score (Macro) = 0.4655267578067646


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
2it [00:00,  8.06it/s]

Epoch : 0, Loss: 0.7193882465362549


102it [00:20,  5.11it/s]

Epoch : 0, Loss: 0.21643760800361633


202it [00:40,  5.12it/s]

Epoch : 0, Loss: 0.11570293456315994


225it [00:45,  4.99it/s]
2it [00:00,  6.40it/s]

Epoch : 1, Loss: 0.11053453385829926


102it [00:20,  5.06it/s]

Epoch : 1, Loss: 0.08711454272270203


202it [00:40,  5.11it/s]

Epoch : 1, Loss: 0.08018036931753159


225it [00:45,  4.95it/s]
2it [00:00,  6.40it/s]

Epoch : 2, Loss: 0.08168156445026398


102it [00:20,  5.09it/s]

Epoch : 2, Loss: 0.08127397298812866


202it [00:40,  5.11it/s]

Epoch : 2, Loss: 0.07893472909927368


225it [00:45,  4.98it/s]
2it [00:00,  6.37it/s]

Epoch : 3, Loss: 0.08136798441410065


102it [00:20,  5.07it/s]

Epoch : 3, Loss: 0.07745444774627686


202it [00:40,  5.09it/s]

Epoch : 3, Loss: 0.0807909145951271


225it [00:45,  4.97it/s]
2it [00:00,  6.37it/s]

Epoch : 4, Loss: 0.07717615365982056


102it [00:20,  5.12it/s]

Epoch : 4, Loss: 0.07732251286506653


202it [00:40,  5.10it/s]

Epoch : 4, Loss: 0.08034443110227585


225it [00:45,  4.98it/s]
2it [00:00,  6.39it/s]

Epoch : 5, Loss: 0.07421204447746277


102it [00:20,  5.10it/s]

Epoch : 5, Loss: 0.07845637202262878


202it [00:40,  5.12it/s]

Epoch : 5, Loss: 0.07798336446285248


225it [00:45,  4.98it/s]
2it [00:00,  6.39it/s]

Epoch : 6, Loss: 0.06844336539506912


102it [00:20,  5.10it/s]

Epoch : 6, Loss: 0.0776432529091835


202it [00:40,  5.11it/s]

Epoch : 6, Loss: 0.06878889352083206


225it [00:45,  4.98it/s]
2it [00:00,  6.36it/s]

Epoch : 7, Loss: 0.07223907113075256


102it [00:20,  5.10it/s]

Epoch : 7, Loss: 0.0719829797744751


202it [00:40,  5.10it/s]

Epoch : 7, Loss: 0.06304700672626495


225it [00:45,  4.99it/s]
2it [00:00,  6.18it/s]

Epoch : 8, Loss: 0.06311707943677902


102it [00:20,  5.13it/s]

Epoch : 8, Loss: 0.06800312548875809


202it [00:40,  5.11it/s]

Epoch : 8, Loss: 0.05752251669764519


225it [00:45,  4.98it/s]
2it [00:00,  6.43it/s]

Epoch : 9, Loss: 0.06894027441740036


102it [00:20,  5.09it/s]

Epoch : 9, Loss: 0.05810938775539398


202it [00:40,  5.09it/s]

Epoch : 9, Loss: 0.05596497654914856


225it [00:45,  4.98it/s]
2it [00:00,  6.36it/s]

Epoch : 10, Loss: 0.060507770627737045


102it [00:20,  5.11it/s]

Epoch : 10, Loss: 0.0574471540749073


202it [00:40,  5.08it/s]

Epoch : 10, Loss: 0.06421320885419846


225it [00:45,  4.98it/s]
2it [00:00,  6.36it/s]

Epoch : 11, Loss: 0.047401417046785355


102it [00:20,  5.09it/s]

Epoch : 11, Loss: 0.047870934009552


202it [00:40,  5.08it/s]

Epoch : 11, Loss: 0.04915299639105797


225it [00:45,  4.98it/s]
2it [00:00,  6.37it/s]

Epoch : 12, Loss: 0.048120222985744476


102it [00:20,  5.11it/s]

Epoch : 12, Loss: 0.04478936642408371


202it [00:40,  5.08it/s]

Epoch : 12, Loss: 0.04341857135295868


225it [00:45,  4.98it/s]
2it [00:00,  6.36it/s]

Epoch : 13, Loss: 0.04580353945493698


102it [00:20,  5.12it/s]

Epoch : 13, Loss: 0.03722377493977547


202it [00:40,  5.07it/s]

Epoch : 13, Loss: 0.04451756924390793


225it [00:45,  4.98it/s]
2it [00:00,  6.38it/s]

Epoch : 14, Loss: 0.03929435834288597


102it [00:20,  5.10it/s]

Epoch : 14, Loss: 0.0461902841925621


202it [00:40,  5.09it/s]

Epoch : 14, Loss: 0.032761555165052414


225it [00:45,  4.98it/s]
2it [00:00,  6.48it/s]

Epoch : 15, Loss: 0.024379359558224678


102it [00:20,  5.09it/s]

Epoch : 15, Loss: 0.03373829647898674


202it [00:40,  5.08it/s]

Epoch : 15, Loss: 0.02624685689806938


225it [00:45,  4.98it/s]
2it [00:00,  6.36it/s]

Epoch : 16, Loss: 0.041231442242860794


102it [00:20,  5.11it/s]

Epoch : 16, Loss: 0.017171667888760567


202it [00:40,  5.09it/s]

Epoch : 16, Loss: 0.02348259650170803


225it [00:45,  4.98it/s]
2it [00:00,  6.36it/s]

Epoch : 17, Loss: 0.028445685282349586


102it [00:20,  5.12it/s]

Epoch : 17, Loss: 0.03281375393271446


202it [00:40,  5.07it/s]

Epoch : 17, Loss: 0.03549984097480774


225it [00:45,  4.97it/s]
2it [00:00,  6.35it/s]

Epoch : 18, Loss: 0.018195178359746933


102it [00:20,  5.07it/s]

Epoch : 18, Loss: 0.02032177522778511


202it [00:40,  5.11it/s]

Epoch : 18, Loss: 0.031659893691539764


225it [00:45,  4.98it/s]
2it [00:00,  6.38it/s]

Epoch : 19, Loss: 0.025956979021430016


102it [00:20,  5.09it/s]

Epoch : 19, Loss: 0.02655024453997612


202it [00:40,  5.09it/s]

Epoch : 19, Loss: 0.036567334085702896


225it [00:45,  4.98it/s]
194it [00:06, 27.91it/s]


Accuracy Score = 0.5129198966408268
F1 Score (Micro) = 0.6266771902131018
F1 Score (Macro) = 0.5185183928323335


194it [00:06, 28.38it/s]


Accuracy Score = 0.5129198966408268
F1 Score (Micro) = 0.6266771902131018
F1 Score (Macro) = 0.5185183928323335


194it [00:06, 28.41it/s]


Accuracy Score = 0.5129198966408268
F1 Score (Micro) = 0.6266771902131018
F1 Score (Macro) = 0.5185183928323335


194it [00:06, 28.39it/s]


Accuracy Score = 0.5129198966408268
F1 Score (Micro) = 0.6266771902131018
F1 Score (Macro) = 0.5185183928323335


194it [00:06, 28.40it/s]


Accuracy Score = 0.5129198966408268
F1 Score (Micro) = 0.6266771902131018
F1 Score (Macro) = 0.5185183928323335


194it [00:06, 28.31it/s]


Accuracy Score = 0.5129198966408268
F1 Score (Micro) = 0.6266771902131018
F1 Score (Macro) = 0.5185183928323335


194it [00:06, 28.40it/s]


Accuracy Score = 0.5129198966408268
F1 Score (Micro) = 0.6266771902131018
F1 Score (Macro) = 0.5185183928323335


194it [00:06, 28.43it/s]


Accuracy Score = 0.5129198966408268
F1 Score (Micro) = 0.6266771902131018
F1 Score (Macro) = 0.5185183928323335


194it [00:06, 28.38it/s]


Accuracy Score = 0.5129198966408268
F1 Score (Micro) = 0.6266771902131018
F1 Score (Macro) = 0.5185183928323335


194it [00:06, 28.40it/s]


Accuracy Score = 0.5129198966408268
F1 Score (Micro) = 0.6266771902131018
F1 Score (Macro) = 0.5185183928323335


194it [00:06, 28.40it/s]


Accuracy Score = 0.5129198966408268
F1 Score (Micro) = 0.6266771902131018
F1 Score (Macro) = 0.5185183928323335


194it [00:06, 28.38it/s]


Accuracy Score = 0.5129198966408268
F1 Score (Micro) = 0.6266771902131018
F1 Score (Macro) = 0.5185183928323335


194it [00:06, 28.41it/s]


Accuracy Score = 0.5129198966408268
F1 Score (Micro) = 0.6266771902131018
F1 Score (Macro) = 0.5185183928323335


194it [00:06, 28.44it/s]


Accuracy Score = 0.5129198966408268
F1 Score (Micro) = 0.6266771902131018
F1 Score (Macro) = 0.5185183928323335


194it [00:06, 28.34it/s]


Accuracy Score = 0.5129198966408268
F1 Score (Micro) = 0.6266771902131018
F1 Score (Macro) = 0.5185183928323335


194it [00:06, 28.34it/s]


Accuracy Score = 0.5129198966408268
F1 Score (Micro) = 0.6266771902131018
F1 Score (Macro) = 0.5185183928323335


194it [00:06, 28.38it/s]


Accuracy Score = 0.5129198966408268
F1 Score (Micro) = 0.6266771902131018
F1 Score (Macro) = 0.5185183928323335


194it [00:06, 28.29it/s]


Accuracy Score = 0.5129198966408268
F1 Score (Micro) = 0.6266771902131018
F1 Score (Macro) = 0.5185183928323335


194it [00:06, 28.38it/s]


Accuracy Score = 0.5129198966408268
F1 Score (Micro) = 0.6266771902131018
F1 Score (Macro) = 0.5185183928323335


194it [00:06, 28.40it/s]


Accuracy Score = 0.5129198966408268
F1 Score (Micro) = 0.6266771902131018
F1 Score (Macro) = 0.5185183928323335


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
2it [00:00,  8.85it/s]

Epoch : 0, Loss: 0.7374112606048584


97it [00:19,  5.02it/s]
2it [00:00,  5.91it/s]

Epoch : 1, Loss: 0.2346244603395462


97it [00:19,  4.99it/s]
2it [00:00,  5.90it/s]

Epoch : 2, Loss: 0.21170258522033691


97it [00:19,  4.99it/s]
2it [00:00,  5.90it/s]

Epoch : 3, Loss: 0.20777948200702667


97it [00:19,  4.99it/s]
2it [00:00,  5.82it/s]

Epoch : 4, Loss: 0.19366326928138733


97it [00:19,  4.98it/s]
2it [00:00,  5.83it/s]

Epoch : 5, Loss: 0.18268612027168274


97it [00:19,  4.98it/s]
2it [00:00,  5.90it/s]

Epoch : 6, Loss: 0.14445184171199799


97it [00:19,  4.97it/s]
2it [00:00,  5.93it/s]

Epoch : 7, Loss: 0.1284777820110321


97it [00:19,  4.98it/s]
2it [00:00,  5.95it/s]

Epoch : 8, Loss: 0.12220446020364761


97it [00:19,  4.99it/s]
2it [00:00,  5.90it/s]

Epoch : 9, Loss: 0.10459387302398682


97it [00:19,  4.99it/s]
2it [00:00,  5.92it/s]

Epoch : 10, Loss: 0.08862876147031784


97it [00:19,  4.99it/s]
2it [00:00,  5.91it/s]

Epoch : 11, Loss: 0.068991519510746


97it [00:19,  4.99it/s]
2it [00:00,  5.95it/s]

Epoch : 12, Loss: 0.08162499219179153


97it [00:19,  4.98it/s]
2it [00:00,  5.90it/s]

Epoch : 13, Loss: 0.06413653492927551


97it [00:19,  4.98it/s]
2it [00:00,  5.97it/s]

Epoch : 14, Loss: 0.07155834138393402


97it [00:19,  4.98it/s]
2it [00:00,  5.92it/s]

Epoch : 15, Loss: 0.06883704662322998


97it [00:19,  4.99it/s]
2it [00:00,  5.90it/s]

Epoch : 16, Loss: 0.0813136100769043


97it [00:19,  4.99it/s]
2it [00:00,  5.91it/s]

Epoch : 17, Loss: 0.06890501081943512


97it [00:19,  4.99it/s]
2it [00:00,  5.90it/s]

Epoch : 18, Loss: 0.07982329279184341


97it [00:19,  4.99it/s]
2it [00:00,  5.83it/s]

Epoch : 19, Loss: 0.06008445471525192


97it [00:19,  4.99it/s]
83it [00:03, 27.29it/s]


Accuracy Score = 0.5015105740181269
F1 Score (Micro) = 0.5704467353951891
F1 Score (Macro) = 0.5661822597269457


83it [00:02, 28.50it/s]


Accuracy Score = 0.5015105740181269
F1 Score (Micro) = 0.5704467353951891
F1 Score (Macro) = 0.5661822597269457


83it [00:02, 28.54it/s]


Accuracy Score = 0.5015105740181269
F1 Score (Micro) = 0.5704467353951891
F1 Score (Macro) = 0.5661822597269457


83it [00:02, 28.55it/s]


Accuracy Score = 0.5015105740181269
F1 Score (Micro) = 0.5704467353951891
F1 Score (Macro) = 0.5661822597269457


83it [00:02, 28.51it/s]


Accuracy Score = 0.5015105740181269
F1 Score (Micro) = 0.5704467353951891
F1 Score (Macro) = 0.5661822597269457


83it [00:02, 28.55it/s]


Accuracy Score = 0.5015105740181269
F1 Score (Micro) = 0.5704467353951891
F1 Score (Macro) = 0.5661822597269457


83it [00:02, 28.55it/s]


Accuracy Score = 0.5015105740181269
F1 Score (Micro) = 0.5704467353951891
F1 Score (Macro) = 0.5661822597269457


83it [00:02, 28.50it/s]


Accuracy Score = 0.5015105740181269
F1 Score (Micro) = 0.5704467353951891
F1 Score (Macro) = 0.5661822597269457


83it [00:02, 28.54it/s]


Accuracy Score = 0.5015105740181269
F1 Score (Micro) = 0.5704467353951891
F1 Score (Macro) = 0.5661822597269457


83it [00:02, 28.53it/s]


Accuracy Score = 0.5015105740181269
F1 Score (Micro) = 0.5704467353951891
F1 Score (Macro) = 0.5661822597269457


83it [00:02, 28.48it/s]


Accuracy Score = 0.5015105740181269
F1 Score (Micro) = 0.5704467353951891
F1 Score (Macro) = 0.5661822597269457


83it [00:02, 28.42it/s]


Accuracy Score = 0.5015105740181269
F1 Score (Micro) = 0.5704467353951891
F1 Score (Macro) = 0.5661822597269457


83it [00:02, 28.46it/s]


Accuracy Score = 0.5015105740181269
F1 Score (Micro) = 0.5704467353951891
F1 Score (Macro) = 0.5661822597269457


83it [00:02, 28.50it/s]


Accuracy Score = 0.5015105740181269
F1 Score (Micro) = 0.5704467353951891
F1 Score (Macro) = 0.5661822597269457


83it [00:02, 28.46it/s]


Accuracy Score = 0.5015105740181269
F1 Score (Micro) = 0.5704467353951891
F1 Score (Macro) = 0.5661822597269457


83it [00:02, 28.35it/s]


Accuracy Score = 0.5015105740181269
F1 Score (Micro) = 0.5704467353951891
F1 Score (Macro) = 0.5661822597269457


83it [00:02, 28.46it/s]


Accuracy Score = 0.5015105740181269
F1 Score (Micro) = 0.5704467353951891
F1 Score (Macro) = 0.5661822597269457


83it [00:02, 28.50it/s]


Accuracy Score = 0.5015105740181269
F1 Score (Micro) = 0.5704467353951891
F1 Score (Macro) = 0.5661822597269457


83it [00:02, 28.49it/s]


Accuracy Score = 0.5015105740181269
F1 Score (Micro) = 0.5704467353951891
F1 Score (Macro) = 0.5661822597269457


83it [00:02, 28.26it/s]


Accuracy Score = 0.5015105740181269
F1 Score (Micro) = 0.5704467353951891
F1 Score (Macro) = 0.5661822597269457


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
2it [00:00,  8.89it/s]

Epoch : 0, Loss: 0.7342578768730164


32it [00:06,  5.11it/s]
2it [00:00,  5.49it/s]

Epoch : 1, Loss: 0.3781522512435913


32it [00:06,  4.98it/s]
2it [00:00,  5.54it/s]

Epoch : 2, Loss: 0.3303341269493103


32it [00:06,  4.98it/s]
2it [00:00,  5.51it/s]

Epoch : 3, Loss: 0.3257991671562195


32it [00:06,  4.98it/s]
2it [00:00,  5.53it/s]

Epoch : 4, Loss: 0.2645248770713806


32it [00:06,  4.98it/s]
2it [00:00,  5.53it/s]

Epoch : 5, Loss: 0.23276613652706146


32it [00:06,  4.98it/s]
2it [00:00,  5.35it/s]

Epoch : 6, Loss: 0.21878185868263245


32it [00:06,  4.98it/s]
2it [00:00,  5.50it/s]

Epoch : 7, Loss: 0.1407853662967682


32it [00:06,  4.99it/s]
2it [00:00,  5.48it/s]

Epoch : 8, Loss: 0.13906943798065186


32it [00:06,  4.98it/s]
2it [00:00,  5.51it/s]

Epoch : 9, Loss: 0.1226966381072998


32it [00:06,  4.99it/s]
2it [00:00,  5.49it/s]

Epoch : 10, Loss: 0.09706176817417145


32it [00:06,  4.99it/s]
2it [00:00,  5.52it/s]

Epoch : 11, Loss: 0.09641768038272858


32it [00:06,  4.99it/s]
2it [00:00,  5.51it/s]

Epoch : 12, Loss: 0.05895031988620758


32it [00:06,  5.00it/s]
2it [00:00,  5.46it/s]

Epoch : 13, Loss: 0.07122386991977692


32it [00:06,  4.98it/s]
2it [00:00,  5.48it/s]

Epoch : 14, Loss: 0.06693226844072342


32it [00:06,  4.99it/s]
2it [00:00,  5.46it/s]

Epoch : 15, Loss: 0.04202864319086075


32it [00:06,  4.98it/s]
2it [00:00,  5.54it/s]

Epoch : 16, Loss: 0.06352046132087708


32it [00:06,  4.99it/s]
2it [00:00,  5.53it/s]

Epoch : 17, Loss: 0.048295311629772186


32it [00:06,  4.99it/s]
2it [00:00,  5.52it/s]

Epoch : 18, Loss: 0.1255388855934143


32it [00:06,  4.99it/s]
2it [00:00,  5.47it/s]

Epoch : 19, Loss: 0.032511286437511444


32it [00:06,  4.98it/s]
28it [00:01, 24.39it/s]


Accuracy Score = 0.8571428571428571
F1 Score (Micro) = 0.8687782805429863
F1 Score (Macro) = 0.8252230965054839


28it [00:00, 28.57it/s]


Accuracy Score = 0.8571428571428571
F1 Score (Micro) = 0.8687782805429863
F1 Score (Macro) = 0.8252230965054839


28it [00:00, 28.48it/s]


Accuracy Score = 0.8571428571428571
F1 Score (Micro) = 0.8687782805429863
F1 Score (Macro) = 0.8252230965054839


28it [00:00, 28.43it/s]


Accuracy Score = 0.8571428571428571
F1 Score (Micro) = 0.8687782805429863
F1 Score (Macro) = 0.8252230965054839


28it [00:00, 28.28it/s]


Accuracy Score = 0.8571428571428571
F1 Score (Micro) = 0.8687782805429863
F1 Score (Macro) = 0.8252230965054839


28it [00:00, 28.43it/s]


Accuracy Score = 0.8571428571428571
F1 Score (Micro) = 0.8687782805429863
F1 Score (Macro) = 0.8252230965054839


28it [00:00, 28.57it/s]


Accuracy Score = 0.8571428571428571
F1 Score (Micro) = 0.8687782805429863
F1 Score (Macro) = 0.8252230965054839


28it [00:00, 28.40it/s]


Accuracy Score = 0.8571428571428571
F1 Score (Micro) = 0.8687782805429863
F1 Score (Macro) = 0.8252230965054839


28it [00:00, 28.31it/s]


Accuracy Score = 0.8571428571428571
F1 Score (Micro) = 0.8687782805429863
F1 Score (Macro) = 0.8252230965054839


28it [00:00, 28.40it/s]


Accuracy Score = 0.8571428571428571
F1 Score (Micro) = 0.8687782805429863
F1 Score (Macro) = 0.8252230965054839


28it [00:00, 28.37it/s]


Accuracy Score = 0.8571428571428571
F1 Score (Micro) = 0.8687782805429863
F1 Score (Macro) = 0.8252230965054839


28it [00:00, 28.37it/s]


Accuracy Score = 0.8571428571428571
F1 Score (Micro) = 0.8687782805429863
F1 Score (Macro) = 0.8252230965054839


28it [00:00, 28.40it/s]


Accuracy Score = 0.8571428571428571
F1 Score (Micro) = 0.8687782805429863
F1 Score (Macro) = 0.8252230965054839


28it [00:00, 28.37it/s]


Accuracy Score = 0.8571428571428571
F1 Score (Micro) = 0.8687782805429863
F1 Score (Macro) = 0.8252230965054839


28it [00:00, 28.46it/s]


Accuracy Score = 0.8571428571428571
F1 Score (Micro) = 0.8687782805429863
F1 Score (Macro) = 0.8252230965054839


28it [00:00, 28.37it/s]


Accuracy Score = 0.8571428571428571
F1 Score (Micro) = 0.8687782805429863
F1 Score (Macro) = 0.8252230965054839


28it [00:00, 28.31it/s]


Accuracy Score = 0.8571428571428571
F1 Score (Micro) = 0.8687782805429863
F1 Score (Macro) = 0.8252230965054839


28it [00:01, 27.83it/s]


Accuracy Score = 0.8571428571428571
F1 Score (Micro) = 0.8687782805429863
F1 Score (Macro) = 0.8252230965054839


28it [00:00, 28.40it/s]


Accuracy Score = 0.8571428571428571
F1 Score (Micro) = 0.8687782805429863
F1 Score (Macro) = 0.8252230965054839


28it [00:00, 28.25it/s]


Accuracy Score = 0.8571428571428571
F1 Score (Micro) = 0.8687782805429863
F1 Score (Macro) = 0.8252230965054839


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
2it [00:00,  8.81it/s]

Epoch : 0, Loss: 0.7421707510948181


102it [00:20,  5.10it/s]

Epoch : 0, Loss: 0.20682190358638763


202it [00:40,  5.08it/s]

Epoch : 0, Loss: 0.11057090014219284


295it [00:59,  4.99it/s]
2it [00:00,  5.66it/s]

Epoch : 1, Loss: 0.09027126431465149


102it [00:20,  5.12it/s]

Epoch : 1, Loss: 0.08350856602191925


202it [00:40,  5.10it/s]

Epoch : 1, Loss: 0.0761725977063179


295it [00:59,  4.98it/s]
2it [00:00,  5.66it/s]

Epoch : 2, Loss: 0.08183745294809341


102it [00:20,  5.08it/s]

Epoch : 2, Loss: 0.07465408742427826


202it [00:40,  5.09it/s]

Epoch : 2, Loss: 0.0761958584189415


295it [00:59,  4.98it/s]
2it [00:00,  5.64it/s]

Epoch : 3, Loss: 0.07293931394815445


102it [00:20,  5.10it/s]

Epoch : 3, Loss: 0.07353004068136215


202it [00:40,  5.09it/s]

Epoch : 3, Loss: 0.0769413560628891


295it [00:59,  4.97it/s]
2it [00:00,  5.69it/s]

Epoch : 4, Loss: 0.0775367021560669


102it [00:20,  5.12it/s]

Epoch : 4, Loss: 0.07515837997198105


202it [00:40,  5.12it/s]

Epoch : 4, Loss: 0.07445880770683289


295it [00:59,  4.98it/s]
2it [00:00,  5.63it/s]

Epoch : 5, Loss: 0.07447832077741623


102it [00:20,  5.12it/s]

Epoch : 5, Loss: 0.0743083506822586


202it [00:40,  5.07it/s]

Epoch : 5, Loss: 0.07160616666078568


295it [00:59,  4.95it/s]
2it [00:00,  5.62it/s]

Epoch : 6, Loss: 0.07309405505657196


102it [00:20,  5.11it/s]

Epoch : 6, Loss: 0.073999784886837


202it [00:40,  5.06it/s]

Epoch : 6, Loss: 0.06869203597307205


295it [00:59,  4.97it/s]
2it [00:00,  5.68it/s]

Epoch : 7, Loss: 0.06911608576774597


102it [00:20,  5.10it/s]

Epoch : 7, Loss: 0.06820708513259888


202it [00:40,  5.10it/s]

Epoch : 7, Loss: 0.06737951934337616


295it [00:59,  4.98it/s]
2it [00:00,  5.65it/s]

Epoch : 8, Loss: 0.07171665877103806


102it [00:20,  5.09it/s]

Epoch : 8, Loss: 0.06426219642162323


202it [00:40,  5.09it/s]

Epoch : 8, Loss: 0.05862685665488243


295it [00:59,  4.97it/s]
2it [00:00,  5.64it/s]

Epoch : 9, Loss: 0.052219171077013016


102it [00:20,  5.07it/s]

Epoch : 9, Loss: 0.05013250187039375


202it [00:40,  5.07it/s]

Epoch : 9, Loss: 0.050296202301979065


295it [00:59,  4.97it/s]
2it [00:00,  5.66it/s]

Epoch : 10, Loss: 0.058730948716402054


102it [00:20,  5.11it/s]

Epoch : 10, Loss: 0.05980445444583893


202it [00:40,  5.11it/s]

Epoch : 10, Loss: 0.05619730055332184


295it [00:59,  4.97it/s]
2it [00:00,  5.68it/s]

Epoch : 11, Loss: 0.05172451213002205


102it [00:20,  5.10it/s]

Epoch : 11, Loss: 0.04881852865219116


202it [00:40,  5.11it/s]

Epoch : 11, Loss: 0.04016197472810745


295it [00:59,  4.97it/s]
2it [00:00,  5.67it/s]

Epoch : 12, Loss: 0.05930387228727341


102it [00:20,  5.11it/s]

Epoch : 12, Loss: 0.042211420834064484


202it [00:40,  5.11it/s]

Epoch : 12, Loss: 0.039417020976543427


295it [00:59,  4.98it/s]
2it [00:00,  5.65it/s]

Epoch : 13, Loss: 0.051238492131233215


102it [00:20,  5.09it/s]

Epoch : 13, Loss: 0.04989607259631157


202it [00:40,  5.08it/s]

Epoch : 13, Loss: 0.0453525073826313


295it [00:59,  4.98it/s]
2it [00:00,  5.65it/s]

Epoch : 14, Loss: 0.03490583971142769


102it [00:20,  5.12it/s]

Epoch : 14, Loss: 0.042782485485076904


202it [00:40,  5.11it/s]

Epoch : 14, Loss: 0.043933622539043427


295it [00:59,  4.97it/s]
2it [00:00,  5.67it/s]

Epoch : 15, Loss: 0.027992647141218185


102it [00:20,  5.09it/s]

Epoch : 15, Loss: 0.02679448015987873


202it [00:40,  5.11it/s]

Epoch : 15, Loss: 0.03949114307761192


295it [00:59,  4.97it/s]
2it [00:00,  5.63it/s]

Epoch : 16, Loss: 0.02884676307439804


102it [00:20,  5.10it/s]

Epoch : 16, Loss: 0.034853745251894


202it [00:40,  5.08it/s]

Epoch : 16, Loss: 0.021707966923713684


295it [00:59,  4.97it/s]
2it [00:00,  5.67it/s]

Epoch : 17, Loss: 0.028302796185016632


102it [00:20,  5.07it/s]

Epoch : 17, Loss: 0.019083721563220024


202it [00:40,  5.10it/s]

Epoch : 17, Loss: 0.02284192107617855


295it [00:59,  4.97it/s]
2it [00:00,  5.67it/s]

Epoch : 18, Loss: 0.028561560437083244


102it [00:20,  5.09it/s]

Epoch : 18, Loss: 0.05128897354006767


202it [00:40,  5.11it/s]

Epoch : 18, Loss: 0.03002219647169113


295it [00:59,  4.97it/s]
2it [00:00,  5.62it/s]

Epoch : 19, Loss: 0.036485232412815094


102it [00:20,  5.09it/s]

Epoch : 19, Loss: 0.022449493408203125


202it [00:40,  5.12it/s]

Epoch : 19, Loss: 0.019948814064264297


295it [00:59,  4.97it/s]
253it [00:09, 27.86it/s]


Accuracy Score = 0.437190900098912
F1 Score (Micro) = 0.5536159600997506
F1 Score (Macro) = 0.47891703258128


253it [00:08, 28.24it/s]


Accuracy Score = 0.437190900098912
F1 Score (Micro) = 0.5536159600997506
F1 Score (Macro) = 0.47891703258128


253it [00:08, 28.27it/s]


Accuracy Score = 0.437190900098912
F1 Score (Micro) = 0.5536159600997506
F1 Score (Macro) = 0.47891703258128


253it [00:08, 28.33it/s]


Accuracy Score = 0.437190900098912
F1 Score (Micro) = 0.5536159600997506
F1 Score (Macro) = 0.47891703258128


253it [00:08, 28.29it/s]


Accuracy Score = 0.437190900098912
F1 Score (Micro) = 0.5536159600997506
F1 Score (Macro) = 0.47891703258128


253it [00:08, 28.31it/s]


Accuracy Score = 0.437190900098912
F1 Score (Micro) = 0.5536159600997506
F1 Score (Macro) = 0.47891703258128


253it [00:08, 28.33it/s]


Accuracy Score = 0.437190900098912
F1 Score (Micro) = 0.5536159600997506
F1 Score (Macro) = 0.47891703258128


253it [00:08, 28.32it/s]


Accuracy Score = 0.437190900098912
F1 Score (Micro) = 0.5536159600997506
F1 Score (Macro) = 0.47891703258128


253it [00:08, 28.36it/s]


Accuracy Score = 0.437190900098912
F1 Score (Micro) = 0.5536159600997506
F1 Score (Macro) = 0.47891703258128


253it [00:08, 28.36it/s]


Accuracy Score = 0.437190900098912
F1 Score (Micro) = 0.5536159600997506
F1 Score (Macro) = 0.47891703258128


253it [00:08, 28.32it/s]


Accuracy Score = 0.437190900098912
F1 Score (Micro) = 0.5536159600997506
F1 Score (Macro) = 0.47891703258128


253it [00:08, 28.29it/s]


Accuracy Score = 0.437190900098912
F1 Score (Micro) = 0.5536159600997506
F1 Score (Macro) = 0.47891703258128


253it [00:08, 28.34it/s]


Accuracy Score = 0.437190900098912
F1 Score (Micro) = 0.5536159600997506
F1 Score (Macro) = 0.47891703258128


253it [00:08, 28.36it/s]


Accuracy Score = 0.437190900098912
F1 Score (Micro) = 0.5536159600997506
F1 Score (Macro) = 0.47891703258128


253it [00:08, 28.32it/s]


Accuracy Score = 0.437190900098912
F1 Score (Micro) = 0.5536159600997506
F1 Score (Macro) = 0.47891703258128


253it [00:08, 28.32it/s]


Accuracy Score = 0.437190900098912
F1 Score (Micro) = 0.5536159600997506
F1 Score (Macro) = 0.47891703258128


253it [00:08, 28.35it/s]


Accuracy Score = 0.437190900098912
F1 Score (Micro) = 0.5536159600997506
F1 Score (Macro) = 0.47891703258128


253it [00:08, 28.31it/s]


Accuracy Score = 0.437190900098912
F1 Score (Micro) = 0.5536159600997506
F1 Score (Macro) = 0.47891703258128


253it [00:08, 28.29it/s]


Accuracy Score = 0.437190900098912
F1 Score (Micro) = 0.5536159600997506
F1 Score (Macro) = 0.47891703258128


253it [00:08, 28.33it/s]


Accuracy Score = 0.437190900098912
F1 Score (Micro) = 0.5536159600997506
F1 Score (Macro) = 0.47891703258128


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
2it [00:00,  7.33it/s]

Epoch : 0, Loss: 0.7160239219665527


102it [00:20,  5.09it/s]

Epoch : 0, Loss: 0.20348411798477173


198it [00:39,  4.98it/s]
2it [00:00,  5.64it/s]

Epoch : 1, Loss: 0.13035465776920319


102it [00:20,  5.07it/s]

Epoch : 1, Loss: 0.110659159719944


198it [00:39,  4.98it/s]
2it [00:00,  5.70it/s]

Epoch : 2, Loss: 0.11013451218605042


102it [00:20,  5.10it/s]

Epoch : 2, Loss: 0.09732816368341446


198it [00:39,  4.98it/s]
2it [00:00,  5.69it/s]

Epoch : 3, Loss: 0.10146883130073547


102it [00:20,  5.10it/s]

Epoch : 3, Loss: 0.1074308529496193


198it [00:39,  4.98it/s]
2it [00:00,  5.69it/s]

Epoch : 4, Loss: 0.11451052874326706


102it [00:20,  5.07it/s]

Epoch : 4, Loss: 0.09279558807611465


198it [00:39,  4.97it/s]
2it [00:00,  5.62it/s]

Epoch : 5, Loss: 0.08443359285593033


102it [00:20,  5.08it/s]

Epoch : 5, Loss: 0.08886578679084778


198it [00:39,  4.97it/s]
2it [00:00,  5.67it/s]

Epoch : 6, Loss: 0.07856016606092453


102it [00:20,  5.08it/s]

Epoch : 6, Loss: 0.07433491945266724


198it [00:39,  4.98it/s]
2it [00:00,  5.71it/s]

Epoch : 7, Loss: 0.07756400853395462


102it [00:20,  5.08it/s]

Epoch : 7, Loss: 0.07227198779582977


198it [00:39,  4.98it/s]
2it [00:00,  5.64it/s]

Epoch : 8, Loss: 0.055024806410074234


102it [00:20,  5.10it/s]

Epoch : 8, Loss: 0.05982959643006325


198it [00:39,  4.97it/s]
2it [00:00,  5.60it/s]

Epoch : 9, Loss: 0.04472094401717186


102it [00:20,  5.10it/s]

Epoch : 9, Loss: 0.06441406905651093


198it [00:39,  4.98it/s]
2it [00:00,  5.65it/s]

Epoch : 10, Loss: 0.045030441135168076


102it [00:20,  5.10it/s]

Epoch : 10, Loss: 0.05014808475971222


198it [00:39,  4.98it/s]
2it [00:00,  5.65it/s]

Epoch : 11, Loss: 0.04000604897737503


102it [00:20,  5.11it/s]

Epoch : 11, Loss: 0.04614511877298355


198it [00:39,  4.98it/s]
2it [00:00,  5.69it/s]

Epoch : 12, Loss: 0.038624588400125504


102it [00:20,  5.09it/s]

Epoch : 12, Loss: 0.052068520337343216


198it [00:39,  4.97it/s]
2it [00:00,  5.66it/s]

Epoch : 13, Loss: 0.03508816286921501


102it [00:20,  5.10it/s]

Epoch : 13, Loss: 0.023964449763298035


198it [00:39,  4.97it/s]
2it [00:00,  5.66it/s]

Epoch : 14, Loss: 0.027792293578386307


102it [00:20,  5.11it/s]

Epoch : 14, Loss: 0.018817830830812454


198it [00:39,  4.98it/s]
2it [00:00,  5.64it/s]

Epoch : 15, Loss: 0.019469067454338074


102it [00:20,  5.10it/s]

Epoch : 15, Loss: 0.032385826110839844


198it [00:39,  4.98it/s]
2it [00:00,  5.64it/s]

Epoch : 16, Loss: 0.027773747220635414


102it [00:20,  5.09it/s]

Epoch : 16, Loss: 0.03931668773293495


198it [00:39,  4.97it/s]
2it [00:00,  5.66it/s]

Epoch : 17, Loss: 0.01650114171206951


102it [00:20,  5.07it/s]

Epoch : 17, Loss: 0.0344090461730957


198it [00:39,  4.97it/s]
2it [00:00,  5.72it/s]

Epoch : 18, Loss: 0.02104172296822071


102it [00:20,  5.10it/s]

Epoch : 18, Loss: 0.01593271642923355


198it [00:39,  4.98it/s]
2it [00:00,  5.68it/s]

Epoch : 19, Loss: 0.028399523347616196


102it [00:20,  5.10it/s]

Epoch : 19, Loss: 0.022249937057495117


198it [00:39,  4.98it/s]
170it [00:06, 27.48it/s]


Accuracy Score = 0.6533923303834809
F1 Score (Micro) = 0.7203252032520326
F1 Score (Macro) = 0.6300377318546069


170it [00:05, 28.50it/s]


Accuracy Score = 0.6533923303834809
F1 Score (Micro) = 0.7203252032520326
F1 Score (Macro) = 0.6300377318546069


170it [00:05, 28.53it/s]


Accuracy Score = 0.6533923303834809
F1 Score (Micro) = 0.7203252032520326
F1 Score (Macro) = 0.6300377318546069


170it [00:05, 28.52it/s]


Accuracy Score = 0.6533923303834809
F1 Score (Micro) = 0.7203252032520326
F1 Score (Macro) = 0.6300377318546069


170it [00:05, 28.52it/s]


Accuracy Score = 0.6533923303834809
F1 Score (Micro) = 0.7203252032520326
F1 Score (Macro) = 0.6300377318546069


170it [00:05, 28.51it/s]


Accuracy Score = 0.6533923303834809
F1 Score (Micro) = 0.7203252032520326
F1 Score (Macro) = 0.6300377318546069


170it [00:05, 28.44it/s]


Accuracy Score = 0.6533923303834809
F1 Score (Micro) = 0.7203252032520326
F1 Score (Macro) = 0.6300377318546069


170it [00:05, 28.49it/s]


Accuracy Score = 0.6533923303834809
F1 Score (Micro) = 0.7203252032520326
F1 Score (Macro) = 0.6300377318546069


170it [00:05, 28.38it/s]


Accuracy Score = 0.6533923303834809
F1 Score (Micro) = 0.7203252032520326
F1 Score (Macro) = 0.6300377318546069


170it [00:05, 28.47it/s]


Accuracy Score = 0.6533923303834809
F1 Score (Micro) = 0.7203252032520326
F1 Score (Macro) = 0.6300377318546069


170it [00:05, 28.49it/s]


Accuracy Score = 0.6533923303834809
F1 Score (Micro) = 0.7203252032520326
F1 Score (Macro) = 0.6300377318546069


170it [00:05, 28.48it/s]


Accuracy Score = 0.6533923303834809
F1 Score (Micro) = 0.7203252032520326
F1 Score (Macro) = 0.6300377318546069


170it [00:05, 28.45it/s]


Accuracy Score = 0.6533923303834809
F1 Score (Micro) = 0.7203252032520326
F1 Score (Macro) = 0.6300377318546069


170it [00:05, 28.46it/s]


Accuracy Score = 0.6533923303834809
F1 Score (Micro) = 0.7203252032520326
F1 Score (Macro) = 0.6300377318546069


170it [00:05, 28.48it/s]


Accuracy Score = 0.6533923303834809
F1 Score (Micro) = 0.7203252032520326
F1 Score (Macro) = 0.6300377318546069


170it [00:06, 27.48it/s]


Accuracy Score = 0.6533923303834809
F1 Score (Micro) = 0.7203252032520326
F1 Score (Macro) = 0.6300377318546069


170it [00:05, 28.45it/s]


Accuracy Score = 0.6533923303834809
F1 Score (Micro) = 0.7203252032520326
F1 Score (Macro) = 0.6300377318546069


170it [00:05, 28.49it/s]


Accuracy Score = 0.6533923303834809
F1 Score (Micro) = 0.7203252032520326
F1 Score (Macro) = 0.6300377318546069


170it [00:05, 28.41it/s]


Accuracy Score = 0.6533923303834809
F1 Score (Micro) = 0.7203252032520326
F1 Score (Macro) = 0.6300377318546069


170it [00:05, 28.47it/s]


Accuracy Score = 0.6533923303834809
F1 Score (Micro) = 0.7203252032520326
F1 Score (Macro) = 0.6300377318546069


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
2it [00:00,  8.85it/s]

Epoch : 0, Loss: 0.7145983576774597


102it [00:20,  5.13it/s]

Epoch : 0, Loss: 0.22267809510231018


202it [00:40,  5.08it/s]

Epoch : 0, Loss: 0.09956308454275131


243it [00:48,  4.99it/s]
2it [00:00,  5.96it/s]

Epoch : 1, Loss: 0.08709769695997238


102it [00:20,  5.10it/s]

Epoch : 1, Loss: 0.07102403044700623


202it [00:40,  5.09it/s]

Epoch : 1, Loss: 0.06400562822818756


243it [00:48,  4.97it/s]
2it [00:00,  5.85it/s]

Epoch : 2, Loss: 0.06319493055343628


102it [00:20,  5.07it/s]

Epoch : 2, Loss: 0.061915040016174316


202it [00:40,  5.07it/s]

Epoch : 2, Loss: 0.058732666075229645


243it [00:48,  4.98it/s]
2it [00:00,  5.86it/s]

Epoch : 3, Loss: 0.06036604195833206


102it [00:20,  5.11it/s]

Epoch : 3, Loss: 0.05739263445138931


202it [00:40,  5.10it/s]

Epoch : 3, Loss: 0.06085386499762535


243it [00:48,  4.98it/s]
2it [00:00,  5.91it/s]

Epoch : 4, Loss: 0.056608837097883224


102it [00:20,  5.10it/s]

Epoch : 4, Loss: 0.059272997081279755


202it [00:40,  5.11it/s]

Epoch : 4, Loss: 0.05831734091043472


243it [00:48,  4.98it/s]
2it [00:00,  6.00it/s]

Epoch : 5, Loss: 0.056567948311567307


102it [00:20,  5.11it/s]

Epoch : 5, Loss: 0.058667730540037155


202it [00:40,  5.08it/s]

Epoch : 5, Loss: 0.05460029095411301


243it [00:48,  4.98it/s]
2it [00:00,  5.89it/s]

Epoch : 6, Loss: 0.05257910117506981


102it [00:20,  5.08it/s]

Epoch : 6, Loss: 0.05681322142481804


202it [00:40,  5.08it/s]

Epoch : 6, Loss: 0.05323413014411926


243it [00:48,  4.97it/s]
2it [00:00,  5.98it/s]

Epoch : 7, Loss: 0.05377698689699173


102it [00:20,  5.10it/s]

Epoch : 7, Loss: 0.05852301046252251


202it [00:40,  5.08it/s]

Epoch : 7, Loss: 0.05280910059809685


243it [00:48,  4.98it/s]
2it [00:00,  5.94it/s]

Epoch : 8, Loss: 0.055320702493190765


102it [00:20,  5.10it/s]

Epoch : 8, Loss: 0.05092267692089081


202it [00:40,  5.10it/s]

Epoch : 8, Loss: 0.05281074345111847


243it [00:48,  4.98it/s]
2it [00:00,  5.89it/s]

Epoch : 9, Loss: 0.05467047542333603


102it [00:20,  5.09it/s]

Epoch : 9, Loss: 0.05560830235481262


202it [00:40,  5.12it/s]

Epoch : 9, Loss: 0.05858875438570976


243it [00:48,  4.98it/s]
2it [00:00,  5.92it/s]

Epoch : 10, Loss: 0.05181782692670822


102it [00:20,  5.12it/s]

Epoch : 10, Loss: 0.0527750663459301


202it [00:40,  5.07it/s]

Epoch : 10, Loss: 0.0481623038649559


243it [00:48,  4.98it/s]
2it [00:00,  5.86it/s]

Epoch : 11, Loss: 0.05087921768426895


102it [00:20,  5.10it/s]

Epoch : 11, Loss: 0.0487406961619854


202it [00:40,  5.07it/s]

Epoch : 11, Loss: 0.04635573923587799


243it [00:48,  4.99it/s]
2it [00:00,  5.95it/s]

Epoch : 12, Loss: 0.045358285307884216


102it [00:20,  5.09it/s]

Epoch : 12, Loss: 0.04520414397120476


202it [00:40,  5.09it/s]

Epoch : 12, Loss: 0.03871766850352287


243it [00:48,  4.98it/s]
2it [00:00,  5.95it/s]

Epoch : 13, Loss: 0.04436538740992546


102it [00:20,  5.06it/s]

Epoch : 13, Loss: 0.041425131261348724


202it [00:40,  5.11it/s]

Epoch : 13, Loss: 0.042303234338760376


243it [00:48,  4.98it/s]
2it [00:00,  5.85it/s]

Epoch : 14, Loss: 0.047778673470020294


102it [00:20,  5.10it/s]

Epoch : 14, Loss: 0.03787357360124588


202it [00:40,  5.07it/s]

Epoch : 14, Loss: 0.03269452601671219


243it [00:48,  4.98it/s]
2it [00:00,  5.85it/s]

Epoch : 15, Loss: 0.04213199019432068


102it [00:20,  5.09it/s]

Epoch : 15, Loss: 0.0357077531516552


202it [00:40,  5.09it/s]

Epoch : 15, Loss: 0.03814791142940521


243it [00:48,  4.97it/s]
2it [00:00,  5.93it/s]

Epoch : 16, Loss: 0.035639967769384384


102it [00:20,  5.11it/s]

Epoch : 16, Loss: 0.031126491725444794


202it [00:40,  5.12it/s]

Epoch : 16, Loss: 0.030986910685896873


243it [00:48,  4.98it/s]
2it [00:00,  5.87it/s]

Epoch : 17, Loss: 0.03135944530367851


102it [00:20,  5.10it/s]

Epoch : 17, Loss: 0.025740887969732285


202it [00:40,  5.11it/s]

Epoch : 17, Loss: 0.028004657477140427


243it [00:48,  4.98it/s]
2it [00:00,  5.91it/s]

Epoch : 18, Loss: 0.02662757784128189


102it [00:20,  5.06it/s]

Epoch : 18, Loss: 0.02554190717637539


202it [00:40,  5.12it/s]

Epoch : 18, Loss: 0.024974506348371506


243it [00:48,  4.99it/s]
2it [00:00,  5.81it/s]

Epoch : 19, Loss: 0.024994928389787674


102it [00:20,  5.10it/s]

Epoch : 19, Loss: 0.02112232707440853


202it [00:40,  5.10it/s]

Epoch : 19, Loss: 0.01811479777097702


243it [00:48,  4.97it/s]
210it [00:07, 27.94it/s]


Accuracy Score = 0.36873508353221957
F1 Score (Micro) = 0.51802179379715
F1 Score (Macro) = 0.2958570956664107


210it [00:07, 28.40it/s]


Accuracy Score = 0.36873508353221957
F1 Score (Micro) = 0.51802179379715
F1 Score (Macro) = 0.2958570956664107


210it [00:07, 28.42it/s]


Accuracy Score = 0.36873508353221957
F1 Score (Micro) = 0.51802179379715
F1 Score (Macro) = 0.2958570956664107


210it [00:07, 28.39it/s]


Accuracy Score = 0.36873508353221957
F1 Score (Micro) = 0.51802179379715
F1 Score (Macro) = 0.2958570956664107


210it [00:07, 28.42it/s]


Accuracy Score = 0.36873508353221957
F1 Score (Micro) = 0.51802179379715
F1 Score (Macro) = 0.2958570956664107


210it [00:07, 28.40it/s]


Accuracy Score = 0.36873508353221957
F1 Score (Micro) = 0.51802179379715
F1 Score (Macro) = 0.2958570956664107


210it [00:07, 28.47it/s]


Accuracy Score = 0.36873508353221957
F1 Score (Micro) = 0.51802179379715
F1 Score (Macro) = 0.2958570956664107


210it [00:07, 28.44it/s]


Accuracy Score = 0.36873508353221957
F1 Score (Micro) = 0.51802179379715
F1 Score (Macro) = 0.2958570956664107


210it [00:07, 28.42it/s]


Accuracy Score = 0.36873508353221957
F1 Score (Micro) = 0.51802179379715
F1 Score (Macro) = 0.2958570956664107


210it [00:07, 28.42it/s]


Accuracy Score = 0.36873508353221957
F1 Score (Micro) = 0.51802179379715
F1 Score (Macro) = 0.2958570956664107


210it [00:07, 28.45it/s]


Accuracy Score = 0.36873508353221957
F1 Score (Micro) = 0.51802179379715
F1 Score (Macro) = 0.2958570956664107


210it [00:07, 28.44it/s]


Accuracy Score = 0.36873508353221957
F1 Score (Micro) = 0.51802179379715
F1 Score (Macro) = 0.2958570956664107


210it [00:07, 28.38it/s]


Accuracy Score = 0.36873508353221957
F1 Score (Micro) = 0.51802179379715
F1 Score (Macro) = 0.2958570956664107


210it [00:07, 28.36it/s]


Accuracy Score = 0.36873508353221957
F1 Score (Micro) = 0.51802179379715
F1 Score (Macro) = 0.2958570956664107


210it [00:07, 28.37it/s]


Accuracy Score = 0.36873508353221957
F1 Score (Micro) = 0.51802179379715
F1 Score (Macro) = 0.2958570956664107


210it [00:07, 28.33it/s]


Accuracy Score = 0.36873508353221957
F1 Score (Micro) = 0.51802179379715
F1 Score (Macro) = 0.2958570956664107


210it [00:07, 28.44it/s]


Accuracy Score = 0.36873508353221957
F1 Score (Micro) = 0.51802179379715
F1 Score (Macro) = 0.2958570956664107


210it [00:07, 28.42it/s]


Accuracy Score = 0.36873508353221957
F1 Score (Micro) = 0.51802179379715
F1 Score (Macro) = 0.2958570956664107


210it [00:07, 28.42it/s]


Accuracy Score = 0.36873508353221957
F1 Score (Micro) = 0.51802179379715
F1 Score (Macro) = 0.2958570956664107


210it [00:07, 28.41it/s]


Accuracy Score = 0.36873508353221957
F1 Score (Micro) = 0.51802179379715
F1 Score (Macro) = 0.2958570956664107


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
2it [00:00,  8.85it/s]

Epoch : 0, Loss: 0.7792951464653015


61it [00:12,  5.04it/s]
2it [00:00,  5.51it/s]

Epoch : 1, Loss: 0.28514763712882996


61it [00:12,  4.98it/s]
2it [00:00,  5.51it/s]

Epoch : 2, Loss: 0.22018566727638245


61it [00:12,  4.98it/s]
2it [00:00,  5.49it/s]

Epoch : 3, Loss: 0.2078043669462204


61it [00:12,  4.99it/s]
2it [00:00,  5.46it/s]

Epoch : 4, Loss: 0.19344685971736908


61it [00:12,  4.98it/s]
2it [00:00,  5.42it/s]

Epoch : 5, Loss: 0.18789899349212646


61it [00:12,  4.98it/s]
2it [00:00,  5.47it/s]

Epoch : 6, Loss: 0.17958879470825195


61it [00:12,  4.98it/s]
2it [00:00,  5.48it/s]

Epoch : 7, Loss: 0.17192469537258148


61it [00:12,  4.99it/s]
2it [00:00,  5.54it/s]

Epoch : 8, Loss: 0.15755735337734222


61it [00:12,  4.98it/s]
2it [00:00,  5.49it/s]

Epoch : 9, Loss: 0.14214952290058136


61it [00:12,  4.97it/s]
2it [00:00,  5.47it/s]

Epoch : 10, Loss: 0.1329347938299179


61it [00:12,  4.98it/s]
2it [00:00,  5.48it/s]

Epoch : 11, Loss: 0.10447978973388672


61it [00:12,  4.99it/s]
2it [00:00,  5.50it/s]

Epoch : 12, Loss: 0.0904400572180748


61it [00:12,  4.98it/s]
2it [00:00,  5.46it/s]

Epoch : 13, Loss: 0.08521787077188492


61it [00:12,  4.98it/s]
2it [00:00,  5.48it/s]

Epoch : 14, Loss: 0.08566270768642426


61it [00:12,  4.97it/s]
2it [00:00,  5.47it/s]

Epoch : 15, Loss: 0.06731970608234406


61it [00:12,  4.97it/s]
2it [00:00,  5.46it/s]

Epoch : 16, Loss: 0.06750044971704483


61it [00:12,  4.97it/s]
2it [00:00,  5.43it/s]

Epoch : 17, Loss: 0.051818132400512695


61it [00:12,  4.97it/s]
2it [00:00,  5.54it/s]

Epoch : 18, Loss: 0.051469601690769196


61it [00:12,  4.97it/s]
2it [00:00,  5.43it/s]

Epoch : 19, Loss: 0.06060903146862984


61it [00:12,  4.98it/s]
53it [00:02, 26.02it/s]


Accuracy Score = 0.6933962264150944
F1 Score (Micro) = 0.7616580310880828
F1 Score (Macro) = 0.7846776001548875


53it [00:01, 28.34it/s]


Accuracy Score = 0.6933962264150944
F1 Score (Micro) = 0.7616580310880828
F1 Score (Macro) = 0.7846776001548875


53it [00:01, 28.46it/s]


Accuracy Score = 0.6933962264150944
F1 Score (Micro) = 0.7616580310880828
F1 Score (Macro) = 0.7846776001548875


53it [00:01, 28.31it/s]


Accuracy Score = 0.6933962264150944
F1 Score (Micro) = 0.7616580310880828
F1 Score (Macro) = 0.7846776001548875


53it [00:01, 28.43it/s]


Accuracy Score = 0.6933962264150944
F1 Score (Micro) = 0.7616580310880828
F1 Score (Macro) = 0.7846776001548875


53it [00:01, 28.34it/s]


Accuracy Score = 0.6933962264150944
F1 Score (Micro) = 0.7616580310880828
F1 Score (Macro) = 0.7846776001548875


53it [00:01, 28.36it/s]


Accuracy Score = 0.6933962264150944
F1 Score (Micro) = 0.7616580310880828
F1 Score (Macro) = 0.7846776001548875


53it [00:01, 28.34it/s]


Accuracy Score = 0.6933962264150944
F1 Score (Micro) = 0.7616580310880828
F1 Score (Macro) = 0.7846776001548875


53it [00:01, 28.45it/s]


Accuracy Score = 0.6933962264150944
F1 Score (Micro) = 0.7616580310880828
F1 Score (Macro) = 0.7846776001548875


53it [00:01, 28.36it/s]


Accuracy Score = 0.6933962264150944
F1 Score (Micro) = 0.7616580310880828
F1 Score (Macro) = 0.7846776001548875


53it [00:01, 28.40it/s]


Accuracy Score = 0.6933962264150944
F1 Score (Micro) = 0.7616580310880828
F1 Score (Macro) = 0.7846776001548875


53it [00:01, 28.36it/s]


Accuracy Score = 0.6933962264150944
F1 Score (Micro) = 0.7616580310880828
F1 Score (Macro) = 0.7846776001548875


53it [00:01, 28.33it/s]


Accuracy Score = 0.6933962264150944
F1 Score (Micro) = 0.7616580310880828
F1 Score (Macro) = 0.7846776001548875


53it [00:01, 28.33it/s]


Accuracy Score = 0.6933962264150944
F1 Score (Micro) = 0.7616580310880828
F1 Score (Macro) = 0.7846776001548875


53it [00:01, 28.46it/s]


Accuracy Score = 0.6933962264150944
F1 Score (Micro) = 0.7616580310880828
F1 Score (Macro) = 0.7846776001548875


53it [00:01, 28.37it/s]


Accuracy Score = 0.6933962264150944
F1 Score (Micro) = 0.7616580310880828
F1 Score (Macro) = 0.7846776001548875


53it [00:01, 28.31it/s]


Accuracy Score = 0.6933962264150944
F1 Score (Micro) = 0.7616580310880828
F1 Score (Macro) = 0.7846776001548875


53it [00:01, 28.39it/s]


Accuracy Score = 0.6933962264150944
F1 Score (Micro) = 0.7616580310880828
F1 Score (Macro) = 0.7846776001548875


53it [00:01, 28.38it/s]


Accuracy Score = 0.6933962264150944
F1 Score (Micro) = 0.7616580310880828
F1 Score (Macro) = 0.7846776001548875


53it [00:01, 28.34it/s]


Accuracy Score = 0.6933962264150944
F1 Score (Micro) = 0.7616580310880828
F1 Score (Macro) = 0.7846776001548875


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
2it [00:00,  8.97it/s]

Epoch : 0, Loss: 0.7081350088119507


102it [00:20,  5.02it/s]


Epoch : 0, Loss: 0.2015475332736969


2it [00:00,  5.86it/s]

Epoch : 1, Loss: 0.19673436880111694


102it [00:20,  4.98it/s]


Epoch : 1, Loss: 0.16475817561149597


2it [00:00,  5.85it/s]

Epoch : 2, Loss: 0.1657751053571701


102it [00:20,  4.98it/s]


Epoch : 2, Loss: 0.1600761115550995


2it [00:00,  5.91it/s]

Epoch : 3, Loss: 0.14699232578277588


102it [00:20,  4.98it/s]


Epoch : 3, Loss: 0.14867419004440308


2it [00:00,  5.94it/s]

Epoch : 4, Loss: 0.1512478142976761


102it [00:20,  4.98it/s]


Epoch : 4, Loss: 0.14893054962158203


2it [00:00,  5.91it/s]

Epoch : 5, Loss: 0.12868648767471313


102it [00:20,  4.99it/s]


Epoch : 5, Loss: 0.12686145305633545


2it [00:00,  5.88it/s]

Epoch : 6, Loss: 0.12686830759048462


102it [00:20,  4.99it/s]


Epoch : 6, Loss: 0.12603633105754852


2it [00:00,  5.90it/s]

Epoch : 7, Loss: 0.1302935779094696


102it [00:20,  4.99it/s]


Epoch : 7, Loss: 0.11490543186664581


2it [00:00,  5.91it/s]

Epoch : 8, Loss: 0.10680822283029556


102it [00:20,  4.99it/s]


Epoch : 8, Loss: 0.09065887331962585


2it [00:00,  5.91it/s]

Epoch : 9, Loss: 0.09959463030099869


102it [00:20,  4.98it/s]


Epoch : 9, Loss: 0.07997357845306396


2it [00:00,  5.85it/s]

Epoch : 10, Loss: 0.10459588468074799


102it [00:20,  4.98it/s]


Epoch : 10, Loss: 0.05973061919212341


2it [00:00,  5.89it/s]

Epoch : 11, Loss: 0.07787453383207321


102it [00:20,  4.98it/s]


Epoch : 11, Loss: 0.06674236059188843


2it [00:00,  5.89it/s]

Epoch : 12, Loss: 0.08846309036016464


102it [00:20,  4.98it/s]


Epoch : 12, Loss: 0.06442582607269287


2it [00:00,  5.90it/s]

Epoch : 13, Loss: 0.06566964834928513


102it [00:20,  4.98it/s]


Epoch : 13, Loss: 0.0603686086833477


2it [00:00,  5.91it/s]

Epoch : 14, Loss: 0.05605841055512428


102it [00:20,  4.98it/s]


Epoch : 14, Loss: 0.08109348267316818


2it [00:00,  5.95it/s]

Epoch : 15, Loss: 0.05122460052371025


102it [00:20,  4.99it/s]


Epoch : 15, Loss: 0.05421329662203789


2it [00:00,  5.92it/s]

Epoch : 16, Loss: 0.04505765810608864


102it [00:20,  4.99it/s]


Epoch : 16, Loss: 0.0887208878993988


2it [00:00,  5.89it/s]

Epoch : 17, Loss: 0.06445791572332382


102it [00:20,  4.98it/s]


Epoch : 17, Loss: 0.04617045074701309


2it [00:00,  5.77it/s]

Epoch : 18, Loss: 0.030356116592884064


102it [00:20,  4.98it/s]


Epoch : 18, Loss: 0.032720860093832016


2it [00:00,  5.88it/s]

Epoch : 19, Loss: 0.04586528241634369


102it [00:20,  4.98it/s]


Epoch : 19, Loss: 0.04545144364237785


87it [00:03, 27.33it/s]


Accuracy Score = 0.5994236311239193
F1 Score (Micro) = 0.6613672496025437
F1 Score (Macro) = 0.5962133662385084


87it [00:03, 28.44it/s]


Accuracy Score = 0.5994236311239193
F1 Score (Micro) = 0.6613672496025437
F1 Score (Macro) = 0.5962133662385084


87it [00:03, 28.49it/s]


Accuracy Score = 0.5994236311239193
F1 Score (Micro) = 0.6613672496025437
F1 Score (Macro) = 0.5962133662385084


87it [00:03, 28.49it/s]


Accuracy Score = 0.5994236311239193
F1 Score (Micro) = 0.6613672496025437
F1 Score (Macro) = 0.5962133662385084


87it [00:03, 28.52it/s]


Accuracy Score = 0.5994236311239193
F1 Score (Micro) = 0.6613672496025437
F1 Score (Macro) = 0.5962133662385084


87it [00:03, 28.43it/s]


Accuracy Score = 0.5994236311239193
F1 Score (Micro) = 0.6613672496025437
F1 Score (Macro) = 0.5962133662385084


87it [00:03, 28.48it/s]


Accuracy Score = 0.5994236311239193
F1 Score (Micro) = 0.6613672496025437
F1 Score (Macro) = 0.5962133662385084


87it [00:03, 28.53it/s]


Accuracy Score = 0.5994236311239193
F1 Score (Micro) = 0.6613672496025437
F1 Score (Macro) = 0.5962133662385084


87it [00:03, 28.53it/s]


Accuracy Score = 0.5994236311239193
F1 Score (Micro) = 0.6613672496025437
F1 Score (Macro) = 0.5962133662385084


87it [00:03, 28.33it/s]


Accuracy Score = 0.5994236311239193
F1 Score (Micro) = 0.6613672496025437
F1 Score (Macro) = 0.5962133662385084


87it [00:03, 28.51it/s]


Accuracy Score = 0.5994236311239193
F1 Score (Micro) = 0.6613672496025437
F1 Score (Macro) = 0.5962133662385084


87it [00:03, 28.49it/s]


Accuracy Score = 0.5994236311239193
F1 Score (Micro) = 0.6613672496025437
F1 Score (Macro) = 0.5962133662385084


87it [00:03, 28.53it/s]


Accuracy Score = 0.5994236311239193
F1 Score (Micro) = 0.6613672496025437
F1 Score (Macro) = 0.5962133662385084


87it [00:03, 28.46it/s]


Accuracy Score = 0.5994236311239193
F1 Score (Micro) = 0.6613672496025437
F1 Score (Macro) = 0.5962133662385084


87it [00:03, 28.46it/s]


Accuracy Score = 0.5994236311239193
F1 Score (Micro) = 0.6613672496025437
F1 Score (Macro) = 0.5962133662385084


87it [00:03, 28.48it/s]


Accuracy Score = 0.5994236311239193
F1 Score (Micro) = 0.6613672496025437
F1 Score (Macro) = 0.5962133662385084


87it [00:03, 28.52it/s]


Accuracy Score = 0.5994236311239193
F1 Score (Micro) = 0.6613672496025437
F1 Score (Macro) = 0.5962133662385084


87it [00:03, 28.50it/s]


Accuracy Score = 0.5994236311239193
F1 Score (Micro) = 0.6613672496025437
F1 Score (Macro) = 0.5962133662385084


87it [00:03, 28.46it/s]


Accuracy Score = 0.5994236311239193
F1 Score (Micro) = 0.6613672496025437
F1 Score (Macro) = 0.5962133662385084


87it [00:03, 28.49it/s]


Accuracy Score = 0.5994236311239193
F1 Score (Micro) = 0.6613672496025437
F1 Score (Macro) = 0.5962133662385084


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
2it [00:00,  8.89it/s]

Epoch : 0, Loss: 0.6494648456573486


65it [00:13,  4.93it/s]
2it [00:00,  5.52it/s]

Epoch : 1, Loss: 0.269973486661911


65it [00:13,  4.99it/s]
2it [00:00,  5.48it/s]

Epoch : 2, Loss: 0.22409462928771973


65it [00:13,  4.98it/s]
2it [00:00,  5.49it/s]

Epoch : 3, Loss: 0.1980500966310501


65it [00:13,  4.98it/s]
2it [00:00,  5.44it/s]

Epoch : 4, Loss: 0.2032117396593094


65it [00:13,  4.98it/s]
2it [00:00,  5.55it/s]

Epoch : 5, Loss: 0.17424437403678894


65it [00:13,  4.98it/s]
2it [00:00,  5.50it/s]

Epoch : 6, Loss: 0.1937369555234909


65it [00:13,  4.98it/s]
2it [00:00,  5.52it/s]

Epoch : 7, Loss: 0.14692527055740356


65it [00:13,  4.97it/s]
2it [00:00,  5.54it/s]

Epoch : 8, Loss: 0.16042333841323853


65it [00:13,  4.97it/s]
2it [00:00,  5.49it/s]

Epoch : 9, Loss: 0.12327289581298828


65it [00:13,  4.97it/s]
2it [00:00,  5.48it/s]

Epoch : 10, Loss: 0.09967800229787827


65it [00:13,  4.97it/s]
2it [00:00,  5.52it/s]

Epoch : 11, Loss: 0.10901496559381485


65it [00:13,  4.97it/s]
2it [00:00,  5.52it/s]

Epoch : 12, Loss: 0.09265483915805817


65it [00:13,  4.98it/s]
2it [00:00,  5.49it/s]

Epoch : 13, Loss: 0.10579881072044373


65it [00:13,  4.98it/s]
2it [00:00,  5.54it/s]

Epoch : 14, Loss: 0.077049620449543


65it [00:13,  4.98it/s]
2it [00:00,  5.52it/s]

Epoch : 15, Loss: 0.0637747198343277


65it [00:13,  4.98it/s]
2it [00:00,  5.46it/s]

Epoch : 16, Loss: 0.06910312920808792


65it [00:13,  4.98it/s]
2it [00:00,  5.55it/s]

Epoch : 17, Loss: 0.07785569876432419


65it [00:13,  4.98it/s]
2it [00:00,  5.52it/s]

Epoch : 18, Loss: 0.0834755003452301


65it [00:13,  4.97it/s]
2it [00:00,  5.50it/s]

Epoch : 19, Loss: 0.0756642296910286


65it [00:13,  4.97it/s]
56it [00:02, 26.43it/s]


Accuracy Score = 0.481981981981982
F1 Score (Micro) = 0.5661375661375663
F1 Score (Macro) = 0.4634570162347941


56it [00:01, 28.59it/s]


Accuracy Score = 0.481981981981982
F1 Score (Micro) = 0.5661375661375663
F1 Score (Macro) = 0.4634570162347941


56it [00:01, 28.50it/s]


Accuracy Score = 0.481981981981982
F1 Score (Micro) = 0.5661375661375663
F1 Score (Macro) = 0.4634570162347941


56it [00:01, 28.63it/s]


Accuracy Score = 0.481981981981982
F1 Score (Micro) = 0.5661375661375663
F1 Score (Macro) = 0.4634570162347941


56it [00:01, 28.63it/s]


Accuracy Score = 0.481981981981982
F1 Score (Micro) = 0.5661375661375663
F1 Score (Macro) = 0.4634570162347941


56it [00:01, 28.57it/s]


Accuracy Score = 0.481981981981982
F1 Score (Micro) = 0.5661375661375663
F1 Score (Macro) = 0.4634570162347941


56it [00:01, 28.60it/s]


Accuracy Score = 0.481981981981982
F1 Score (Micro) = 0.5661375661375663
F1 Score (Macro) = 0.4634570162347941


56it [00:01, 28.50it/s]


Accuracy Score = 0.481981981981982
F1 Score (Micro) = 0.5661375661375663
F1 Score (Macro) = 0.4634570162347941


56it [00:01, 28.53it/s]


Accuracy Score = 0.481981981981982
F1 Score (Micro) = 0.5661375661375663
F1 Score (Macro) = 0.4634570162347941


56it [00:01, 28.59it/s]


Accuracy Score = 0.481981981981982
F1 Score (Micro) = 0.5661375661375663
F1 Score (Macro) = 0.4634570162347941


56it [00:01, 28.56it/s]


Accuracy Score = 0.481981981981982
F1 Score (Micro) = 0.5661375661375663
F1 Score (Macro) = 0.4634570162347941


56it [00:01, 28.57it/s]


Accuracy Score = 0.481981981981982
F1 Score (Micro) = 0.5661375661375663
F1 Score (Macro) = 0.4634570162347941


56it [00:01, 28.62it/s]


Accuracy Score = 0.481981981981982
F1 Score (Micro) = 0.5661375661375663
F1 Score (Macro) = 0.4634570162347941


56it [00:01, 28.60it/s]


Accuracy Score = 0.481981981981982
F1 Score (Micro) = 0.5661375661375663
F1 Score (Macro) = 0.4634570162347941


56it [00:01, 28.60it/s]


Accuracy Score = 0.481981981981982
F1 Score (Micro) = 0.5661375661375663
F1 Score (Macro) = 0.4634570162347941


56it [00:01, 28.64it/s]


Accuracy Score = 0.481981981981982
F1 Score (Micro) = 0.5661375661375663
F1 Score (Macro) = 0.4634570162347941


56it [00:01, 28.72it/s]


Accuracy Score = 0.481981981981982
F1 Score (Micro) = 0.5661375661375663
F1 Score (Macro) = 0.4634570162347941


56it [00:01, 28.60it/s]


Accuracy Score = 0.481981981981982
F1 Score (Micro) = 0.5661375661375663
F1 Score (Macro) = 0.4634570162347941


56it [00:01, 28.62it/s]


Accuracy Score = 0.481981981981982
F1 Score (Micro) = 0.5661375661375663
F1 Score (Macro) = 0.4634570162347941


56it [00:01, 28.62it/s]


Accuracy Score = 0.481981981981982
F1 Score (Micro) = 0.5661375661375663
F1 Score (Macro) = 0.4634570162347941


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
2it [00:00,  8.85it/s]

Epoch : 0, Loss: 0.7669519186019897


50it [00:09,  5.06it/s]
2it [00:00,  5.27it/s]

Epoch : 1, Loss: 0.34484031796455383


50it [00:10,  4.97it/s]
2it [00:00,  5.25it/s]

Epoch : 2, Loss: 0.31828221678733826


50it [00:10,  4.98it/s]
2it [00:00,  5.29it/s]

Epoch : 3, Loss: 0.3073035776615143


50it [00:10,  4.98it/s]
2it [00:00,  5.20it/s]

Epoch : 4, Loss: 0.2643454968929291


50it [00:10,  4.98it/s]
2it [00:00,  5.25it/s]

Epoch : 5, Loss: 0.20542536675930023


50it [00:10,  4.98it/s]
2it [00:00,  5.27it/s]

Epoch : 6, Loss: 0.20420312881469727


50it [00:10,  4.98it/s]
2it [00:00,  5.24it/s]

Epoch : 7, Loss: 0.11877746880054474


50it [00:10,  4.97it/s]
2it [00:00,  5.28it/s]

Epoch : 8, Loss: 0.13567723333835602


50it [00:10,  4.97it/s]
2it [00:00,  5.27it/s]

Epoch : 9, Loss: 0.07887466251850128


50it [00:10,  4.98it/s]
2it [00:00,  5.23it/s]

Epoch : 10, Loss: 0.0712658166885376


50it [00:10,  4.98it/s]
2it [00:00,  5.32it/s]

Epoch : 11, Loss: 0.0657050609588623


50it [00:10,  4.97it/s]
2it [00:00,  5.23it/s]

Epoch : 12, Loss: 0.05050181224942207


50it [00:10,  4.96it/s]
2it [00:00,  5.22it/s]

Epoch : 13, Loss: 0.10318809002637863


50it [00:10,  4.97it/s]
2it [00:00,  5.23it/s]

Epoch : 14, Loss: 0.07174618542194366


50it [00:10,  4.96it/s]
2it [00:00,  5.22it/s]

Epoch : 15, Loss: 0.04738176614046097


50it [00:10,  4.97it/s]
2it [00:00,  5.23it/s]

Epoch : 16, Loss: 0.038313157856464386


50it [00:10,  4.96it/s]
2it [00:00,  5.23it/s]

Epoch : 17, Loss: 0.09691204875707626


50it [00:10,  4.96it/s]
2it [00:00,  5.24it/s]

Epoch : 18, Loss: 0.04923580214381218


50it [00:10,  4.97it/s]
2it [00:00,  5.24it/s]

Epoch : 19, Loss: 0.029120808467268944


50it [00:10,  4.97it/s]
44it [00:01, 25.70it/s]


Accuracy Score = 0.7471264367816092
F1 Score (Micro) = 0.7820895522388059
F1 Score (Macro) = 0.6869758491818365


44it [00:01, 28.65it/s]


Accuracy Score = 0.7471264367816092
F1 Score (Micro) = 0.7820895522388059
F1 Score (Macro) = 0.6869758491818365


44it [00:01, 28.56it/s]


Accuracy Score = 0.7471264367816092
F1 Score (Micro) = 0.7820895522388059
F1 Score (Macro) = 0.6869758491818365


44it [00:01, 28.76it/s]


Accuracy Score = 0.7471264367816092
F1 Score (Micro) = 0.7820895522388059
F1 Score (Macro) = 0.6869758491818365


44it [00:01, 28.61it/s]


Accuracy Score = 0.7471264367816092
F1 Score (Micro) = 0.7820895522388059
F1 Score (Macro) = 0.6869758491818365


44it [00:01, 28.65it/s]


Accuracy Score = 0.7471264367816092
F1 Score (Micro) = 0.7820895522388059
F1 Score (Macro) = 0.6869758491818365


44it [00:01, 28.70it/s]


Accuracy Score = 0.7471264367816092
F1 Score (Micro) = 0.7820895522388059
F1 Score (Macro) = 0.6869758491818365


44it [00:01, 28.60it/s]


Accuracy Score = 0.7471264367816092
F1 Score (Micro) = 0.7820895522388059
F1 Score (Macro) = 0.6869758491818365


44it [00:01, 28.61it/s]


Accuracy Score = 0.7471264367816092
F1 Score (Micro) = 0.7820895522388059
F1 Score (Macro) = 0.6869758491818365


44it [00:01, 28.78it/s]


Accuracy Score = 0.7471264367816092
F1 Score (Micro) = 0.7820895522388059
F1 Score (Macro) = 0.6869758491818365


44it [00:01, 28.61it/s]


Accuracy Score = 0.7471264367816092
F1 Score (Micro) = 0.7820895522388059
F1 Score (Macro) = 0.6869758491818365


44it [00:01, 28.76it/s]


Accuracy Score = 0.7471264367816092
F1 Score (Micro) = 0.7820895522388059
F1 Score (Macro) = 0.6869758491818365


44it [00:01, 28.70it/s]


Accuracy Score = 0.7471264367816092
F1 Score (Micro) = 0.7820895522388059
F1 Score (Macro) = 0.6869758491818365


44it [00:01, 28.61it/s]


Accuracy Score = 0.7471264367816092
F1 Score (Micro) = 0.7820895522388059
F1 Score (Macro) = 0.6869758491818365


44it [00:01, 28.74it/s]


Accuracy Score = 0.7471264367816092
F1 Score (Micro) = 0.7820895522388059
F1 Score (Macro) = 0.6869758491818365


44it [00:01, 28.63it/s]


Accuracy Score = 0.7471264367816092
F1 Score (Micro) = 0.7820895522388059
F1 Score (Macro) = 0.6869758491818365


44it [00:01, 28.63it/s]


Accuracy Score = 0.7471264367816092
F1 Score (Micro) = 0.7820895522388059
F1 Score (Macro) = 0.6869758491818365


44it [00:01, 28.65it/s]


Accuracy Score = 0.7471264367816092
F1 Score (Micro) = 0.7820895522388059
F1 Score (Macro) = 0.6869758491818365


44it [00:01, 28.70it/s]


Accuracy Score = 0.7471264367816092
F1 Score (Micro) = 0.7820895522388059
F1 Score (Macro) = 0.6869758491818365


44it [00:01, 28.66it/s]


Accuracy Score = 0.7471264367816092
F1 Score (Micro) = 0.7820895522388059
F1 Score (Macro) = 0.6869758491818365


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
2it [00:00,  8.89it/s]

Epoch : 0, Loss: 0.6711263656616211


102it [00:20,  5.08it/s]

Epoch : 0, Loss: 0.20124442875385284


127it [00:25,  5.01it/s]
2it [00:00,  6.41it/s]

Epoch : 1, Loss: 0.18439629673957825


102it [00:20,  5.09it/s]

Epoch : 1, Loss: 0.13935978710651398


127it [00:25,  4.98it/s]
2it [00:00,  6.40it/s]

Epoch : 2, Loss: 0.1470838338136673


102it [00:20,  5.09it/s]

Epoch : 2, Loss: 0.1395493894815445


127it [00:25,  4.98it/s]
2it [00:00,  6.34it/s]

Epoch : 3, Loss: 0.12408486753702164


102it [00:20,  5.07it/s]

Epoch : 3, Loss: 0.12789961695671082


127it [00:25,  4.99it/s]
2it [00:00,  6.36it/s]

Epoch : 4, Loss: 0.1284230500459671


102it [00:20,  5.13it/s]

Epoch : 4, Loss: 0.1356988251209259


127it [00:25,  4.99it/s]
2it [00:00,  6.46it/s]

Epoch : 5, Loss: 0.11066276580095291


102it [00:20,  5.10it/s]

Epoch : 5, Loss: 0.12302983552217484


127it [00:25,  4.99it/s]
2it [00:00,  6.37it/s]

Epoch : 6, Loss: 0.1214723140001297


102it [00:20,  5.10it/s]

Epoch : 6, Loss: 0.10287896543741226


127it [00:25,  4.99it/s]
2it [00:00,  6.26it/s]

Epoch : 7, Loss: 0.0851655974984169


102it [00:20,  5.12it/s]

Epoch : 7, Loss: 0.09183396399021149


127it [00:25,  4.99it/s]
2it [00:00,  6.35it/s]

Epoch : 8, Loss: 0.07971194386482239


102it [00:20,  5.07it/s]

Epoch : 8, Loss: 0.08447271585464478


127it [00:25,  4.99it/s]
2it [00:00,  6.35it/s]

Epoch : 9, Loss: 0.07310860604047775


102it [00:20,  5.08it/s]

Epoch : 9, Loss: 0.07241746783256531


127it [00:25,  4.98it/s]
2it [00:00,  6.37it/s]

Epoch : 10, Loss: 0.08620218932628632


102it [00:20,  5.08it/s]

Epoch : 10, Loss: 0.04436023160815239


127it [00:25,  4.98it/s]
2it [00:00,  6.31it/s]

Epoch : 11, Loss: 0.06626157462596893


102it [00:20,  5.11it/s]

Epoch : 11, Loss: 0.05140332505106926


127it [00:25,  4.99it/s]
2it [00:00,  6.40it/s]

Epoch : 12, Loss: 0.04967256635427475


102it [00:20,  5.09it/s]

Epoch : 12, Loss: 0.05825773999094963


127it [00:25,  4.99it/s]
2it [00:00,  6.36it/s]

Epoch : 13, Loss: 0.04787517338991165


102it [00:20,  5.11it/s]

Epoch : 13, Loss: 0.04620940610766411


127it [00:25,  4.99it/s]
2it [00:00,  6.39it/s]

Epoch : 14, Loss: 0.04155541583895683


102it [00:20,  5.09it/s]

Epoch : 14, Loss: 0.027377337217330933


127it [00:25,  4.98it/s]
2it [00:00,  6.38it/s]

Epoch : 15, Loss: 0.042940326035022736


102it [00:20,  5.11it/s]

Epoch : 15, Loss: 0.04073922708630562


127it [00:25,  4.99it/s]
2it [00:00,  6.35it/s]

Epoch : 16, Loss: 0.02748614363372326


102it [00:20,  5.10it/s]

Epoch : 16, Loss: 0.04549545422196388


127it [00:25,  4.98it/s]
2it [00:00,  6.39it/s]

Epoch : 17, Loss: 0.027959460392594337


102it [00:20,  5.11it/s]

Epoch : 17, Loss: 0.027285344898700714


127it [00:25,  5.00it/s]
2it [00:00,  6.40it/s]

Epoch : 18, Loss: 0.0311575997620821


102it [00:20,  5.11it/s]

Epoch : 18, Loss: 0.021853096783161163


127it [00:25,  4.99it/s]
2it [00:00,  6.31it/s]

Epoch : 19, Loss: 0.024360261857509613


102it [00:20,  5.09it/s]

Epoch : 19, Loss: 0.043900638818740845


127it [00:25,  4.99it/s]
109it [00:03, 27.86it/s]


Accuracy Score = 0.6859122401847575
F1 Score (Micro) = 0.7528517110266159
F1 Score (Macro) = 0.6586666515950315


109it [00:03, 28.65it/s]


Accuracy Score = 0.6859122401847575
F1 Score (Micro) = 0.7528517110266159
F1 Score (Macro) = 0.6586666515950315


109it [00:03, 28.60it/s]


Accuracy Score = 0.6859122401847575
F1 Score (Micro) = 0.7528517110266159
F1 Score (Macro) = 0.6586666515950315


109it [00:03, 28.58it/s]


Accuracy Score = 0.6859122401847575
F1 Score (Micro) = 0.7528517110266159
F1 Score (Macro) = 0.6586666515950315


109it [00:03, 28.62it/s]


Accuracy Score = 0.6859122401847575
F1 Score (Micro) = 0.7528517110266159
F1 Score (Macro) = 0.6586666515950315


109it [00:03, 28.58it/s]


Accuracy Score = 0.6859122401847575
F1 Score (Micro) = 0.7528517110266159
F1 Score (Macro) = 0.6586666515950315


109it [00:03, 28.59it/s]


Accuracy Score = 0.6859122401847575
F1 Score (Micro) = 0.7528517110266159
F1 Score (Macro) = 0.6586666515950315


109it [00:03, 28.59it/s]


Accuracy Score = 0.6859122401847575
F1 Score (Micro) = 0.7528517110266159
F1 Score (Macro) = 0.6586666515950315


109it [00:03, 28.62it/s]


Accuracy Score = 0.6859122401847575
F1 Score (Micro) = 0.7528517110266159
F1 Score (Macro) = 0.6586666515950315


109it [00:03, 28.68it/s]


Accuracy Score = 0.6859122401847575
F1 Score (Micro) = 0.7528517110266159
F1 Score (Macro) = 0.6586666515950315


109it [00:03, 28.64it/s]


Accuracy Score = 0.6859122401847575
F1 Score (Micro) = 0.7528517110266159
F1 Score (Macro) = 0.6586666515950315


109it [00:03, 28.43it/s]


Accuracy Score = 0.6859122401847575
F1 Score (Micro) = 0.7528517110266159
F1 Score (Macro) = 0.6586666515950315


109it [00:03, 28.54it/s]


Accuracy Score = 0.6859122401847575
F1 Score (Micro) = 0.7528517110266159
F1 Score (Macro) = 0.6586666515950315


109it [00:03, 28.56it/s]


Accuracy Score = 0.6859122401847575
F1 Score (Micro) = 0.7528517110266159
F1 Score (Macro) = 0.6586666515950315


109it [00:03, 28.60it/s]


Accuracy Score = 0.6859122401847575
F1 Score (Micro) = 0.7528517110266159
F1 Score (Macro) = 0.6586666515950315


109it [00:03, 28.60it/s]


Accuracy Score = 0.6859122401847575
F1 Score (Micro) = 0.7528517110266159
F1 Score (Macro) = 0.6586666515950315


109it [00:03, 28.62it/s]


Accuracy Score = 0.6859122401847575
F1 Score (Micro) = 0.7528517110266159
F1 Score (Macro) = 0.6586666515950315


109it [00:03, 28.60it/s]


Accuracy Score = 0.6859122401847575
F1 Score (Micro) = 0.7528517110266159
F1 Score (Macro) = 0.6586666515950315


109it [00:03, 28.56it/s]


Accuracy Score = 0.6859122401847575
F1 Score (Micro) = 0.7528517110266159
F1 Score (Macro) = 0.6586666515950315


109it [00:03, 28.41it/s]


Accuracy Score = 0.6859122401847575
F1 Score (Micro) = 0.7528517110266159
F1 Score (Macro) = 0.6586666515950315


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
2it [00:00,  8.89it/s]

Epoch : 0, Loss: 0.7162240147590637


102it [00:20,  5.06it/s]

Epoch : 0, Loss: 0.21036256849765778


127it [00:25,  5.00it/s]
2it [00:00,  5.90it/s]

Epoch : 1, Loss: 0.19211417436599731


102it [00:20,  5.09it/s]

Epoch : 1, Loss: 0.17861926555633545


127it [00:25,  4.98it/s]
2it [00:00,  5.90it/s]

Epoch : 2, Loss: 0.1803390085697174


102it [00:20,  5.10it/s]

Epoch : 2, Loss: 0.1558525413274765


127it [00:25,  4.98it/s]
2it [00:00,  5.90it/s]

Epoch : 3, Loss: 0.16745318472385406


102it [00:20,  5.09it/s]

Epoch : 3, Loss: 0.13914138078689575


127it [00:25,  4.98it/s]
2it [00:00,  5.90it/s]

Epoch : 4, Loss: 0.17736445367336273


102it [00:20,  5.10it/s]

Epoch : 4, Loss: 0.1284181773662567


127it [00:25,  4.98it/s]
2it [00:00,  5.88it/s]

Epoch : 5, Loss: 0.14481204748153687


102it [00:20,  5.08it/s]

Epoch : 5, Loss: 0.14305861294269562


127it [00:25,  4.98it/s]
2it [00:00,  5.89it/s]

Epoch : 6, Loss: 0.10152337700128555


102it [00:20,  5.11it/s]

Epoch : 6, Loss: 0.1096419245004654


127it [00:25,  4.98it/s]
2it [00:00,  5.89it/s]

Epoch : 7, Loss: 0.11594749987125397


102it [00:20,  5.10it/s]

Epoch : 7, Loss: 0.0956830382347107


127it [00:25,  4.99it/s]
2it [00:00,  5.88it/s]

Epoch : 8, Loss: 0.09217026084661484


102it [00:20,  5.09it/s]

Epoch : 8, Loss: 0.05282377079129219


127it [00:25,  4.99it/s]
2it [00:00,  5.94it/s]

Epoch : 9, Loss: 0.07902837544679642


102it [00:20,  5.08it/s]

Epoch : 9, Loss: 0.0699273869395256


127it [00:25,  4.99it/s]
2it [00:00,  5.90it/s]

Epoch : 10, Loss: 0.08884409070014954


102it [00:20,  5.10it/s]

Epoch : 10, Loss: 0.04240524768829346


127it [00:25,  4.98it/s]
2it [00:00,  5.95it/s]

Epoch : 11, Loss: 0.06432239711284637


102it [00:20,  5.08it/s]

Epoch : 11, Loss: 0.08822416514158249


127it [00:25,  4.98it/s]
2it [00:00,  5.89it/s]

Epoch : 12, Loss: 0.056847188621759415


102it [00:20,  5.08it/s]

Epoch : 12, Loss: 0.04403112456202507


127it [00:25,  4.98it/s]
2it [00:00,  5.81it/s]

Epoch : 13, Loss: 0.07651391625404358


102it [00:20,  5.09it/s]

Epoch : 13, Loss: 0.03354092687368393


127it [00:25,  4.98it/s]
2it [00:00,  5.93it/s]

Epoch : 14, Loss: 0.04740755632519722


102it [00:20,  5.10it/s]

Epoch : 14, Loss: 0.05448289215564728


127it [00:25,  4.98it/s]
2it [00:00,  6.00it/s]

Epoch : 15, Loss: 0.037706051021814346


102it [00:20,  5.10it/s]

Epoch : 15, Loss: 0.024574631825089455


127it [00:25,  4.99it/s]
2it [00:00,  5.95it/s]

Epoch : 16, Loss: 0.05546809360384941


102it [00:20,  5.10it/s]

Epoch : 16, Loss: 0.047430165112018585


127it [00:25,  4.99it/s]
2it [00:00,  5.99it/s]

Epoch : 17, Loss: 0.042821113020181656


102it [00:20,  5.09it/s]

Epoch : 17, Loss: 0.04721083864569664


127it [00:25,  4.98it/s]
2it [00:00,  5.90it/s]

Epoch : 18, Loss: 0.027021639049053192


102it [00:20,  5.10it/s]

Epoch : 18, Loss: 0.016326704993844032


127it [00:25,  4.98it/s]
2it [00:00,  5.95it/s]

Epoch : 19, Loss: 0.02234332449734211


102it [00:20,  5.07it/s]

Epoch : 19, Loss: 0.015761448070406914


127it [00:25,  4.98it/s]
109it [00:03, 27.56it/s]


Accuracy Score = 0.6689655172413793
F1 Score (Micro) = 0.7229813664596273
F1 Score (Macro) = 0.7279637558214512


109it [00:03, 28.45it/s]


Accuracy Score = 0.6689655172413793
F1 Score (Micro) = 0.7229813664596273
F1 Score (Macro) = 0.7279637558214512


109it [00:03, 28.39it/s]


Accuracy Score = 0.6689655172413793
F1 Score (Micro) = 0.7229813664596273
F1 Score (Macro) = 0.7279637558214512


109it [00:03, 28.47it/s]


Accuracy Score = 0.6689655172413793
F1 Score (Micro) = 0.7229813664596273
F1 Score (Macro) = 0.7279637558214512


109it [00:03, 28.39it/s]


Accuracy Score = 0.6689655172413793
F1 Score (Micro) = 0.7229813664596273
F1 Score (Macro) = 0.7279637558214512


109it [00:03, 28.45it/s]


Accuracy Score = 0.6689655172413793
F1 Score (Micro) = 0.7229813664596273
F1 Score (Macro) = 0.7279637558214512


109it [00:03, 28.40it/s]


Accuracy Score = 0.6689655172413793
F1 Score (Micro) = 0.7229813664596273
F1 Score (Macro) = 0.7279637558214512


109it [00:03, 28.46it/s]


Accuracy Score = 0.6689655172413793
F1 Score (Micro) = 0.7229813664596273
F1 Score (Macro) = 0.7279637558214512


109it [00:03, 28.36it/s]


Accuracy Score = 0.6689655172413793
F1 Score (Micro) = 0.7229813664596273
F1 Score (Macro) = 0.7279637558214512


109it [00:03, 28.47it/s]


Accuracy Score = 0.6689655172413793
F1 Score (Micro) = 0.7229813664596273
F1 Score (Macro) = 0.7279637558214512


109it [00:03, 28.44it/s]


Accuracy Score = 0.6689655172413793
F1 Score (Micro) = 0.7229813664596273
F1 Score (Macro) = 0.7279637558214512


109it [00:03, 28.41it/s]


Accuracy Score = 0.6689655172413793
F1 Score (Micro) = 0.7229813664596273
F1 Score (Macro) = 0.7279637558214512


109it [00:03, 28.41it/s]


Accuracy Score = 0.6689655172413793
F1 Score (Micro) = 0.7229813664596273
F1 Score (Macro) = 0.7279637558214512


109it [00:03, 28.33it/s]


Accuracy Score = 0.6689655172413793
F1 Score (Micro) = 0.7229813664596273
F1 Score (Macro) = 0.7279637558214512


109it [00:03, 28.42it/s]


Accuracy Score = 0.6689655172413793
F1 Score (Micro) = 0.7229813664596273
F1 Score (Macro) = 0.7279637558214512


109it [00:03, 28.42it/s]


Accuracy Score = 0.6689655172413793
F1 Score (Micro) = 0.7229813664596273
F1 Score (Macro) = 0.7279637558214512


109it [00:03, 28.38it/s]


Accuracy Score = 0.6689655172413793
F1 Score (Micro) = 0.7229813664596273
F1 Score (Macro) = 0.7279637558214512


109it [00:03, 28.44it/s]


Accuracy Score = 0.6689655172413793
F1 Score (Micro) = 0.7229813664596273
F1 Score (Macro) = 0.7279637558214512


109it [00:03, 28.46it/s]


Accuracy Score = 0.6689655172413793
F1 Score (Micro) = 0.7229813664596273
F1 Score (Macro) = 0.7279637558214512


109it [00:03, 28.42it/s]


Accuracy Score = 0.6689655172413793
F1 Score (Micro) = 0.7229813664596273
F1 Score (Macro) = 0.7279637558214512


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
2it [00:00,  8.70it/s]

Epoch : 0, Loss: 0.7382020354270935


102it [00:20,  5.09it/s]

Epoch : 0, Loss: 0.21420356631278992


202it [00:40,  5.10it/s]

Epoch : 0, Loss: 0.1240970566868782


302it [01:00,  5.09it/s]

Epoch : 0, Loss: 0.10381726175546646


313it [01:02,  4.99it/s]
2it [00:00,  5.23it/s]

Epoch : 1, Loss: 0.09885197877883911


102it [00:20,  5.08it/s]

Epoch : 1, Loss: 0.09565219283103943


202it [00:40,  5.10it/s]

Epoch : 1, Loss: 0.09391797333955765


302it [01:00,  5.10it/s]

Epoch : 1, Loss: 0.09105018526315689


313it [01:03,  4.96it/s]
2it [00:00,  5.22it/s]

Epoch : 2, Loss: 0.09182270616292953


102it [00:20,  5.12it/s]

Epoch : 2, Loss: 0.09316344559192657


202it [00:40,  5.11it/s]

Epoch : 2, Loss: 0.08671525865793228


302it [01:00,  5.10it/s]

Epoch : 2, Loss: 0.09066219627857208


313it [01:02,  4.97it/s]
2it [00:00,  5.21it/s]

Epoch : 3, Loss: 0.09066343307495117


102it [00:20,  5.09it/s]

Epoch : 3, Loss: 0.09504023939371109


202it [00:40,  5.10it/s]

Epoch : 3, Loss: 0.08791746944189072


302it [01:01,  5.08it/s]

Epoch : 3, Loss: 0.09133134037256241


313it [01:03,  4.95it/s]
2it [00:00,  5.24it/s]

Epoch : 4, Loss: 0.07827001065015793


102it [00:20,  5.11it/s]

Epoch : 4, Loss: 0.08336220681667328


202it [00:40,  5.12it/s]

Epoch : 4, Loss: 0.08613260090351105


302it [01:00,  5.11it/s]

Epoch : 4, Loss: 0.08798585087060928


313it [01:02,  4.97it/s]
2it [00:00,  5.25it/s]

Epoch : 5, Loss: 0.08293848484754562


102it [00:20,  5.13it/s]

Epoch : 5, Loss: 0.08054720610380173


202it [00:40,  5.13it/s]

Epoch : 5, Loss: 0.07628384232521057


302it [01:00,  5.11it/s]

Epoch : 5, Loss: 0.0766398161649704


313it [01:02,  4.97it/s]
2it [00:00,  5.26it/s]

Epoch : 6, Loss: 0.06446947157382965


102it [00:20,  5.08it/s]

Epoch : 6, Loss: 0.06552964448928833


202it [00:40,  5.10it/s]

Epoch : 6, Loss: 0.06539146602153778


302it [01:00,  5.09it/s]

Epoch : 6, Loss: 0.0708329901099205


313it [01:03,  4.97it/s]
2it [00:00,  5.29it/s]

Epoch : 7, Loss: 0.06685110926628113


102it [00:20,  5.08it/s]

Epoch : 7, Loss: 0.06568008661270142


202it [00:40,  5.10it/s]

Epoch : 7, Loss: 0.07371155917644501


302it [01:00,  5.11it/s]

Epoch : 7, Loss: 0.05379939079284668


313it [01:02,  4.97it/s]
2it [00:00,  5.25it/s]

Epoch : 8, Loss: 0.06255045533180237


102it [00:20,  5.11it/s]

Epoch : 8, Loss: 0.05221796780824661


202it [00:40,  5.11it/s]

Epoch : 8, Loss: 0.04899078607559204


302it [01:00,  5.13it/s]

Epoch : 8, Loss: 0.04429788514971733


313it [01:02,  4.97it/s]
2it [00:00,  5.22it/s]

Epoch : 9, Loss: 0.048638731241226196


102it [00:20,  5.11it/s]

Epoch : 9, Loss: 0.05077560618519783


202it [00:40,  5.11it/s]

Epoch : 9, Loss: 0.046215999871492386


302it [01:00,  5.10it/s]

Epoch : 9, Loss: 0.050632543861866


313it [01:02,  4.97it/s]
2it [00:00,  5.31it/s]

Epoch : 10, Loss: 0.05301342532038689


102it [00:20,  5.09it/s]

Epoch : 10, Loss: 0.05938325077295303


202it [00:40,  5.12it/s]

Epoch : 10, Loss: 0.04572829604148865


302it [01:00,  5.11it/s]

Epoch : 10, Loss: 0.042110625654459


313it [01:02,  4.98it/s]
2it [00:00,  5.24it/s]

Epoch : 11, Loss: 0.04019198566675186


102it [00:20,  5.11it/s]

Epoch : 11, Loss: 0.03982686996459961


202it [00:40,  5.10it/s]

Epoch : 11, Loss: 0.033823397010564804


302it [01:00,  5.11it/s]

Epoch : 11, Loss: 0.04706665128469467


313it [01:02,  4.97it/s]
2it [00:00,  5.25it/s]

Epoch : 12, Loss: 0.03152308240532875


102it [00:20,  5.08it/s]

Epoch : 12, Loss: 0.03803981840610504


202it [00:40,  5.10it/s]

Epoch : 12, Loss: 0.029831230640411377


302it [01:00,  5.11it/s]

Epoch : 12, Loss: 0.032695405185222626


313it [01:02,  4.97it/s]
2it [00:00,  5.20it/s]

Epoch : 13, Loss: 0.02286820486187935


102it [00:20,  5.09it/s]

Epoch : 13, Loss: 0.03396892547607422


202it [00:40,  5.08it/s]

Epoch : 13, Loss: 0.03376796469092369


302it [01:00,  5.06it/s]

Epoch : 13, Loss: 0.02714921347796917


313it [01:02,  4.97it/s]
2it [00:00,  5.28it/s]

Epoch : 14, Loss: 0.016672048717737198


102it [00:20,  5.08it/s]

Epoch : 14, Loss: 0.02586391568183899


202it [00:40,  5.10it/s]

Epoch : 14, Loss: 0.02075715735554695


302it [01:00,  5.09it/s]

Epoch : 14, Loss: 0.024765735492110252


313it [01:02,  4.97it/s]
2it [00:00,  5.27it/s]

Epoch : 15, Loss: 0.022536857053637505


102it [00:20,  5.06it/s]

Epoch : 15, Loss: 0.05418771132826805


202it [00:40,  5.12it/s]

Epoch : 15, Loss: 0.028732256963849068


302it [01:00,  5.10it/s]

Epoch : 15, Loss: 0.015365324914455414


313it [01:02,  4.97it/s]
2it [00:00,  5.27it/s]

Epoch : 16, Loss: 0.02689169906079769


102it [00:20,  5.08it/s]

Epoch : 16, Loss: 0.020219307392835617


202it [00:40,  5.10it/s]

Epoch : 16, Loss: 0.012325949035584927


302it [01:00,  5.08it/s]

Epoch : 16, Loss: 0.01633947715163231


313it [01:02,  4.97it/s]
2it [00:00,  5.24it/s]

Epoch : 17, Loss: 0.01948615349829197


102it [00:20,  5.07it/s]

Epoch : 17, Loss: 0.029061762616038322


202it [00:40,  5.10it/s]

Epoch : 17, Loss: 0.038913264870643616


302it [01:00,  5.11it/s]

Epoch : 17, Loss: 0.0262229572981596


313it [01:02,  4.97it/s]
2it [00:00,  5.27it/s]

Epoch : 18, Loss: 0.013823558576405048


102it [00:20,  5.11it/s]

Epoch : 18, Loss: 0.014910416677594185


202it [00:40,  5.08it/s]

Epoch : 18, Loss: 0.0106815742328763


302it [01:00,  5.10it/s]

Epoch : 18, Loss: 0.02658029831945896


313it [01:02,  4.97it/s]
2it [00:00,  5.22it/s]

Epoch : 19, Loss: 0.01426875963807106


102it [00:20,  5.11it/s]

Epoch : 19, Loss: 0.022771913558244705


202it [00:40,  5.09it/s]

Epoch : 19, Loss: 0.009511245414614677


302it [01:00,  5.10it/s]

Epoch : 19, Loss: 0.008529583923518658


313it [01:02,  4.98it/s]
269it [00:09, 27.97it/s]


Accuracy Score = 0.5731593662628145
F1 Score (Micro) = 0.6199596774193549
F1 Score (Macro) = 0.6084897936209943


269it [00:09, 28.51it/s]


Accuracy Score = 0.5731593662628145
F1 Score (Micro) = 0.6199596774193549
F1 Score (Macro) = 0.6084897936209943


269it [00:09, 28.46it/s]


Accuracy Score = 0.5731593662628145
F1 Score (Micro) = 0.6199596774193549
F1 Score (Macro) = 0.6084897936209943


269it [00:09, 28.44it/s]


Accuracy Score = 0.5731593662628145
F1 Score (Micro) = 0.6199596774193549
F1 Score (Macro) = 0.6084897936209943


269it [00:09, 28.49it/s]


Accuracy Score = 0.5731593662628145
F1 Score (Micro) = 0.6199596774193549
F1 Score (Macro) = 0.6084897936209943


269it [00:09, 28.47it/s]


Accuracy Score = 0.5731593662628145
F1 Score (Micro) = 0.6199596774193549
F1 Score (Macro) = 0.6084897936209943


269it [00:09, 28.48it/s]


Accuracy Score = 0.5731593662628145
F1 Score (Micro) = 0.6199596774193549
F1 Score (Macro) = 0.6084897936209943


269it [00:09, 28.47it/s]


Accuracy Score = 0.5731593662628145
F1 Score (Micro) = 0.6199596774193549
F1 Score (Macro) = 0.6084897936209943


269it [00:09, 28.50it/s]


Accuracy Score = 0.5731593662628145
F1 Score (Micro) = 0.6199596774193549
F1 Score (Macro) = 0.6084897936209943


269it [00:09, 28.19it/s]


Accuracy Score = 0.5731593662628145
F1 Score (Micro) = 0.6199596774193549
F1 Score (Macro) = 0.6084897936209943


269it [00:09, 28.48it/s]


Accuracy Score = 0.5731593662628145
F1 Score (Micro) = 0.6199596774193549
F1 Score (Macro) = 0.6084897936209943


269it [00:09, 28.48it/s]


Accuracy Score = 0.5731593662628145
F1 Score (Micro) = 0.6199596774193549
F1 Score (Macro) = 0.6084897936209943


269it [00:09, 28.43it/s]


Accuracy Score = 0.5731593662628145
F1 Score (Micro) = 0.6199596774193549
F1 Score (Macro) = 0.6084897936209943


269it [00:09, 28.45it/s]


Accuracy Score = 0.5731593662628145
F1 Score (Micro) = 0.6199596774193549
F1 Score (Macro) = 0.6084897936209943


269it [00:09, 28.45it/s]


Accuracy Score = 0.5731593662628145
F1 Score (Micro) = 0.6199596774193549
F1 Score (Macro) = 0.6084897936209943


269it [00:09, 28.45it/s]


Accuracy Score = 0.5731593662628145
F1 Score (Micro) = 0.6199596774193549
F1 Score (Macro) = 0.6084897936209943


269it [00:09, 28.47it/s]


Accuracy Score = 0.5731593662628145
F1 Score (Micro) = 0.6199596774193549
F1 Score (Macro) = 0.6084897936209943


269it [00:09, 28.38it/s]


Accuracy Score = 0.5731593662628145
F1 Score (Micro) = 0.6199596774193549
F1 Score (Macro) = 0.6084897936209943


269it [00:09, 28.35it/s]


Accuracy Score = 0.5731593662628145
F1 Score (Micro) = 0.6199596774193549
F1 Score (Macro) = 0.6084897936209943


269it [00:09, 28.40it/s]


Accuracy Score = 0.5731593662628145
F1 Score (Micro) = 0.6199596774193549
F1 Score (Macro) = 0.6084897936209943


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
2it [00:00,  8.77it/s]

Epoch : 0, Loss: 0.6774927377700806


102it [00:20,  5.07it/s]

Epoch : 0, Loss: 0.26055723428726196


149it [00:29,  4.99it/s]
2it [00:00,  5.25it/s]

Epoch : 1, Loss: 0.22914329171180725


102it [00:20,  5.08it/s]

Epoch : 1, Loss: 0.24127237498760223


149it [00:29,  4.97it/s]
2it [00:00,  5.29it/s]

Epoch : 2, Loss: 0.24605771899223328


102it [00:20,  5.08it/s]

Epoch : 2, Loss: 0.17286747694015503


149it [00:29,  4.97it/s]
2it [00:00,  5.26it/s]

Epoch : 3, Loss: 0.18886606395244598


102it [00:20,  5.10it/s]

Epoch : 3, Loss: 0.16342471539974213


149it [00:29,  4.97it/s]
2it [00:00,  5.26it/s]

Epoch : 4, Loss: 0.12532299757003784


102it [00:20,  5.09it/s]

Epoch : 4, Loss: 0.13361696898937225


149it [00:29,  4.97it/s]
2it [00:00,  5.23it/s]

Epoch : 5, Loss: 0.13965243101119995


102it [00:20,  5.09it/s]

Epoch : 5, Loss: 0.11898458003997803


149it [00:29,  4.97it/s]
2it [00:00,  5.27it/s]

Epoch : 6, Loss: 0.09584687650203705


102it [00:20,  5.10it/s]

Epoch : 6, Loss: 0.1087292805314064


149it [00:29,  4.98it/s]
2it [00:00,  5.27it/s]

Epoch : 7, Loss: 0.09185900539159775


102it [00:20,  5.09it/s]

Epoch : 7, Loss: 0.06165018677711487


149it [00:29,  4.98it/s]
2it [00:00,  5.23it/s]

Epoch : 8, Loss: 0.10683596879243851


102it [00:20,  5.09it/s]

Epoch : 8, Loss: 0.07451686263084412


149it [00:29,  4.98it/s]
2it [00:00,  5.26it/s]

Epoch : 9, Loss: 0.07605338096618652


102it [00:20,  5.10it/s]

Epoch : 9, Loss: 0.06082822009921074


149it [00:29,  4.98it/s]
2it [00:00,  5.28it/s]

Epoch : 10, Loss: 0.044452425092458725


102it [00:20,  5.05it/s]

Epoch : 10, Loss: 0.08391215652227402


149it [00:30,  4.96it/s]
2it [00:00,  5.26it/s]

Epoch : 11, Loss: 0.06440144777297974


102it [00:20,  5.06it/s]

Epoch : 11, Loss: 0.11539920419454575


149it [00:30,  4.97it/s]
2it [00:00,  5.26it/s]

Epoch : 12, Loss: 0.10624071955680847


102it [00:20,  5.07it/s]

Epoch : 12, Loss: 0.0660846084356308


149it [00:29,  4.97it/s]
2it [00:00,  5.27it/s]

Epoch : 13, Loss: 0.04664714261889458


102it [00:20,  5.08it/s]

Epoch : 13, Loss: 0.06964614242315292


149it [00:29,  4.97it/s]
2it [00:00,  5.29it/s]

Epoch : 14, Loss: 0.02435559593141079


102it [00:20,  5.09it/s]

Epoch : 14, Loss: 0.0335506908595562


149it [00:29,  4.97it/s]
2it [00:00,  5.23it/s]

Epoch : 15, Loss: 0.06460083276033401


102it [00:20,  5.10it/s]

Epoch : 15, Loss: 0.1022116020321846


149it [00:30,  4.96it/s]
2it [00:00,  5.22it/s]

Epoch : 16, Loss: 0.05752783268690109


102it [00:20,  5.09it/s]

Epoch : 16, Loss: 0.030940640717744827


149it [00:29,  4.97it/s]
2it [00:00,  5.29it/s]

Epoch : 17, Loss: 0.08814506232738495


102it [00:20,  5.08it/s]

Epoch : 17, Loss: 0.0751333087682724


149it [00:29,  4.98it/s]
2it [00:00,  5.17it/s]

Epoch : 18, Loss: 0.05898059159517288


102it [00:20,  4.92it/s]

Epoch : 18, Loss: 0.04638174921274185


149it [00:30,  4.84it/s]
2it [00:00,  4.93it/s]

Epoch : 19, Loss: 0.05774746462702751


102it [00:21,  4.76it/s]

Epoch : 19, Loss: 0.03697003051638603


146it [00:30,  5.02it/s]

In [None]:
def predict(input, tokenizer, model, device):
    ''' GOAL OF THIS FUNCTION: 
    This function takes in any given string and converts it into a tokenized version that can be run through the model. '''

    input = " ".join(input.split())
    inputs = tokenizer.encode_plus(
        input,
        None,
        add_special_tokens=True,
        max_length=175,
        pad_to_max_length=True,
        return_token_type_ids=True
    )
    
    ids = torch.tensor(inputs['input_ids'], dtype=torch.long).to(device, dtype = torch.long)
    mask = torch.tensor(inputs['attention_mask'], dtype=torch.long).to(device, dtype= torch.long)
    token_type_ids = torch.tensor(inputs['token_type_ids'], dtype=torch.long).to(device, dtype= torch.long)

    output = model(ids.unsqueeze(0), mask.unsqueeze(0), token_type_ids.unsqueeze(0))

    return output

In [None]:
input='Data Engineer'
output = predict(input, tokenizer, model, device)



In [None]:
output_df = pd.DataFrame(output.cpu().detach().numpy().tolist(), columns=label_df.columns)

ValueError: 22 columns passed, passed data had 57 columns

In [None]:
output_df

NameError: name 'output_df' is not defined