# One Shot Learning

Here we are checking the performance of the model trained on the English Dataset on other Datasets and their translated versions without any finetuning

## Imports

In [1]:
from bert import BERT
import torch
import pandas as pd
import numpy as np
from tqdm import tqdm

from data_cleaning import Data_Preprocessing
from arabert.preprocess import ArabertPreprocessor

## Utility Functions

In [2]:
def preprocess(df,isArabic):
    
    X = df['Text']
    X_new=[]
    if(isArabic):
        prep = ArabertPreprocessor('bert-base-arabertv02')
        for text in tqdm(X):
            text = prep.preprocess(text)
            X_new.append(text)
    else:
        processer = Data_Preprocessing()
        for text in tqdm(X):
            text= processer.removeEmojis(text)
            text = processer.removeUrls(text)
            text=processer.removeSpecialChar(text)
            X_new.append(text)

    df['Text']=X_new
    return df 

In [3]:
def load_dataset(args,data_path,index):
    # read dataframes
    df_test = pd.read_csv(data_path+'test_'+str(index)+'.csv')

    # clean data
    df_test=preprocess(df_test,args['isArabic'])

    return df_test

In [4]:
def one_shot_output(model_path,data_path,obj,args):
    saved_model=obj.load_model(model_path,args)
    device = torch.device(args['device'])
    saved_model=saved_model.to(device)
    
    all_metrics={}
    
    # preprocessing
    for fold in [1,2,3,4,5]:
        df = load_dataset(args,data_path,fold)

        metrics = obj.run_test(saved_model,df,args)
        
        for key,value in metrics.items():
            if(key not in all_metrics):
                all_metrics[key]=value
            else:
                all_metrics[key]+=value
    
    for key,value in all_metrics.items():
        all_metrics[key]/=5
    
    return all_metrics

### Arabic

In [5]:
DATA_PATH = "Data_Processed/Let-Mi/"
MODEL_PATH = "Saved_Models/Let-Mi/all_but_one/best_bert_bert_3_all.pt"

args={
        'seed_val': 42,
        'batch_size': 8,
        'bert_model': "bert-base-multilingual-cased",
        'learning_rate': 2e-5,
        'epochs': 10,
        'max_len': 128,
        'device': 'cuda',
        'weights': [1.0, 1.0],
        'save_model': False,
        'model_save_path': '',
        'name': 'bert_one_shot',
        'isArabic': True,
    }

model = BERT(args)

metrics = one_shot_output(MODEL_PATH,DATA_PATH,model,args)

Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model ch

In [6]:
metrics

{'Test_accuracy': 0.582421875,
 'Test_mF1Score': 0.5419605835011622,
 'Test_f1Score': 0.40600068873181633,
 'Test_auc': 0.5786451279075037,
 'Test_precision': 0.6827326020102771,
 'Test_recall': 0.2892143656246628,
 'Test_non_hatef1Score': 0.6779204782705082,
 'Test_non_recallScore': 0.8680758901903445,
 'Test_non_precisionScore': 0.5561452688242857,
 'Test_avg_loss': 1.535088037326932}

### Italian

In [8]:
DATA_PATH = "Data_Processed/AMI-2020/"
MODEL_PATH = "Saved_Models/AMI-2020/all_but_one/best_bert_bert_2_all.pt"

args={
        'seed_val': 42,
        'batch_size': 8,
        'bert_model': "bert-base-multilingual-cased",
        'learning_rate': 2e-5,
        'epochs': 10,
        'max_len': 128,
        'device': 'cuda',
        'weights': [1.0, 1.0],
        'save_model': False,
        'model_save_path': '',
        'name': 'bert_one_shot',
        'isArabic': False,
    }

model = BERT(args)

metrics = one_shot_output(MODEL_PATH,DATA_PATH,model,args)

Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model ch

In [9]:
metrics

{'Test_accuracy': 0.48456504494976194,
 'Test_mF1Score': 0.4615747304513461,
 'Test_f1Score': 0.35039373161261916,
 'Test_auc': 0.4782228968700304,
 'Test_precision': 0.44805594360321443,
 'Test_recall': 0.2877535590445205,
 'Test_non_hatef1Score': 0.5727557292900729,
 'Test_non_recallScore': 0.6686922346955404,
 'Test_non_precisionScore': 0.5009207409952607,
 'Test_avg_loss': 1.2697309851142102}

### Hindi 

In [10]:
DATA_PATH = "Data_Processed/Shared_Task_hin/"
MODEL_PATH = "Saved_Models/Shared_Task_hin/all_but_one/best_bert_bert_1_all.pt"

args={
        'seed_val': 42,
        'batch_size': 8,
        'bert_model': "bert-base-multilingual-cased",
        'learning_rate': 2e-5,
        'epochs': 10,
        'max_len': 128,
        'device': 'cuda',
        'weights': [1.0, 4.5],
        'save_model': False,
        'model_save_path': '',
        'name': 'bert_one_shot',
        'isArabic': False,
    }

model = BERT(args)

metrics = one_shot_output(MODEL_PATH,DATA_PATH,model,args)

Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model ch

In [11]:
metrics

{'Test_accuracy': 0.8075657894736843,
 'Test_mF1Score': 0.6925479304464182,
 'Test_f1Score': 0.5045199089213626,
 'Test_auc': 0.6765116483215595,
 'Test_precision': 0.5934296330052435,
 'Test_recall': 0.43944878866091974,
 'Test_non_hatef1Score': 0.8805759519714739,
 'Test_non_recallScore': 0.9135745079821993,
 'Test_non_precisionScore': 0.849937755390578,
 'Test_avg_loss': 0.9881924117866315}

### Bengali 

In [12]:
DATA_PATH = "Data_Processed/Shared_Task_iben/"
MODEL_PATH = "Saved_Models/Shared_Task_iben/all_but_one/best_bert_bert_2_all.pt"

args={
        'seed_val': 42,
        'batch_size': 8,
        'bert_model': "bert-base-multilingual-cased",
        'learning_rate': 2e-5,
        'epochs': 10,
        'max_len': 128,
        'device': 'cuda',
        'weights': [1.0, 6.0],
        'save_model': False,
        'model_save_path': '',
        'name': 'bert_one_shot',
        'isArabic': False,
    }

model = BERT(args)

metrics = one_shot_output(MODEL_PATH,DATA_PATH,model,args)

Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model ch

In [13]:
metrics

{'Test_accuracy': 0.831418918918919,
 'Test_mF1Score': 0.6013067106508869,
 'Test_f1Score': 0.29842239545710997,
 'Test_auc': 0.5852102450277643,
 'Test_precision': 0.6443711725013197,
 'Test_recall': 0.1946606190089915,
 'Test_non_hatef1Score': 0.9041910258446638,
 'Test_non_recallScore': 0.9757598710465369,
 'Test_non_precisionScore': 0.8424198714551008,
 'Test_avg_loss': 0.8347795232727722}

### Spanish

In [14]:
DATA_PATH = "Data_Processed/AMI-Spanish/"
MODEL_PATH = "Saved_Models/AMI-Spanish/all_but_one/best_bert_bert_4_all.pt"

args={
        'seed_val': 42,
        'batch_size': 8,
        'bert_model': "bert-base-multilingual-cased",
        'learning_rate': 2e-5,
        'epochs': 10,
        'max_len': 128,
        'device': 'cuda',
        'weights': [1.0, 1.0],
        'save_model': False,
        'model_save_path': '',
        'name': 'bert_one_shot',
        'isArabic': False,
    }

model = BERT(args)

metrics = one_shot_output(MODEL_PATH,DATA_PATH,model,args)

Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model ch

In [15]:
metrics

{'Test_accuracy': 0.5631250000000001,
 'Test_mF1Score': 0.5089978695689356,
 'Test_f1Score': 0.3460475547747173,
 'Test_auc': 0.5616713614753089,
 'Test_precision': 0.6797050391956052,
 'Test_recall': 0.23226659110634523,
 'Test_non_hatef1Score': 0.6719481843631536,
 'Test_non_recallScore': 0.8910761318442724,
 'Test_non_precisionScore': 0.5393421571499156,
 'Test_avg_loss': 1.243747164607048}

## English

In [16]:
DATA_PATH = "Data_Processed/Shared_Task_eng/"
MODEL_PATH = "Saved_Models/Shared_Task_eng/all_but_one/best_bert_bert_1_all.pt"

args={
        'seed_val': 42,
        'batch_size': 8,
        'bert_model': "bert-base-multilingual-cased",
        'learning_rate': 2e-5,
        'epochs': 10,
        'max_len': 128,
        'device': 'cuda',
        'weights': [1.0, 8.0],
        'save_model': False,
        'model_save_path': '',
        'name': 'bert_one_shot',
        'isArabic': False,
    }

model = BERT(args)

metrics = one_shot_output(MODEL_PATH,DATA_PATH,model,args)

Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model ch

In [17]:
metrics

{'Test_accuracy': 0.7884191176470589,
 'Test_mF1Score': 0.5624156594521211,
 'Test_f1Score': 0.24793997940400397,
 'Test_auc': 0.5563677411235332,
 'Test_precision': 0.32192214690827975,
 'Test_recall': 0.20163718032468267,
 'Test_non_hatef1Score': 0.8768913395002382,
 'Test_non_recallScore': 0.9110983019223834,
 'Test_non_precisionScore': 0.8451626277539559,
 'Test_avg_loss': 0.7881933271884918}

# Few Shot Learning

Here we are checking the performance of the model trained on the English Dataset on other Datasets and their translated versions with finetuning

## BERT Model

### Imports

In [4]:
# Core
import random

# Basics
import numpy as np
import pandas as pd
import torch

# Metrics
from sklearn.metrics import *

# Tokeniser
from transformers import BertTokenizer

# Utility
from tqdm import tqdm

# Dataloader
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler

# Scheduler
from transformers import get_linear_schedule_with_warmup

# Optimiser
from transformers import AdamW

# Model
from transformers import BertForSequenceClassification
import torch.nn as nn

### Main Class

In [5]:
class BERT_FewShot:
    def __init__(self,args):
        # fix the random
        random.seed(args['seed_val'])
        np.random.seed(args['seed_val'])
        torch.manual_seed(args['seed_val'])
        torch.cuda.manual_seed_all(args['seed_val'])
        
        # set device
        self.device = torch.device(args['device'])

        self.weights=args['weights']
        
        # initiliase tokeniser
        self.tokenizer = BertTokenizer.from_pretrained(args['bert_model'])

        self.model_save_path = args['model_save_path']
        self.name = args['name']
        
    ##-----------------------------------------------------------##
    ##----------------- Utility Functions -----------------------##
    ##-----------------------------------------------------------##
    def encode(self,data,max_len):
        input_ids = []
        attention_masks = []
        for sent in tqdm(data):
            # use in-built tokeniser of Bert
            encoded_dict = self.tokenizer.encode_plus(
                            sent,
                            add_special_tokens =True, # for [CLS] and [SEP]
                            max_length = max_len,
                            truncation = True,
                            padding = 'max_length',
                            return_attention_mask = True,
                            return_tensors = 'pt', # return pytorch tensors
            )
            input_ids.append(encoded_dict['input_ids'])
            # attention masks notify where padding has been added 
            # and where is the sentence
            attention_masks.append(encoded_dict['attention_mask'])
        
        return [input_ids,attention_masks]
    
    ##-----------------------------------------------------------##
    ##------------------ Dataloader -----------------------------##
    ##-----------------------------------------------------------##
    def get_dataloader(self,samples, batch_size,is_train=False):
        inputs,masks,labels = samples

        # Convert the lists into tensors.
        inputs = torch.cat(inputs, dim=0)
        masks = torch.cat(masks, dim=0)
        labels = torch.tensor(labels)

        # convert to dataset
        data = TensorDataset(inputs,masks,labels)

        if(is_train==False):
            # use random sampler for training to shuffle
            # train data
            sampler = SequentialSampler(data)
        else:
            # order does not matter for validation as we just 
            # need the metrics
            sampler = RandomSampler(data)  

        dataloader = DataLoader(data, sampler=sampler, batch_size=batch_size,drop_last=True)

        return dataloader
    
    ##-----------------------------------------------------------##
    ##----------------- Training Utilities ----------------------##
    ##-----------------------------------------------------------## 
    def get_optimiser(self,learning_rate,model):
        # using AdamW optimiser from transformers library
        return AdamW(model.parameters(),
                  lr = learning_rate, 
                  eps = 1e-8
                )
    
    def get_scheduler(self,epochs,optimiser,train_dl):
        total_steps = len(train_dl) * epochs
        return get_linear_schedule_with_warmup(optimiser, 
                num_warmup_steps = 0, 
                num_training_steps = total_steps)
    
    def evalMetric(self, y_true, y_pred, prefix):
        # calculate all the metrics and add prefix to them
        # before saving in dictionary
        accuracy = accuracy_score(y_true, y_pred)
        mf1Score = f1_score(y_true, y_pred, average='macro')
        f1Score = f1_score(y_true, y_pred)
        area_under_c = roc_auc_score(y_true, y_pred)
        recallScore = recall_score(y_true, y_pred)
        precisionScore = precision_score(y_true, y_pred)

        nonhate_f1Score = f1_score(y_true, y_pred, pos_label=0)
        non_recallScore = recall_score(y_true, y_pred, pos_label=0)
        non_precisionScore = precision_score(y_true, y_pred, pos_label=0)
        return {prefix+"accuracy": accuracy, prefix+'mF1Score': mf1Score, 
            prefix+'f1Score': f1Score, prefix+'auc': area_under_c,
            prefix+'precision': precisionScore, 
            prefix+'recall': recallScore, 
            prefix+'non_hatef1Score': nonhate_f1Score, 
            prefix+'non_recallScore': non_recallScore, 
            prefix+'non_precisionScore': non_precisionScore}
    
    ##-----------------------------------------------------------##
    ##---------------- Different Train Loops --------------------##
    ##-----------------------------------------------------------## 
    def evaluate(self,model,loader,which):
        # to evaluate model on test and validation set

        model.eval() # put model in eval mode

        # maintain total loss to save in metrics
        total_eval_loss = 0

        # maintain predictions for each batch and calculate metrics
        # at the end of the epoch
        y_pred = np.zeros(shape=(0),dtype='int')
        y_true = np.empty(shape=(0),dtype='int')

        for batch in tqdm(loader):
            # separate input, labels and attention mask
            b_input_ids = batch[0].to(self.device)
            b_input_mask = batch[1].to(self.device)
            b_labels = batch[2].to(self.device)

            with torch.no_grad(): # do not construct compute graph
                outputs = model(b_input_ids, 
                                   token_type_ids=None, 
                                   attention_mask=b_input_mask,
                                   labels=b_labels)
            
            # output is always a tuple, thus we have to 
            # separate it manually
            loss = outputs[0]
            logits = outputs[1]

            # add the current loss
            # loss.item() extracts loss value as a float
            total_eval_loss += loss.item()

            # calculate true labels and convert it into numpy array
            b_y_true = b_labels.cpu().data.squeeze().numpy()
            
            # calculate predicted labels by taking max of 
            # prediction scores
            b_y_pred = torch.max(logits,1)[1]
            b_y_pred = b_y_pred.cpu().data.squeeze().numpy()

            y_pred = np.concatenate((y_pred,b_y_pred))
            y_true = np.concatenate((y_true,b_y_true))

        # calculate metrics
        metrics = self.evalMetric(y_true,y_pred,which+"_")

        # Calculate the average loss over all of the batches.
        avg_loss = total_eval_loss / len(loader)
        # add it to the metric
        metrics[which+'_avg_loss'] = avg_loss

        return metrics
    
    
    def run_train_loop(self,model,train_loader,optimiser,scheduler):

        model.train() # put model in train mode

        # maintain total loss to add to metric
        total_loss = 0

        # maintain predictions for each batch and calculate metrics
        # at the end of the epoch
        y_pred = np.zeros(shape=(0),dtype='int')
        y_true = np.empty(shape=(0),dtype='int')

        for batch in tqdm(train_loader):
            # separate inputs, labels and attention mask
            b_input_ids = batch[0].to(self.device)
            b_input_mask = batch[1].to(self.device)
            b_labels = batch[2].to(self.device)

            # Ref: https://stackoverflow.com/questions/48001598/why-do-we-need-to-call-zero-grad-in-pytorch#:~:text=In%20PyTorch%20%2C%20we%20need%20to,backward()%20call.
            model.zero_grad()                

            outputs = model(b_input_ids, 
                             token_type_ids=None, 
                             attention_mask=b_input_mask, 
                             labels=b_labels)

            # outputs is always returned as tuple
            # Separate it manually
            logits = outputs[1]

            # define new loss function so that we can include
            # weights
            loss_fct = nn.CrossEntropyLoss(weight=torch.tensor(
                        self.weights,dtype=torch.float)).to(self.device)
            
            loss = loss_fct(logits,b_labels)
            
            # calculate current loss
            # loss.item() extracts loss value as a float
            total_loss += loss.item()

            # Back-propagation
            loss.backward()

            # calculate true labels
            b_y_true = b_labels.cpu().data.squeeze().numpy()

            # calculate predicted labels by taking max of 
            # prediction scores
            b_y_pred = torch.max(logits,1)[1]
            b_y_pred = b_y_pred.cpu().data.squeeze().numpy()

            y_pred = np.concatenate((y_pred,b_y_pred))
            y_true = np.concatenate((y_true,b_y_true))

            # clip gradient to prevent exploding gradient
            # problems
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

            # gradient descent
            optimiser.step()
            
            # schedule learning rate accordingly
            scheduler.step()

        # calculate avg loss 
        avg_train_loss = total_loss / len(train_loader)

        # calculate metrics
        train_metrics = self.evalMetric(y_true,y_pred,"Train_")
        
        # print results
        print('avg_train_loss',avg_train_loss)
        print('train_f1Score',train_metrics['Train_f1Score'])
        print('train_accuracy',train_metrics['Train_accuracy'])

        # add loss to metrics
        train_metrics['Train_avg_loss'] = avg_train_loss

        return train_metrics
    
    
    ##------------------------------------------------------------##
    ##----------------- Main Train Loop --------------------------##
    ##------------------------------------------------------------##
    def train(self,model,data_loaders,optimiser,scheduler,epochs,save_model):
        # save train stats per epoch
        train_stats = []
        test_stats=[]
        best_test={}
        train_loader,val_loader,test_loader = data_loaders
        # maintain best mF1 Score to save best model
        best_mf1Score=-1.0
        for epoch_i in range(0, epochs):
            print("")
            print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
            
            print("")
            print('Training...')
            # run trian loop
            train_metrics = self.run_train_loop(model,train_loader,
                                            optimiser,scheduler)

            print("")
            print("Running Validation...") 
            # test on validation set
            val_metrics = self.evaluate(model,val_loader,"Val")
            
            print("Validation Loss: ",val_metrics['Val_avg_loss'])
            print("Validation Accuracy: ",val_metrics['Val_accuracy'])
            
            stats = {}
            
            if(val_metrics['Val_mF1Score']>best_mf1Score):
                print("Best mF1Score....")
                best_mf1Score=val_metrics['Val_mF1Score']
                if(save_model):
                    torch.save(model.state_dict(), self.model_save_path+
                        '/best_bert_'+self.name+'.pt')
                # evaluate model on test set
                best_test = self.evaluate(model,test_loader,'')
                best_test['name']=self.name+'_'+str(epoch_i)+'_best'


            stats['epoch']=epoch_i+1

            # add train and val metrics of the epoch to 
            # same dictionary
            stats.update(train_metrics)
            stats.update(val_metrics)

            train_stats.append(stats)
            
        test_stats.append(best_test)
        return train_stats,best_test
    
    ##-----------------------------------------------------------##
    ##----------------------- Main Pipeline ---------------------##
    ##-----------------------------------------------------------##
    def run(self,args,df_train,df_val,df_test):
        # get X and Y data points 
        X_train = df_train['Text'].values
        Y_train = df_train['Label'].values
        X_test = df_test['Text'].values
        Y_test = df_test['Label'].values
        X_val = df_val['Text'].values
        Y_val = df_val['Label'].values
        
        # encode data
        # returns list of data and attention masks
        train_data = self.encode(X_train,args['max_len'])
        val_data = self.encode(X_val,args['max_len'])
        test_data = self.encode(X_test,args['max_len'])
        
        # add labels to data so that we can send them to
        # dataloader function together
        train_data.append(Y_train)
        val_data.append(Y_val)
        test_data.append(Y_test)
        
        # convert to dataloader
        train_dl =self.get_dataloader(train_data,args['batch_size'],True)
        val_dl =self.get_dataloader(val_data,args['batch_size'])                          
        test_dl =self.get_dataloader(test_data,args['batch_size'])
        
        # intialise model
#         model = BertForSequenceClassification.from_pretrained(
#                 args['bert_model'], 
#                 num_labels = 2, 
#                 output_attentions = False, # Whether the model returns attentions weights.
#                 output_hidden_states = False, # Whether the model returns all hidden-states.
#             )
        model = self.load_model(args['model_path'],args)
        model.to(self.device)
        
        optimiser = self.get_optimiser(args['learning_rate'],model)
        
        scheduler = self.get_scheduler(args['epochs'],optimiser,train_dl)
        
        # Run train loop and evaluate on validation data set
        # on each epoch. Store best model from all epochs 
        # (best mF1 Score on Val set) and evaluate it on
        # test set
        train_stats,best_test = self.train(model,[train_dl,val_dl,test_dl],
                                optimiser,scheduler,args['epochs'],args['save_model'])
        
        return train_stats,best_test
        
    ##-----------------------------------------------------------##
    ##-------------------- Other Utilities ----------------------##
    ##-----------------------------------------------------------##
    def run_test(self,model,df_test,args):
        # to evaluate test set on the final saved model
        # to retrieve results if necessary
        X_test = df_test['Text'].values
        Y_test = df_test['Label'].values

        test_data = self.encode(X_test,args['max_len'])

        test_data.append(Y_test)

        test_dl =self.get_dataloader(test_data,32)

        metrics = self.evaluate(model,test_dl,"Test")

        return metrics
    
    def load_model(self,path,args):
        # load saved best model
        saved_model = BertForSequenceClassification.from_pretrained(
                args['bert_model'], 
                num_labels = 2, 
                output_attentions = False, # Whether the model returns attentions weights.
                output_hidden_states = False, # Whether the model returns all hidden-states.
            )
        
        saved_model.load_state_dict(torch.load(path))
        
        return saved_model

### Utility Functions

In [6]:
def load_dataset(args,index):
    # initialise constants 
    path = args['data_path']
    # read dataframes
    df_train = pd.read_csv(path+'train_'+str(index)+'.csv')
    df_val = pd.read_csv(path+'val_'+str(index)+'.csv')
    df_test = pd.read_csv(path+'test_'+str(index)+'.csv')

    # clean data
    df_train=preprocess(df_train,args['isArabic'])
    df_val=preprocess(df_val,args['isArabic'])
    df_test=preprocess(df_test,args['isArabic'])

    return df_train, df_val, df_test

In [7]:
def preprocess(df,isArabic):
    
    X = df['Text']
    X_new=[]
    if(isArabic):
        prep = ArabertPreprocessor('bert-base-arabertv02')
        for text in tqdm(X):
            text = prep.preprocess(text)
            X_new.append(text)
    else:
        processer = Data_Preprocessing()
        for text in tqdm(X):
            text= processer.removeEmojis(text)
            text = processer.removeUrls(text)
            text=processer.removeSpecialChar(text)
            X_new.append(text)

    df['Text']=X_new
    return df 

In [8]:
def save_metrics(path,metrics,which):
    df = pd.DataFrame(metrics)
    df.to_csv(path+"_"+which+".csv")

In [9]:
def fix_random(seed_val=42):
    random.seed(seed_val)
    np.random.seed(seed_val)
    torch.manual_seed(seed_val)
    torch.cuda.manual_seed_all(seed_val)

### Main Train Function

In [10]:
def train(args, index,all_test_metrics,model_args):
    model_name = args['model_name']
    model_args['name']=model_name+'_'+str(index)+'_all'
    print("\tInitialising Model....")
    model = BERT_FewShot(model_args)
    print("\tLoading Dataset....")
    df_train, df_val, df_test = load_dataset(args,index)
    print("\tTraining Starts....")
    train_metrics, test_metrics = model.run(model_args, 
                    df_train, df_val, df_test)

    # Save train metrics after generating path
    res_path=args['res_base_path']+model_name+'_'+model_args['name']
    save_metrics(res_path,train_metrics,"train")
    
    all_test_metrics.append(test_metrics)

### Main Run Function

In [11]:
def run(args,model_args):
    all_test_metrics=[]
    
    for fold in [1, 2, 3, 4, 5]:
        print("Fold: ",fold)
        fix_random()
        train(args,fold,all_test_metrics,model_args)
        print("Saving Test Metrics....")
        save_metrics(args['res_base_path']+args['model_name']+
             '_all',all_test_metrics,"test")

## Arabic

In [None]:
run_args={
    'model_name':'few_shot',
    'data_path':'Data_Processed/Let-Mi/',
    'train_cnt':256,
    'res_base_path': 'Results/Let-Mi/all_but_one/',
    'model_save_path': 'Saved_Models/Let-Mi/',
    'isArabic': True,
}

model_args={
        'seed_val': 42,
        'batch_size': 8,
        'bert_model': "bert-base-multilingual-cased",
        'learning_rate': 2e-5,
        'epochs': 10,
        'max_len': 128,
        'device': 'cuda',
        'weights': [1.0, 1.0],
        'save_model': False,
        'model_save_path': '',
        'isArabic': True,
        'model_path': "Saved_Models/Let-Mi/all_but_one/best_bert_bert_3_all.pt",
    }
run(run_args,model_args)

Fold:  1
	Initialising Model....


  0%|          | 0/3670 [00:00<?, ?it/s]

	Loading Dataset....


100%|██████████| 3670/3670 [00:00<00:00, 8584.08it/s]
100%|██████████| 523/523 [00:00<00:00, 7225.91it/s]
100%|██████████| 1047/1047 [00:00<00:00, 8885.27it/s]
  8%|▊         | 276/3670 [00:00<00:02, 1563.05it/s]

	Training Starts....


100%|██████████| 3670/3670 [00:01<00:00, 2335.22it/s]
100%|██████████| 523/523 [00:00<00:00, 2486.01it/s]
100%|██████████| 1047/1047 [00:00<00:00, 2508.89it/s]
Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertF



Training...


  6%|▌         | 28/458 [00:04<01:04,  6.71it/s]

## Spanish

In [None]:
run_args={
    'model_name':'few_shot',
    'data_path':'Data_Processed/AMI-Spanish/',
    'train_cnt':256,
    'res_base_path': 'Results/AMI-Spanish/fewShot/',
    'model_save_path': 'Saved_Models/AMI-Spanish/',
    'isArabic': False,
}

model_args={
        'seed_val': 42,
        'batch_size': 8,
        'bert_model': "bert-base-multilingual-cased",
        'learning_rate': 2e-5,
        'epochs': 10,
        'max_len': 128,
        'device': 'cuda',
        'weights': [1.0, 1.0],
        'save_model': False,
        'model_save_path': '',
        'isArabic': False,
        'model_path': "Saved_Models/Shared_Task_eng_1/best_bert_3_all.pt",
    }
run(run_args,model_args)

Fold:  1
	Initialising Model....


 10%|█         | 235/2317 [00:00<00:00, 2347.45it/s]

	Loading Dataset....


100%|██████████| 2317/2317 [00:00<00:00, 2369.03it/s]
100%|██████████| 330/330 [00:00<00:00, 2426.60it/s]
100%|██████████| 660/660 [00:00<00:00, 2445.49it/s]
 27%|██▋       | 634/2317 [00:00<00:00, 3140.35it/s]

	Training Starts....


100%|██████████| 2317/2317 [00:00<00:00, 2776.25it/s]
100%|██████████| 330/330 [00:00<00:00, 3354.22it/s]
100%|██████████| 660/660 [00:00<00:00, 3318.21it/s]
Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertFor



Training...


100%|██████████| 289/289 [00:42<00:00,  6.87it/s]
 12%|█▏        | 5/41 [00:00<00:00, 45.87it/s]

avg_train_loss 0.71527979778171
train_f1Score 0.485999050783104
train_accuracy 0.5315743944636678

Running Validation...


100%|██████████| 41/41 [00:00<00:00, 44.55it/s]
  1%|          | 2/289 [00:00<00:27, 10.29it/s]

Validation Loss:  0.6653372601764959
Validation Accuracy:  0.6310975609756098


Training...


100%|██████████| 289/289 [00:56<00:00,  5.07it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.23it/s]

avg_train_loss 0.5959913835290394
train_f1Score 0.7123176661264181
train_accuracy 0.6929065743944637

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 20.87it/s]
  4%|▎         | 3/82 [00:00<00:03, 20.48it/s]

Validation Loss:  0.5722530062605695
Validation Accuracy:  0.7317073170731707
Testing Model....


100%|██████████| 82/82 [00:04<00:00, 19.98it/s]
  0%|          | 1/289 [00:00<00:57,  5.02it/s]



Training...


100%|██████████| 289/289 [00:59<00:00,  4.83it/s]
  5%|▍         | 2/41 [00:00<00:01, 19.52it/s]

avg_train_loss 0.5192106330064754
train_f1Score 0.7799747580984433
train_accuracy 0.7737889273356401

Running Validation...


 78%|███████▊  | 32/41 [00:01<00:00, 19.92it/s]

## Hindi

In [None]:
run_args={
    'model_name':'few_shot',
    'data_path':'Data_Processed/Shared_Task_hin/',
    'train_cnt':256,
    'res_base_path': 'Results/Shared_Task_hin/fewShot/',
    'model_save_path': 'Saved_Models/Shared_Task_hin/',
    'isArabic': False,
}

model_args={
        'seed_val': 42,
        'batch_size': 8,
        'bert_model': "bert-base-multilingual-cased",
        'learning_rate': 2e-5,
        'epochs': 10,
        'max_len': 128,
        'device': 'cuda',
        'weights': [1.0, 4.5],
        'save_model': False,
        'model_save_path': '',
        'isArabic': False,
        'model_path': "Saved_Models/Shared_Task_eng_1/best_bert_3_all.pt",
    }
run(run_args,model_args)

Fold:  1
	Initialising Model....


  7%|▋         | 285/4327 [00:00<00:01, 2839.92it/s]

	Loading Dataset....


100%|██████████| 4327/4327 [00:01<00:00, 2755.89it/s]
100%|██████████| 618/618 [00:00<00:00, 2824.46it/s]
100%|██████████| 1236/1236 [00:00<00:00, 2763.65it/s]
 17%|█▋        | 749/4327 [00:00<00:00, 3752.48it/s]

	Training Starts....


100%|██████████| 4327/4327 [00:01<00:00, 3812.91it/s]
100%|██████████| 618/618 [00:00<00:00, 3804.58it/s]
100%|██████████| 1236/1236 [00:00<00:00, 3946.10it/s]
Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertF



Training...


100%|██████████| 540/540 [00:51<00:00, 10.44it/s]
  6%|▋         | 5/77 [00:00<00:01, 48.02it/s]

avg_train_loss 0.6370941555748383
train_f1Score 0.46449957228400346
train_accuracy 0.7101851851851851

Running Validation...


100%|██████████| 77/77 [00:01<00:00, 45.94it/s]
  0%|          | 2/540 [00:00<00:50, 10.67it/s]

Validation Loss:  0.44010104761495217
Validation Accuracy:  0.8652597402597403


Training...


100%|██████████| 540/540 [00:52<00:00, 10.24it/s]
  6%|▋         | 5/77 [00:00<00:01, 47.08it/s]

avg_train_loss 0.4936746807424007
train_f1Score 0.6772777167947311
train_accuracy 0.8638888888888889

Running Validation...


100%|██████████| 77/77 [00:01<00:00, 45.59it/s]
  3%|▎         | 5/154 [00:00<00:03, 45.72it/s]

Validation Loss:  0.4073442340671242
Validation Accuracy:  0.8603896103896104
Testing Model....


100%|██████████| 154/154 [00:03<00:00, 45.45it/s]
  0%|          | 2/540 [00:00<00:51, 10.53it/s]



Training...


100%|██████████| 540/540 [00:52<00:00, 10.26it/s]
  6%|▋         | 5/77 [00:00<00:01, 46.40it/s]

avg_train_loss 0.5174121857638022
train_f1Score 0.7406989853438556
train_accuracy 0.8935185185185185

Running Validation...


100%|██████████| 77/77 [00:01<00:00, 45.49it/s]
  0%|          | 2/540 [00:00<00:50, 10.57it/s]

Validation Loss:  0.33339020048643087
Validation Accuracy:  0.887987012987013


Training...


100%|██████████| 540/540 [00:52<00:00, 10.20it/s]
  6%|▋         | 5/77 [00:00<00:01, 46.02it/s]

avg_train_loss 0.4568759047891945
train_f1Score 0.8024622271964185
train_accuracy 0.9182870370370371

Running Validation...


100%|██████████| 77/77 [00:01<00:00, 45.04it/s]
  3%|▎         | 5/154 [00:00<00:03, 45.14it/s]

Validation Loss:  0.47130339979738384
Validation Accuracy:  0.8814935064935064
Testing Model....


100%|██████████| 154/154 [00:03<00:00, 39.14it/s]
  0%|          | 1/540 [00:00<01:42,  5.28it/s]



Training...


 24%|██▍       | 130/540 [00:26<01:23,  4.93it/s]

## Less data points few Shot

In [12]:
def load_dataset_part(train_cnt,args,index,seed):
    # initialise constants 
    path = args['data_path']
    # read dataframes
    df_train = pd.read_csv(path+'train_'+str(index)+'.csv')
    df_val = pd.read_csv(path+'val_'+str(index)+'.csv')
    df_test = pd.read_csv(path+'test_'+str(index)+'.csv')
    
    # split train into hate and non-hate and take train_cnt
    # samples of each
    df_train_hate = df_train[df_train['Label'] == 1].sample(train_cnt,random_state=seed)
    df_train_non_hate = df_train[df_train['Label'] == 0].sample(train_cnt,random_state=seed)
    # concatenate hate and non_hate
    df_train = pd.concat([df_train_hate, df_train_non_hate])
    # shuffle the train data
    df_train = df_train.sample(frac=1).reset_index(drop=True)

    # clean data
    df_train=preprocess(df_train,args['isArabic'])
    df_val=preprocess(df_val,args['isArabic'])
    df_test=preprocess(df_test,args['isArabic'])

    return df_train, df_val, df_test

In [13]:
def train_part(args,train_cnt,run,index,all_test_metrics,model_args,seed):
    model_name = args['model_name']
    model_args['name']=model_name+'_'+str(index)+'_'+str(train_cnt)+'_'+str(run)
    print("\tInitialising Model....")
    model = BERT_FewShot(model_args)
    print("\tLoading Dataset....")
    df_train, df_val, df_test = load_dataset_part(train_cnt,args,index,seed)
    print("\tTraining Starts....")
    train_metrics, test_metrics = model.run(model_args, 
                    df_train, df_val, df_test)

    # Save train metrics after generating path
    res_path=args['res_base_path']+model_name+'_'+model_args['name']
    save_metrics(res_path,train_metrics,"train")
    
    all_test_metrics.append(test_metrics)

In [14]:
def run_part(run_args,model_args,train_cnt):
    all_test_metrics=[]
    seeds = [42,43,44]
    for fold in [1, 2, 3, 4, 5]:
        print("Fold: ",fold)
        for run in [1,2,3]:
            print("Run: ",run)
            fix_random()
            train_part(run_args,train_cnt,run,fold,all_test_metrics,model_args,seeds[run-1])
            print("Saving Test Metrics....")
            save_metrics(run_args['res_base_path']+run_args['model_name']+
             '_'+str(train_cnt),all_test_metrics,"test")

## Arabic few data

In [18]:
run_args={
    'model_name':'few_shot',
    'data_path':'Data_Processed/Let-Mi/',
    'train_cnt':256,
    'res_base_path': 'Results/Let-Mi/all_but_one/',
    'model_save_path': 'Saved_Models/Let-Mi/',
    'isArabic': True,
}

model_args={
        'seed_val': 42,
        'batch_size': 8,
        'bert_model': "bert-base-multilingual-cased",
        'learning_rate': 2e-5,
        'epochs': 10,
        'max_len': 128,
        'device': 'cuda',
        'weights': [1.0, 1.0],
        'save_model': False,
        'model_save_path': '',
        'isArabic': True,
        'model_path': "Saved_Models/Let-Mi/all_but_one/best_bert_bert_3_all.pt",
    }
for train_cnt in [512]:
    print("Train Cnt: ",train_cnt)
    run_part(run_args,model_args,train_cnt)

Train Cnt:  512
Fold:  1
Run:  1
	Initialising Model....


KeyboardInterrupt: 

## Italian few Data

In [None]:
run_args={
    'model_name':'few_shot',
    'data_path':'Data_Processed/AMI-2020/',
    'train_cnt':256,
    'res_base_path': 'Results/AMI-2020/all_but_one/',
    'model_save_path': 'Saved_Models/AMI-2020/',
    'isArabic': False,
}

model_args={
        'seed_val': 42,
        'batch_size': 8,
        'bert_model': "bert-base-multilingual-cased",
        'learning_rate': 2e-5,
        'epochs': 10,
        'max_len': 128,
        'device': 'cuda',
        'weights': [1.0, 1.0],
        'save_model': False,
        'model_save_path': '',
        'isArabic': False,
        'model_path': "Saved_Models/AMI-2020/all_but_one/best_bert_bert_1_all.pt",
    }
for train_cnt in [64,128,256,512]:
    print("Train Cnt: ",train_cnt)
    run_part(run_args,model_args,train_cnt)

Train Cnt:  64
Fold:  1
Run:  1
	Initialising Model....


  0%|          | 0/128 [00:00<?, ?it/s]

	Loading Dataset....


100%|██████████| 128/128 [00:00<00:00, 402.58it/s]
100%|██████████| 991/991 [00:00<00:00, 2007.43it/s]
100%|██████████| 1983/1983 [00:00<00:00, 2103.19it/s]
100%|██████████| 128/128 [00:00<00:00, 2836.22it/s]
 51%|█████     | 502/991 [00:00<00:00, 2444.09it/s]

	Training Starts....


100%|██████████| 991/991 [00:00<00:00, 2448.36it/s]
100%|██████████| 1983/1983 [00:00<00:00, 2449.21it/s]


## Spanish Few Data

In [None]:
run_args={
    'model_name':'few_shot',
    'data_path':'Data_Processed/AMI-Spanish/',
    'train_cnt':256,
    'res_base_path': 'Results/AMI-Spanish/all_but_one/',
    'model_save_path': 'Saved_Models/AMI-Spanish/',
    'isArabic': False,
}

model_args={
        'seed_val': 42,
        'batch_size': 8,
        'bert_model': "bert-base-multilingual-cased",
        'learning_rate': 2e-5,
        'epochs': 10,
        'max_len': 128,
        'device': 'cuda:1',
        'weights': [1.0, 1.0],
        'save_model': False,
        'model_save_path': '',
        'isArabic': False,
        'model_path': "Saved_Models/AMI-Spanish/all_but_one/best_bert_bert_4_all.pt",
    }
for train_cnt in [128,256,512]:
    print("Train Cnt: ",train_cnt)
    run_part(run_args,model_args,train_cnt)

Train Cnt:  128
Fold:  1
Run:  1
	Initialising Model....


  0%|          | 0/256 [00:00<?, ?it/s]

	Loading Dataset....


100%|██████████| 256/256 [00:00<00:00, 541.19it/s]
100%|██████████| 330/330 [00:00<00:00, 1361.64it/s]
100%|██████████| 660/660 [00:00<00:00, 1330.65it/s]
100%|██████████| 256/256 [00:00<00:00, 1896.90it/s]
 61%|██████    | 202/330 [00:00<00:00, 2018.97it/s]

	Training Starts....


100%|██████████| 330/330 [00:00<00:00, 1984.52it/s]
100%|██████████| 660/660 [00:00<00:00, 1923.66it/s]
Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceCla



Training...


100%|██████████| 32/32 [00:06<00:00,  5.30it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.70it/s]

avg_train_loss 0.7364970203489065
train_f1Score 0.644927536231884
train_accuracy 0.6171875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.50it/s]
  4%|▎         | 3/82 [00:00<00:03, 23.16it/s]

Validation Loss:  0.6313214033115201
Validation Accuracy:  0.6463414634146342
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.22it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.33it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.34it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.91it/s]

avg_train_loss 0.48157513281330466
train_f1Score 0.728813559322034
train_accuracy 0.75

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.58it/s]
  4%|▎         | 3/82 [00:00<00:03, 23.00it/s]

Validation Loss:  0.8366948031797642
Validation Accuracy:  0.6737804878048781
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.55it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.53it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.41it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.94it/s]

avg_train_loss 0.4548530882457271
train_f1Score 0.8034188034188035
train_accuracy 0.8203125

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 22.91it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.53it/s]

Validation Loss:  0.9055239405573868
Validation Accuracy:  0.6615853658536586


Training...


100%|██████████| 32/32 [00:05<00:00,  5.43it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.87it/s]

avg_train_loss 0.2897845113184303
train_f1Score 0.8968253968253969
train_accuracy 0.8984375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.43it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.24it/s]

Validation Loss:  1.0677874139169368
Validation Accuracy:  0.6646341463414634


Training...


100%|██████████| 32/32 [00:05<00:00,  5.36it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.91it/s]

avg_train_loss 0.20387223234865814
train_f1Score 0.9285714285714286
train_accuracy 0.9296875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 22.93it/s]
  4%|▎         | 3/82 [00:00<00:03, 23.06it/s]

Validation Loss:  1.1958377492137071
Validation Accuracy:  0.676829268292683
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.57it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.73it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.53it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.98it/s]

avg_train_loss 0.09239674860145897
train_f1Score 0.9721115537848606
train_accuracy 0.97265625

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 22.92it/s]
  4%|▎         | 3/82 [00:00<00:03, 23.07it/s]

Validation Loss:  1.5255103242106554
Validation Accuracy:  0.676829268292683
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.18it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.54it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.56it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.86it/s]

avg_train_loss 0.06008007511263713
train_f1Score 0.984375
train_accuracy 0.984375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 22.77it/s]
  4%|▎         | 3/82 [00:00<00:03, 22.85it/s]

Validation Loss:  1.677328545622891
Validation Accuracy:  0.6829268292682927
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.13it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.68it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.52it/s]
  7%|▋         | 3/41 [00:00<00:01, 28.77it/s]

avg_train_loss 0.04845416808166192
train_f1Score 0.9844961240310077
train_accuracy 0.984375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.42it/s]
  4%|▎         | 3/82 [00:00<00:02, 28.62it/s]

Validation Loss:  1.740246649536236
Validation Accuracy:  0.6951219512195121
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.45it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.72it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.50it/s]
  7%|▋         | 3/41 [00:00<00:01, 28.58it/s]

avg_train_loss 0.0304427049595688
train_f1Score 0.9921259842519685
train_accuracy 0.9921875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.28it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.33it/s]

Validation Loss:  1.7788316883691928
Validation Accuracy:  0.6920731707317073


Training...


100%|██████████| 32/32 [00:05<00:00,  5.51it/s]
  7%|▋         | 3/41 [00:00<00:01, 26.85it/s]

avg_train_loss 0.017366697466059122
train_f1Score 0.996078431372549
train_accuracy 0.99609375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.18it/s]


Validation Loss:  1.7914148934367227
Validation Accuracy:  0.6920731707317073
Saving Test Metrics....
Run:  2
	Initialising Model....


 45%|████▍     | 114/256 [00:00<00:00, 1126.29it/s]

	Loading Dataset....


100%|██████████| 256/256 [00:00<00:00, 1187.05it/s]
100%|██████████| 330/330 [00:00<00:00, 1305.84it/s]
100%|██████████| 660/660 [00:00<00:00, 1323.50it/s]
100%|██████████| 256/256 [00:00<00:00, 1764.15it/s]
 57%|█████▋    | 188/330 [00:00<00:00, 1878.71it/s]

	Training Starts....


100%|██████████| 330/330 [00:00<00:00, 1784.38it/s]
100%|██████████| 660/660 [00:00<00:00, 1942.64it/s]
Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceCla



Training...


100%|██████████| 32/32 [00:05<00:00,  5.47it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.96it/s]

avg_train_loss 0.751726096495986
train_f1Score 0.6159695817490494
train_accuracy 0.60546875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.44it/s]
  4%|▎         | 3/82 [00:00<00:03, 22.93it/s]

Validation Loss:  1.0245989368456165
Validation Accuracy:  0.5640243902439024
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.43it/s]
  3%|▎         | 1/32 [00:00<00:04,  6.85it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.52it/s]
  7%|▋         | 3/41 [00:00<00:01, 27.73it/s]

avg_train_loss 0.602381894364953
train_f1Score 0.756554307116105
train_accuracy 0.74609375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.23it/s]
  4%|▎         | 3/82 [00:00<00:03, 24.77it/s]

Validation Loss:  0.7254087565148749
Validation Accuracy:  0.6615853658536586
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.62it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.17it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.41it/s]
  7%|▋         | 3/41 [00:00<00:01, 24.26it/s]

avg_train_loss 0.4084287453442812
train_f1Score 0.7950819672131147
train_accuracy 0.8046875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.38it/s]
  4%|▎         | 3/82 [00:00<00:02, 28.98it/s]

Validation Loss:  0.8439262458463994
Validation Accuracy:  0.6676829268292683
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.62it/s]
  3%|▎         | 1/32 [00:00<00:04,  6.27it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.52it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.07it/s]

avg_train_loss 0.2803826938616112
train_f1Score 0.900398406374502
train_accuracy 0.90234375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 22.96it/s]
  4%|▎         | 3/82 [00:00<00:03, 23.11it/s]

Validation Loss:  0.9510593725050368
Validation Accuracy:  0.6676829268292683
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.59it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.68it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.52it/s]
  7%|▋         | 3/41 [00:00<00:01, 26.59it/s]

avg_train_loss 0.17782337215612642
train_f1Score 0.9402390438247011
train_accuracy 0.94140625

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.59it/s]
  4%|▎         | 3/82 [00:00<00:02, 28.75it/s]

Validation Loss:  1.4128879766638687
Validation Accuracy:  0.6890243902439024
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.41it/s]
  3%|▎         | 1/32 [00:00<00:04,  6.69it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.58it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.03it/s]

avg_train_loss 0.17613418088876642
train_f1Score 0.9571984435797667
train_accuracy 0.95703125

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.42it/s]
  4%|▎         | 3/82 [00:00<00:03, 22.57it/s]

Validation Loss:  1.3719460190069386
Validation Accuracy:  0.6920731707317073
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.44it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.70it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.54it/s]
  7%|▋         | 3/41 [00:00<00:01, 28.66it/s]

avg_train_loss 0.1870238745905226
train_f1Score 0.953125
train_accuracy 0.953125

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.50it/s]
  3%|▎         | 1/32 [00:00<00:04,  7.14it/s]

Validation Loss:  1.3326318543631492
Validation Accuracy:  0.6859756097560976


Training...


100%|██████████| 32/32 [00:05<00:00,  5.59it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.01it/s]

avg_train_loss 0.11101777147268876
train_f1Score 0.9767441860465116
train_accuracy 0.9765625

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.51it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.49it/s]

Validation Loss:  1.4164080225930709
Validation Accuracy:  0.6798780487804879


Training...


100%|██████████| 32/32 [00:05<00:00,  5.58it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.96it/s]

avg_train_loss 0.06629530852660537
train_f1Score 0.9844961240310077
train_accuracy 0.984375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.56it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.57it/s]

Validation Loss:  1.4967996274552695
Validation Accuracy:  0.6798780487804879


Training...


100%|██████████| 32/32 [00:05<00:00,  5.59it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.96it/s]

avg_train_loss 0.06993191021319944
train_f1Score 0.9844961240310077
train_accuracy 0.984375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.46it/s]


Validation Loss:  1.522623953738863
Validation Accuracy:  0.6737804878048781
Saving Test Metrics....
Run:  3
	Initialising Model....


 48%|████▊     | 122/256 [00:00<00:00, 1213.11it/s]

	Loading Dataset....


100%|██████████| 256/256 [00:00<00:00, 1182.91it/s]
100%|██████████| 330/330 [00:00<00:00, 1321.36it/s]
100%|██████████| 660/660 [00:00<00:00, 1393.21it/s]
100%|██████████| 256/256 [00:00<00:00, 1831.25it/s]
 65%|██████▍   | 214/330 [00:00<00:00, 2130.92it/s]

	Training Starts....


100%|██████████| 330/330 [00:00<00:00, 1909.39it/s]
100%|██████████| 660/660 [00:00<00:00, 1836.31it/s]
Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceCla



Training...


100%|██████████| 32/32 [00:05<00:00,  5.46it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.06it/s]

avg_train_loss 0.6805222551338375
train_f1Score 0.6311787072243347
train_accuracy 0.62109375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 22.94it/s]
  4%|▎         | 3/82 [00:00<00:03, 23.12it/s]

Validation Loss:  0.7304629552655104
Validation Accuracy:  0.6036585365853658
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.45it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.70it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.45it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.92it/s]

avg_train_loss 0.5065723019652069
train_f1Score 0.7871485943775101
train_accuracy 0.79296875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.55it/s]
  4%|▎         | 3/82 [00:00<00:03, 23.09it/s]

Validation Loss:  0.8058178722858429
Validation Accuracy:  0.6585365853658537
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.53it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.54it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.43it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.07it/s]

avg_train_loss 0.320373130554799
train_f1Score 0.9034749034749036
train_accuracy 0.90234375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.66it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.58it/s]

Validation Loss:  1.1644554768757123
Validation Accuracy:  0.6615853658536586


Training...


100%|██████████| 32/32 [00:05<00:00,  5.43it/s]
  7%|▋         | 3/41 [00:00<00:01, 24.51it/s]

avg_train_loss 0.23648471283377148
train_f1Score 0.9365079365079365
train_accuracy 0.9375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.53it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.53it/s]

Validation Loss:  1.5911935818404324
Validation Accuracy:  0.6402439024390244


Training...


100%|██████████| 32/32 [00:05<00:00,  5.54it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.12it/s]

avg_train_loss 0.14995012355211657
train_f1Score 0.9571984435797667
train_accuracy 0.95703125

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.55it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.49it/s]

Validation Loss:  1.8721652972943537
Validation Accuracy:  0.625


Training...


100%|██████████| 32/32 [00:05<00:00,  5.52it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.29it/s]

avg_train_loss 0.08960355023737065
train_f1Score 0.9763779527559054
train_accuracy 0.9765625

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.69it/s]
  4%|▎         | 3/82 [00:00<00:03, 23.14it/s]

Validation Loss:  1.8673144435828053
Validation Accuracy:  0.6646341463414634
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.30it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.69it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.62it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.97it/s]

avg_train_loss 0.020127725416386966
train_f1Score 0.996078431372549
train_accuracy 0.99609375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 22.96it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.67it/s]

Validation Loss:  2.0630596058397757
Validation Accuracy:  0.6615853658536586


Training...


100%|██████████| 32/32 [00:05<00:00,  5.57it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.05it/s]

avg_train_loss 0.03672489244127064
train_f1Score 0.9922480620155039
train_accuracy 0.9921875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.40it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.92it/s]

Validation Loss:  2.190553849790154
Validation Accuracy:  0.649390243902439


Training...


100%|██████████| 32/32 [00:05<00:00,  5.48it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.99it/s]

avg_train_loss 0.0016000472132873256
train_f1Score 1.0
train_accuracy 1.0

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.47it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.69it/s]

Validation Loss:  2.17009184782098
Validation Accuracy:  0.6585365853658537


Training...


100%|██████████| 32/32 [00:05<00:00,  5.48it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.84it/s]

avg_train_loss 0.001344039039395284
train_f1Score 1.0
train_accuracy 1.0

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.27it/s]
  4%|▎         | 3/82 [00:00<00:03, 22.87it/s]

Validation Loss:  2.18814218044281
Validation Accuracy:  0.6615853658536586
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.21it/s]


Saving Test Metrics....
Fold:  2
Run:  1
	Initialising Model....


 57%|█████▋    | 147/256 [00:00<00:00, 1455.54it/s]

	Loading Dataset....


100%|██████████| 256/256 [00:00<00:00, 1424.27it/s]
100%|██████████| 330/330 [00:00<00:00, 1378.79it/s]
100%|██████████| 660/660 [00:00<00:00, 1378.06it/s]
100%|██████████| 256/256 [00:00<00:00, 1926.44it/s]
 63%|██████▎   | 207/330 [00:00<00:00, 2068.08it/s]

	Training Starts....


100%|██████████| 330/330 [00:00<00:00, 2031.42it/s]
100%|██████████| 660/660 [00:00<00:00, 1983.13it/s]
Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceCla



Training...


100%|██████████| 32/32 [00:05<00:00,  5.40it/s]
  7%|▋         | 3/41 [00:00<00:01, 28.76it/s]

avg_train_loss 0.690292140468955
train_f1Score 0.5977011494252874
train_accuracy 0.58984375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.92it/s]
  4%|▎         | 3/82 [00:00<00:03, 23.07it/s]

Validation Loss:  0.6672101689548027
Validation Accuracy:  0.649390243902439
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.24it/s]
  3%|▎         | 1/32 [00:00<00:04,  6.88it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.48it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.07it/s]

avg_train_loss 0.4777680682018399
train_f1Score 0.7756653992395437
train_accuracy 0.76953125

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 22.87it/s]
  4%|▎         | 3/82 [00:00<00:03, 23.10it/s]

Validation Loss:  0.6296427119068984
Validation Accuracy:  0.698170731707317
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.54it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.65it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.40it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.97it/s]

avg_train_loss 0.33218234323430806
train_f1Score 0.8856088560885609
train_accuracy 0.87890625

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.47it/s]
  3%|▎         | 1/32 [00:00<00:04,  6.79it/s]

Validation Loss:  1.0630091984824437
Validation Accuracy:  0.6646341463414634


Training...


100%|██████████| 32/32 [00:05<00:00,  5.50it/s]
  7%|▋         | 3/41 [00:00<00:01, 28.58it/s]

avg_train_loss 0.2849853450898081
train_f1Score 0.9144981412639406
train_accuracy 0.91015625

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.90it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.92it/s]

Validation Loss:  0.9542852418451775
Validation Accuracy:  0.6859756097560976


Training...


100%|██████████| 32/32 [00:05<00:00,  5.46it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.00it/s]

avg_train_loss 0.2578545607102569
train_f1Score 0.931297709923664
train_accuracy 0.9296875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.47it/s]
  4%|▎         | 3/82 [00:00<00:03, 25.88it/s]

Validation Loss:  1.2475069158642393
Validation Accuracy:  0.7012195121951219
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.62it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.55it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.53it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.82it/s]

avg_train_loss 0.1726301281742053
train_f1Score 0.9513108614232209
train_accuracy 0.94921875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.48it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.67it/s]

Validation Loss:  1.3830201960191495
Validation Accuracy:  0.6859756097560976


Training...


100%|██████████| 32/32 [00:05<00:00,  5.53it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.96it/s]

avg_train_loss 0.11777905542840017
train_f1Score 0.9657794676806085
train_accuracy 0.96484375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.76it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.40it/s]

Validation Loss:  1.5889351609469642
Validation Accuracy:  0.6737804878048781


Training...


100%|██████████| 32/32 [00:05<00:00,  5.47it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.74it/s]

avg_train_loss 0.0789729339958285
train_f1Score 0.9731800766283524
train_accuracy 0.97265625

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.05it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.70it/s]

Validation Loss:  1.683428892224073
Validation Accuracy:  0.6737804878048781


Training...


100%|██████████| 32/32 [00:05<00:00,  5.48it/s]
  7%|▋         | 3/41 [00:00<00:01, 24.75it/s]

avg_train_loss 0.08000021172483684
train_f1Score 0.9731800766283524
train_accuracy 0.97265625

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.58it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.70it/s]

Validation Loss:  1.7423380289238128
Validation Accuracy:  0.676829268292683


Training...


100%|██████████| 32/32 [00:05<00:00,  5.55it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.85it/s]

avg_train_loss 0.0930531607955345
train_f1Score 0.9657794676806085
train_accuracy 0.96484375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.50it/s]


Validation Loss:  1.7303117610650456
Validation Accuracy:  0.6890243902439024
Saving Test Metrics....
Run:  2
	Initialising Model....


 52%|█████▏    | 133/256 [00:00<00:00, 1326.68it/s]

	Loading Dataset....


100%|██████████| 256/256 [00:00<00:00, 1303.93it/s]
100%|██████████| 330/330 [00:00<00:00, 1352.48it/s]
100%|██████████| 660/660 [00:00<00:00, 1364.79it/s]
100%|██████████| 256/256 [00:00<00:00, 1637.27it/s]
 58%|█████▊    | 192/330 [00:00<00:00, 1913.38it/s]

	Training Starts....


100%|██████████| 330/330 [00:00<00:00, 1876.77it/s]
100%|██████████| 660/660 [00:00<00:00, 1908.44it/s]
Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceCla



Training...


100%|██████████| 32/32 [00:05<00:00,  5.46it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.66it/s]

avg_train_loss 0.7200425053015351
train_f1Score 0.6263345195729537
train_accuracy 0.58984375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.33it/s]
  4%|▎         | 3/82 [00:00<00:03, 22.88it/s]

Validation Loss:  0.7610945926933754
Validation Accuracy:  0.5030487804878049
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.10it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.60it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.44it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.83it/s]

avg_train_loss 0.5036358595825732
train_f1Score 0.7209302325581396
train_accuracy 0.71875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 22.81it/s]
  4%|▎         | 3/82 [00:00<00:03, 22.99it/s]

Validation Loss:  0.7537226233540512
Validation Accuracy:  0.6432926829268293
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.38it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.56it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.43it/s]
  7%|▋         | 3/41 [00:00<00:01, 28.58it/s]

avg_train_loss 0.33872049127239734
train_f1Score 0.8559670781893003
train_accuracy 0.86328125

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.43it/s]
  4%|▎         | 3/82 [00:00<00:03, 22.77it/s]

Validation Loss:  1.093794773628072
Validation Accuracy:  0.6463414634146342
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.35it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.67it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.49it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.05it/s]

avg_train_loss 0.2780693310778588
train_f1Score 0.9105691056910569
train_accuracy 0.9140625

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.38it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.70it/s]

Validation Loss:  1.2638705387348081
Validation Accuracy:  0.6402439024390244


Training...


100%|██████████| 32/32 [00:05<00:00,  5.45it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.90it/s]

avg_train_loss 0.18488870590226725
train_f1Score 0.944
train_accuracy 0.9453125

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.42it/s]
  4%|▎         | 3/82 [00:00<00:03, 22.98it/s]

Validation Loss:  1.4702851241681634
Validation Accuracy:  0.6463414634146342
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.63it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.76it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.48it/s]
  7%|▋         | 3/41 [00:00<00:01, 26.20it/s]

avg_train_loss 0.24079002501093782
train_f1Score 0.9296875
train_accuracy 0.9296875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.15it/s]
  3%|▎         | 1/32 [00:00<00:04,  7.07it/s]

Validation Loss:  1.3409521812345924
Validation Accuracy:  0.6432926829268293


Training...


100%|██████████| 32/32 [00:05<00:00,  5.54it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.92it/s]

avg_train_loss 0.1660373063350562
train_f1Score 0.9554655870445343
train_accuracy 0.95703125

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 22.93it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.65it/s]

Validation Loss:  1.4659080999653513
Validation Accuracy:  0.6432926829268293


Training...


100%|██████████| 32/32 [00:05<00:00,  5.43it/s]
  7%|▋         | 3/41 [00:00<00:01, 28.52it/s]

avg_train_loss 0.15794799543800764
train_f1Score 0.9554655870445343
train_accuracy 0.95703125

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.35it/s]
  4%|▎         | 3/82 [00:00<00:03, 23.00it/s]

Validation Loss:  1.4861576455395396
Validation Accuracy:  0.6554878048780488
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.34it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.40it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.46it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.00it/s]

avg_train_loss 0.14319194547715597
train_f1Score 0.9596774193548387
train_accuracy 0.9609375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.67it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.76it/s]

Validation Loss:  1.556286559599202
Validation Accuracy:  0.6432926829268293


Training...


100%|██████████| 32/32 [00:05<00:00,  5.54it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.96it/s]

avg_train_loss 0.132887964398833
train_f1Score 0.963855421686747
train_accuracy 0.96484375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.04it/s]


Validation Loss:  1.5739899248611637
Validation Accuracy:  0.649390243902439
Saving Test Metrics....
Run:  3
	Initialising Model....


 55%|█████▌    | 141/256 [00:00<00:00, 1408.60it/s]

	Loading Dataset....


100%|██████████| 256/256 [00:00<00:00, 1399.30it/s]
100%|██████████| 330/330 [00:00<00:00, 1419.81it/s]
100%|██████████| 660/660 [00:00<00:00, 1463.61it/s]
100%|██████████| 256/256 [00:00<00:00, 1878.16it/s]
 63%|██████▎   | 208/330 [00:00<00:00, 2072.09it/s]

	Training Starts....


100%|██████████| 330/330 [00:00<00:00, 2037.75it/s]
100%|██████████| 660/660 [00:00<00:00, 2043.31it/s]
Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceCla



Training...


100%|██████████| 32/32 [00:05<00:00,  5.51it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.93it/s]

avg_train_loss 0.7310921428725123
train_f1Score 0.584
train_accuracy 0.59375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.26it/s]
  4%|▎         | 3/82 [00:00<00:02, 28.65it/s]

Validation Loss:  0.6821035023142652
Validation Accuracy:  0.6341463414634146
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.19it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.58it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.46it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.92it/s]

avg_train_loss 0.4600851461291313
train_f1Score 0.7734375
train_accuracy 0.7734375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.60it/s]
  4%|▎         | 3/82 [00:00<00:03, 23.12it/s]

Validation Loss:  0.775782180632033
Validation Accuracy:  0.6859756097560976
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.30it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.61it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.49it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.97it/s]

avg_train_loss 0.3163386674132198
train_f1Score 0.8715953307392996
train_accuracy 0.87109375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.02it/s]
  4%|▎         | 3/82 [00:00<00:03, 22.96it/s]

Validation Loss:  0.8664414620980984
Validation Accuracy:  0.7073170731707317
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.29it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.56it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.44it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.00it/s]

avg_train_loss 0.19305658264784142
train_f1Score 0.9323308270676692
train_accuracy 0.9296875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.53it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.58it/s]

Validation Loss:  1.1210604074706392
Validation Accuracy:  0.7012195121951219


Training...


100%|██████████| 32/32 [00:05<00:00,  5.46it/s]
  7%|▋         | 3/41 [00:00<00:01, 28.53it/s]

avg_train_loss 0.1322146024176618
train_f1Score 0.9694656488549618
train_accuracy 0.96875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.51it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.69it/s]

Validation Loss:  1.4295569112774258
Validation Accuracy:  0.698170731707317


Training...


100%|██████████| 32/32 [00:05<00:00,  5.42it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.91it/s]

avg_train_loss 0.12057014845777303
train_f1Score 0.973384030418251
train_accuracy 0.97265625

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 22.81it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.68it/s]

Validation Loss:  1.464059452480841
Validation Accuracy:  0.6920731707317073


Training...


100%|██████████| 32/32 [00:05<00:00,  5.54it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.09it/s]

avg_train_loss 0.10063723874918651
train_f1Score 0.9769230769230769
train_accuracy 0.9765625

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.24it/s]
  3%|▎         | 1/32 [00:00<00:04,  6.58it/s]

Validation Loss:  1.6322799707172302
Validation Accuracy:  0.6798780487804879


Training...


100%|██████████| 32/32 [00:05<00:00,  5.61it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.07it/s]

avg_train_loss 0.1157873971387744
train_f1Score 0.9769230769230769
train_accuracy 0.9765625

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.59it/s]
  3%|▎         | 1/32 [00:00<00:04,  7.04it/s]

Validation Loss:  1.5410386136301406
Validation Accuracy:  0.6859756097560976


Training...


100%|██████████| 32/32 [00:05<00:00,  5.58it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.03it/s]

avg_train_loss 0.10021728453284595
train_f1Score 0.9806949806949806
train_accuracy 0.98046875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.22it/s]
  3%|▎         | 1/32 [00:00<00:04,  6.67it/s]

Validation Loss:  1.581432479024842
Validation Accuracy:  0.6829268292682927


Training...


100%|██████████| 32/32 [00:05<00:00,  5.54it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.97it/s]

avg_train_loss 0.07574594470497686
train_f1Score 0.9846153846153847
train_accuracy 0.984375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.60it/s]


Validation Loss:  1.5749348828046605
Validation Accuracy:  0.6829268292682927
Saving Test Metrics....
Fold:  3
Run:  1
	Initialising Model....


100%|██████████| 256/256 [00:00<00:00, 1511.38it/s]

	Loading Dataset....



100%|██████████| 330/330 [00:00<00:00, 1392.78it/s]
100%|██████████| 660/660 [00:00<00:00, 1390.44it/s]
100%|██████████| 256/256 [00:00<00:00, 1968.50it/s]
 61%|██████    | 200/330 [00:00<00:00, 1998.52it/s]

	Training Starts....


100%|██████████| 330/330 [00:00<00:00, 1985.00it/s]
100%|██████████| 660/660 [00:00<00:00, 1988.64it/s]
Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceCla



Training...


100%|██████████| 32/32 [00:05<00:00,  5.38it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.81it/s]

avg_train_loss 0.7332184370607138
train_f1Score 0.6008230452674898
train_accuracy 0.62109375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.48it/s]
  4%|▎         | 3/82 [00:00<00:03, 23.03it/s]

Validation Loss:  0.6586414728222824
Validation Accuracy:  0.6219512195121951
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.11it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.54it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.43it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.03it/s]

avg_train_loss 0.5352144022472203
train_f1Score 0.7703703703703704
train_accuracy 0.7578125

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 22.91it/s]
  4%|▎         | 3/82 [00:00<00:03, 22.99it/s]

Validation Loss:  0.7144115592648344
Validation Accuracy:  0.6615853658536586
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.06it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.44it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.39it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.86it/s]

avg_train_loss 0.41117525519803166
train_f1Score 0.8550185873605947
train_accuracy 0.84765625

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.44it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.55it/s]

Validation Loss:  0.7840476464934465
Validation Accuracy:  0.6585365853658537


Training...


100%|██████████| 32/32 [00:06<00:00,  5.31it/s]
  7%|▋         | 3/41 [00:00<00:01, 28.07it/s]

avg_train_loss 0.3251977896434255
train_f1Score 0.8759124087591241
train_accuracy 0.8671875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.01it/s]
  4%|▎         | 3/82 [00:00<00:03, 22.74it/s]

Validation Loss:  0.9978272868547498
Validation Accuracy:  0.6829268292682927
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.22it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.64it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.47it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.94it/s]

avg_train_loss 0.19307937379926443
train_f1Score 0.9201520912547527
train_accuracy 0.91796875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 22.88it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.50it/s]

Validation Loss:  1.2290017961910584
Validation Accuracy:  0.676829268292683


Training...


100%|██████████| 32/32 [00:05<00:00,  5.46it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.05it/s]

avg_train_loss 0.12453839034424163
train_f1Score 0.9682539682539683
train_accuracy 0.96875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.41it/s]
  4%|▎         | 3/82 [00:00<00:03, 23.03it/s]

Validation Loss:  1.4190688116203358
Validation Accuracy:  0.6829268292682927
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.18it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.74it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.53it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.21it/s]

avg_train_loss 0.09522706431016559
train_f1Score 0.9803921568627452
train_accuracy 0.98046875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 22.89it/s]
  4%|▎         | 3/82 [00:00<00:03, 23.04it/s]

Validation Loss:  1.4721580424641327
Validation Accuracy:  0.6951219512195121
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.16it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.60it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.55it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.90it/s]

avg_train_loss 0.0491910603523138
train_f1Score 0.9881422924901185
train_accuracy 0.98828125

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.09it/s]
  4%|▎         | 3/82 [00:00<00:02, 27.27it/s]

Validation Loss:  1.5837133265691015
Validation Accuracy:  0.698170731707317
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.29it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.72it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.55it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.88it/s]

avg_train_loss 0.06622392871940974
train_f1Score 0.988235294117647
train_accuracy 0.98828125

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.40it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.57it/s]

Validation Loss:  1.6179937905427524
Validation Accuracy:  0.698170731707317


Training...


100%|██████████| 32/32 [00:05<00:00,  5.50it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.79it/s]

avg_train_loss 0.0854954306196305
train_f1Score 0.9842519685039369
train_accuracy 0.984375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.42it/s]


Validation Loss:  1.6385955905496348
Validation Accuracy:  0.6920731707317073
Saving Test Metrics....
Run:  2
	Initialising Model....


 52%|█████▏    | 134/256 [00:00<00:00, 1330.62it/s]

	Loading Dataset....


100%|██████████| 256/256 [00:00<00:00, 1251.20it/s]
100%|██████████| 330/330 [00:00<00:00, 1377.20it/s]
100%|██████████| 660/660 [00:00<00:00, 1361.66it/s]
100%|██████████| 256/256 [00:00<00:00, 1794.71it/s]
 62%|██████▏   | 204/330 [00:00<00:00, 2025.22it/s]

	Training Starts....


100%|██████████| 330/330 [00:00<00:00, 2000.94it/s]
100%|██████████| 660/660 [00:00<00:00, 1858.46it/s]
Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceCla



Training...


100%|██████████| 32/32 [00:05<00:00,  5.40it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.83it/s]

avg_train_loss 0.7619362249970436
train_f1Score 0.48181818181818187
train_accuracy 0.5546875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 22.88it/s]
  4%|▎         | 3/82 [00:00<00:02, 28.72it/s]

Validation Loss:  0.6494653944562121
Validation Accuracy:  0.6310975609756098
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.49it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.55it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.35it/s]
  7%|▋         | 3/41 [00:00<00:01, 27.17it/s]

avg_train_loss 0.5325181945227087
train_f1Score 0.742857142857143
train_accuracy 0.75390625

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.73it/s]
  4%|▎         | 3/82 [00:00<00:02, 27.06it/s]

Validation Loss:  0.6997732872643122
Validation Accuracy:  0.6646341463414634
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.43it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.60it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.41it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.91it/s]

avg_train_loss 0.3912614861037582
train_f1Score 0.852589641434263
train_accuracy 0.85546875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.77it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.60it/s]

Validation Loss:  1.08564485191572
Validation Accuracy:  0.6432926829268293


Training...


100%|██████████| 32/32 [00:05<00:00,  5.45it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.05it/s]

avg_train_loss 0.3257002650061622
train_f1Score 0.888030888030888
train_accuracy 0.88671875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.02it/s]
  4%|▎         | 3/82 [00:00<00:03, 22.96it/s]

Validation Loss:  1.0773127502421054
Validation Accuracy:  0.6951219512195121
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.32it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.64it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.51it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.11it/s]

avg_train_loss 0.23712973171495833
train_f1Score 0.9254901960784313
train_accuracy 0.92578125

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.02it/s]
  4%|▎         | 3/82 [00:00<00:03, 23.03it/s]

Validation Loss:  1.2210153724952806
Validation Accuracy:  0.698170731707317
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.36it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.78it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.55it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.14it/s]

avg_train_loss 0.16469080376555212
train_f1Score 0.9647058823529412
train_accuracy 0.96484375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.55it/s]
  4%|▎         | 3/82 [00:00<00:03, 23.06it/s]

Validation Loss:  1.4756901114497607
Validation Accuracy:  0.698170731707317
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.20it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.68it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.52it/s]
  7%|▋         | 3/41 [00:00<00:01, 28.55it/s]

avg_train_loss 0.09095969640475232
train_f1Score 0.9763779527559054
train_accuracy 0.9765625

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.27it/s]
  4%|▎         | 3/82 [00:00<00:03, 23.02it/s]

Validation Loss:  1.4428376096368927
Validation Accuracy:  0.7042682926829268
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.23it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.68it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.47it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.73it/s]

avg_train_loss 0.07596040598582476
train_f1Score 0.9841269841269841
train_accuracy 0.984375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.38it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.60it/s]

Validation Loss:  1.5925904909693978
Validation Accuracy:  0.7012195121951219


Training...


100%|██████████| 32/32 [00:05<00:00,  5.50it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.05it/s]

avg_train_loss 0.08357311476720497
train_f1Score 0.9802371541501976
train_accuracy 0.98046875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.03it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.41it/s]

Validation Loss:  1.6081424063389622
Validation Accuracy:  0.7012195121951219


Training...


100%|██████████| 32/32 [00:05<00:00,  5.55it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.99it/s]

avg_train_loss 0.07661004104011226
train_f1Score 0.9842519685039369
train_accuracy 0.984375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.64it/s]


Validation Loss:  1.6101995098927036
Validation Accuracy:  0.698170731707317
Saving Test Metrics....
Run:  3
	Initialising Model....


100%|██████████| 256/256 [00:00<00:00, 1494.47it/s]

	Loading Dataset....



100%|██████████| 330/330 [00:00<00:00, 1387.49it/s]
100%|██████████| 660/660 [00:00<00:00, 1385.84it/s]
100%|██████████| 256/256 [00:00<00:00, 1976.61it/s]
 60%|██████    | 198/330 [00:00<00:00, 1979.19it/s]

	Training Starts....


100%|██████████| 330/330 [00:00<00:00, 1900.89it/s]
100%|██████████| 660/660 [00:00<00:00, 1944.88it/s]
Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceCla



Training...


100%|██████████| 32/32 [00:05<00:00,  5.46it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.87it/s]

avg_train_loss 0.7552073672413826
train_f1Score 0.5517241379310345
train_accuracy 0.59375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.43it/s]
  4%|▎         | 3/82 [00:00<00:03, 25.48it/s]

Validation Loss:  0.6186577619575873
Validation Accuracy:  0.6615853658536586
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.46it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.63it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.46it/s]
  7%|▋         | 3/41 [00:00<00:01, 28.76it/s]

avg_train_loss 0.5170264802873135
train_f1Score 0.7510548523206751
train_accuracy 0.76953125

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.60it/s]
  4%|▎         | 3/82 [00:00<00:03, 23.19it/s]

Validation Loss:  0.6820798872447595
Validation Accuracy:  0.6737804878048781
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.55it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.78it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.47it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.94it/s]

avg_train_loss 0.3752488805912435
train_f1Score 0.8682170542635659
train_accuracy 0.8671875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 22.94it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.55it/s]

Validation Loss:  0.7507622816213747
Validation Accuracy:  0.6585365853658537


Training...


100%|██████████| 32/32 [00:05<00:00,  5.40it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.01it/s]

avg_train_loss 0.2500982354977168
train_f1Score 0.9260700389105059
train_accuracy 0.92578125

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 22.94it/s]
  4%|▎         | 3/82 [00:00<00:03, 22.92it/s]

Validation Loss:  0.9858446488293205
Validation Accuracy:  0.7073170731707317
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.21it/s]
  3%|▎         | 1/32 [00:00<00:04,  7.23it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.52it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.13it/s]

avg_train_loss 0.1837078554672189
train_f1Score 0.9538461538461539
train_accuracy 0.953125

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.55it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.65it/s]

Validation Loss:  1.4973205967874426
Validation Accuracy:  0.6798780487804879


Training...


100%|██████████| 32/32 [00:05<00:00,  5.46it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.11it/s]

avg_train_loss 0.15508033755759243
train_f1Score 0.968503937007874
train_accuracy 0.96875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 22.80it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.65it/s]

Validation Loss:  1.668475813016568
Validation Accuracy:  0.6554878048780488


Training...


100%|██████████| 32/32 [00:05<00:00,  5.57it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.10it/s]

avg_train_loss 0.12187524214095902
train_f1Score 0.9765625
train_accuracy 0.9765625

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 22.98it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.67it/s]

Validation Loss:  1.4707743326860776
Validation Accuracy:  0.6859756097560976


Training...


100%|██████████| 32/32 [00:05<00:00,  5.58it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.92it/s]

avg_train_loss 0.06820293523196597
train_f1Score 0.984375
train_accuracy 0.984375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.65it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.72it/s]

Validation Loss:  1.6785200723537765
Validation Accuracy:  0.6707317073170732


Training...


100%|██████████| 32/32 [00:05<00:00,  5.57it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.84it/s]

avg_train_loss 0.06718902036664076
train_f1Score 0.988235294117647
train_accuracy 0.98828125

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.62it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.72it/s]

Validation Loss:  1.823871917170813
Validation Accuracy:  0.6554878048780488


Training...


100%|██████████| 32/32 [00:05<00:00,  5.56it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.98it/s]

avg_train_loss 0.0584779879936832
train_f1Score 0.9883268482490272
train_accuracy 0.98828125

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.03it/s]


Validation Loss:  1.8217998030141178
Validation Accuracy:  0.6554878048780488
Saving Test Metrics....
Fold:  4
Run:  1
	Initialising Model....


 57%|█████▋    | 146/256 [00:00<00:00, 1458.54it/s]

	Loading Dataset....


100%|██████████| 256/256 [00:00<00:00, 1476.74it/s]
100%|██████████| 330/330 [00:00<00:00, 1416.36it/s]
100%|██████████| 660/660 [00:00<00:00, 1387.72it/s]
100%|██████████| 256/256 [00:00<00:00, 1933.21it/s]
 62%|██████▏   | 206/330 [00:00<00:00, 2052.44it/s]

	Training Starts....


100%|██████████| 330/330 [00:00<00:00, 2024.54it/s]
100%|██████████| 660/660 [00:00<00:00, 1990.76it/s]
Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceCla



Training...


100%|██████████| 32/32 [00:05<00:00,  5.36it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.03it/s]

avg_train_loss 0.7022201521322131
train_f1Score 0.6356589147286822
train_accuracy 0.6328125

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.54it/s]
  4%|▎         | 3/82 [00:00<00:03, 22.92it/s]

Validation Loss:  0.648988590007875
Validation Accuracy:  0.649390243902439
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.19it/s]
  3%|▎         | 1/32 [00:00<00:04,  6.97it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.40it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.93it/s]

avg_train_loss 0.48355314834043384
train_f1Score 0.8
train_accuracy 0.80859375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.03it/s]
  4%|▎         | 3/82 [00:00<00:02, 28.86it/s]

Validation Loss:  0.7502407765969997
Validation Accuracy:  0.6676829268292683
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.61it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.60it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.44it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.86it/s]

avg_train_loss 0.36561806173995137
train_f1Score 0.8755020080321286
train_accuracy 0.87890625

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.63it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.70it/s]

Validation Loss:  0.8989424614644632
Validation Accuracy:  0.6585365853658537


Training...


100%|██████████| 32/32 [00:05<00:00,  5.48it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.96it/s]

avg_train_loss 0.36322434298926964
train_f1Score 0.9
train_accuracy 0.8984375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.26it/s]
  4%|▎         | 3/82 [00:00<00:02, 27.05it/s]

Validation Loss:  1.0501029724028053
Validation Accuracy:  0.6737804878048781
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.45it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.62it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.49it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.44it/s]

avg_train_loss 0.22863667021738365
train_f1Score 0.9328063241106719
train_accuracy 0.93359375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.08it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.70it/s]

Validation Loss:  1.2619143113857363
Validation Accuracy:  0.649390243902439


Training...


100%|██████████| 32/32 [00:05<00:00,  5.53it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.31it/s]

avg_train_loss 0.12362467014463618
train_f1Score 0.96875
train_accuracy 0.96875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.65it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.70it/s]

Validation Loss:  1.584216206538968
Validation Accuracy:  0.6402439024390244


Training...


100%|██████████| 32/32 [00:05<00:00,  5.55it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.05it/s]

avg_train_loss 0.10156880019349046
train_f1Score 0.9806949806949806
train_accuracy 0.98046875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.20it/s]
  3%|▎         | 1/32 [00:00<00:04,  6.99it/s]

Validation Loss:  1.7404956868508967
Validation Accuracy:  0.6402439024390244


Training...


100%|██████████| 32/32 [00:05<00:00,  5.59it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.05it/s]

avg_train_loss 0.08641670132055879
train_f1Score 0.9844961240310077
train_accuracy 0.984375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.00it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.70it/s]

Validation Loss:  1.7626204054530075
Validation Accuracy:  0.6524390243902439


Training...


100%|██████████| 32/32 [00:05<00:00,  5.47it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.03it/s]

avg_train_loss 0.10368420762824826
train_f1Score 0.9806949806949806
train_accuracy 0.98046875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.30it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.41it/s]

Validation Loss:  1.8252827794881674
Validation Accuracy:  0.649390243902439


Training...


100%|██████████| 32/32 [00:05<00:00,  5.42it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.92it/s]

avg_train_loss 0.1048848495411221
train_f1Score 0.980544747081712
train_accuracy 0.98046875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.46it/s]


Validation Loss:  1.8214953796506472
Validation Accuracy:  0.649390243902439
Saving Test Metrics....
Run:  2
	Initialising Model....


 52%|█████▏    | 134/256 [00:00<00:00, 1333.30it/s]

	Loading Dataset....


100%|██████████| 256/256 [00:00<00:00, 1298.24it/s]
100%|██████████| 330/330 [00:00<00:00, 1333.61it/s]
100%|██████████| 660/660 [00:00<00:00, 1171.60it/s]
100%|██████████| 256/256 [00:00<00:00, 1822.26it/s]
 59%|█████▉    | 194/330 [00:00<00:00, 1938.92it/s]

	Training Starts....


100%|██████████| 330/330 [00:00<00:00, 1895.58it/s]
100%|██████████| 660/660 [00:00<00:00, 1808.99it/s]
Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceCla



Training...


100%|██████████| 32/32 [00:05<00:00,  5.41it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.01it/s]

avg_train_loss 0.6996169583871961
train_f1Score 0.6541353383458646
train_accuracy 0.640625

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 22.81it/s]
  4%|▎         | 3/82 [00:00<00:03, 23.22it/s]

Validation Loss:  0.6443752319347568
Validation Accuracy:  0.6189024390243902
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.27it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.65it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.53it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.03it/s]

avg_train_loss 0.49002593802288175
train_f1Score 0.7530364372469636
train_accuracy 0.76171875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.00it/s]
  4%|▎         | 3/82 [00:00<00:03, 23.10it/s]

Validation Loss:  0.8103510533891073
Validation Accuracy:  0.649390243902439
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.73it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.61it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.44it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.03it/s]

avg_train_loss 0.40111391665413976
train_f1Score 0.8375451263537906
train_accuracy 0.82421875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 22.97it/s]
  4%|▎         | 3/82 [00:00<00:03, 23.03it/s]

Validation Loss:  0.7789541704625618
Validation Accuracy:  0.6524390243902439
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.39it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.55it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.49it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.92it/s]

avg_train_loss 0.2576841523987241
train_f1Score 0.9118773946360152
train_accuracy 0.91015625

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.80it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.71it/s]

Validation Loss:  1.300437058617429
Validation Accuracy:  0.6432926829268293


Training...


100%|██████████| 32/32 [00:05<00:00,  5.54it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.06it/s]

avg_train_loss 0.09892403305275366
train_f1Score 0.980544747081712
train_accuracy 0.98046875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.01it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.69it/s]

Validation Loss:  1.6870560515217665
Validation Accuracy:  0.6463414634146342


Training...


100%|██████████| 32/32 [00:05<00:00,  5.59it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.89it/s]

avg_train_loss 0.11175422499945853
train_f1Score 0.9763779527559054
train_accuracy 0.9765625

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 22.94it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.86it/s]

Validation Loss:  1.8201104039099159
Validation Accuracy:  0.6402439024390244


Training...


100%|██████████| 32/32 [00:05<00:00,  5.55it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.02it/s]

avg_train_loss 0.04489634255878627
train_f1Score 0.9883268482490272
train_accuracy 0.98828125

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.60it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.70it/s]

Validation Loss:  2.0078637058415065
Validation Accuracy:  0.6189024390243902


Training...


100%|██████████| 32/32 [00:05<00:00,  5.49it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.07it/s]

avg_train_loss 0.05218890558171552
train_f1Score 0.9881422924901185
train_accuracy 0.98828125

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.49it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.72it/s]

Validation Loss:  2.0703344388705927
Validation Accuracy:  0.6280487804878049


Training...


100%|██████████| 32/32 [00:05<00:00,  5.56it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.94it/s]

avg_train_loss 0.003050624902243726
train_f1Score 1.0
train_accuracy 1.0

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 22.91it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.56it/s]

Validation Loss:  2.0591071102677323
Validation Accuracy:  0.6432926829268293


Training...


100%|██████████| 32/32 [00:05<00:00,  5.48it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.26it/s]

avg_train_loss 0.009820800019952003
train_f1Score 0.9922480620155039
train_accuracy 0.9921875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.72it/s]


Validation Loss:  2.1520459535645275
Validation Accuracy:  0.6280487804878049
Saving Test Metrics....
Run:  3
	Initialising Model....


 62%|██████▎   | 160/256 [00:00<00:00, 1588.80it/s]

	Loading Dataset....


100%|██████████| 256/256 [00:00<00:00, 1504.87it/s]
100%|██████████| 330/330 [00:00<00:00, 1430.20it/s]
100%|██████████| 660/660 [00:00<00:00, 1415.20it/s]
100%|██████████| 256/256 [00:00<00:00, 2058.86it/s]
 58%|█████▊    | 191/330 [00:00<00:00, 1909.56it/s]

	Training Starts....


100%|██████████| 330/330 [00:00<00:00, 1928.72it/s]
100%|██████████| 660/660 [00:00<00:00, 2039.21it/s]
Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceCla



Training...


100%|██████████| 32/32 [00:05<00:00,  5.45it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.98it/s]

avg_train_loss 0.7430158983916044
train_f1Score 0.6037735849056604
train_accuracy 0.58984375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.77it/s]
  4%|▎         | 3/82 [00:00<00:03, 23.05it/s]

Validation Loss:  0.615380436908908
Validation Accuracy:  0.676829268292683
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.38it/s]
  3%|▎         | 1/32 [00:00<00:04,  7.12it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.47it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.08it/s]

avg_train_loss 0.47594062332063913
train_f1Score 0.7937743190661478
train_accuracy 0.79296875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.68it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.71it/s]

Validation Loss:  0.7262760989549684
Validation Accuracy:  0.6707317073170732


Training...


100%|██████████| 32/32 [00:05<00:00,  5.46it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.96it/s]

avg_train_loss 0.38154550013132393
train_f1Score 0.8571428571428571
train_accuracy 0.85546875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.72it/s]
  4%|▎         | 3/82 [00:00<00:03, 23.17it/s]

Validation Loss:  0.8211805934949619
Validation Accuracy:  0.6890243902439024
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.39it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.62it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.54it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.06it/s]

avg_train_loss 0.19749203929677606
train_f1Score 0.9375
train_accuracy 0.9375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.02it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.77it/s]

Validation Loss:  1.1695800613521077
Validation Accuracy:  0.6737804878048781


Training...


100%|██████████| 32/32 [00:05<00:00,  5.57it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.03it/s]

avg_train_loss 0.08834198192926124
train_f1Score 0.9765625
train_accuracy 0.9765625

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.56it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.56it/s]

Validation Loss:  1.5525365806207425
Validation Accuracy:  0.6676829268292683


Training...


100%|██████████| 32/32 [00:05<00:00,  5.55it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.84it/s]

avg_train_loss 0.0292942233136273
train_f1Score 0.9921259842519685
train_accuracy 0.9921875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.61it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.71it/s]

Validation Loss:  1.7612654643087853
Validation Accuracy:  0.6615853658536586


Training...


100%|██████████| 32/32 [00:05<00:00,  5.53it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.01it/s]

avg_train_loss 0.03539507014284027
train_f1Score 0.9921875
train_accuracy 0.9921875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.56it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.65it/s]

Validation Loss:  1.908191401784013
Validation Accuracy:  0.6646341463414634


Training...


100%|██████████| 32/32 [00:05<00:00,  5.43it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.11it/s]

avg_train_loss 0.07443196637541405
train_f1Score 0.9802371541501976
train_accuracy 0.98046875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.55it/s]
  3%|▎         | 1/32 [00:00<00:05,  6.00it/s]

Validation Loss:  1.8934547414140004
Validation Accuracy:  0.6707317073170732


Training...


100%|██████████| 32/32 [00:05<00:00,  5.57it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.26it/s]

avg_train_loss 0.003654202570032794
train_f1Score 1.0
train_accuracy 1.0

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.74it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.72it/s]

Validation Loss:  2.002454284851144
Validation Accuracy:  0.6646341463414634


Training...


100%|██████████| 32/32 [00:05<00:00,  5.56it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.07it/s]

avg_train_loss 0.008446263382211328
train_f1Score 0.996078431372549
train_accuracy 0.99609375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.72it/s]


Validation Loss:  2.025284378630359
Validation Accuracy:  0.6676829268292683
Saving Test Metrics....
Fold:  5
Run:  1
	Initialising Model....


 52%|█████▏    | 134/256 [00:00<00:00, 1323.29it/s]

	Loading Dataset....


100%|██████████| 256/256 [00:00<00:00, 1381.96it/s]
100%|██████████| 329/329 [00:00<00:00, 1467.14it/s]
100%|██████████| 667/667 [00:00<00:00, 1425.28it/s]
100%|██████████| 256/256 [00:00<00:00, 1849.12it/s]
 63%|██████▎   | 207/329 [00:00<00:00, 2065.23it/s]

	Training Starts....


100%|██████████| 329/329 [00:00<00:00, 2057.83it/s]
100%|██████████| 667/667 [00:00<00:00, 2031.50it/s]
Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceCla



Training...


100%|██████████| 32/32 [00:05<00:00,  5.51it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.07it/s]

avg_train_loss 0.7294352911412716
train_f1Score 0.5975103734439834
train_accuracy 0.62109375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.30it/s]
  4%|▎         | 3/83 [00:00<00:02, 28.81it/s]

Validation Loss:  0.7479645682544243
Validation Accuracy:  0.5823170731707317
Best mF1Score....


100%|██████████| 83/83 [00:03<00:00, 23.89it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.58it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.52it/s]
  7%|▋         | 3/41 [00:00<00:01, 28.94it/s]

avg_train_loss 0.5620964248664677
train_f1Score 0.754863813229572
train_accuracy 0.75390625

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.66it/s]
  4%|▎         | 3/83 [00:00<00:03, 23.16it/s]

Validation Loss:  0.9004378260635748
Validation Accuracy:  0.6067073170731707
Best mF1Score....


100%|██████████| 83/83 [00:03<00:00, 23.70it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.61it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.53it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.16it/s]

avg_train_loss 0.39043747703544796
train_f1Score 0.852589641434263
train_accuracy 0.85546875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.56it/s]
  4%|▎         | 3/83 [00:00<00:03, 23.07it/s]

Validation Loss:  1.0837829552772569
Validation Accuracy:  0.6158536585365854
Best mF1Score....


100%|██████████| 83/83 [00:03<00:00, 23.59it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.57it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.47it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.89it/s]

avg_train_loss 0.30095226783305407
train_f1Score 0.8976377952755906
train_accuracy 0.8984375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.40it/s]
  4%|▎         | 3/83 [00:00<00:03, 22.98it/s]

Validation Loss:  1.2106067585508997
Validation Accuracy:  0.6585365853658537
Best mF1Score....


100%|██████████| 83/83 [00:03<00:00, 23.55it/s]
  3%|▎         | 1/32 [00:00<00:04,  7.11it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.52it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.13it/s]

avg_train_loss 0.19980791909620166
train_f1Score 0.9354838709677419
train_accuracy 0.9375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.66it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.57it/s]

Validation Loss:  1.6839387230120781
Validation Accuracy:  0.6402439024390244


Training...


100%|██████████| 32/32 [00:05<00:00,  5.52it/s]
  7%|▋         | 3/41 [00:00<00:01, 28.73it/s]

avg_train_loss 0.1196760707825888
train_f1Score 0.9571984435797667
train_accuracy 0.95703125

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.33it/s]
  4%|▎         | 3/83 [00:00<00:02, 28.66it/s]

Validation Loss:  1.5436745431365035
Validation Accuracy:  0.6798780487804879
Best mF1Score....


100%|██████████| 83/83 [00:03<00:00, 23.43it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.64it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.56it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.93it/s]

avg_train_loss 0.06081358222581912
train_f1Score 0.9803921568627452
train_accuracy 0.98046875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.47it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.56it/s]

Validation Loss:  1.9176175391710386
Validation Accuracy:  0.6585365853658537


Training...


100%|██████████| 32/32 [00:05<00:00,  5.63it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.97it/s]

avg_train_loss 0.014871041079459246
train_f1Score 0.996078431372549
train_accuracy 0.99609375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.42it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.60it/s]

Validation Loss:  2.0508320364964807
Validation Accuracy:  0.6524390243902439


Training...


100%|██████████| 32/32 [00:05<00:00,  5.64it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.76it/s]

avg_train_loss 0.0022870513894304167
train_f1Score 1.0
train_accuracy 1.0

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.44it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.66it/s]

Validation Loss:  2.1020177268400424
Validation Accuracy:  0.6554878048780488


Training...


100%|██████████| 32/32 [00:05<00:00,  5.54it/s]
  7%|▋         | 3/41 [00:00<00:01, 28.41it/s]

avg_train_loss 0.02545065202502883
train_f1Score 0.996078431372549
train_accuracy 0.99609375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.23it/s]


Validation Loss:  2.1322460189098265
Validation Accuracy:  0.6524390243902439
Saving Test Metrics....
Run:  2
	Initialising Model....


 57%|█████▋    | 145/256 [00:00<00:00, 1449.01it/s]

	Loading Dataset....


100%|██████████| 256/256 [00:00<00:00, 1421.58it/s]
100%|██████████| 329/329 [00:00<00:00, 1422.61it/s]
100%|██████████| 667/667 [00:00<00:00, 1364.53it/s]
100%|██████████| 256/256 [00:00<00:00, 1811.38it/s]
 59%|█████▉    | 195/329 [00:00<00:00, 1944.91it/s]

	Training Starts....


100%|██████████| 329/329 [00:00<00:00, 1973.93it/s]
100%|██████████| 667/667 [00:00<00:00, 2031.97it/s]
Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceCla



Training...


100%|██████████| 32/32 [00:05<00:00,  5.40it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.94it/s]

avg_train_loss 0.7137686684727669
train_f1Score 0.5511111111111111
train_accuracy 0.60546875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.43it/s]
  4%|▎         | 3/83 [00:00<00:03, 22.99it/s]

Validation Loss:  0.6435754604455901
Validation Accuracy:  0.6310975609756098
Best mF1Score....


100%|██████████| 83/83 [00:03<00:00, 23.35it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.60it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.34it/s]
  7%|▋         | 3/41 [00:00<00:01, 24.70it/s]

avg_train_loss 0.41811031522229314
train_f1Score 0.8333333333333335
train_accuracy 0.8359375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.41it/s]
  4%|▎         | 3/83 [00:00<00:03, 22.96it/s]

Validation Loss:  0.9063047512275416
Validation Accuracy:  0.6371951219512195
Best mF1Score....


100%|██████████| 83/83 [00:03<00:00, 23.04it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.56it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.42it/s]
  7%|▋         | 3/41 [00:00<00:01, 24.47it/s]

avg_train_loss 0.2989966423774604
train_f1Score 0.8987854251012145
train_accuracy 0.90234375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.13it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.70it/s]

Validation Loss:  1.5019564570450201
Validation Accuracy:  0.6189024390243902


Training...


100%|██████████| 32/32 [00:05<00:00,  5.50it/s]
  7%|▋         | 3/41 [00:00<00:01, 28.77it/s]

avg_train_loss 0.269584639929235
train_f1Score 0.9302325581395349
train_accuracy 0.9296875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.61it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.58it/s]

Validation Loss:  1.533060379644356
Validation Accuracy:  0.6341463414634146


Training...


100%|██████████| 32/32 [00:05<00:00,  5.52it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.02it/s]

avg_train_loss 0.18452909140614793
train_f1Score 0.9498069498069499
train_accuracy 0.94921875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.62it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.68it/s]

Validation Loss:  1.731130418254108
Validation Accuracy:  0.625


Training...


100%|██████████| 32/32 [00:05<00:00,  5.52it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.01it/s]

avg_train_loss 0.08740183838381199
train_f1Score 0.9689922480620154
train_accuracy 0.96875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.37it/s]
  4%|▎         | 3/83 [00:00<00:03, 24.45it/s]

Validation Loss:  1.9536170755944602
Validation Accuracy:  0.6432926829268293
Best mF1Score....


100%|██████████| 83/83 [00:03<00:00, 23.39it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.74it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.51it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.93it/s]

avg_train_loss 0.07970953661424574
train_f1Score 0.9844961240310077
train_accuracy 0.984375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.51it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.36it/s]

Validation Loss:  2.2358344313575
Validation Accuracy:  0.6341463414634146


Training...


100%|██████████| 32/32 [00:05<00:00,  5.47it/s]
  7%|▋         | 3/41 [00:00<00:01, 26.15it/s]

avg_train_loss 0.03441728654433973
train_f1Score 0.9922480620155039
train_accuracy 0.9921875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.15it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.51it/s]

Validation Loss:  2.228581370376959
Validation Accuracy:  0.6371951219512195


Training...


100%|██████████| 32/32 [00:05<00:00,  5.57it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.41it/s]

avg_train_loss 0.008501465767039917
train_f1Score 0.9961089494163424
train_accuracy 0.99609375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.53it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.70it/s]

Validation Loss:  2.2467860710330125
Validation Accuracy:  0.6341463414634146


Training...


100%|██████████| 32/32 [00:05<00:00,  5.55it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.68it/s]

avg_train_loss 0.001634359661693452
train_f1Score 1.0
train_accuracy 1.0

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.48it/s]


Validation Loss:  2.2654936095563376
Validation Accuracy:  0.6310975609756098
Saving Test Metrics....
Run:  3
	Initialising Model....


 55%|█████▌    | 141/256 [00:00<00:00, 1402.56it/s]

	Loading Dataset....


100%|██████████| 256/256 [00:00<00:00, 1339.64it/s]
100%|██████████| 329/329 [00:00<00:00, 1399.05it/s]
100%|██████████| 667/667 [00:00<00:00, 1412.96it/s]
100%|██████████| 256/256 [00:00<00:00, 1701.23it/s]
 61%|██████▏   | 202/329 [00:00<00:00, 2017.48it/s]

	Training Starts....


100%|██████████| 329/329 [00:00<00:00, 2001.00it/s]
100%|██████████| 667/667 [00:00<00:00, 1971.70it/s]
Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceCla



Training...


100%|██████████| 32/32 [00:05<00:00,  5.54it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.06it/s]

avg_train_loss 0.6883867625147104
train_f1Score 0.6234817813765183
train_accuracy 0.63671875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.54it/s]
  4%|▎         | 3/83 [00:00<00:03, 23.14it/s]

Validation Loss:  0.6837369196298646
Validation Accuracy:  0.6310975609756098
Best mF1Score....


100%|██████████| 83/83 [00:03<00:00, 23.58it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.84it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.47it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.96it/s]

avg_train_loss 0.4716383987106383
train_f1Score 0.819277108433735
train_accuracy 0.82421875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.54it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.68it/s]

Validation Loss:  0.978483539528963
Validation Accuracy:  0.625


Training...


100%|██████████| 32/32 [00:05<00:00,  5.51it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.21it/s]

avg_train_loss 0.32845424552215263
train_f1Score 0.8803088803088803
train_accuracy 0.87890625

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.58it/s]
  4%|▎         | 3/83 [00:00<00:03, 23.05it/s]

Validation Loss:  1.008092599852783
Validation Accuracy:  0.676829268292683
Best mF1Score....


100%|██████████| 83/83 [00:03<00:00, 23.53it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.61it/s]



Training...


100%|██████████| 32/32 [00:05<00:00,  5.49it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.96it/s]

avg_train_loss 0.2309462181001436
train_f1Score 0.9288389513108614
train_accuracy 0.92578125

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.36it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.97it/s]

Validation Loss:  1.3402062102061947
Validation Accuracy:  0.649390243902439


Training...


100%|██████████| 32/32 [00:05<00:00,  5.53it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.09it/s]

avg_train_loss 0.0654252929816721
train_f1Score 0.9884169884169884
train_accuracy 0.98828125

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.48it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.58it/s]

Validation Loss:  2.1569661314701434
Validation Accuracy:  0.6432926829268293


Training...


100%|██████████| 32/32 [00:05<00:00,  5.60it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.95it/s]

avg_train_loss 0.0369116939327796
train_f1Score 0.9922480620155039
train_accuracy 0.9921875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.57it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.52it/s]

Validation Loss:  2.2279843735883422
Validation Accuracy:  0.6463414634146342


Training...


100%|██████████| 32/32 [00:05<00:00,  5.58it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.49it/s]

avg_train_loss 0.05161659893201431
train_f1Score 0.9921875
train_accuracy 0.9921875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.42it/s]
  3%|▎         | 1/32 [00:00<00:04,  6.75it/s]

Validation Loss:  2.204873922229858
Validation Accuracy:  0.6463414634146342


Training...


100%|██████████| 32/32 [00:05<00:00,  5.61it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.80it/s]

avg_train_loss 0.07487246227901778
train_f1Score 0.988235294117647
train_accuracy 0.98828125

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.52it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.38it/s]

Validation Loss:  2.187987628733603
Validation Accuracy:  0.649390243902439


Training...


100%|██████████| 32/32 [00:05<00:00,  5.53it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.00it/s]

avg_train_loss 0.041121070989902364
train_f1Score 0.9921875
train_accuracy 0.9921875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.47it/s]
  3%|▎         | 1/32 [00:00<00:05,  5.69it/s]

Validation Loss:  2.1580838747152193
Validation Accuracy:  0.6524390243902439


Training...


100%|██████████| 32/32 [00:05<00:00,  5.61it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.05it/s]

avg_train_loss 0.03868971547126421
train_f1Score 0.9922480620155039
train_accuracy 0.9921875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.57it/s]


Validation Loss:  2.1875630990976878
Validation Accuracy:  0.6524390243902439
Saving Test Metrics....
Train Cnt:  256
Fold:  1
Run:  1
	Initialising Model....


 26%|██▌       | 134/512 [00:00<00:00, 1335.11it/s]

	Loading Dataset....


100%|██████████| 512/512 [00:00<00:00, 1298.66it/s]
100%|██████████| 330/330 [00:00<00:00, 1353.64it/s]
100%|██████████| 660/660 [00:00<00:00, 1417.51it/s]
 75%|███████▌  | 385/512 [00:00<00:00, 1935.24it/s]

	Training Starts....


100%|██████████| 512/512 [00:00<00:00, 1919.06it/s]
100%|██████████| 330/330 [00:00<00:00, 2016.80it/s]
100%|██████████| 660/660 [00:00<00:00, 1930.03it/s]
Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSe



Training...


100%|██████████| 64/64 [00:11<00:00,  5.39it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.25it/s]

avg_train_loss 0.6740988669916987
train_f1Score 0.6376811594202899
train_accuracy 0.658203125

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.59it/s]
  4%|▎         | 3/82 [00:00<00:03, 22.94it/s]

Validation Loss:  0.6248480306892861
Validation Accuracy:  0.6676829268292683
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.14it/s]
  2%|▏         | 1/64 [00:00<00:11,  5.54it/s]



Training...


100%|██████████| 64/64 [00:11<00:00,  5.41it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.08it/s]

avg_train_loss 0.4948103983188048
train_f1Score 0.7992351816443595
train_accuracy 0.794921875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.59it/s]
  4%|▎         | 3/82 [00:00<00:03, 23.12it/s]

Validation Loss:  0.6469556907328163
Validation Accuracy:  0.7195121951219512
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.50it/s]
  2%|▏         | 1/64 [00:00<00:11,  5.32it/s]



Training...


100%|██████████| 64/64 [00:11<00:00,  5.46it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.83it/s]

avg_train_loss 0.34928426321130246
train_f1Score 0.883720930232558
train_accuracy 0.8828125

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.00it/s]
  2%|▏         | 1/64 [00:00<00:11,  5.56it/s]

Validation Loss:  0.944993201461507
Validation Accuracy:  0.698170731707317


Training...


100%|██████████| 64/64 [00:11<00:00,  5.49it/s]
  7%|▋         | 3/41 [00:00<00:01, 24.90it/s]

avg_train_loss 0.30509429777157493
train_f1Score 0.91015625
train_accuracy 0.91015625

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.55it/s]
  2%|▏         | 1/64 [00:00<00:11,  5.59it/s]

Validation Loss:  1.097712110410013
Validation Accuracy:  0.6920731707317073


Training...


100%|██████████| 64/64 [00:11<00:00,  5.47it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.98it/s]

avg_train_loss 0.20654899260262027
train_f1Score 0.9361702127659575
train_accuracy 0.935546875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.00it/s]
  2%|▏         | 1/64 [00:00<00:11,  5.71it/s]

Validation Loss:  1.3166753602009722
Validation Accuracy:  0.698170731707317


Training...


100%|██████████| 64/64 [00:11<00:00,  5.51it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.86it/s]

avg_train_loss 0.19967415632709162
train_f1Score 0.9498069498069499
train_accuracy 0.94921875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 22.90it/s]
  2%|▏         | 1/64 [00:00<00:08,  7.05it/s]

Validation Loss:  1.395322648285911
Validation Accuracy:  0.7042682926829268


Training...


100%|██████████| 64/64 [00:11<00:00,  5.52it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.90it/s]

avg_train_loss 0.15706079264782602
train_f1Score 0.9656488549618321
train_accuracy 0.96484375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.67it/s]
  2%|▏         | 1/64 [00:00<00:10,  5.77it/s]

Validation Loss:  1.4142714706703838
Validation Accuracy:  0.6859756097560976


Training...


100%|██████████| 64/64 [00:11<00:00,  5.52it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.98it/s]

avg_train_loss 0.11467236433236394
train_f1Score 0.9747572815533979
train_accuracy 0.974609375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 22.92it/s]
  2%|▏         | 1/64 [00:00<00:11,  5.56it/s]

Validation Loss:  1.5889889752810322
Validation Accuracy:  0.6646341463414634


Training...


100%|██████████| 64/64 [00:11<00:00,  5.49it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.91it/s]

avg_train_loss 0.10727537397906417
train_f1Score 0.9766536964980544
train_accuracy 0.9765625

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.52it/s]
  2%|▏         | 1/64 [00:00<00:11,  5.72it/s]

Validation Loss:  1.6309390245346216
Validation Accuracy:  0.6707317073170732


Training...


100%|██████████| 64/64 [00:11<00:00,  5.56it/s]
  7%|▋         | 3/41 [00:00<00:01, 27.78it/s]

avg_train_loss 0.09375363602885045
train_f1Score 0.9806201550387595
train_accuracy 0.98046875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.70it/s]


Validation Loss:  1.6953866296992979
Validation Accuracy:  0.6646341463414634
Saving Test Metrics....
Run:  2
	Initialising Model....


 27%|██▋       | 137/512 [00:00<00:00, 1367.84it/s]

	Loading Dataset....


100%|██████████| 512/512 [00:00<00:00, 1369.09it/s]
100%|██████████| 330/330 [00:00<00:00, 1452.14it/s]
100%|██████████| 660/660 [00:00<00:00, 1460.72it/s]
 72%|███████▏  | 370/512 [00:00<00:00, 1851.91it/s]

	Training Starts....


100%|██████████| 512/512 [00:00<00:00, 1877.52it/s]
100%|██████████| 330/330 [00:00<00:00, 2050.90it/s]
100%|██████████| 660/660 [00:00<00:00, 1995.35it/s]
Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSe



Training...


100%|██████████| 64/64 [00:11<00:00,  5.57it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.88it/s]

avg_train_loss 0.7165822298265994
train_f1Score 0.6309751434034416
train_accuracy 0.623046875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.60it/s]
  4%|▎         | 3/82 [00:00<00:03, 23.13it/s]

Validation Loss:  0.5991666905763673
Validation Accuracy:  0.698170731707317
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.63it/s]
  2%|▏         | 1/64 [00:00<00:11,  5.71it/s]



Training...


100%|██████████| 64/64 [00:11<00:00,  5.58it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.05it/s]

avg_train_loss 0.45446586632169783
train_f1Score 0.8095238095238094
train_accuracy 0.8125

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 24.05it/s]
  4%|▎         | 3/82 [00:00<00:03, 23.14it/s]

Validation Loss:  0.737701820527635
Validation Accuracy:  0.7073170731707317
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.61it/s]
  2%|▏         | 1/64 [00:00<00:11,  5.70it/s]



Training...


100%|██████████| 64/64 [00:11<00:00,  5.59it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.05it/s]

avg_train_loss 0.38338440778898075
train_f1Score 0.8721804511278196
train_accuracy 0.8671875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.48it/s]
  2%|▏         | 1/64 [00:00<00:10,  5.74it/s]

Validation Loss:  0.9155611788354269
Validation Accuracy:  0.6737804878048781


Training...


100%|██████████| 64/64 [00:11<00:00,  5.59it/s]
  7%|▋         | 3/41 [00:00<00:01, 27.53it/s]

avg_train_loss 0.2896550298319198
train_f1Score 0.9111111111111111
train_accuracy 0.90625

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.70it/s]
  2%|▏         | 1/64 [00:00<00:11,  5.69it/s]

Validation Loss:  1.0787820427156076
Validation Accuracy:  0.7073170731707317


Training...


100%|██████████| 64/64 [00:11<00:00,  5.63it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.96it/s]

avg_train_loss 0.23679598683156655
train_f1Score 0.943609022556391
train_accuracy 0.94140625

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.49it/s]
  2%|▏         | 1/64 [00:00<00:10,  5.80it/s]

Validation Loss:  1.3839207138957046
Validation Accuracy:  0.676829268292683


Training...


100%|██████████| 64/64 [00:11<00:00,  5.54it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.06it/s]

avg_train_loss 0.1619202313304413
train_f1Score 0.9615384615384616
train_accuracy 0.9609375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.45it/s]
  2%|▏         | 1/64 [00:00<00:11,  5.35it/s]

Validation Loss:  1.4193294866055977
Validation Accuracy:  0.6920731707317073


Training...


100%|██████████| 64/64 [00:11<00:00,  5.57it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.07it/s]

avg_train_loss 0.12572845368413255
train_f1Score 0.9712092130518234
train_accuracy 0.970703125

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.63it/s]
  2%|▏         | 1/64 [00:00<00:10,  5.73it/s]

Validation Loss:  1.652596040469844
Validation Accuracy:  0.6615853658536586


Training...


100%|██████████| 64/64 [00:11<00:00,  5.61it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.02it/s]

avg_train_loss 0.06414461399981519
train_f1Score 0.9865125240847785
train_accuracy 0.986328125

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.61it/s]
  2%|▏         | 1/64 [00:00<00:10,  5.78it/s]

Validation Loss:  1.8616628007190983
Validation Accuracy:  0.6585365853658537


Training...


100%|██████████| 64/64 [00:11<00:00,  5.61it/s]
  7%|▋         | 3/41 [00:00<00:01, 28.71it/s]

avg_train_loss 0.059352009353460744
train_f1Score 0.9883720930232558
train_accuracy 0.98828125

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.57it/s]
  2%|▏         | 1/64 [00:00<00:10,  5.74it/s]

Validation Loss:  1.8795765027767275
Validation Accuracy:  0.6554878048780488


Training...


100%|██████████| 64/64 [00:11<00:00,  5.55it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.01it/s]

avg_train_loss 0.0710913860511937
train_f1Score 0.98635477582846
train_accuracy 0.986328125

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.64it/s]


Validation Loss:  1.8625617477951981
Validation Accuracy:  0.6707317073170732
Saving Test Metrics....
Run:  3
	Initialising Model....


 27%|██▋       | 139/512 [00:00<00:00, 1389.96it/s]

	Loading Dataset....


100%|██████████| 512/512 [00:00<00:00, 1396.80it/s]
100%|██████████| 330/330 [00:00<00:00, 1453.24it/s]
100%|██████████| 660/660 [00:00<00:00, 1456.36it/s]
 74%|███████▍  | 379/512 [00:00<00:00, 1902.73it/s]

	Training Starts....


100%|██████████| 512/512 [00:00<00:00, 1912.93it/s]
100%|██████████| 330/330 [00:00<00:00, 2061.83it/s]
100%|██████████| 660/660 [00:00<00:00, 1976.37it/s]
Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSe



Training...


100%|██████████| 64/64 [00:11<00:00,  5.46it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.90it/s]

avg_train_loss 0.6588675403036177
train_f1Score 0.625250501002004
train_accuracy 0.634765625

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.53it/s]
  4%|▎         | 3/82 [00:00<00:03, 23.03it/s]

Validation Loss:  0.6854819946172761
Validation Accuracy:  0.7012195121951219
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 22.96it/s]
  2%|▏         | 1/64 [00:00<00:08,  7.04it/s]



Training...


100%|██████████| 64/64 [00:11<00:00,  5.43it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.04it/s]

avg_train_loss 0.4472576028201729
train_f1Score 0.800751879699248
train_accuracy 0.79296875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 22.89it/s]
  2%|▏         | 1/64 [00:00<00:11,  5.40it/s]

Validation Loss:  0.7007748680870708
Validation Accuracy:  0.6829268292682927


Training...


100%|██████████| 64/64 [00:11<00:00,  5.43it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.09it/s]

avg_train_loss 0.3860161427001003
train_f1Score 0.8675623800383877
train_accuracy 0.865234375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.76it/s]
  2%|▏         | 1/64 [00:00<00:11,  5.60it/s]

Validation Loss:  0.8684690918137388
Validation Accuracy:  0.676829268292683


Training...


100%|██████████| 64/64 [00:11<00:00,  5.50it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.08it/s]

avg_train_loss 0.2523634190001758
train_f1Score 0.92578125
train_accuracy 0.92578125

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.75it/s]
  4%|▎         | 3/82 [00:00<00:03, 23.25it/s]

Validation Loss:  1.342122506622861
Validation Accuracy:  0.7042682926829268
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.41it/s]
  2%|▏         | 1/64 [00:00<00:10,  5.74it/s]



Training...


100%|██████████| 64/64 [00:11<00:00,  5.49it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.42it/s]

avg_train_loss 0.21113842339400435
train_f1Score 0.9508840864440079
train_accuracy 0.951171875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.84it/s]
  2%|▏         | 1/64 [00:00<00:10,  5.74it/s]

Validation Loss:  1.5171165694482625
Validation Accuracy:  0.698170731707317


Training...


100%|██████████| 64/64 [00:11<00:00,  5.55it/s]
  7%|▋         | 3/41 [00:00<00:01, 24.30it/s]

avg_train_loss 0.09353708698472474
train_f1Score 0.9784735812133072
train_accuracy 0.978515625

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.24it/s]
  2%|▏         | 1/64 [00:00<00:11,  5.61it/s]

Validation Loss:  1.736752363785011
Validation Accuracy:  0.6737804878048781


Training...


100%|██████████| 64/64 [00:11<00:00,  5.55it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.08it/s]

avg_train_loss 0.037571104661765276
train_f1Score 0.9941747572815534
train_accuracy 0.994140625

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.15it/s]
  2%|▏         | 1/64 [00:00<00:08,  7.22it/s]

Validation Loss:  1.8427884357686086
Validation Accuracy:  0.6890243902439024


Training...


100%|██████████| 64/64 [00:11<00:00,  5.56it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.20it/s]

avg_train_loss 0.02540456443784933
train_f1Score 0.9941747572815534
train_accuracy 0.994140625

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.14it/s]
  2%|▏         | 1/64 [00:00<00:10,  5.83it/s]

Validation Loss:  1.8934080135531541
Validation Accuracy:  0.698170731707317


Training...


100%|██████████| 64/64 [00:11<00:00,  5.56it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.00it/s]

avg_train_loss 0.027884488512427197
train_f1Score 0.9941747572815534
train_accuracy 0.994140625

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.76it/s]
  2%|▏         | 1/64 [00:00<00:10,  5.81it/s]

Validation Loss:  2.05184194809053
Validation Accuracy:  0.6707317073170732


Training...


100%|██████████| 64/64 [00:11<00:00,  5.55it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.23it/s]

avg_train_loss 0.027152908331117942
train_f1Score 0.9941291585127201
train_accuracy 0.994140625

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 22.89it/s]


Validation Loss:  2.00679581049012
Validation Accuracy:  0.6859756097560976
Saving Test Metrics....
Fold:  2
Run:  1
	Initialising Model....


 30%|███       | 154/512 [00:00<00:00, 1525.45it/s]

	Loading Dataset....


100%|██████████| 512/512 [00:00<00:00, 1439.00it/s]
100%|██████████| 330/330 [00:00<00:00, 1431.04it/s]
100%|██████████| 660/660 [00:00<00:00, 1462.60it/s]
 77%|███████▋  | 394/512 [00:00<00:00, 1967.78it/s]

	Training Starts....


100%|██████████| 512/512 [00:00<00:00, 1963.82it/s]
100%|██████████| 330/330 [00:00<00:00, 2023.90it/s]
100%|██████████| 660/660 [00:00<00:00, 1937.30it/s]
Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSe



Training...


100%|██████████| 64/64 [00:11<00:00,  5.52it/s]
  7%|▋         | 3/41 [00:00<00:01, 24.79it/s]

avg_train_loss 0.6532322680577636
train_f1Score 0.6211180124223603
train_accuracy 0.642578125

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.61it/s]
  4%|▎         | 3/82 [00:00<00:02, 28.85it/s]

Validation Loss:  0.6941757979916363
Validation Accuracy:  0.6585365853658537
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.62it/s]
  2%|▏         | 1/64 [00:00<00:11,  5.60it/s]



Training...


100%|██████████| 64/64 [00:11<00:00,  5.51it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.05it/s]

avg_train_loss 0.5379949283087626
train_f1Score 0.7710843373493976
train_accuracy 0.77734375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.61it/s]
  2%|▏         | 1/64 [00:00<00:11,  5.60it/s]

Validation Loss:  0.8199167651374165
Validation Accuracy:  0.6371951219512195


Training...


100%|██████████| 64/64 [00:11<00:00,  5.52it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.02it/s]

avg_train_loss 0.37879533786326647
train_f1Score 0.8754578754578755
train_accuracy 0.8671875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.61it/s]
  4%|▎         | 3/82 [00:00<00:03, 23.15it/s]

Validation Loss:  1.228933194425048
Validation Accuracy:  0.6798780487804879
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.61it/s]
  2%|▏         | 1/64 [00:00<00:10,  5.74it/s]



Training...


100%|██████████| 64/64 [00:11<00:00,  5.57it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.04it/s]

avg_train_loss 0.4206831965711899
train_f1Score 0.8862745098039215
train_accuracy 0.88671875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.04it/s]
  2%|▏         | 1/64 [00:00<00:11,  5.66it/s]

Validation Loss:  1.081828625100415
Validation Accuracy:  0.6798780487804879


Training...


100%|██████████| 64/64 [00:11<00:00,  5.49it/s]
  7%|▋         | 3/41 [00:00<00:01, 28.53it/s]

avg_train_loss 0.3300239437376149
train_f1Score 0.9059500959692899
train_accuracy 0.904296875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.46it/s]
  4%|▎         | 3/82 [00:00<00:03, 23.18it/s]

Validation Loss:  1.0240970268482115
Validation Accuracy:  0.6798780487804879
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.52it/s]
  2%|▏         | 1/64 [00:00<00:11,  5.60it/s]



Training...


100%|██████████| 64/64 [00:11<00:00,  5.46it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.72it/s]

avg_train_loss 0.2301114007132128
train_f1Score 0.9375
train_accuracy 0.9375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.31it/s]
  2%|▏         | 1/64 [00:00<00:11,  5.56it/s]

Validation Loss:  1.3561771397547024
Validation Accuracy:  0.6737804878048781


Training...


100%|██████████| 64/64 [00:11<00:00,  5.49it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.05it/s]

avg_train_loss 0.2505992611841066
train_f1Score 0.9365079365079365
train_accuracy 0.9375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.66it/s]
  2%|▏         | 1/64 [00:00<00:11,  5.60it/s]

Validation Loss:  1.2994592130275033
Validation Accuracy:  0.6798780487804879


Training...


100%|██████████| 64/64 [00:11<00:00,  5.53it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.72it/s]

avg_train_loss 0.15961843288096134
train_f1Score 0.9642857142857142
train_accuracy 0.96484375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.72it/s]
  2%|▏         | 1/64 [00:00<00:11,  5.72it/s]

Validation Loss:  1.3808739383636814
Validation Accuracy:  0.6646341463414634


Training...


100%|██████████| 64/64 [00:11<00:00,  5.52it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.10it/s]

avg_train_loss 0.149359524511965
train_f1Score 0.9641434262948206
train_accuracy 0.96484375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 22.94it/s]
  2%|▏         | 1/64 [00:00<00:11,  5.55it/s]

Validation Loss:  1.472725321970335
Validation Accuracy:  0.6676829268292683


Training...


100%|██████████| 64/64 [00:11<00:00,  5.51it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.20it/s]

avg_train_loss 0.1415354524506256
train_f1Score 0.9660678642714572
train_accuracy 0.966796875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.79it/s]


Validation Loss:  1.4348153567350492
Validation Accuracy:  0.676829268292683
Saving Test Metrics....
Run:  2
	Initialising Model....


 29%|██▉       | 148/512 [00:00<00:00, 1473.16it/s]

	Loading Dataset....


100%|██████████| 512/512 [00:00<00:00, 1421.75it/s]
100%|██████████| 330/330 [00:00<00:00, 1437.61it/s]
100%|██████████| 660/660 [00:00<00:00, 1446.73it/s]
 76%|███████▌  | 390/512 [00:00<00:00, 1950.37it/s]

	Training Starts....


100%|██████████| 512/512 [00:00<00:00, 1965.89it/s]
100%|██████████| 330/330 [00:00<00:00, 2033.91it/s]
100%|██████████| 660/660 [00:00<00:00, 2003.60it/s]
Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSe



Training...


100%|██████████| 64/64 [00:11<00:00,  5.43it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.91it/s]

avg_train_loss 0.7323356005363166
train_f1Score 0.6425855513307985
train_accuracy 0.6328125

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 22.86it/s]
  4%|▎         | 3/82 [00:00<00:03, 23.17it/s]

Validation Loss:  0.6284035742282867
Validation Accuracy:  0.6676829268292683
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.55it/s]
  2%|▏         | 1/64 [00:00<00:11,  5.58it/s]



Training...


100%|██████████| 64/64 [00:11<00:00,  5.43it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.92it/s]

avg_train_loss 0.5950145935639739
train_f1Score 0.7432675044883302
train_accuracy 0.720703125

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 22.87it/s]
  4%|▎         | 3/82 [00:00<00:03, 23.30it/s]

Validation Loss:  0.6389880064057141
Validation Accuracy:  0.676829268292683
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.37it/s]
  2%|▏         | 1/64 [00:00<00:08,  7.08it/s]



Training...


100%|██████████| 64/64 [00:11<00:00,  5.49it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.03it/s]

avg_train_loss 0.5338900981005281
train_f1Score 0.7733812949640287
train_accuracy 0.75390625

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.01it/s]
  4%|▎         | 3/82 [00:00<00:03, 23.17it/s]

Validation Loss:  0.6276487889813214
Validation Accuracy:  0.6920731707317073
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.71it/s]
  2%|▏         | 1/64 [00:00<00:11,  5.61it/s]



Training...


100%|██████████| 64/64 [00:11<00:00,  5.48it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.03it/s]

avg_train_loss 0.42770838004071265
train_f1Score 0.8397790055248618
train_accuracy 0.830078125

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.74it/s]
  4%|▎         | 3/82 [00:00<00:03, 22.99it/s]

Validation Loss:  0.7477929057871423
Validation Accuracy:  0.7103658536585366
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.44it/s]
  2%|▏         | 1/64 [00:00<00:11,  5.56it/s]



Training...


100%|██████████| 64/64 [00:11<00:00,  5.48it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.18it/s]

avg_train_loss 0.32862326374743134
train_f1Score 0.8917431192660551
train_accuracy 0.884765625

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.70it/s]
  2%|▏         | 1/64 [00:00<00:11,  5.57it/s]

Validation Loss:  1.070877101817509
Validation Accuracy:  0.6859756097560976


Training...


100%|██████████| 64/64 [00:11<00:00,  5.50it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.85it/s]

avg_train_loss 0.323436054692138
train_f1Score 0.9127272727272727
train_accuracy 0.90625

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.67it/s]
  2%|▏         | 1/64 [00:00<00:11,  5.61it/s]

Validation Loss:  1.1361449369933547
Validation Accuracy:  0.6829268292682927


Training...


100%|██████████| 64/64 [00:11<00:00,  5.55it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.94it/s]

avg_train_loss 0.29849923797883093
train_f1Score 0.9216757741347905
train_accuracy 0.916015625

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 22.99it/s]
  4%|▎         | 3/82 [00:00<00:03, 23.26it/s]

Validation Loss:  0.9602977274576339
Validation Accuracy:  0.7164634146341463
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.66it/s]
  2%|▏         | 1/64 [00:00<00:11,  5.60it/s]



Training...


100%|██████████| 64/64 [00:11<00:00,  5.46it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.91it/s]

avg_train_loss 0.25914611293410417
train_f1Score 0.9264705882352942
train_accuracy 0.921875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 22.88it/s]
  4%|▎         | 3/82 [00:00<00:03, 23.27it/s]

Validation Loss:  1.0329351592354659
Validation Accuracy:  0.7195121951219512
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.10it/s]
  2%|▏         | 1/64 [00:00<00:09,  6.94it/s]



Training...


100%|██████████| 64/64 [00:11<00:00,  5.49it/s]
  7%|▋         | 3/41 [00:00<00:01, 22.97it/s]

avg_train_loss 0.21865478555264417
train_f1Score 0.9385474860335196
train_accuracy 0.935546875

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.82it/s]
  2%|▏         | 1/64 [00:00<00:11,  5.71it/s]

Validation Loss:  1.1623556129180077
Validation Accuracy:  0.7134146341463414


Training...


100%|██████████| 64/64 [00:11<00:00,  5.55it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.22it/s]

avg_train_loss 0.2070040606049588
train_f1Score 0.942271880819367
train_accuracy 0.939453125

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.05it/s]


Validation Loss:  1.1407176386110667
Validation Accuracy:  0.7164634146341463
Saving Test Metrics....
Run:  3
	Initialising Model....


 27%|██▋       | 139/512 [00:00<00:00, 1368.80it/s]

	Loading Dataset....


100%|██████████| 512/512 [00:00<00:00, 1354.62it/s]
100%|██████████| 330/330 [00:00<00:00, 1388.10it/s]
100%|██████████| 660/660 [00:00<00:00, 1420.54it/s]
 72%|███████▏  | 369/512 [00:00<00:00, 1851.04it/s]

	Training Starts....


100%|██████████| 512/512 [00:00<00:00, 1873.29it/s]
100%|██████████| 330/330 [00:00<00:00, 1962.87it/s]
100%|██████████| 660/660 [00:00<00:00, 2000.91it/s]
Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSe



Training...


100%|██████████| 64/64 [00:11<00:00,  5.54it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.04it/s]

avg_train_loss 0.6684360941872001
train_f1Score 0.6511627906976746
train_accuracy 0.6484375

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.64it/s]
  4%|▎         | 3/82 [00:00<00:03, 23.17it/s]

Validation Loss:  0.6103045882248297
Validation Accuracy:  0.6676829268292683
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.73it/s]
  2%|▏         | 1/64 [00:00<00:11,  5.57it/s]



Training...


100%|██████████| 64/64 [00:11<00:00,  5.52it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.05it/s]

avg_train_loss 0.4507978719775565
train_f1Score 0.8226415094339622
train_accuracy 0.81640625

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.70it/s]
  4%|▎         | 3/82 [00:00<00:03, 23.10it/s]

Validation Loss:  0.8744445295958985
Validation Accuracy:  0.7073170731707317
Best mF1Score....


100%|██████████| 82/82 [00:03<00:00, 23.71it/s]
  2%|▏         | 1/64 [00:00<00:11,  5.59it/s]



Training...


100%|██████████| 64/64 [00:11<00:00,  5.51it/s]
  7%|▋         | 3/41 [00:00<00:01, 23.01it/s]

avg_train_loss 0.3244188553071581
train_f1Score 0.8915187376725838
train_accuracy 0.892578125

Running Validation...


100%|██████████| 41/41 [00:01<00:00, 23.74it/s]
  4%|▎         | 3/82 [00:00<00:03, 22.98it/s]

Validation Loss:  1.066000823908281
Validation Accuracy:  0.7195121951219512
Best mF1Score....


 33%|███▎      | 27/82 [00:01<00:02, 23.00it/s]

## Hindi Few Data

In [None]:
run_args={
    'model_name':'few_shot',
    'data_path':'Data_Processed/Shared_Task_hin/',
    'train_cnt':256,
    'res_base_path': 'Results/Shared_Task_hin/all_but_one/',
    'model_save_path': 'Saved_Models/Shared_Task_hin/',
    'isArabic': False,
}

model_args={
        'seed_val': 42,
        'batch_size': 8,
        'bert_model': "bert-base-multilingual-cased",
        'learning_rate': 2e-5,
        'epochs': 10,
        'max_len': 128,
        'device': 'cuda:1',
        'weights': [1.0, 1.0],
        'save_model': False,
        'model_save_path': '',
        'isArabic': False,
        'model_path': "Saved_Models/Shared_Task_hin/all_but_one/best_bert_bert_1_all.pt",
    }
for train_cnt in [512]:
    print("Train Cnt: ",train_cnt)
    run_part(run_args,model_args,train_cnt)

Train Cnt:  512
Fold:  1
Run:  1
	Initialising Model....


## Bengali Few Data

In [None]:
run_args={
    'model_name':'few_shot',
    'data_path':'Data_Processed/Shared_Task_iben/',
    'train_cnt':256,
    'res_base_path': 'Results/Shared_Task_iben/all_but_one/',
    'model_save_path': 'Saved_Models/Shared_Task_iben/',
    'isArabic': False,
}

model_args={
        'seed_val': 42,
        'batch_size': 8,
        'bert_model': "bert-base-multilingual-cased",
        'learning_rate': 2e-5,
        'epochs': 10,
        'max_len': 128,
        'device': 'cuda',
        'weights': [1.0, 1.0],
        'save_model': False,
        'model_save_path': '',
        'isArabic': False,
        'model_path': "Saved_Models/Shared_Task_iben/all_but_one/best_bert_bert_2_all.pt",
    }
for train_cnt in [32,64,128,256,512]:
    print("Train Cnt: ",train_cnt)
    run_part(run_args,model_args,train_cnt)

## English Few Data

In [None]:
run_args={
    'model_name':'few_shot',
    'data_path':'Data_Processed/Shared_Task_eng/',
    'train_cnt':256,
    'res_base_path': 'Results/Shared_Task_eng/all_but_one/',
    'model_save_path': 'Saved_Models/Shared_Task_eng/',
    'isArabic': False,
}

model_args={
        'seed_val': 42,
        'batch_size': 8,
        'bert_model': "bert-base-multilingual-cased",
        'learning_rate': 2e-5,
        'epochs': 10,
        'max_len': 128,
        'device': 'cuda:1',
        'weights': [1.0, 1.0],
        'save_model': False,
        'model_save_path': '',
        'isArabic': False,
        'model_path': "Saved_Models/Shared_Task_eng/all_but_one/best_bert_bert_1_all.pt",
    }
for train_cnt in [32,64,128,256,512]:
    print("Train Cnt: ",train_cnt)
    run_part(run_args,model_args,train_cnt)