In [1]:
import pandas as pd
import numpy as np
import torch
from torch import cuda
from torch.utils.data import Dataset, DataLoader
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from sklearn.metrics import accuracy_score, classification_report, precision_score, recall_score, f1_score
from load_data import initialize_data
from reading_datasets import read_task
from labels_to_ids import labels_to_ids_kan
import time
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [2]:
def train(epoch, train_loader, model, optimizer, device, grad_step = 1, max_grad_norm = 10):
    tr_loss, tr_accuracy = 0, 0
    tr_precision, tr_recall = 0, 0
    tr_f1score = 0
    nb_tr_examples, nb_tr_steps = 0, 0
    tr_preds, tr_labels = [], []
    # put model in training mode
    model.train()
    optimizer.zero_grad()
    
    for idx, batch in enumerate(train_loader):
        ids = batch['input_ids'].to(device, dtype = torch.long)
        mask = batch['attention_mask'].to(device, dtype = torch.long)
        labels = batch['labels'].to(device, dtype = torch.long)

        if (idx + 1) % 20 == 0:
            print('FINSIHED BATCH:', idx, 'of', len(train_loader))

        #loss, tr_logits = model(input_ids=ids, attention_mask=mask, labels=labels)
        output = model(input_ids=ids, attention_mask=mask, labels=labels)
        tr_loss += output[0]

        nb_tr_steps += 1
        nb_tr_examples += labels.size(0)
           
        # compute training accuracy
        flattened_targets = labels.view(-1) # shape (batch_size * seq_len,)
        active_logits = output[1].view(-1, model.num_labels) # shape (batch_size * seq_len, num_labels)
        flattened_predictions = torch.argmax(active_logits, axis=1) # shape (batch_size * seq_len,)
        
        # only compute accuracy at active labels
        active_accuracy = labels.view(-1) != -100 # shape (batch_size, seq_len)
        #active_labels = torch.where(active_accuracy, labels.view(-1), torch.tensor(-100).type_as(labels))
        
        labels = torch.masked_select(flattened_targets, active_accuracy)
        predictions = torch.masked_select(flattened_predictions, active_accuracy)

        tr_labels.extend(labels)
        tr_preds.extend(predictions)

        tmp_tr_accuracy = accuracy_score(labels.cpu().numpy(), predictions.cpu().numpy())
        tr_accuracy += tmp_tr_accuracy
        
        # Compute Precision
        tmp_tr_precision = precision_score(labels.cpu().numpy(), predictions.cpu().numpy(), average = 'macro', zero_division=0 )
        tr_precision += tmp_tr_precision
        
        # Compute Recall
        tmp_tr_recall = recall_score(labels.cpu().numpy(), predictions.cpu().numpy(), average = 'macro', zero_division=0)
        tr_recall += tmp_tr_recall
        
        # Compute f1score
        tmp_tr_f1score = f1_score(labels.cpu().numpy(), predictions.cpu().numpy(), average= 'macro', zero_division=0)
        tr_f1score += tmp_tr_f1score
    
        # gradient clipping
        torch.nn.utils.clip_grad_norm_(
            parameters=model.parameters(), max_norm=max_grad_norm
        )
        
        # backward pass
        output['loss'].backward()
        if (idx + 1) % grad_step == 0:
            optimizer.step()
            optimizer.zero_grad()

    epoch_loss = tr_loss / nb_tr_steps
    tr_accuracy = tr_accuracy / nb_tr_steps
    tr_precision = tr_precision / nb_tr_steps
    tr_recall = tr_recall / nb_tr_steps
    tr_f1score= tr_f1score / nb_tr_steps
    #print(f"Training loss epoch: {epoch_loss}")
    #print(f"Training accuracy epoch: {tr_accuracy}")

    return model

In [3]:
def testing(model, testing_loader, labels_to_ids, device):
    # put model in evaluation mode
    model.eval()
    
    eval_loss, eval_accuracy = 0, 0
    eval_precision, eval_recall = 0, 0
    eval_f1score = 0
    nb_eval_examples, nb_eval_steps = 0, 0
    eval_preds, eval_labels = [], []
     
    
    ids_to_labels = dict((v,k) for k,v in labels_to_ids.items())

    with torch.no_grad():
        for idx, batch in enumerate(testing_loader):
            
            ids = batch['input_ids'].to(device, dtype = torch.long)
            mask = batch['attention_mask'].to(device, dtype = torch.long)
            labels = batch['labels'].to(device, dtype = torch.long)
            
            #loss, eval_logits = model(input_ids=ids, attention_mask=mask, labels=labels)
            output = model(input_ids=ids, attention_mask=mask, labels=labels)

            eval_loss += output['loss'].item()

            nb_eval_steps += 1
            nb_eval_examples += labels.size(0)
        
            if idx % 100==0:
                loss_step = eval_loss/nb_eval_steps
                print(f"Validation loss per 100 evaluation steps: {loss_step}")
              
            # compute evaluation accuracy
            flattened_targets = labels.view(-1) # shape (batch_size * seq_len,)
            active_logits = output[1].view(-1, model.num_labels) # shape (batch_size * seq_len, num_labels)
            flattened_predictions = torch.argmax(active_logits, axis=1) # shape (batch_size * seq_len,)
            
            # only compute accuracy at active labels
            active_accuracy = labels.view(-1) != -100 # shape (batch_size, seq_len)
        
            labels = torch.masked_select(flattened_targets, active_accuracy)
            predictions = torch.masked_select(flattened_predictions, active_accuracy)
            
            eval_labels.extend(labels)
            eval_preds.extend(predictions)
            
            tmp_eval_accuracy = accuracy_score(labels.cpu().numpy(), predictions.cpu().numpy())
            eval_accuracy += tmp_eval_accuracy
            
            # Compute Precision
            tmp_eval_precision = precision_score(labels.cpu().numpy(), predictions.cpu().numpy(), average = 'macro', zero_division=0)
            eval_precision += tmp_eval_precision
            
            # Compute Recall
            tmp_eval_recall = recall_score(labels.cpu().numpy(), predictions.cpu().numpy(), average = 'macro', zero_division=0)
            eval_recall += tmp_eval_recall
            
            # Compute f1score
            tmp_eval_f1score = f1_score(labels.cpu().numpy(), predictions.cpu().numpy(), average='macro', zero_division=0)
            eval_f1score += tmp_eval_f1score

    labels = [ids_to_labels[id.item()] for id in eval_labels]
    predictions = [ids_to_labels[id.item()] for id in eval_preds]
    
    eval_loss = eval_loss / nb_eval_steps
    eval_accuracy = eval_accuracy / nb_eval_steps
    eval_precision = eval_precision / nb_eval_steps
    eval_recall = eval_recall / nb_eval_steps
    eval_f1score = eval_f1score / nb_eval_steps
    #print(f"Validation Loss: {eval_loss}")
    #print(f"Validation Accuracy: {eval_accuracy}")

    return labels, predictions, eval_accuracy, eval_precision, eval_recall, eval_f1score

In [4]:
def main(n_epochs, model_name, model_save_flag, model_save_location, model_load_flag, model_load_location):
    #Initialization training parameters
    max_len = 256
    batch_size = 32
    grad_step = 1
    learning_rate = 1e-05
    initialization_input = (max_len, batch_size)

    #Reading datasets and initializing data loaders
    dataset_location = '../datasets/task_a/'

    train_data = read_task(dataset_location , split = 'kan_train_negative_augmented')
    dev_data = read_task(dataset_location , split = 'kan_sentiment_dev')
    #test_data = read_task(dataset_location , split = 'dev')#load test set
    labels_to_ids = labels_to_ids_kan
    #input_data = (train_data, dev_data, labels_to_ids)

    #Define tokenizer, model and optimizer
    device = 'cuda' if cuda.is_available() else 'cpu' #save the processing time
    if model_load_flag:
        tokenizer = AutoTokenizer.from_pretrained(model_load_location)
        model = AutoModelForSequenceClassification.from_pretrained(model_load_location)
    else: 
        tokenizer =  AutoTokenizer.from_pretrained(model_name, add_prefix_space=True)
        model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=len(labels_to_ids))
    optimizer = torch.optim.Adam(params=model.parameters(), lr=learning_rate)
    model.to(device)

    #Get dataloaders
    train_loader = initialize_data(tokenizer, initialization_input, train_data, labels_to_ids, shuffle = True)
    dev_loader = initialize_data(tokenizer, initialization_input, dev_data, labels_to_ids, shuffle = True)
    #test_loader = initialize_data(tokenizer, initialization_input, test_data, labels_to_ids, shuffle = True)#create test loader

    best_dev_acc = 0
    best_test_acc = 0
    best_dev_precision = 0
    best_test_precision = 0
    best_dev_recall = 0
    best_test_recall = 0
    best_dev_f1score = 0
    best_test_f1score = 0
    best_epoch = -1
    
    list_dev_acc = [] 
    list_test_acc = []  
    list_dev_precision = []  
    list_test_precision  = []  
    list_dev_recall = []  
    list_test_recall = []  
    list_dev_f1score = []  
    list_test_f1score = []
    
    for epoch in range(n_epochs):
        start = time.time()
        print(f"Training epoch: {epoch + 1}")

        #train model
        if not model_load_flag:
            model = train(epoch, train_loader, model, optimizer, device, grad_step)
        
        #testing and logging
        labels_dev, predictions_dev, dev_accuracy, dev_precision, dev_recall, dev_f1score = testing(model, dev_loader, labels_to_ids, device)
        print('DEV ACC:', dev_accuracy)
        print('DEV Precision:' , dev_precision)
        print('DEV Recall:' , dev_recall)
        print('DEV F1Score:' , dev_f1score)
        
        list_dev_acc.append(dev_accuracy)     
        list_dev_precision.append(dev_precision)   
        list_dev_recall.append(dev_recall)  
        list_dev_f1score.append(dev_f1score)  
        
        
        #labels_test, predictions_test, test_accuracy, test_precision, test_recall, test_f1score = testing(model, test_loader, labels_to_ids, device)
        #print('TEST ACC:', test_accuracy)
        #print('TEST Precision:' , test_precision)
        #print('TEST Recall:' , test_recall)
        #print('TEST F1Score:' , test_f1score)
        
        #list_test_acc.append(test_accuracy) 
        #list_test_precision.append(test_precision)  
        #list_test_recall.append(test_recall)
        #list_test_f1score.append(test_f1score) 

        #saving model
        if dev_accuracy > best_dev_acc:
            best_dev_acc = dev_accuracy
            #best_test_acc = test_accuracy
        if dev_precision > best_dev_precision:
            best_dev_precision = dev_precision
            #best_test_precision = test_precision
        if dev_recall > best_dev_recall:
            best_dev_recall = dev_recall
            #best_test_recall = test_recall
        if dev_f1score > best_dev_f1score:
            best_dev_f1score = dev_f1score
            #best_test_f1score = test_f1score
            best_epoch = epoch
            
            if model_save_flag:
                os.makedirs(model_save_location, exist_ok=True)
                tokenizer.save_pretrained(model_save_location)
                model.save_pretrained(model_save_location)

        now = time.time()
        print('BEST ACCURACY --> ', 'DEV:', round(best_dev_acc, 5))
        print('BEST PRECISION --> ', 'DEV:', round(best_dev_precision, 5))
        print('BEST RECALL --> ', 'DEV:', round(best_dev_recall, 5))
        print('BEST F1SCORE --> ', 'DEV:', round(best_dev_f1score, 5))
        print('TIME PER EPOCH:', (now-start)/60 )
        print()

    return best_dev_acc, best_test_acc, best_epoch, best_dev_precision, best_test_precision, best_dev_recall, best_test_recall, best_dev_f1score, best_test_f1score, list_dev_acc, list_test_acc, list_dev_precision, list_test_precision, list_dev_recall, list_test_recall, list_dev_f1score, list_test_f1score

In [5]:
if __name__ == '__main__':
    n_epochs = 10
    models = ['ai4bharat/indic-bert']
    
    #model saving parameters
    model_save_flag = True
    model_load_flag = False
    
    overall_list_dev_acc = [] 
    overall_list_test_acc = []    
    overall_list_dev_precision = []  
    overall_list_test_precision  = []  
    overall_list_dev_recall = []  
    overall_list_test_recall = []  
    overall_list_dev_f1score = []  
    overall_list_test_f1score = [] 
    
    for i in range(5):
        
        for model_name in models:

            model_save_location = 'saved_models/' + model_name + 'Kannada' + 'Augmented' + str(i)
            model_load_location = None

            best_dev_acc, best_test_acc, best_epoch, best_dev_precision, best_test_precision, best_dev_recall, best_test_recall, best_dev_f1score, best_test_f1score, list_dev_acc, list_test_acc, list_dev_precision, list_test_precision, list_dev_recall, list_test_recall, list_dev_f1score, list_test_f1score = main(n_epochs, model_name, model_save_flag, model_save_location, model_load_flag, model_load_location)
            
            overall_list_dev_acc.append(list_dev_acc) 
            overall_list_test_acc.append(list_test_acc) 
            overall_list_dev_precision.append(list_dev_precision)  
            overall_list_test_precision.append(list_test_precision) 
            overall_list_dev_recall.append(list_dev_recall)  
            overall_list_test_recall.append(list_test_recall)  
            overall_list_dev_f1score.append(list_dev_f1score)  
            overall_list_test_f1score.append(list_test_f1score) 

Downloading:   0%|          | 0.00/129M [00:00<?, ?B/s]

Some weights of the model checkpoint at ai4bharat/indic-bert were not used when initializing AlbertForSequenceClassification: ['predictions.dense.weight', 'predictions.decoder.weight', 'predictions.decoder.bias', 'predictions.dense.bias', 'sop_classifier.classifier.bias', 'predictions.bias', 'predictions.LayerNorm.weight', 'predictions.LayerNorm.bias', 'sop_classifier.classifier.weight']
- This IS expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ai4bharat/indi

Training epoch: 1
FINSIHED BATCH: 19 of 246
FINSIHED BATCH: 39 of 246
FINSIHED BATCH: 59 of 246
FINSIHED BATCH: 79 of 246
FINSIHED BATCH: 99 of 246
FINSIHED BATCH: 119 of 246
FINSIHED BATCH: 139 of 246
FINSIHED BATCH: 159 of 246
FINSIHED BATCH: 179 of 246
FINSIHED BATCH: 199 of 246
FINSIHED BATCH: 219 of 246
FINSIHED BATCH: 239 of 246
Validation loss per 100 evaluation steps: 1.3624331951141357
DEV ACC: 0.3713367224880383
DEV Precision: 0.15116347254494134
DEV Recall: 0.22679242076371262
DEV F1Score: 0.1760930483256686
BEST ACCURACY -->  DEV: 0.37134
BEST PRECISION -->  DEV: 0.15116
BEST RECALL -->  DEV: 0.22679
BEST F1SCORE -->  DEV: 0.17609
TIME PER EPOCH: 5.144247460365295

Training epoch: 2
FINSIHED BATCH: 19 of 246
FINSIHED BATCH: 39 of 246
FINSIHED BATCH: 59 of 246
FINSIHED BATCH: 79 of 246
FINSIHED BATCH: 99 of 246
FINSIHED BATCH: 119 of 246
FINSIHED BATCH: 139 of 246
FINSIHED BATCH: 159 of 246
FINSIHED BATCH: 179 of 246
FINSIHED BATCH: 199 of 246
FINSIHED BATCH: 219 of 246
FINS

Some weights of the model checkpoint at ai4bharat/indic-bert were not used when initializing AlbertForSequenceClassification: ['predictions.dense.weight', 'predictions.decoder.weight', 'predictions.decoder.bias', 'predictions.dense.bias', 'sop_classifier.classifier.bias', 'predictions.bias', 'predictions.LayerNorm.weight', 'predictions.LayerNorm.bias', 'sop_classifier.classifier.weight']
- This IS expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ai4bharat/indi

Training epoch: 1
FINSIHED BATCH: 19 of 246
FINSIHED BATCH: 39 of 246
FINSIHED BATCH: 59 of 246
FINSIHED BATCH: 79 of 246
FINSIHED BATCH: 99 of 246
FINSIHED BATCH: 119 of 246
FINSIHED BATCH: 139 of 246
FINSIHED BATCH: 159 of 246
FINSIHED BATCH: 179 of 246
FINSIHED BATCH: 199 of 246
FINSIHED BATCH: 219 of 246
FINSIHED BATCH: 239 of 246
Validation loss per 100 evaluation steps: 1.344710350036621
DEV ACC: 0.4045305023923445
DEV Precision: 0.1545091641771824
DEV Recall: 0.2251763635518561
DEV F1Score: 0.1786770960005882
BEST ACCURACY -->  DEV: 0.40453
BEST PRECISION -->  DEV: 0.15451
BEST RECALL -->  DEV: 0.22518
BEST F1SCORE -->  DEV: 0.17868
TIME PER EPOCH: 5.49535650809606

Training epoch: 2
FINSIHED BATCH: 19 of 246
FINSIHED BATCH: 39 of 246
FINSIHED BATCH: 59 of 246
FINSIHED BATCH: 79 of 246
FINSIHED BATCH: 99 of 246
FINSIHED BATCH: 119 of 246
FINSIHED BATCH: 139 of 246
FINSIHED BATCH: 159 of 246
FINSIHED BATCH: 179 of 246
FINSIHED BATCH: 199 of 246
FINSIHED BATCH: 219 of 246
FINSIHED

Some weights of the model checkpoint at ai4bharat/indic-bert were not used when initializing AlbertForSequenceClassification: ['predictions.dense.weight', 'predictions.decoder.weight', 'predictions.decoder.bias', 'predictions.dense.bias', 'sop_classifier.classifier.bias', 'predictions.bias', 'predictions.LayerNorm.weight', 'predictions.LayerNorm.bias', 'sop_classifier.classifier.weight']
- This IS expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ai4bharat/indi

Training epoch: 1
FINSIHED BATCH: 19 of 246
FINSIHED BATCH: 39 of 246
FINSIHED BATCH: 59 of 246
FINSIHED BATCH: 79 of 246
FINSIHED BATCH: 99 of 246
FINSIHED BATCH: 119 of 246
FINSIHED BATCH: 139 of 246
FINSIHED BATCH: 159 of 246
FINSIHED BATCH: 179 of 246
FINSIHED BATCH: 199 of 246
FINSIHED BATCH: 219 of 246
FINSIHED BATCH: 239 of 246
Validation loss per 100 evaluation steps: 1.1015130281448364
DEV ACC: 0.4727123205741627
DEV Precision: 0.2794815091706115
DEV Recall: 0.3313655138057052
DEV F1Score: 0.2952927144023049
BEST ACCURACY -->  DEV: 0.47271
BEST PRECISION -->  DEV: 0.27948
BEST RECALL -->  DEV: 0.33137
BEST F1SCORE -->  DEV: 0.29529
TIME PER EPOCH: 5.274339624245962

Training epoch: 2
FINSIHED BATCH: 19 of 246
FINSIHED BATCH: 39 of 246
FINSIHED BATCH: 59 of 246
FINSIHED BATCH: 79 of 246
FINSIHED BATCH: 99 of 246
FINSIHED BATCH: 119 of 246
FINSIHED BATCH: 139 of 246
FINSIHED BATCH: 159 of 246
FINSIHED BATCH: 179 of 246
FINSIHED BATCH: 199 of 246
FINSIHED BATCH: 219 of 246
FINSIH

Some weights of the model checkpoint at ai4bharat/indic-bert were not used when initializing AlbertForSequenceClassification: ['predictions.dense.weight', 'predictions.decoder.weight', 'predictions.decoder.bias', 'predictions.dense.bias', 'sop_classifier.classifier.bias', 'predictions.bias', 'predictions.LayerNorm.weight', 'predictions.LayerNorm.bias', 'sop_classifier.classifier.weight']
- This IS expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ai4bharat/indi

Training epoch: 1
FINSIHED BATCH: 19 of 246
FINSIHED BATCH: 39 of 246
FINSIHED BATCH: 59 of 246
FINSIHED BATCH: 79 of 246
FINSIHED BATCH: 99 of 246
FINSIHED BATCH: 119 of 246
FINSIHED BATCH: 139 of 246
FINSIHED BATCH: 159 of 246
FINSIHED BATCH: 179 of 246
FINSIHED BATCH: 199 of 246
FINSIHED BATCH: 219 of 246
FINSIHED BATCH: 239 of 246
Validation loss per 100 evaluation steps: 1.296880841255188
DEV ACC: 0.37380382775119614
DEV Precision: 0.13744504570659136
DEV Recall: 0.20649935028382824
DEV F1Score: 0.16180316753368829
BEST ACCURACY -->  DEV: 0.3738
BEST PRECISION -->  DEV: 0.13745
BEST RECALL -->  DEV: 0.2065
BEST F1SCORE -->  DEV: 0.1618
TIME PER EPOCH: 5.486238614718119

Training epoch: 2
FINSIHED BATCH: 19 of 246
FINSIHED BATCH: 39 of 246
FINSIHED BATCH: 59 of 246
FINSIHED BATCH: 79 of 246
FINSIHED BATCH: 99 of 246
FINSIHED BATCH: 119 of 246
FINSIHED BATCH: 139 of 246
FINSIHED BATCH: 159 of 246
FINSIHED BATCH: 179 of 246
FINSIHED BATCH: 199 of 246
FINSIHED BATCH: 219 of 246
FINSIH

Some weights of the model checkpoint at ai4bharat/indic-bert were not used when initializing AlbertForSequenceClassification: ['predictions.dense.weight', 'predictions.decoder.weight', 'predictions.decoder.bias', 'predictions.dense.bias', 'sop_classifier.classifier.bias', 'predictions.bias', 'predictions.LayerNorm.weight', 'predictions.LayerNorm.bias', 'sop_classifier.classifier.weight']
- This IS expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ai4bharat/indi

Training epoch: 1
FINSIHED BATCH: 19 of 246
FINSIHED BATCH: 39 of 246
FINSIHED BATCH: 59 of 246
FINSIHED BATCH: 79 of 246
FINSIHED BATCH: 99 of 246
FINSIHED BATCH: 119 of 246
FINSIHED BATCH: 139 of 246
FINSIHED BATCH: 159 of 246
FINSIHED BATCH: 179 of 246
FINSIHED BATCH: 199 of 246
FINSIHED BATCH: 219 of 246
FINSIHED BATCH: 239 of 246
Validation loss per 100 evaluation steps: 1.5130629539489746
DEV ACC: 0.4505083732057416
DEV Precision: 0.4184701831848608
DEV Recall: 0.41050959626107386
DEV F1Score: 0.3725191969585616
BEST ACCURACY -->  DEV: 0.45051
BEST PRECISION -->  DEV: 0.41847
BEST RECALL -->  DEV: 0.41051
BEST F1SCORE -->  DEV: 0.37252
TIME PER EPOCH: 5.327264006932577

Training epoch: 2
FINSIHED BATCH: 19 of 246
FINSIHED BATCH: 39 of 246
FINSIHED BATCH: 59 of 246
FINSIHED BATCH: 79 of 246
FINSIHED BATCH: 99 of 246
FINSIHED BATCH: 119 of 246
FINSIHED BATCH: 139 of 246
FINSIHED BATCH: 159 of 246
FINSIHED BATCH: 179 of 246
FINSIHED BATCH: 199 of 246
FINSIHED BATCH: 219 of 246
FINSI

In [6]:
print(best_dev_acc)

0.6167763157894737


In [7]:
print(best_epoch)

9


In [8]:
print(best_dev_precision)

0.5185590353735156


In [9]:
print(best_dev_recall)

0.5041628942581483


In [10]:
print(best_dev_f1score)

0.4771103737338075


In [11]:
print(overall_list_dev_acc)

[[0.3713367224880383, 0.47607655502392343, 0.5480711722488039, 0.5489683014354066, 0.5808163875598086, 0.5963666267942584, 0.5936004784688995, 0.6120663875598086, 0.59688995215311, 0.609674043062201], [0.4045305023923445, 0.4453498803827751, 0.5788726076555024, 0.5954694976076556, 0.5916566985645934, 0.5840311004784688, 0.5793211722488039, 0.5486692583732057, 0.5959928229665071, 0.6006279904306221], [0.4727123205741627, 0.5589862440191388, 0.5917314593301435, 0.5694527511961722, 0.6034688995215312, 0.5547248803827751, 0.6006279904306221, 0.6129635167464115, 0.6053379186602871, 0.5684808612440192], [0.37380382775119614, 0.43480861244019137, 0.5414174641148325, 0.5523325358851675, 0.5589114832535885, 0.5854515550239234, 0.5906848086124402, 0.6030950956937798, 0.5627242822966507, 0.5983104066985646], [0.4505083732057416, 0.5220544258373206, 0.5655651913875598, 0.6072816985645934, 0.6167763157894737, 0.5930771531100478, 0.6129635167464115, 0.6049641148325359, 0.6044407894736842, 0.57117224

In [12]:
print(overall_list_dev_precision)

[[0.15116347254494134, 0.2114837533413729, 0.3361861167025113, 0.32320854353608364, 0.356484798887434, 0.3675090875041697, 0.3870414252656371, 0.4561685688579067, 0.41896583343635124, 0.5165935106769796], [0.1545091641771824, 0.1730203779531843, 0.3522474375646312, 0.36959759357626604, 0.33825387475777835, 0.34655893718112274, 0.33760187798084434, 0.3783964268055177, 0.4210400616286487, 0.43436245139137064], [0.2794815091706115, 0.4411047329882645, 0.4673578911290001, 0.4545161280849431, 0.46449014258172006, 0.4567105572836645, 0.49193744146759194, 0.5052538007540822, 0.5311467803091002, 0.4926474038439279], [0.13744504570659136, 0.1743963856328007, 0.33931857066460086, 0.3194519712593134, 0.33788704256037444, 0.35771284607127557, 0.3554244502580072, 0.3901254552550993, 0.43134050869802343, 0.44868747789096786], [0.4184701831848608, 0.4423041627119167, 0.48370343276083133, 0.46649850997396014, 0.4729820729275415, 0.4357115641730757, 0.49131548530289954, 0.48969260236745454, 0.518559035

In [13]:
print(overall_list_dev_recall)

[[0.22679242076371262, 0.24103215115247204, 0.3728724299163014, 0.41588951923158923, 0.4462313228957614, 0.4416755370298151, 0.4683564834600931, 0.4641573032582391, 0.46276555815888787, 0.49474536811188935], [0.2251763635518561, 0.25955819128490104, 0.3913326341134708, 0.4138616434070979, 0.40500518479550274, 0.4388957479315099, 0.41071703677846366, 0.42140550551646805, 0.44302311441216935, 0.44855539986485665], [0.3313655138057052, 0.45525000670124505, 0.4728058447122097, 0.47527983454454037, 0.5086751794074831, 0.45756535131535137, 0.5032221499534334, 0.5230424984372353, 0.517545577565631, 0.4910686702436632], [0.20649935028382824, 0.2601143954085131, 0.3857774823216, 0.3694952596716071, 0.4051180763680764, 0.4477659153828941, 0.44697181266524083, 0.4376625647080193, 0.44585715309645785, 0.44881775981937816], [0.41050959626107386, 0.4215967455004888, 0.4770781517205315, 0.5041628942581483, 0.48514977079711114, 0.48270441073257353, 0.4781397479525822, 0.49410396710741494, 0.4831670012

In [14]:
print(overall_list_dev_f1score)

[[0.1760930483256686, 0.19282612233564034, 0.33666473186562973, 0.3526674873792428, 0.38498324013542345, 0.38192736055804893, 0.40291104847848574, 0.42666457238732325, 0.41391015454569385, 0.48412734291990256], [0.1786770960005882, 0.2030353701456617, 0.35860861596834864, 0.37547899407601903, 0.35893026782861925, 0.37778597393573593, 0.36244971104135293, 0.3798359711807793, 0.4125125136196047, 0.41844345497813423], [0.2952927144023049, 0.4244972967864777, 0.44738878653362285, 0.442273095718279, 0.4679167115901521, 0.42114741623375684, 0.4723332094904459, 0.48527066742592223, 0.5001983371035451, 0.4669875164448456], [0.16180316753368829, 0.20355380897694053, 0.34252015754732906, 0.3329968063346393, 0.3569508784035132, 0.3847305764711236, 0.3812696061931639, 0.3942492943258298, 0.4137974426878086, 0.4341971778681344], [0.3725191969585616, 0.3947228111992062, 0.45998268654685753, 0.4597088170853654, 0.45762583529502054, 0.4402710173811363, 0.46129134662745463, 0.4699335580336509, 0.476986

In [15]:
#The best model is 3