In [1]:
import pandas as pd
import numpy as np
import torch
from torch import cuda
from torch.utils.data import Dataset, DataLoader
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from sklearn.metrics import accuracy_score, classification_report, precision_score, recall_score, f1_score
from load_data import initialize_data
from reading_datasets import read_task
from labels_to_ids import labels_to_ids_kan
import time
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [2]:
def train(epoch, train_loader, model, optimizer, device, grad_step = 1, max_grad_norm = 10):
    tr_loss, tr_accuracy = 0, 0
    tr_precision, tr_recall = 0, 0
    tr_f1score = 0
    nb_tr_examples, nb_tr_steps = 0, 0
    tr_preds, tr_labels = [], []
    # put model in training mode
    model.train()
    optimizer.zero_grad()
    
    for idx, batch in enumerate(train_loader):
        ids = batch['input_ids'].to(device, dtype = torch.long)
        mask = batch['attention_mask'].to(device, dtype = torch.long)
        labels = batch['labels'].to(device, dtype = torch.long)

        if (idx + 1) % 20 == 0:
            print('FINSIHED BATCH:', idx, 'of', len(train_loader))

        #loss, tr_logits = model(input_ids=ids, attention_mask=mask, labels=labels)
        output = model(input_ids=ids, attention_mask=mask, labels=labels)
        tr_loss += output[0]

        nb_tr_steps += 1
        nb_tr_examples += labels.size(0)
           
        # compute training accuracy
        flattened_targets = labels.view(-1) # shape (batch_size * seq_len,)
        active_logits = output[1].view(-1, model.num_labels) # shape (batch_size * seq_len, num_labels)
        flattened_predictions = torch.argmax(active_logits, axis=1) # shape (batch_size * seq_len,)
        
        # only compute accuracy at active labels
        active_accuracy = labels.view(-1) != -100 # shape (batch_size, seq_len)
        #active_labels = torch.where(active_accuracy, labels.view(-1), torch.tensor(-100).type_as(labels))
        
        labels = torch.masked_select(flattened_targets, active_accuracy)
        predictions = torch.masked_select(flattened_predictions, active_accuracy)

        tr_labels.extend(labels)
        tr_preds.extend(predictions)

        tmp_tr_accuracy = accuracy_score(labels.cpu().numpy(), predictions.cpu().numpy())
        tr_accuracy += tmp_tr_accuracy
        
        # Compute Precision
        tmp_tr_precision = precision_score(labels.cpu().numpy(), predictions.cpu().numpy(), average = 'macro', zero_division=0 )
        tr_precision += tmp_tr_precision
        
        # Compute Recall
        tmp_tr_recall = recall_score(labels.cpu().numpy(), predictions.cpu().numpy(), average = 'macro', zero_division=0)
        tr_recall += tmp_tr_recall
        
        # Compute f1score
        tmp_tr_f1score = f1_score(labels.cpu().numpy(), predictions.cpu().numpy(), average= 'macro', zero_division=0)
        tr_f1score += tmp_tr_f1score
    
        # gradient clipping
        torch.nn.utils.clip_grad_norm_(
            parameters=model.parameters(), max_norm=max_grad_norm
        )
        
        # backward pass
        output['loss'].backward()
        if (idx + 1) % grad_step == 0:
            optimizer.step()
            optimizer.zero_grad()

    epoch_loss = tr_loss / nb_tr_steps
    tr_accuracy = tr_accuracy / nb_tr_steps
    tr_precision = tr_precision / nb_tr_steps
    tr_recall = tr_recall / nb_tr_steps
    tr_f1score= tr_f1score / nb_tr_steps
    #print(f"Training loss epoch: {epoch_loss}")
    #print(f"Training accuracy epoch: {tr_accuracy}")

    return model

In [3]:
def testing(model, testing_loader, labels_to_ids, device):
    # put model in evaluation mode
    model.eval()
    
    eval_loss, eval_accuracy = 0, 0
    eval_precision, eval_recall = 0, 0
    eval_f1score = 0
    nb_eval_examples, nb_eval_steps = 0, 0
    eval_preds, eval_labels = [], []
     
    
    ids_to_labels = dict((v,k) for k,v in labels_to_ids.items())

    with torch.no_grad():
        for idx, batch in enumerate(testing_loader):
            
            ids = batch['input_ids'].to(device, dtype = torch.long)
            mask = batch['attention_mask'].to(device, dtype = torch.long)
            labels = batch['labels'].to(device, dtype = torch.long)
            
            #loss, eval_logits = model(input_ids=ids, attention_mask=mask, labels=labels)
            output = model(input_ids=ids, attention_mask=mask, labels=labels)

            eval_loss += output['loss'].item()

            nb_eval_steps += 1
            nb_eval_examples += labels.size(0)
        
            if idx % 100==0:
                loss_step = eval_loss/nb_eval_steps
                print(f"Validation loss per 100 evaluation steps: {loss_step}")
              
            # compute evaluation accuracy
            flattened_targets = labels.view(-1) # shape (batch_size * seq_len,)
            active_logits = output[1].view(-1, model.num_labels) # shape (batch_size * seq_len, num_labels)
            flattened_predictions = torch.argmax(active_logits, axis=1) # shape (batch_size * seq_len,)
            
            # only compute accuracy at active labels
            active_accuracy = labels.view(-1) != -100 # shape (batch_size, seq_len)
        
            labels = torch.masked_select(flattened_targets, active_accuracy)
            predictions = torch.masked_select(flattened_predictions, active_accuracy)
            
            eval_labels.extend(labels)
            eval_preds.extend(predictions)
            
            tmp_eval_accuracy = accuracy_score(labels.cpu().numpy(), predictions.cpu().numpy())
            eval_accuracy += tmp_eval_accuracy
            
            # Compute Precision
            tmp_eval_precision = precision_score(labels.cpu().numpy(), predictions.cpu().numpy(), average = 'macro', zero_division=0)
            eval_precision += tmp_eval_precision
            
            # Compute Recall
            tmp_eval_recall = recall_score(labels.cpu().numpy(), predictions.cpu().numpy(), average = 'macro', zero_division=0)
            eval_recall += tmp_eval_recall
            
            # Compute f1score
            tmp_eval_f1score = f1_score(labels.cpu().numpy(), predictions.cpu().numpy(), average='macro', zero_division=0)
            eval_f1score += tmp_eval_f1score

    labels = [ids_to_labels[id.item()] for id in eval_labels]
    predictions = [ids_to_labels[id.item()] for id in eval_preds]
    
    eval_loss = eval_loss / nb_eval_steps
    eval_accuracy = eval_accuracy / nb_eval_steps
    eval_precision = eval_precision / nb_eval_steps
    eval_recall = eval_recall / nb_eval_steps
    eval_f1score = eval_f1score / nb_eval_steps
    #print(f"Validation Loss: {eval_loss}")
    #print(f"Validation Accuracy: {eval_accuracy}")

    return labels, predictions, eval_accuracy, eval_precision, eval_recall, eval_f1score

In [4]:
def main(n_epochs, model_name, model_save_flag, model_save_location, model_load_flag, model_load_location):
    #Initialization training parameters
    max_len = 256
    batch_size = 32
    grad_step = 1
    learning_rate = 1e-05
    initialization_input = (max_len, batch_size)

    #Reading datasets and initializing data loaders
    dataset_location = '../datasets/task_a/'

    train_data = read_task(dataset_location , split = 'kan_train_negative_augmented')
    dev_data = read_task(dataset_location , split = 'kan_sentiment_dev')
    #test_data = read_task(dataset_location , split = 'dev')#load test set
    labels_to_ids = labels_to_ids_kan
    #input_data = (train_data, dev_data, labels_to_ids)

    #Define tokenizer, model and optimizer
    device = 'cuda' if cuda.is_available() else 'cpu' #save the processing time
    if model_load_flag:
        tokenizer = AutoTokenizer.from_pretrained(model_load_location)
        model = AutoModelForSequenceClassification.from_pretrained(model_load_location)
    else: 
        tokenizer =  AutoTokenizer.from_pretrained(model_name, add_prefix_space=True)
        model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=len(labels_to_ids))
    optimizer = torch.optim.Adam(params=model.parameters(), lr=learning_rate)
    model.to(device)

    #Get dataloaders
    train_loader = initialize_data(tokenizer, initialization_input, train_data, labels_to_ids, shuffle = True)
    dev_loader = initialize_data(tokenizer, initialization_input, dev_data, labels_to_ids, shuffle = True)
    #test_loader = initialize_data(tokenizer, initialization_input, test_data, labels_to_ids, shuffle = True)#create test loader

    best_dev_acc = 0
    best_test_acc = 0
    best_dev_precision = 0
    best_test_precision = 0
    best_dev_recall = 0
    best_test_recall = 0
    best_dev_f1score = 0
    best_test_f1score = 0
    best_epoch = -1
    
    list_dev_acc = [] 
    list_test_acc = []  
    list_dev_precision = []  
    list_test_precision  = []  
    list_dev_recall = []  
    list_test_recall = []  
    list_dev_f1score = []  
    list_test_f1score = []
    
    for epoch in range(n_epochs):
        start = time.time()
        print(f"Training epoch: {epoch + 1}")

        #train model
        if not model_load_flag:
            model = train(epoch, train_loader, model, optimizer, device, grad_step)
        
        #testing and logging
        labels_dev, predictions_dev, dev_accuracy, dev_precision, dev_recall, dev_f1score = testing(model, dev_loader, labels_to_ids, device)
        print('DEV ACC:', dev_accuracy)
        print('DEV Precision:' , dev_precision)
        print('DEV Recall:' , dev_recall)
        print('DEV F1Score:' , dev_f1score)
        
        list_dev_acc.append(dev_accuracy)     
        list_dev_precision.append(dev_precision)   
        list_dev_recall.append(dev_recall)  
        list_dev_f1score.append(dev_f1score)  
        
        
        #labels_test, predictions_test, test_accuracy, test_precision, test_recall, test_f1score = testing(model, test_loader, labels_to_ids, device)
        #print('TEST ACC:', test_accuracy)
        #print('TEST Precision:' , test_precision)
        #print('TEST Recall:' , test_recall)
        #print('TEST F1Score:' , test_f1score)
        
        #list_test_acc.append(test_accuracy) 
        #list_test_precision.append(test_precision)  
        #list_test_recall.append(test_recall)
        #list_test_f1score.append(test_f1score) 

        #saving model
        if dev_accuracy > best_dev_acc:
            best_dev_acc = dev_accuracy
            #best_test_acc = test_accuracy
        if dev_precision > best_dev_precision:
            best_dev_precision = dev_precision
            #best_test_precision = test_precision
        if dev_recall > best_dev_recall:
            best_dev_recall = dev_recall
            #best_test_recall = test_recall
        if dev_f1score > best_dev_f1score:
            best_dev_f1score = dev_f1score
            #best_test_f1score = test_f1score
            best_epoch = epoch
            
            if model_save_flag:
                os.makedirs(model_save_location, exist_ok=True)
                tokenizer.save_pretrained(model_save_location)
                model.save_pretrained(model_save_location)

        now = time.time()
        print('BEST ACCURACY --> ', 'DEV:', round(best_dev_acc, 5))
        print('BEST PRECISION --> ', 'DEV:', round(best_dev_precision, 5))
        print('BEST RECALL --> ', 'DEV:', round(best_dev_recall, 5))
        print('BEST F1SCORE --> ', 'DEV:', round(best_dev_f1score, 5))
        print('TIME PER EPOCH:', (now-start)/60 )
        print()

    return best_dev_acc, best_test_acc, best_epoch, best_dev_precision, best_test_precision, best_dev_recall, best_test_recall, best_dev_f1score, best_test_f1score, list_dev_acc, list_test_acc, list_dev_precision, list_test_precision, list_dev_recall, list_test_recall, list_dev_f1score, list_test_f1score

In [5]:
if __name__ == '__main__':
    n_epochs = 10
    models = ['bert-base-uncased']
    
    #model saving parameters
    model_save_flag = True
    model_load_flag = False
    
    overall_list_dev_acc = [] 
    overall_list_test_acc = []    
    overall_list_dev_precision = []  
    overall_list_test_precision  = []  
    overall_list_dev_recall = []  
    overall_list_test_recall = []  
    overall_list_dev_f1score = []  
    overall_list_test_f1score = [] 
    
    for i in range(5):
        
        for model_name in models:

            model_save_location = 'saved_models/' + model_name + 'Kannada' + 'Augmented' + str(i)
            model_load_location = None

            best_dev_acc, best_test_acc, best_epoch, best_dev_precision, best_test_precision, best_dev_recall, best_test_recall, best_dev_f1score, best_test_f1score, list_dev_acc, list_test_acc, list_dev_precision, list_test_precision, list_dev_recall, list_test_recall, list_dev_f1score, list_test_f1score = main(n_epochs, model_name, model_save_flag, model_save_location, model_load_flag, model_load_location)
            
            overall_list_dev_acc.append(list_dev_acc) 
            overall_list_test_acc.append(list_test_acc) 
            overall_list_dev_precision.append(list_dev_precision)  
            overall_list_test_precision.append(list_test_precision) 
            overall_list_dev_recall.append(list_dev_recall)  
            overall_list_test_recall.append(list_test_recall)  
            overall_list_dev_f1score.append(list_dev_f1score)  
            overall_list_test_f1score.append(list_test_f1score) 

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Training epoch: 1
FINSIHED BATCH: 19 of 246
FINSIHED BATCH: 39 of 246
FINSIHED BATCH: 59 of 246
FINSIHED BATCH: 79 of 246
FINSIHED BATCH: 99 of 246
FINSIHED BATCH: 119 of 246
FINSIHED BATCH: 139 of 246
FINSIHED BATCH: 159 of 246
FINSIHED BATCH: 179 of 246
FINSIHED BATCH: 199 of 246
FINSIHED BATCH: 219 of 246
FINSIHED BATCH: 239 of 246
Validation loss per 100 evaluation steps: 1.0286014080047607
DEV ACC: 0.5216806220095693
DEV Precision: 0.43590251697203564
DEV Recall: 0.477013054533108
DEV F1Score: 0.409461277167108
BEST ACCURACY -->  DEV: 0.52168
BEST PRECISION -->  DEV: 0.4359
BEST RECALL -->  DEV: 0.47701
BEST F1SCORE -->  DEV: 0.40946
TIME PER EPOCH: 5.155515897274017

Training epoch: 2
FINSIHED BATCH: 19 of 246
FINSIHED BATCH: 39 of 246
FINSIHED BATCH: 59 of 246
FINSIHED BATCH: 79 of 246
FINSIHED BATCH: 99 of 246
FINSIHED BATCH: 119 of 246
FINSIHED BATCH: 139 of 246
FINSIHED BATCH: 159 of 246
FINSIHED BATCH: 179 of 246
FINSIHED BATCH: 199 of 246
FINSIHED BATCH: 219 of 246
FINSIHED

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Training epoch: 1
FINSIHED BATCH: 19 of 246
FINSIHED BATCH: 39 of 246
FINSIHED BATCH: 59 of 246
FINSIHED BATCH: 79 of 246
FINSIHED BATCH: 99 of 246
FINSIHED BATCH: 119 of 246
FINSIHED BATCH: 139 of 246
FINSIHED BATCH: 159 of 246
FINSIHED BATCH: 179 of 246
FINSIHED BATCH: 199 of 246
FINSIHED BATCH: 219 of 246
FINSIHED BATCH: 239 of 246
Validation loss per 100 evaluation steps: 0.990794837474823
DEV ACC: 0.5400717703349283
DEV Precision: 0.4299724739719109
DEV Recall: 0.46465295565501763
DEV F1Score: 0.4239230242342095
BEST ACCURACY -->  DEV: 0.54007
BEST PRECISION -->  DEV: 0.42997
BEST RECALL -->  DEV: 0.46465
BEST F1SCORE -->  DEV: 0.42392
TIME PER EPOCH: 5.5870315392812095

Training epoch: 2
FINSIHED BATCH: 19 of 246
FINSIHED BATCH: 39 of 246
FINSIHED BATCH: 59 of 246
FINSIHED BATCH: 79 of 246
FINSIHED BATCH: 99 of 246
FINSIHED BATCH: 119 of 246
FINSIHED BATCH: 139 of 246
FINSIHED BATCH: 159 of 246
FINSIHED BATCH: 179 of 246
FINSIHED BATCH: 199 of 246
FINSIHED BATCH: 219 of 246
FINSI

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Training epoch: 1
FINSIHED BATCH: 19 of 246
FINSIHED BATCH: 39 of 246
FINSIHED BATCH: 59 of 246
FINSIHED BATCH: 79 of 246
FINSIHED BATCH: 99 of 246
FINSIHED BATCH: 119 of 246
FINSIHED BATCH: 139 of 246
FINSIHED BATCH: 159 of 246
FINSIHED BATCH: 179 of 246
FINSIHED BATCH: 199 of 246
FINSIHED BATCH: 219 of 246
FINSIHED BATCH: 239 of 246
Validation loss per 100 evaluation steps: 1.1335554122924805
DEV ACC: 0.5229515550239234
DEV Precision: 0.4104057080026331
DEV Recall: 0.41120313859530716
DEV F1Score: 0.3792432295336885
BEST ACCURACY -->  DEV: 0.52295
BEST PRECISION -->  DEV: 0.41041
BEST RECALL -->  DEV: 0.4112
BEST F1SCORE -->  DEV: 0.37924
TIME PER EPOCH: 5.619885969161987

Training epoch: 2
FINSIHED BATCH: 19 of 246
FINSIHED BATCH: 39 of 246
FINSIHED BATCH: 59 of 246
FINSIHED BATCH: 79 of 246
FINSIHED BATCH: 99 of 246
FINSIHED BATCH: 119 of 246
FINSIHED BATCH: 139 of 246
FINSIHED BATCH: 159 of 246
FINSIHED BATCH: 179 of 246
FINSIHED BATCH: 199 of 246
FINSIHED BATCH: 219 of 246
FINSIH

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Training epoch: 1
FINSIHED BATCH: 19 of 246
FINSIHED BATCH: 39 of 246
FINSIHED BATCH: 59 of 246
FINSIHED BATCH: 79 of 246
FINSIHED BATCH: 99 of 246
FINSIHED BATCH: 119 of 246
FINSIHED BATCH: 139 of 246
FINSIHED BATCH: 159 of 246
FINSIHED BATCH: 179 of 246
FINSIHED BATCH: 199 of 246
FINSIHED BATCH: 219 of 246
FINSIHED BATCH: 239 of 246
Validation loss per 100 evaluation steps: 1.2286428213119507
DEV ACC: 0.5570424641148325
DEV Precision: 0.41878657345773324
DEV Recall: 0.42683684564489516
DEV F1Score: 0.39714802357473683
BEST ACCURACY -->  DEV: 0.55704
BEST PRECISION -->  DEV: 0.41879
BEST RECALL -->  DEV: 0.42684
BEST F1SCORE -->  DEV: 0.39715
TIME PER EPOCH: 5.2042035063107805

Training epoch: 2
FINSIHED BATCH: 19 of 246
FINSIHED BATCH: 39 of 246
FINSIHED BATCH: 59 of 246
FINSIHED BATCH: 79 of 246
FINSIHED BATCH: 99 of 246
FINSIHED BATCH: 119 of 246
FINSIHED BATCH: 139 of 246
FINSIHED BATCH: 159 of 246
FINSIHED BATCH: 179 of 246
FINSIHED BATCH: 199 of 246
FINSIHED BATCH: 219 of 246
FI

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Training epoch: 1
FINSIHED BATCH: 19 of 246
FINSIHED BATCH: 39 of 246
FINSIHED BATCH: 59 of 246
FINSIHED BATCH: 79 of 246
FINSIHED BATCH: 99 of 246
FINSIHED BATCH: 119 of 246
FINSIHED BATCH: 139 of 246
FINSIHED BATCH: 159 of 246
FINSIHED BATCH: 179 of 246
FINSIHED BATCH: 199 of 246
FINSIHED BATCH: 219 of 246
FINSIHED BATCH: 239 of 246
Validation loss per 100 evaluation steps: 1.0568705797195435
DEV ACC: 0.5462021531100478
DEV Precision: 0.38168309391377925
DEV Recall: 0.3952267229205732
DEV F1Score: 0.36699741691950505
BEST ACCURACY -->  DEV: 0.5462
BEST PRECISION -->  DEV: 0.38168
BEST RECALL -->  DEV: 0.39523
BEST F1SCORE -->  DEV: 0.367
TIME PER EPOCH: 5.608150696754455

Training epoch: 2
FINSIHED BATCH: 19 of 246
FINSIHED BATCH: 39 of 246
FINSIHED BATCH: 59 of 246
FINSIHED BATCH: 79 of 246
FINSIHED BATCH: 99 of 246
FINSIHED BATCH: 119 of 246
FINSIHED BATCH: 139 of 246
FINSIHED BATCH: 159 of 246
FINSIHED BATCH: 179 of 246
FINSIHED BATCH: 199 of 246
FINSIHED BATCH: 219 of 246
FINSIHE

In [6]:
print(best_dev_acc)

0.6272428229665071


In [7]:
print(best_epoch)

7


In [8]:
print(best_dev_precision)

0.531985892113953


In [9]:
print(best_dev_recall)

0.5469504256181008


In [10]:
print(best_dev_f1score)

0.5120970988807882


In [11]:
print(overall_list_dev_acc)

[[0.5216806220095693, 0.5641447368421052, 0.5556220095693779, 0.5617523923444976, 0.5716955741626794, 0.5751345693779905, 0.5988337320574163, 0.5751345693779905, 0.5868720095693779, 0.5869467703349283], [0.5400717703349283, 0.5195873205741627, 0.5978618421052632, 0.5827601674641149, 0.6291866028708134, 0.6125897129186603, 0.6139354066985646, 0.5987589712918661, 0.5859748803827751, 0.5835077751196173], [0.5229515550239234, 0.5741626794258373, 0.5589114832535885, 0.6078050239234449, 0.5661632775119617, 0.5702751196172249, 0.591581937799043, 0.5807416267942584, 0.578424043062201, 0.572816985645933], [0.5570424641148325, 0.54373504784689, 0.5841058612440192, 0.6101973684210527, 0.5888157894736842, 0.5850029904306221, 0.5761064593301435, 0.56563995215311, 0.5551734449760766, 0.5674342105263158], [0.5462021531100478, 0.5651913875598086, 0.5376794258373206, 0.5897129186602871, 0.6272428229665071, 0.5921800239234449, 0.6064593301435406, 0.6101973684210527, 0.5930023923444976, 0.588815789473684

In [12]:
print(overall_list_dev_precision)

[[0.43590251697203564, 0.5013408718806446, 0.4297883047582245, 0.43264933880892226, 0.475043149692038, 0.4923783658409327, 0.5026035985545554, 0.48501371623029377, 0.4792301808411434, 0.5127872994133021], [0.4299724739719109, 0.44528982871352, 0.49117628587442835, 0.46551059049387916, 0.51401379139866, 0.48549464529903585, 0.5021270020399278, 0.5101339034124709, 0.532214462875524, 0.49540220252118644], [0.4104057080026331, 0.4576850653880628, 0.4683713743999066, 0.5008470175804118, 0.48611537518834164, 0.49612010390085254, 0.5176417743684841, 0.4685585826829142, 0.5045534545668237, 0.5116843858755623], [0.41878657345773324, 0.4354576275781947, 0.4836796685192406, 0.49239881268307956, 0.4734608903425748, 0.4750975665748393, 0.4750255678664769, 0.5063050170269422, 0.4778281305663095, 0.4992635363666887], [0.38168309391377925, 0.4312276478720328, 0.4541260486039318, 0.4887919933172185, 0.5177131771893243, 0.4675385223768631, 0.5075050024833659, 0.531985892113953, 0.5291798050707776, 0.510

In [13]:
print(overall_list_dev_recall)

[[0.477013054533108, 0.47819428120898705, 0.49548988528671195, 0.4601436575901816, 0.49652005393402737, 0.5004319350201352, 0.5131395387534565, 0.5041445950849975, 0.47699319811126734, 0.49418368670646673], [0.46465295565501763, 0.44086875728252733, 0.5147590352348655, 0.4984319097420702, 0.5404633416197587, 0.5176108386328622, 0.5035654575902254, 0.5152788177594679, 0.5178741327003359, 0.48292094195169066], [0.41120313859530716, 0.4634433268517142, 0.45070368431331004, 0.4942806623826185, 0.5005355121939699, 0.5070264219159518, 0.5024873342965869, 0.49262504058627066, 0.5037142257389232, 0.48682644121991153], [0.42683684564489516, 0.485681034651623, 0.4968783036630629, 0.49501887001887, 0.5038032036294068, 0.4925715655095757, 0.5033412042502952, 0.47752028534448326, 0.4950931480878005, 0.5107320460054064], [0.3952267229205732, 0.47082019584693385, 0.41912416074910797, 0.5088934371442112, 0.5256659027637778, 0.4780445782053578, 0.4921185476733605, 0.5469504256181008, 0.4918248318276462

In [14]:
print(overall_list_dev_f1score)

[[0.409461277167108, 0.458347493292581, 0.43963212658374906, 0.42608538140213104, 0.4566948742588641, 0.46764028287697995, 0.4814801980303933, 0.470095512943781, 0.4613288814261839, 0.4735707787836326], [0.4239230242342095, 0.4133751796986022, 0.4798655961353717, 0.4614476693032117, 0.5037701758094514, 0.48041600867333845, 0.48105407963554275, 0.48316958060130266, 0.4921604825325572, 0.45873699978215216], [0.3792432295336885, 0.43774974346066015, 0.43167356728467415, 0.47515047134654403, 0.4646668456875784, 0.4690712888881707, 0.4904428822676543, 0.45878139571883914, 0.48039575323606276, 0.4697047961339613], [0.39714802357473683, 0.42994756650348687, 0.46954584093123536, 0.4715658417467042, 0.46455381225920367, 0.4595611523480153, 0.4601003438348428, 0.464682989086257, 0.46036558849865233, 0.47290187689883606], [0.36699741691950505, 0.41996773082545713, 0.3986993232285526, 0.46938861534167614, 0.5013587660498909, 0.4550660966472862, 0.47209288659841636, 0.5120970988807882, 0.4882002877

In [15]:
#The best model is 3