In [1]:
import pandas as pd
import numpy as np
import torch
from torch import cuda
from torch.utils.data import Dataset, DataLoader
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from sklearn.metrics import accuracy_score, classification_report, precision_score, recall_score, f1_score
from load_data import initialize_data
from reading_datasets import read_task
from labels_to_ids import labels_to_ids_kan
import time
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [2]:
def train(epoch, train_loader, model, optimizer, device, grad_step = 1, max_grad_norm = 10):
    tr_loss, tr_accuracy = 0, 0
    tr_precision, tr_recall = 0, 0
    tr_f1score = 0
    nb_tr_examples, nb_tr_steps = 0, 0
    tr_preds, tr_labels = [], []
    # put model in training mode
    model.train()
    optimizer.zero_grad()
    
    for idx, batch in enumerate(train_loader):
        ids = batch['input_ids'].to(device, dtype = torch.long)
        mask = batch['attention_mask'].to(device, dtype = torch.long)
        labels = batch['labels'].to(device, dtype = torch.long)

        if (idx + 1) % 20 == 0:
            print('FINSIHED BATCH:', idx, 'of', len(train_loader))

        #loss, tr_logits = model(input_ids=ids, attention_mask=mask, labels=labels)
        output = model(input_ids=ids, attention_mask=mask, labels=labels)
        tr_loss += output[0]

        nb_tr_steps += 1
        nb_tr_examples += labels.size(0)
           
        # compute training accuracy
        flattened_targets = labels.view(-1) # shape (batch_size * seq_len,)
        active_logits = output[1].view(-1, model.num_labels) # shape (batch_size * seq_len, num_labels)
        flattened_predictions = torch.argmax(active_logits, axis=1) # shape (batch_size * seq_len,)
        
        # only compute accuracy at active labels
        active_accuracy = labels.view(-1) != -100 # shape (batch_size, seq_len)
        #active_labels = torch.where(active_accuracy, labels.view(-1), torch.tensor(-100).type_as(labels))
        
        labels = torch.masked_select(flattened_targets, active_accuracy)
        predictions = torch.masked_select(flattened_predictions, active_accuracy)

        tr_labels.extend(labels)
        tr_preds.extend(predictions)

        tmp_tr_accuracy = accuracy_score(labels.cpu().numpy(), predictions.cpu().numpy())
        tr_accuracy += tmp_tr_accuracy
        
        # Compute Precision
        tmp_tr_precision = precision_score(labels.cpu().numpy(), predictions.cpu().numpy(), average = 'macro', zero_division=0 )
        tr_precision += tmp_tr_precision
        
        # Compute Recall
        tmp_tr_recall = recall_score(labels.cpu().numpy(), predictions.cpu().numpy(), average = 'macro', zero_division=0)
        tr_recall += tmp_tr_recall
        
        # Compute f1score
        tmp_tr_f1score = f1_score(labels.cpu().numpy(), predictions.cpu().numpy(), average= 'macro', zero_division=0)
        tr_f1score += tmp_tr_f1score
    
        # gradient clipping
        torch.nn.utils.clip_grad_norm_(
            parameters=model.parameters(), max_norm=max_grad_norm
        )
        
        # backward pass
        output['loss'].backward()
        if (idx + 1) % grad_step == 0:
            optimizer.step()
            optimizer.zero_grad()

    epoch_loss = tr_loss / nb_tr_steps
    tr_accuracy = tr_accuracy / nb_tr_steps
    tr_precision = tr_precision / nb_tr_steps
    tr_recall = tr_recall / nb_tr_steps
    tr_f1score= tr_f1score / nb_tr_steps
    #print(f"Training loss epoch: {epoch_loss}")
    #print(f"Training accuracy epoch: {tr_accuracy}")

    return model

In [3]:
def testing(model, testing_loader, labels_to_ids, device):
    # put model in evaluation mode
    model.eval()
    
    eval_loss, eval_accuracy = 0, 0
    eval_precision, eval_recall = 0, 0
    eval_f1score = 0
    nb_eval_examples, nb_eval_steps = 0, 0
    eval_preds, eval_labels = [], []
     
    
    ids_to_labels = dict((v,k) for k,v in labels_to_ids.items())

    with torch.no_grad():
        for idx, batch in enumerate(testing_loader):
            
            ids = batch['input_ids'].to(device, dtype = torch.long)
            mask = batch['attention_mask'].to(device, dtype = torch.long)
            labels = batch['labels'].to(device, dtype = torch.long)
            
            #loss, eval_logits = model(input_ids=ids, attention_mask=mask, labels=labels)
            output = model(input_ids=ids, attention_mask=mask, labels=labels)

            eval_loss += output['loss'].item()

            nb_eval_steps += 1
            nb_eval_examples += labels.size(0)
        
            if idx % 100==0:
                loss_step = eval_loss/nb_eval_steps
                print(f"Validation loss per 100 evaluation steps: {loss_step}")
              
            # compute evaluation accuracy
            flattened_targets = labels.view(-1) # shape (batch_size * seq_len,)
            active_logits = output[1].view(-1, model.num_labels) # shape (batch_size * seq_len, num_labels)
            flattened_predictions = torch.argmax(active_logits, axis=1) # shape (batch_size * seq_len,)
            
            # only compute accuracy at active labels
            active_accuracy = labels.view(-1) != -100 # shape (batch_size, seq_len)
        
            labels = torch.masked_select(flattened_targets, active_accuracy)
            predictions = torch.masked_select(flattened_predictions, active_accuracy)
            
            eval_labels.extend(labels)
            eval_preds.extend(predictions)
            
            tmp_eval_accuracy = accuracy_score(labels.cpu().numpy(), predictions.cpu().numpy())
            eval_accuracy += tmp_eval_accuracy
            
            # Compute Precision
            tmp_eval_precision = precision_score(labels.cpu().numpy(), predictions.cpu().numpy(), average = 'macro', zero_division=0)
            eval_precision += tmp_eval_precision
            
            # Compute Recall
            tmp_eval_recall = recall_score(labels.cpu().numpy(), predictions.cpu().numpy(), average = 'macro', zero_division=0)
            eval_recall += tmp_eval_recall
            
            # Compute f1score
            tmp_eval_f1score = f1_score(labels.cpu().numpy(), predictions.cpu().numpy(), average='macro', zero_division=0)
            eval_f1score += tmp_eval_f1score

    labels = [ids_to_labels[id.item()] for id in eval_labels]
    predictions = [ids_to_labels[id.item()] for id in eval_preds]
    
    eval_loss = eval_loss / nb_eval_steps
    eval_accuracy = eval_accuracy / nb_eval_steps
    eval_precision = eval_precision / nb_eval_steps
    eval_recall = eval_recall / nb_eval_steps
    eval_f1score = eval_f1score / nb_eval_steps
    #print(f"Validation Loss: {eval_loss}")
    #print(f"Validation Accuracy: {eval_accuracy}")

    return labels, predictions, eval_accuracy, eval_precision, eval_recall, eval_f1score

In [4]:
def main(n_epochs, model_name, model_save_flag, model_save_location, model_load_flag, model_load_location):
    #Initialization training parameters
    max_len = 256
    batch_size = 32
    grad_step = 1
    learning_rate = 1e-05
    initialization_input = (max_len, batch_size)

    #Reading datasets and initializing data loaders
    dataset_location = '../datasets/task_a/transliterated/'

    train_data = read_task(dataset_location , split = 'kan_train_trans')
    dev_data = read_task(dataset_location , split = 'kan_dev_trans')
    #test_data = read_task(dataset_location , split = 'dev')#load test set
    labels_to_ids = labels_to_ids_kan
    #input_data = (train_data, dev_data, labels_to_ids)

    #Define tokenizer, model and optimizer
    device = 'cuda' if cuda.is_available() else 'cpu' #save the processing time
    if model_load_flag:
        tokenizer = AutoTokenizer.from_pretrained(model_load_location)
        model = AutoModelForSequenceClassification.from_pretrained(model_load_location)
    else: 
        tokenizer =  AutoTokenizer.from_pretrained(model_name, add_prefix_space=True)
        model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=len(labels_to_ids))
    optimizer = torch.optim.Adam(params=model.parameters(), lr=learning_rate)
    model.to(device)

    #Get dataloaders
    train_loader = initialize_data(tokenizer, initialization_input, train_data, labels_to_ids, shuffle = True)
    dev_loader = initialize_data(tokenizer, initialization_input, dev_data, labels_to_ids, shuffle = True)
    #test_loader = initialize_data(tokenizer, initialization_input, test_data, labels_to_ids, shuffle = True)#create test loader

    best_dev_acc = 0
    best_test_acc = 0
    best_dev_precision = 0
    best_test_precision = 0
    best_dev_recall = 0
    best_test_recall = 0
    best_dev_f1score = 0
    best_test_f1score = 0
    best_epoch = -1
    
    list_dev_acc = [] 
    list_test_acc = []  
    list_dev_precision = []  
    list_test_precision  = []  
    list_dev_recall = []  
    list_test_recall = []  
    list_dev_f1score = []  
    list_test_f1score = []
    
    for epoch in range(n_epochs):
        start = time.time()
        print(f"Training epoch: {epoch + 1}")

        #train model
        if not model_load_flag:
            model = train(epoch, train_loader, model, optimizer, device, grad_step)
        
        #testing and logging
        labels_dev, predictions_dev, dev_accuracy, dev_precision, dev_recall, dev_f1score = testing(model, dev_loader, labels_to_ids, device)
        print('DEV ACC:', dev_accuracy)
        print('DEV Precision:' , dev_precision)
        print('DEV Recall:' , dev_recall)
        print('DEV F1Score:' , dev_f1score)
        
        list_dev_acc.append(dev_accuracy)     
        list_dev_precision.append(dev_precision)   
        list_dev_recall.append(dev_recall)  
        list_dev_f1score.append(dev_f1score)  
        
        
        #labels_test, predictions_test, test_accuracy, test_precision, test_recall, test_f1score = testing(model, test_loader, labels_to_ids, device)
        #print('TEST ACC:', test_accuracy)
        #print('TEST Precision:' , test_precision)
        #print('TEST Recall:' , test_recall)
        #print('TEST F1Score:' , test_f1score)
        
        #list_test_acc.append(test_accuracy) 
        #list_test_precision.append(test_precision)  
        #list_test_recall.append(test_recall)
        #list_test_f1score.append(test_f1score) 

        #saving model
        if dev_accuracy > best_dev_acc:
            best_dev_acc = dev_accuracy
            #best_test_acc = test_accuracy
        if dev_precision > best_dev_precision:
            best_dev_precision = dev_precision
            #best_test_precision = test_precision
        if dev_recall > best_dev_recall:
            best_dev_recall = dev_recall
            #best_test_recall = test_recall
        if dev_f1score > best_dev_f1score:
            best_dev_f1score = dev_f1score
            #best_test_f1score = test_f1score
            best_epoch = epoch
            
            if model_save_flag:
                os.makedirs(model_save_location, exist_ok=True)
                tokenizer.save_pretrained(model_save_location)
                model.save_pretrained(model_save_location)

        now = time.time()
        print('BEST ACCURACY --> ', 'DEV:', round(best_dev_acc, 5))
        print('BEST PRECISION --> ', 'DEV:', round(best_dev_precision, 5))
        print('BEST RECALL --> ', 'DEV:', round(best_dev_recall, 5))
        print('BEST F1SCORE --> ', 'DEV:', round(best_dev_f1score, 5))
        print('TIME PER EPOCH:', (now-start)/60 )
        print()

    return best_dev_acc, best_test_acc, best_epoch, best_dev_precision, best_test_precision, best_dev_recall, best_test_recall, best_dev_f1score, best_test_f1score, list_dev_acc, list_test_acc, list_dev_precision, list_test_precision, list_dev_recall, list_test_recall, list_dev_f1score, list_test_f1score

In [5]:
if __name__ == '__main__':
    n_epochs = 10
    models = ['bert-base-multilingual-uncased']
    
    #model saving parameters
    model_save_flag = True
    model_load_flag = False
    
    overall_list_dev_acc = [] 
    overall_list_test_acc = []    
    overall_list_dev_precision = []  
    overall_list_test_precision  = []  
    overall_list_dev_recall = []  
    overall_list_test_recall = []  
    overall_list_dev_f1score = []  
    overall_list_test_f1score = [] 
    
    for i in range(5):
        
        for model_name in models:

            model_save_location = 'saved_models/' + model_name + 'Kannada' + 'transliterated' + str(i)
            model_load_location = None

            best_dev_acc, best_test_acc, best_epoch, best_dev_precision, best_test_precision, best_dev_recall, best_test_recall, best_dev_f1score, best_test_f1score, list_dev_acc, list_test_acc, list_dev_precision, list_test_precision, list_dev_recall, list_test_recall, list_dev_f1score, list_test_f1score = main(n_epochs, model_name, model_save_flag, model_save_location, model_load_flag, model_load_location)
            
            overall_list_dev_acc.append(list_dev_acc) 
            overall_list_test_acc.append(list_test_acc) 
            overall_list_dev_precision.append(list_dev_precision)  
            overall_list_test_precision.append(list_test_precision) 
            overall_list_dev_recall.append(list_dev_recall)  
            overall_list_test_recall.append(list_test_recall)  
            overall_list_dev_f1score.append(list_dev_f1score)  
            overall_list_test_f1score.append(list_test_f1score) 

Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/625 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/851k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.64M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/641M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model 

Training epoch: 1
FINSIHED BATCH: 19 of 195
FINSIHED BATCH: 39 of 195
FINSIHED BATCH: 59 of 195
FINSIHED BATCH: 79 of 195
FINSIHED BATCH: 99 of 195
FINSIHED BATCH: 119 of 195
FINSIHED BATCH: 139 of 195
FINSIHED BATCH: 159 of 195
FINSIHED BATCH: 179 of 195
Validation loss per 100 evaluation steps: 1.2432700395584106
DEV ACC: 0.5821620813397129
DEV Precision: 0.41868119964490913
DEV Recall: 0.4243446225452629
DEV F1Score: 0.3977691519004302
BEST ACCURACY -->  DEV: 0.58216
BEST PRECISION -->  DEV: 0.41868
BEST RECALL -->  DEV: 0.42434
BEST F1SCORE -->  DEV: 0.39777
TIME PER EPOCH: 4.668273933728536

Training epoch: 2
FINSIHED BATCH: 19 of 195
FINSIHED BATCH: 39 of 195
FINSIHED BATCH: 59 of 195
FINSIHED BATCH: 79 of 195
FINSIHED BATCH: 99 of 195
FINSIHED BATCH: 119 of 195
FINSIHED BATCH: 139 of 195
FINSIHED BATCH: 159 of 195
FINSIHED BATCH: 179 of 195
Validation loss per 100 evaluation steps: 0.8528249859809875
DEV ACC: 0.6191686602870813
DEV Precision: 0.49209266888255865
DEV Recall: 0.48

DEV ACC: 0.6712769138755981
DEV Precision: 0.5945389194239369
DEV Recall: 0.5497401605298102
DEV F1Score: 0.5443186742509064
BEST ACCURACY -->  DEV: 0.67128
BEST PRECISION -->  DEV: 0.59454
BEST RECALL -->  DEV: 0.54974
BEST F1SCORE -->  DEV: 0.54432
TIME PER EPOCH: 4.505809843540192

Training epoch: 6
FINSIHED BATCH: 19 of 195
FINSIHED BATCH: 39 of 195
FINSIHED BATCH: 59 of 195
FINSIHED BATCH: 79 of 195
FINSIHED BATCH: 99 of 195
FINSIHED BATCH: 119 of 195
FINSIHED BATCH: 139 of 195
FINSIHED BATCH: 159 of 195
FINSIHED BATCH: 179 of 195
Validation loss per 100 evaluation steps: 1.3562448024749756
DEV ACC: 0.6489982057416268
DEV Precision: 0.5127482602499428
DEV Recall: 0.515568674687729
DEV F1Score: 0.4815885624215337
BEST ACCURACY -->  DEV: 0.67128
BEST PRECISION -->  DEV: 0.59454
BEST RECALL -->  DEV: 0.54974
BEST F1SCORE -->  DEV: 0.54432
TIME PER EPOCH: 4.526136521498362

Training epoch: 7
FINSIHED BATCH: 19 of 195
FINSIHED BATCH: 39 of 195
FINSIHED BATCH: 59 of 195
FINSIHED BATCH: 

Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model 

Training epoch: 1
FINSIHED BATCH: 19 of 195
FINSIHED BATCH: 39 of 195
FINSIHED BATCH: 59 of 195
FINSIHED BATCH: 79 of 195
FINSIHED BATCH: 99 of 195
FINSIHED BATCH: 119 of 195
FINSIHED BATCH: 139 of 195
FINSIHED BATCH: 159 of 195
FINSIHED BATCH: 179 of 195
Validation loss per 100 evaluation steps: 0.9064346551895142
DEV ACC: 0.6015251196172249
DEV Precision: 0.4661595245608755
DEV Recall: 0.48764660422449607
DEV F1Score: 0.45531804127356085
BEST ACCURACY -->  DEV: 0.60153
BEST PRECISION -->  DEV: 0.46616
BEST RECALL -->  DEV: 0.48765
BEST F1SCORE -->  DEV: 0.45532
TIME PER EPOCH: 4.374613710244497

Training epoch: 2
FINSIHED BATCH: 19 of 195
FINSIHED BATCH: 39 of 195
FINSIHED BATCH: 59 of 195
FINSIHED BATCH: 79 of 195
FINSIHED BATCH: 99 of 195
FINSIHED BATCH: 119 of 195
FINSIHED BATCH: 139 of 195
FINSIHED BATCH: 159 of 195
FINSIHED BATCH: 179 of 195
Validation loss per 100 evaluation steps: 0.7220532894134521
DEV ACC: 0.6504186602870813
DEV Precision: 0.5194086125823391
DEV Recall: 0.52

Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model 

Training epoch: 1
FINSIHED BATCH: 19 of 195
FINSIHED BATCH: 39 of 195
FINSIHED BATCH: 59 of 195
FINSIHED BATCH: 79 of 195
FINSIHED BATCH: 99 of 195
FINSIHED BATCH: 119 of 195
FINSIHED BATCH: 139 of 195
FINSIHED BATCH: 159 of 195
FINSIHED BATCH: 179 of 195
Validation loss per 100 evaluation steps: 0.7367625832557678
DEV ACC: 0.6087021531100478
DEV Precision: 0.46368333998248706
DEV Recall: 0.4504480145522925
DEV F1Score: 0.4309543805247744
BEST ACCURACY -->  DEV: 0.6087
BEST PRECISION -->  DEV: 0.46368
BEST RECALL -->  DEV: 0.45045
BEST F1SCORE -->  DEV: 0.43095
TIME PER EPOCH: 4.77736003001531

Training epoch: 2
FINSIHED BATCH: 19 of 195
FINSIHED BATCH: 39 of 195
FINSIHED BATCH: 59 of 195
FINSIHED BATCH: 79 of 195
FINSIHED BATCH: 99 of 195
FINSIHED BATCH: 119 of 195
FINSIHED BATCH: 139 of 195
FINSIHED BATCH: 159 of 195
FINSIHED BATCH: 179 of 195
Validation loss per 100 evaluation steps: 0.8041238784790039
DEV ACC: 0.6376345693779905
DEV Precision: 0.5081403356227256
DEV Recall: 0.50842

Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model 

Training epoch: 1
FINSIHED BATCH: 19 of 195
FINSIHED BATCH: 39 of 195
FINSIHED BATCH: 59 of 195
FINSIHED BATCH: 79 of 195
FINSIHED BATCH: 99 of 195
FINSIHED BATCH: 119 of 195
FINSIHED BATCH: 139 of 195
FINSIHED BATCH: 159 of 195
FINSIHED BATCH: 179 of 195
Validation loss per 100 evaluation steps: 0.7887489199638367
DEV ACC: 0.5912081339712919
DEV Precision: 0.48551661690846737
DEV Recall: 0.43951560916801546
DEV F1Score: 0.4294155109081484
BEST ACCURACY -->  DEV: 0.59121
BEST PRECISION -->  DEV: 0.48552
BEST RECALL -->  DEV: 0.43952
BEST F1SCORE -->  DEV: 0.42942
TIME PER EPOCH: 4.811619595686595

Training epoch: 2
FINSIHED BATCH: 19 of 195
FINSIHED BATCH: 39 of 195
FINSIHED BATCH: 59 of 195
FINSIHED BATCH: 79 of 195
FINSIHED BATCH: 99 of 195
FINSIHED BATCH: 119 of 195
FINSIHED BATCH: 139 of 195
FINSIHED BATCH: 159 of 195
FINSIHED BATCH: 179 of 195
Validation loss per 100 evaluation steps: 1.1292914152145386
DEV ACC: 0.6452601674641149
DEV Precision: 0.5021546219674561
DEV Recall: 0.54

In [6]:
print(best_dev_acc)

0.6830891148325359


In [7]:
print(best_epoch)

4


In [8]:
print(best_dev_precision)

0.5837666505633716


In [9]:
print(best_dev_recall)

0.5476818430107904


In [10]:
print(best_dev_f1score)

0.530861442173454


In [11]:
print(overall_list_dev_acc)

[[0.5821620813397129, 0.6191686602870813, 0.652811004784689, 0.669407894736842, 0.6774820574162679, 0.669407894736842, 0.6484748803827751, 0.6755382775119617, 0.6579694976076556, 0.6475777511961722], [0.6021232057416268, 0.6380831339712919, 0.6542314593301435, 0.6504186602870813, 0.6712769138755981, 0.6489982057416268, 0.6172248803827751, 0.6446620813397129, 0.6566238038277512, 0.653782894736842], [0.6015251196172249, 0.6504186602870813, 0.6522876794258373, 0.6423444976076556, 0.6481010765550239, 0.6641746411483254, 0.62813995215311, 0.6386064593301435, 0.637186004784689, 0.6398773923444976], [0.6087021531100478, 0.6376345693779905, 0.6489982057416268, 0.6613337320574163, 0.6603618421052632, 0.6280651913875598, 0.6522876794258373, 0.6508672248803827, 0.6267194976076556, 0.6244766746411483], [0.5912081339712919, 0.6452601674641149, 0.6517643540669856, 0.6456339712918661, 0.6830891148325359, 0.6546800239234449, 0.6394288277511961, 0.6621561004784688, 0.6414473684210527, 0.646605861244019

In [12]:
print(overall_list_dev_precision)

[[0.41868119964490913, 0.49209266888255865, 0.5172703681298532, 0.5077533424740163, 0.5694627718420993, 0.5564023954705397, 0.5255778643363079, 0.5827333006846799, 0.5630086550407406, 0.5516894973713155], [0.51288215341393, 0.4799261147115072, 0.5265515860011708, 0.4985630924724648, 0.5945389194239369, 0.5127482602499428, 0.5446952290134108, 0.5170274660761573, 0.604716627971272, 0.5568975953995654], [0.4661595245608755, 0.5194086125823391, 0.505701374648743, 0.5456563926386964, 0.5354294475148331, 0.5299959237844706, 0.537642324565699, 0.5183593650336848, 0.5430993540890108, 0.5600372306689151], [0.46368333998248706, 0.5081403356227256, 0.5174490030171848, 0.5549497435125779, 0.519745484134029, 0.5290404923359469, 0.5365281953168951, 0.5278338007725145, 0.5088426703702527, 0.5457881705341597], [0.48551661690846737, 0.5021546219674561, 0.5436989249375261, 0.5134617577925591, 0.5688912681433087, 0.5492805931442295, 0.5569235674885828, 0.5555444643857252, 0.5409909151423365, 0.5837666505

In [13]:
print(overall_list_dev_recall)

[[0.4243446225452629, 0.48393339336688257, 0.5140114052614053, 0.5278227776148547, 0.5649097247781459, 0.5835618559248239, 0.550963636408155, 0.5746665690065634, 0.5337759403401114, 0.5518437934614405], [0.4524771161071471, 0.5238644424070472, 0.5158236239770331, 0.5343847063981456, 0.5497401605298102, 0.515568674687729, 0.5595713863332776, 0.5380631206598698, 0.5717702039883998, 0.5150489112788578], [0.48764660422449607, 0.5221886349355214, 0.5324280126068227, 0.5114313647855725, 0.5284447051759887, 0.5082916451453603, 0.5189531064597909, 0.5076181661475778, 0.5378343767046975, 0.551606100961927], [0.4504480145522925, 0.5084244182889693, 0.5661547921384679, 0.58178385326482, 0.5196332371072029, 0.5337447886884982, 0.5129407322592657, 0.49821210132707455, 0.48624745911123635, 0.5253658643558025], [0.43951560916801546, 0.5442534970345975, 0.5000201200978712, 0.4806835854204276, 0.5476818430107904, 0.5447641129705583, 0.5023893054282866, 0.5366473061994452, 0.508977773055665, 0.490892606

In [14]:
print(overall_list_dev_f1score)

[[0.3977691519004302, 0.4577973732738974, 0.4947641234213084, 0.5004161870400122, 0.5355027245249495, 0.5504958403135534, 0.5167558849740367, 0.5518426968826898, 0.5225741970714554, 0.5296376203209184], [0.4303749962160159, 0.48319353043714663, 0.49655835613576793, 0.4917089010836031, 0.5443186742509064, 0.4815885624215337, 0.5247434664320313, 0.5098211729824323, 0.5651739601074353, 0.5100354533030286], [0.45531804127356085, 0.48802813452706056, 0.49628771306726555, 0.49960859244980005, 0.5082445957926078, 0.49448848966840003, 0.4913095390797649, 0.49695694770118043, 0.519655768058736, 0.5324726782279703], [0.4309543805247744, 0.47968925111729255, 0.5139581782315266, 0.5393882486523428, 0.4928187324022737, 0.510056201814039, 0.5040132239113082, 0.48900958702771385, 0.47194953940972934, 0.5124733291523952], [0.4294155109081484, 0.5059502432497084, 0.49148813169304123, 0.4698870231485627, 0.530861442173454, 0.5279357072258851, 0.5035377232375435, 0.519009186188537, 0.4971568357883953, 0.

In [15]:
#The best model is 0