In [1]:
import pandas as pd
import numpy as np
import torch
from torch import cuda
from torch.utils.data import Dataset, DataLoader
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from sklearn.metrics import accuracy_score, classification_report, precision_score, recall_score, f1_score
from load_data import initialize_data
from reading_datasets import read_task
from labels_to_ids import labels_to_ids_kan
import time
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [2]:
def train(epoch, train_loader, model, optimizer, device, grad_step = 1, max_grad_norm = 10):
    tr_loss, tr_accuracy = 0, 0
    tr_precision, tr_recall = 0, 0
    tr_f1score = 0
    nb_tr_examples, nb_tr_steps = 0, 0
    tr_preds, tr_labels = [], []
    # put model in training mode
    model.train()
    optimizer.zero_grad()
    
    for idx, batch in enumerate(train_loader):
        ids = batch['input_ids'].to(device, dtype = torch.long)
        mask = batch['attention_mask'].to(device, dtype = torch.long)
        labels = batch['labels'].to(device, dtype = torch.long)

        if (idx + 1) % 20 == 0:
            print('FINSIHED BATCH:', idx, 'of', len(train_loader))

        #loss, tr_logits = model(input_ids=ids, attention_mask=mask, labels=labels)
        output = model(input_ids=ids, attention_mask=mask, labels=labels)
        tr_loss += output[0]

        nb_tr_steps += 1
        nb_tr_examples += labels.size(0)
           
        # compute training accuracy
        flattened_targets = labels.view(-1) # shape (batch_size * seq_len,)
        active_logits = output[1].view(-1, model.num_labels) # shape (batch_size * seq_len, num_labels)
        flattened_predictions = torch.argmax(active_logits, axis=1) # shape (batch_size * seq_len,)
        
        # only compute accuracy at active labels
        active_accuracy = labels.view(-1) != -100 # shape (batch_size, seq_len)
        #active_labels = torch.where(active_accuracy, labels.view(-1), torch.tensor(-100).type_as(labels))
        
        labels = torch.masked_select(flattened_targets, active_accuracy)
        predictions = torch.masked_select(flattened_predictions, active_accuracy)

        tr_labels.extend(labels)
        tr_preds.extend(predictions)

        tmp_tr_accuracy = accuracy_score(labels.cpu().numpy(), predictions.cpu().numpy())
        tr_accuracy += tmp_tr_accuracy
        
        # Compute Precision
        tmp_tr_precision = precision_score(labels.cpu().numpy(), predictions.cpu().numpy(), average = 'macro', zero_division=0 )
        tr_precision += tmp_tr_precision
        
        # Compute Recall
        tmp_tr_recall = recall_score(labels.cpu().numpy(), predictions.cpu().numpy(), average = 'macro', zero_division=0)
        tr_recall += tmp_tr_recall
        
        # Compute f1score
        tmp_tr_f1score = f1_score(labels.cpu().numpy(), predictions.cpu().numpy(), average= 'macro', zero_division=0)
        tr_f1score += tmp_tr_f1score
    
        # gradient clipping
        torch.nn.utils.clip_grad_norm_(
            parameters=model.parameters(), max_norm=max_grad_norm
        )
        
        # backward pass
        output['loss'].backward()
        if (idx + 1) % grad_step == 0:
            optimizer.step()
            optimizer.zero_grad()

    epoch_loss = tr_loss / nb_tr_steps
    tr_accuracy = tr_accuracy / nb_tr_steps
    tr_precision = tr_precision / nb_tr_steps
    tr_recall = tr_recall / nb_tr_steps
    tr_f1score= tr_f1score / nb_tr_steps
    #print(f"Training loss epoch: {epoch_loss}")
    #print(f"Training accuracy epoch: {tr_accuracy}")

    return model

In [3]:
def testing(model, testing_loader, labels_to_ids, device):
    # put model in evaluation mode
    model.eval()
    
    eval_loss, eval_accuracy = 0, 0
    eval_precision, eval_recall = 0, 0
    eval_f1score = 0
    nb_eval_examples, nb_eval_steps = 0, 0
    eval_preds, eval_labels = [], []
     
    
    ids_to_labels = dict((v,k) for k,v in labels_to_ids.items())

    with torch.no_grad():
        for idx, batch in enumerate(testing_loader):
            
            ids = batch['input_ids'].to(device, dtype = torch.long)
            mask = batch['attention_mask'].to(device, dtype = torch.long)
            labels = batch['labels'].to(device, dtype = torch.long)
            
            #loss, eval_logits = model(input_ids=ids, attention_mask=mask, labels=labels)
            output = model(input_ids=ids, attention_mask=mask, labels=labels)

            eval_loss += output['loss'].item()

            nb_eval_steps += 1
            nb_eval_examples += labels.size(0)
        
            if idx % 100==0:
                loss_step = eval_loss/nb_eval_steps
                print(f"Validation loss per 100 evaluation steps: {loss_step}")
              
            # compute evaluation accuracy
            flattened_targets = labels.view(-1) # shape (batch_size * seq_len,)
            active_logits = output[1].view(-1, model.num_labels) # shape (batch_size * seq_len, num_labels)
            flattened_predictions = torch.argmax(active_logits, axis=1) # shape (batch_size * seq_len,)
            
            # only compute accuracy at active labels
            active_accuracy = labels.view(-1) != -100 # shape (batch_size, seq_len)
        
            labels = torch.masked_select(flattened_targets, active_accuracy)
            predictions = torch.masked_select(flattened_predictions, active_accuracy)
            
            eval_labels.extend(labels)
            eval_preds.extend(predictions)
            
            tmp_eval_accuracy = accuracy_score(labels.cpu().numpy(), predictions.cpu().numpy())
            eval_accuracy += tmp_eval_accuracy
            
            # Compute Precision
            tmp_eval_precision = precision_score(labels.cpu().numpy(), predictions.cpu().numpy(), average = 'macro', zero_division=0)
            eval_precision += tmp_eval_precision
            
            # Compute Recall
            tmp_eval_recall = recall_score(labels.cpu().numpy(), predictions.cpu().numpy(), average = 'macro', zero_division=0)
            eval_recall += tmp_eval_recall
            
            # Compute f1score
            tmp_eval_f1score = f1_score(labels.cpu().numpy(), predictions.cpu().numpy(), average='macro', zero_division=0)
            eval_f1score += tmp_eval_f1score

    labels = [ids_to_labels[id.item()] for id in eval_labels]
    predictions = [ids_to_labels[id.item()] for id in eval_preds]
    
    eval_loss = eval_loss / nb_eval_steps
    eval_accuracy = eval_accuracy / nb_eval_steps
    eval_precision = eval_precision / nb_eval_steps
    eval_recall = eval_recall / nb_eval_steps
    eval_f1score = eval_f1score / nb_eval_steps
    #print(f"Validation Loss: {eval_loss}")
    #print(f"Validation Accuracy: {eval_accuracy}")

    return labels, predictions, eval_accuracy, eval_precision, eval_recall, eval_f1score

In [4]:
def main(n_epochs, model_name, model_save_flag, model_save_location, model_load_flag, model_load_location):
    #Initialization training parameters
    max_len = 256
    batch_size = 32
    grad_step = 1
    learning_rate = 1e-05
    initialization_input = (max_len, batch_size)

    #Reading datasets and initializing data loaders
    dataset_location = '../datasets/task_a/'

    train_data = read_task(dataset_location , split = 'kan_sentiment_train')
    dev_data = read_task(dataset_location , split = 'kan_sentiment_dev')
    #test_data = read_task(dataset_location , split = 'dev')#load test set
    labels_to_ids = labels_to_ids_kan
    #input_data = (train_data, dev_data, labels_to_ids)

    #Define tokenizer, model and optimizer
    device = 'cuda' if cuda.is_available() else 'cpu' #save the processing time
    if model_load_flag:
        tokenizer = AutoTokenizer.from_pretrained(model_load_location)
        model = AutoModelForSequenceClassification.from_pretrained(model_load_location)
    else: 
        tokenizer =  AutoTokenizer.from_pretrained(model_name, add_prefix_space=True)
        model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=len(labels_to_ids))
    optimizer = torch.optim.Adam(params=model.parameters(), lr=learning_rate)
    model.to(device)

    #Get dataloaders
    train_loader = initialize_data(tokenizer, initialization_input, train_data, labels_to_ids, shuffle = True)
    dev_loader = initialize_data(tokenizer, initialization_input, dev_data, labels_to_ids, shuffle = True)
    #test_loader = initialize_data(tokenizer, initialization_input, test_data, labels_to_ids, shuffle = True)#create test loader

    best_dev_acc = 0
    best_test_acc = 0
    best_dev_precision = 0
    best_test_precision = 0
    best_dev_recall = 0
    best_test_recall = 0
    best_dev_f1score = 0
    best_test_f1score = 0
    best_epoch = -1
    
    list_dev_acc = [] 
    list_test_acc = []  
    list_dev_precision = []  
    list_test_precision  = []  
    list_dev_recall = []  
    list_test_recall = []  
    list_dev_f1score = []  
    list_test_f1score = []
    
    for epoch in range(n_epochs):
        start = time.time()
        print(f"Training epoch: {epoch + 1}")

        #train model
        if not model_load_flag:
            model = train(epoch, train_loader, model, optimizer, device, grad_step)
        
        #testing and logging
        labels_dev, predictions_dev, dev_accuracy, dev_precision, dev_recall, dev_f1score = testing(model, dev_loader, labels_to_ids, device)
        print('DEV ACC:', dev_accuracy)
        print('DEV Precision:' , dev_precision)
        print('DEV Recall:' , dev_recall)
        print('DEV F1Score:' , dev_f1score)
        
        list_dev_acc.append(dev_accuracy)     
        list_dev_precision.append(dev_precision)   
        list_dev_recall.append(dev_recall)  
        list_dev_f1score.append(dev_f1score)  
        
        
        #labels_test, predictions_test, test_accuracy, test_precision, test_recall, test_f1score = testing(model, test_loader, labels_to_ids, device)
        #print('TEST ACC:', test_accuracy)
        #print('TEST Precision:' , test_precision)
        #print('TEST Recall:' , test_recall)
        #print('TEST F1Score:' , test_f1score)
        
        #list_test_acc.append(test_accuracy) 
        #list_test_precision.append(test_precision)  
        #list_test_recall.append(test_recall)
        #list_test_f1score.append(test_f1score) 

        #saving model
        if dev_accuracy > best_dev_acc:
            best_dev_acc = dev_accuracy
            #best_test_acc = test_accuracy
        if dev_precision > best_dev_precision:
            best_dev_precision = dev_precision
            #best_test_precision = test_precision
        if dev_recall > best_dev_recall:
            best_dev_recall = dev_recall
            #best_test_recall = test_recall
        if dev_f1score > best_dev_f1score:
            best_dev_f1score = dev_f1score
            #best_test_f1score = test_f1score
            best_epoch = epoch
            
            if model_save_flag:
                os.makedirs(model_save_location, exist_ok=True)
                tokenizer.save_pretrained(model_save_location)
                model.save_pretrained(model_save_location)

        now = time.time()
        print('BEST ACCURACY --> ', 'DEV:', round(best_dev_acc, 5))
        print('BEST PRECISION --> ', 'DEV:', round(best_dev_precision, 5))
        print('BEST RECALL --> ', 'DEV:', round(best_dev_recall, 5))
        print('BEST F1SCORE --> ', 'DEV:', round(best_dev_f1score, 5))
        print('TIME PER EPOCH:', (now-start)/60 )
        print()

    return best_dev_acc, best_test_acc, best_epoch, best_dev_precision, best_test_precision, best_dev_recall, best_test_recall, best_dev_f1score, best_test_f1score, list_dev_acc, list_test_acc, list_dev_precision, list_test_precision, list_dev_recall, list_test_recall, list_dev_f1score, list_test_f1score

In [5]:
if __name__ == '__main__':
    n_epochs = 10
    models = ['ai4bharat/indic-bert']
    
    #model saving parameters
    model_save_flag = True
    model_load_flag = False
    
    overall_list_dev_acc = [] 
    overall_list_test_acc = []    
    overall_list_dev_precision = []  
    overall_list_test_precision  = []  
    overall_list_dev_recall = []  
    overall_list_test_recall = []  
    overall_list_dev_f1score = []  
    overall_list_test_f1score = [] 
    
    for i in range(5):
        
        for model_name in models:

            model_save_location = 'saved_models/' + model_name + 'Kannada' + str(i)
            model_load_location = None

            best_dev_acc, best_test_acc, best_epoch, best_dev_precision, best_test_precision, best_dev_recall, best_test_recall, best_dev_f1score, best_test_f1score, list_dev_acc, list_test_acc, list_dev_precision, list_test_precision, list_dev_recall, list_test_recall, list_dev_f1score, list_test_f1score = main(n_epochs, model_name, model_save_flag, model_save_location, model_load_flag, model_load_location)
            
            overall_list_dev_acc.append(list_dev_acc) 
            overall_list_test_acc.append(list_test_acc) 
            overall_list_dev_precision.append(list_dev_precision)  
            overall_list_test_precision.append(list_test_precision) 
            overall_list_dev_recall.append(list_dev_recall)  
            overall_list_test_recall.append(list_test_recall)  
            overall_list_dev_f1score.append(list_dev_f1score)  
            overall_list_test_f1score.append(list_test_f1score) 

Some weights of the model checkpoint at ai4bharat/indic-bert were not used when initializing AlbertForSequenceClassification: ['predictions.bias', 'predictions.LayerNorm.bias', 'sop_classifier.classifier.weight', 'sop_classifier.classifier.bias', 'predictions.LayerNorm.weight', 'predictions.dense.bias', 'predictions.dense.weight', 'predictions.decoder.bias', 'predictions.decoder.weight']
- This IS expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ai4bharat/indi

Training epoch: 1
FINSIHED BATCH: 19 of 195
FINSIHED BATCH: 39 of 195
FINSIHED BATCH: 59 of 195
FINSIHED BATCH: 79 of 195
FINSIHED BATCH: 99 of 195
FINSIHED BATCH: 119 of 195
FINSIHED BATCH: 139 of 195
FINSIHED BATCH: 159 of 195
FINSIHED BATCH: 179 of 195
Validation loss per 100 evaluation steps: 1.2274982929229736
DEV ACC: 0.4998504784688995
DEV Precision: 0.23439889319060622
DEV Recall: 0.24716862682771776
DEV F1Score: 0.19631536705767452
BEST ACCURACY -->  DEV: 0.49985
BEST PRECISION -->  DEV: 0.2344
BEST RECALL -->  DEV: 0.24717
BEST F1SCORE -->  DEV: 0.19632
TIME PER EPOCH: 4.098899261156718

Training epoch: 2
FINSIHED BATCH: 19 of 195
FINSIHED BATCH: 39 of 195
FINSIHED BATCH: 59 of 195
FINSIHED BATCH: 79 of 195
FINSIHED BATCH: 99 of 195
FINSIHED BATCH: 119 of 195
FINSIHED BATCH: 139 of 195
FINSIHED BATCH: 159 of 195
FINSIHED BATCH: 179 of 195
Validation loss per 100 evaluation steps: 1.2137407064437866
DEV ACC: 0.5594348086124402
DEV Precision: 0.34003455933186166
DEV Recall: 0.3

Some weights of the model checkpoint at ai4bharat/indic-bert were not used when initializing AlbertForSequenceClassification: ['predictions.bias', 'predictions.LayerNorm.bias', 'sop_classifier.classifier.weight', 'sop_classifier.classifier.bias', 'predictions.LayerNorm.weight', 'predictions.dense.bias', 'predictions.dense.weight', 'predictions.decoder.bias', 'predictions.decoder.weight']
- This IS expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ai4bharat/indi

Training epoch: 1
FINSIHED BATCH: 19 of 195
FINSIHED BATCH: 39 of 195
FINSIHED BATCH: 59 of 195
FINSIHED BATCH: 79 of 195
FINSIHED BATCH: 99 of 195
FINSIHED BATCH: 119 of 195
FINSIHED BATCH: 139 of 195
FINSIHED BATCH: 159 of 195
FINSIHED BATCH: 179 of 195
Validation loss per 100 evaluation steps: 1.1695107221603394
DEV ACC: 0.4936453349282297
DEV Precision: 0.19882823684061168
DEV Recall: 0.23457232310173487
DEV F1Score: 0.18319648786481976
BEST ACCURACY -->  DEV: 0.49365
BEST PRECISION -->  DEV: 0.19883
BEST RECALL -->  DEV: 0.23457
BEST F1SCORE -->  DEV: 0.1832
TIME PER EPOCH: 3.9880432923634848

Training epoch: 2
FINSIHED BATCH: 19 of 195
FINSIHED BATCH: 39 of 195
FINSIHED BATCH: 59 of 195
FINSIHED BATCH: 79 of 195
FINSIHED BATCH: 99 of 195
FINSIHED BATCH: 119 of 195
FINSIHED BATCH: 139 of 195
FINSIHED BATCH: 159 of 195
FINSIHED BATCH: 179 of 195
Validation loss per 100 evaluation steps: 1.0813294649124146
DEV ACC: 0.5054575358851675
DEV Precision: 0.24776670200303832
DEV Recall: 0.

Some weights of the model checkpoint at ai4bharat/indic-bert were not used when initializing AlbertForSequenceClassification: ['predictions.bias', 'predictions.LayerNorm.bias', 'sop_classifier.classifier.weight', 'sop_classifier.classifier.bias', 'predictions.LayerNorm.weight', 'predictions.dense.bias', 'predictions.dense.weight', 'predictions.decoder.bias', 'predictions.decoder.weight']
- This IS expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ai4bharat/indi

Training epoch: 1
FINSIHED BATCH: 19 of 195
FINSIHED BATCH: 39 of 195
FINSIHED BATCH: 59 of 195
FINSIHED BATCH: 79 of 195
FINSIHED BATCH: 99 of 195
FINSIHED BATCH: 119 of 195
FINSIHED BATCH: 139 of 195
FINSIHED BATCH: 159 of 195
FINSIHED BATCH: 179 of 195
FINSIHED BATCH: 99 of 195
FINSIHED BATCH: 119 of 195
FINSIHED BATCH: 139 of 195
FINSIHED BATCH: 159 of 195
FINSIHED BATCH: 179 of 195
Validation loss per 100 evaluation steps: 1.2669556140899658
DEV ACC: 0.5192135167464115
DEV Precision: 0.29084843313348624
DEV Recall: 0.31310696248196246
DEV F1Score: 0.27565921172626384
BEST ACCURACY -->  DEV: 0.51921
BEST PRECISION -->  DEV: 0.29085
BEST RECALL -->  DEV: 0.31311
BEST F1SCORE -->  DEV: 0.27566
TIME PER EPOCH: 4.289653027057648

Training epoch: 3
FINSIHED BATCH: 19 of 195
FINSIHED BATCH: 39 of 195
FINSIHED BATCH: 59 of 195
FINSIHED BATCH: 79 of 195
FINSIHED BATCH: 99 of 195
FINSIHED BATCH: 179 of 195
Validation loss per 100 evaluation steps: 1.2063182592391968
DEV ACC: 0.5660137559808

Some weights of the model checkpoint at ai4bharat/indic-bert were not used when initializing AlbertForSequenceClassification: ['predictions.bias', 'predictions.LayerNorm.bias', 'sop_classifier.classifier.weight', 'sop_classifier.classifier.bias', 'predictions.LayerNorm.weight', 'predictions.dense.bias', 'predictions.dense.weight', 'predictions.decoder.bias', 'predictions.decoder.weight']
- This IS expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ai4bharat/indi

Training epoch: 1
FINSIHED BATCH: 19 of 195
FINSIHED BATCH: 39 of 195
FINSIHED BATCH: 59 of 195
FINSIHED BATCH: 79 of 195
FINSIHED BATCH: 99 of 195
FINSIHED BATCH: 119 of 195
FINSIHED BATCH: 139 of 195
FINSIHED BATCH: 159 of 195
FINSIHED BATCH: 179 of 195
Validation loss per 100 evaluation steps: 1.1722699403762817
DEV ACC: 0.4666566985645933
DEV Precision: 0.09699087918660289
DEV Recall: 0.2068181818181819
DEV F1Score: 0.1312196294941196
BEST ACCURACY -->  DEV: 0.46666
BEST PRECISION -->  DEV: 0.09699
BEST RECALL -->  DEV: 0.20682
BEST F1SCORE -->  DEV: 0.13122
TIME PER EPOCH: 3.9693413972854614

Training epoch: 2
FINSIHED BATCH: 19 of 195
FINSIHED BATCH: 39 of 195
FINSIHED BATCH: 59 of 195
FINSIHED BATCH: 79 of 195
FINSIHED BATCH: 99 of 195
FINSIHED BATCH: 119 of 195
FINSIHED BATCH: 139 of 195
FINSIHED BATCH: 159 of 195
FINSIHED BATCH: 179 of 195
Validation loss per 100 evaluation steps: 0.8816156983375549
DEV ACC: 0.5513606459330144
DEV Precision: 0.3362211011485277
DEV Recall: 0.33

Some weights of the model checkpoint at ai4bharat/indic-bert were not used when initializing AlbertForSequenceClassification: ['predictions.bias', 'predictions.LayerNorm.bias', 'sop_classifier.classifier.weight', 'sop_classifier.classifier.bias', 'predictions.LayerNorm.weight', 'predictions.dense.bias', 'predictions.dense.weight', 'predictions.decoder.bias', 'predictions.decoder.weight']
- This IS expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ai4bharat/indi

Training epoch: 1
FINSIHED BATCH: 19 of 195
FINSIHED BATCH: 39 of 195
FINSIHED BATCH: 59 of 195
FINSIHED BATCH: 79 of 195
FINSIHED BATCH: 99 of 195
FINSIHED BATCH: 119 of 195
FINSIHED BATCH: 139 of 195
FINSIHED BATCH: 159 of 195
FINSIHED BATCH: 179 of 195
Validation loss per 100 evaluation steps: 1.2984602451324463
DEV ACC: 0.49028110047846885
DEV Precision: 0.2070083928760236
DEV Recall: 0.24798424058116567
DEV F1Score: 0.2044728975312479
BEST ACCURACY -->  DEV: 0.49028
BEST PRECISION -->  DEV: 0.20701
BEST RECALL -->  DEV: 0.24798
BEST F1SCORE -->  DEV: 0.20447
TIME PER EPOCH: 3.980621902147929

Training epoch: 2
FINSIHED BATCH: 19 of 195
FINSIHED BATCH: 39 of 195
FINSIHED BATCH: 59 of 195
FINSIHED BATCH: 79 of 195
FINSIHED BATCH: 99 of 195
FINSIHED BATCH: 119 of 195
FINSIHED BATCH: 139 of 195
FINSIHED BATCH: 159 of 195
FINSIHED BATCH: 179 of 195
Validation loss per 100 evaluation steps: 1.0463811159133911
DEV ACC: 0.5130831339712919
DEV Precision: 0.3023472178595927
DEV Recall: 0.26

In [6]:
print(best_dev_acc)

0.6248504784688995


In [7]:
print(best_epoch)

9


In [8]:
print(best_dev_precision)

0.5180475567362949


In [9]:
print(best_dev_recall)

0.5224907386137332


In [10]:
print(best_dev_f1score)

0.5054764452341657


In [11]:
print(overall_list_dev_acc)

[[0.4998504784688995, 0.5594348086124402, 0.5879186602870813, 0.5926285885167464, 0.5945723684210527, 0.5907595693779905, 0.6157296650717703, 0.5977870813397129, 0.611543062200957, 0.5944976076555024], [0.4936453349282297, 0.5054575358851675, 0.5996561004784688, 0.6106459330143541, 0.5740131578947368, 0.6200657894736842, 0.5907595693779905, 0.6057864832535885, 0.61251495215311, 0.5988337320574163], [0.4632177033492823, 0.5192135167464115, 0.5660137559808612, 0.5513606459330144, 0.6058612440191388, 0.6214862440191388, 0.5992075358851675, 0.6097488038277512, 0.5542015550239234, 0.5873953349282297], [0.4666566985645933, 0.5513606459330144, 0.5632476076555024, 0.5912081339712919, 0.5983851674641149, 0.5798444976076556, 0.6007027511961722, 0.605936004784689, 0.5893391148325359, 0.5988337320574163], [0.49028110047846885, 0.5130831339712919, 0.5964413875598086, 0.5882924641148325, 0.6167763157894737, 0.609674043062201, 0.6077302631578948, 0.6158791866028708, 0.6177482057416268, 0.624850478468

In [12]:
print(overall_list_dev_precision)

[[0.23439889319060622, 0.34003455933186166, 0.35327087522978323, 0.3421568744215099, 0.3500027988450165, 0.364599539138661, 0.4113871359821965, 0.4698378950942079, 0.4768156091414603, 0.4837257680868714], [0.19882823684061168, 0.24776670200303832, 0.46658781141608563, 0.5079344374146874, 0.43507166950383946, 0.5028820864201557, 0.4666637086195381, 0.4614283410550191, 0.4961262758413054, 0.4955844616261869], [0.13967371347172916, 0.29084843313348624, 0.3293216155930687, 0.3719417198962654, 0.4583092089695175, 0.4925954065490006, 0.4583740392883616, 0.47501614441629914, 0.44480614809245544, 0.5037514016770983], [0.09699087918660289, 0.3362211011485277, 0.3322860803147403, 0.35706726307591774, 0.3674242152834186, 0.3621008770641124, 0.3905099960801282, 0.4134682278740117, 0.45162383011334606, 0.5071948234240664], [0.2070083928760236, 0.3023472178595927, 0.367873818664565, 0.37363721593632937, 0.3661736731715715, 0.4273280483159459, 0.48059716761254195, 0.4803431453483346, 0.46456003511717

In [13]:
print(overall_list_dev_recall)

[[0.24716862682771776, 0.3867779712039843, 0.40626214624103735, 0.4186565443422326, 0.44390689602482447, 0.4532164982967121, 0.4852720697989661, 0.4594960320428235, 0.4869417358488215, 0.4572742221951515], [0.23457232310173487, 0.3066946999482885, 0.417944804632463, 0.46832201002550006, 0.4740961254205343, 0.48981905779119406, 0.46207745030597197, 0.460108331250324, 0.47440806281469095, 0.49527748617989253], [0.21369018236665302, 0.31310696248196246, 0.3614663134087013, 0.4406247867590395, 0.46173455186102946, 0.47072229408292254, 0.4468708356408891, 0.4923731157805355, 0.4574692771343489, 0.4784509124580895], [0.2068181818181819, 0.3323252751778821, 0.38720574800586416, 0.4113966498562263, 0.45245765414496064, 0.45455299817141925, 0.42648128406290514, 0.4502282215626572, 0.43038743086431663, 0.5023398351265645], [0.24798424058116567, 0.2680881744518108, 0.3937945317652607, 0.4470691765430368, 0.42843408788103404, 0.4884171067853871, 0.5203834331088588, 0.4994575591553038, 0.4716416230

In [14]:
print(overall_list_dev_f1score)

[[0.19631536705767452, 0.3484295737843243, 0.36126019752643984, 0.36380151964818686, 0.37654677141390974, 0.38861149546444923, 0.4258281125933916, 0.43852302954575334, 0.45777429360452887, 0.4325886098806278], [0.18319648786481976, 0.256959799050397, 0.40999286929306017, 0.44404927961466006, 0.4333233028617174, 0.45905019265093705, 0.44509794390388263, 0.4376095245787948, 0.46686392734450705, 0.4760759680654953], [0.1433128588771253, 0.27565921172626384, 0.32738421808857987, 0.3836959340795732, 0.4369508444929462, 0.4634433954368217, 0.4286470417320584, 0.46539102573584396, 0.42901873227391385, 0.45807354530738015], [0.1312196294941196, 0.3052444393481167, 0.348445240467718, 0.36519614604567513, 0.387424026735515, 0.383041575885718, 0.38875050974192543, 0.4146925300355415, 0.4111610195426639, 0.48430186063110486], [0.2044728975312479, 0.23480919641376252, 0.35250784852567096, 0.38645157370929994, 0.37422912808556197, 0.4344532017267571, 0.4743774531419394, 0.46316595847704584, 0.444870

In [15]:
#The best model is 3