In [1]:
import pandas as pd
import numpy as np
import torch
from torch import cuda
from torch.utils.data import Dataset, DataLoader
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from sklearn.metrics import accuracy_score, classification_report, precision_score, recall_score, f1_score
from load_data import initialize_data
from reading_datasets import read_task
from labels_to_ids import labels_to_ids_mal
import time
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [2]:
def train(epoch, train_loader, model, optimizer, device, grad_step = 1, max_grad_norm = 10):
    tr_loss, tr_accuracy = 0, 0
    tr_precision, tr_recall = 0, 0
    tr_f1score = 0
    nb_tr_examples, nb_tr_steps = 0, 0
    tr_preds, tr_labels = [], []
    # put model in training mode
    model.train()
    optimizer.zero_grad()
    
    for idx, batch in enumerate(train_loader):
        ids = batch['input_ids'].to(device, dtype = torch.long)
        mask = batch['attention_mask'].to(device, dtype = torch.long)
        labels = batch['labels'].to(device, dtype = torch.long)

        if (idx + 1) % 20 == 0:
            print('FINSIHED BATCH:', idx, 'of', len(train_loader))

        #loss, tr_logits = model(input_ids=ids, attention_mask=mask, labels=labels)
        output = model(input_ids=ids, attention_mask=mask, labels=labels)
        tr_loss += output[0]

        nb_tr_steps += 1
        nb_tr_examples += labels.size(0)
           
        # compute training accuracy
        flattened_targets = labels.view(-1) # shape (batch_size * seq_len,)
        active_logits = output[1].view(-1, model.num_labels) # shape (batch_size * seq_len, num_labels)
        flattened_predictions = torch.argmax(active_logits, axis=1) # shape (batch_size * seq_len,)
        
        # only compute accuracy at active labels
        active_accuracy = labels.view(-1) != -100 # shape (batch_size, seq_len)
        #active_labels = torch.where(active_accuracy, labels.view(-1), torch.tensor(-100).type_as(labels))
        
        labels = torch.masked_select(flattened_targets, active_accuracy)
        predictions = torch.masked_select(flattened_predictions, active_accuracy)

        tr_labels.extend(labels)
        tr_preds.extend(predictions)

        tmp_tr_accuracy = accuracy_score(labels.cpu().numpy(), predictions.cpu().numpy())
        tr_accuracy += tmp_tr_accuracy
        
        # Compute Precision
        tmp_tr_precision = precision_score(labels.cpu().numpy(), predictions.cpu().numpy(), average = 'macro', zero_division=0 )
        tr_precision += tmp_tr_precision
        
        # Compute Recall
        tmp_tr_recall = recall_score(labels.cpu().numpy(), predictions.cpu().numpy(), average = 'macro', zero_division=0)
        tr_recall += tmp_tr_recall
        
        # Compute f1score
        tmp_tr_f1score = f1_score(labels.cpu().numpy(), predictions.cpu().numpy(), average= 'macro', zero_division=0)
        tr_f1score += tmp_tr_f1score
    
        # gradient clipping
        torch.nn.utils.clip_grad_norm_(
            parameters=model.parameters(), max_norm=max_grad_norm
        )
        
        # backward pass
        output['loss'].backward()
        if (idx + 1) % grad_step == 0:
            optimizer.step()
            optimizer.zero_grad()

    epoch_loss = tr_loss / nb_tr_steps
    tr_accuracy = tr_accuracy / nb_tr_steps
    tr_precision = tr_precision / nb_tr_steps
    tr_recall = tr_recall / nb_tr_steps
    tr_f1score= tr_f1score / nb_tr_steps
    #print(f"Training loss epoch: {epoch_loss}")
    #print(f"Training accuracy epoch: {tr_accuracy}")

    return model

In [3]:
def testing(model, testing_loader, labels_to_ids, device):
    # put model in evaluation mode
    model.eval()
    
    eval_loss, eval_accuracy = 0, 0
    eval_precision, eval_recall = 0, 0
    eval_f1score = 0
    nb_eval_examples, nb_eval_steps = 0, 0
    eval_preds, eval_labels = [], []
     
    
    ids_to_labels = dict((v,k) for k,v in labels_to_ids.items())

    with torch.no_grad():
        for idx, batch in enumerate(testing_loader):
            
            ids = batch['input_ids'].to(device, dtype = torch.long)
            mask = batch['attention_mask'].to(device, dtype = torch.long)
            labels = batch['labels'].to(device, dtype = torch.long)
            
            #loss, eval_logits = model(input_ids=ids, attention_mask=mask, labels=labels)
            output = model(input_ids=ids, attention_mask=mask, labels=labels)

            eval_loss += output['loss'].item()

            nb_eval_steps += 1
            nb_eval_examples += labels.size(0)
        
            if idx % 100==0:
                loss_step = eval_loss/nb_eval_steps
                print(f"Validation loss per 100 evaluation steps: {loss_step}")
              
            # compute evaluation accuracy
            flattened_targets = labels.view(-1) # shape (batch_size * seq_len,)
            active_logits = output[1].view(-1, model.num_labels) # shape (batch_size * seq_len, num_labels)
            flattened_predictions = torch.argmax(active_logits, axis=1) # shape (batch_size * seq_len,)
            
            # only compute accuracy at active labels
            active_accuracy = labels.view(-1) != -100 # shape (batch_size, seq_len)
        
            labels = torch.masked_select(flattened_targets, active_accuracy)
            predictions = torch.masked_select(flattened_predictions, active_accuracy)
            
            eval_labels.extend(labels)
            eval_preds.extend(predictions)
            
            tmp_eval_accuracy = accuracy_score(labels.cpu().numpy(), predictions.cpu().numpy())
            eval_accuracy += tmp_eval_accuracy
            
            # Compute Precision
            tmp_eval_precision = precision_score(labels.cpu().numpy(), predictions.cpu().numpy(), average = 'macro', zero_division=0)
            eval_precision += tmp_eval_precision
            
            # Compute Recall
            tmp_eval_recall = recall_score(labels.cpu().numpy(), predictions.cpu().numpy(), average = 'macro', zero_division=0)
            eval_recall += tmp_eval_recall
            
            # Compute f1score
            tmp_eval_f1score = f1_score(labels.cpu().numpy(), predictions.cpu().numpy(), average='macro', zero_division=0)
            eval_f1score += tmp_eval_f1score

    labels = [ids_to_labels[id.item()] for id in eval_labels]
    predictions = [ids_to_labels[id.item()] for id in eval_preds]
    
    eval_loss = eval_loss / nb_eval_steps
    eval_accuracy = eval_accuracy / nb_eval_steps
    eval_precision = eval_precision / nb_eval_steps
    eval_recall = eval_recall / nb_eval_steps
    eval_f1score = eval_f1score / nb_eval_steps
    #print(f"Validation Loss: {eval_loss}")
    #print(f"Validation Accuracy: {eval_accuracy}")

    return labels, predictions, eval_accuracy, eval_precision, eval_recall, eval_f1score

In [4]:
def main(n_epochs, model_name, model_save_flag, model_save_location, model_load_flag, model_load_location):
    #Initialization training parameters
    max_len = 256
    batch_size = 32
    grad_step = 1
    learning_rate = 1e-05
    initialization_input = (max_len, batch_size)

    #Reading datasets and initializing data loaders
    dataset_location = '../datasets/task_a/'

    train_data = read_task(dataset_location , split = 'mal_sentiment_train')
    dev_data = read_task(dataset_location , split = 'mal_sentiment_dev')
    #test_data = read_task(dataset_location , split = 'dev')#load test set
    labels_to_ids = labels_to_ids_mal
    #input_data = (train_data, dev_data, labels_to_ids)

    #Define tokenizer, model and optimizer
    device = 'cuda' if cuda.is_available() else 'cpu' #save the processing time
    if model_load_flag:
        tokenizer = AutoTokenizer.from_pretrained(model_load_location)
        model = AutoModelForSequenceClassification.from_pretrained(model_load_location)
    else: 
        tokenizer =  AutoTokenizer.from_pretrained(model_name, add_prefix_space=True)
        model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=len(labels_to_ids))
    optimizer = torch.optim.Adam(params=model.parameters(), lr=learning_rate)
    model.to(device)

    #Get dataloaders
    train_loader = initialize_data(tokenizer, initialization_input, train_data, labels_to_ids, shuffle = True)
    dev_loader = initialize_data(tokenizer, initialization_input, dev_data, labels_to_ids, shuffle = True)
    #test_loader = initialize_data(tokenizer, initialization_input, test_data, labels_to_ids, shuffle = True)#create test loader

    best_dev_acc = 0
    best_test_acc = 0
    best_dev_precision = 0
    best_test_precision = 0
    best_dev_recall = 0
    best_test_recall = 0
    best_dev_f1score = 0
    best_test_f1score = 0
    best_epoch = -1
    
    list_dev_acc = [] 
    list_test_acc = []  
    list_dev_precision = []  
    list_test_precision  = []  
    list_dev_recall = []  
    list_test_recall = []  
    list_dev_f1score = []  
    list_test_f1score = []
    
    for epoch in range(n_epochs):
        start = time.time()
        print(f"Training epoch: {epoch + 1}")

        #train model
        if not model_load_flag:
            model = train(epoch, train_loader, model, optimizer, device, grad_step)
        
        #testing and logging
        labels_dev, predictions_dev, dev_accuracy, dev_precision, dev_recall, dev_f1score = testing(model, dev_loader, labels_to_ids, device)
        print('DEV ACC:', dev_accuracy)
        print('DEV Precision:' , dev_precision)
        print('DEV Recall:' , dev_recall)
        print('DEV F1Score:' , dev_f1score)
        
        list_dev_acc.append(dev_accuracy)     
        list_dev_precision.append(dev_precision)   
        list_dev_recall.append(dev_recall)  
        list_dev_f1score.append(dev_f1score)  
        
        
        #labels_test, predictions_test, test_accuracy, test_precision, test_recall, test_f1score = testing(model, test_loader, labels_to_ids, device)
        #print('TEST ACC:', test_accuracy)
        #print('TEST Precision:' , test_precision)
        #print('TEST Recall:' , test_recall)
        #print('TEST F1Score:' , test_f1score)
        
        #list_test_acc.append(test_accuracy) 
        #list_test_precision.append(test_precision)  
        #list_test_recall.append(test_recall)
        #list_test_f1score.append(test_f1score) 

        #saving model
        if dev_accuracy > best_dev_acc:
            best_dev_acc = dev_accuracy
            #best_test_acc = test_accuracy
        if dev_precision > best_dev_precision:
            best_dev_precision = dev_precision
            #best_test_precision = test_precision
        if dev_recall > best_dev_recall:
            best_dev_recall = dev_recall
            #best_test_recall = test_recall
        if dev_f1score > best_dev_f1score:
            best_dev_f1score = dev_f1score
            #best_test_f1score = test_f1score
            best_epoch = epoch
            
            if model_save_flag:
                os.makedirs(model_save_location, exist_ok=True)
                tokenizer.save_pretrained(model_save_location)
                model.save_pretrained(model_save_location)

        now = time.time()
        print('BEST ACCURACY --> ', 'DEV:', round(best_dev_acc, 5))
        print('BEST PRECISION --> ', 'DEV:', round(best_dev_precision, 5))
        print('BEST RECALL --> ', 'DEV:', round(best_dev_recall, 5))
        print('BEST F1SCORE --> ', 'DEV:', round(best_dev_f1score, 5))
        print('TIME PER EPOCH:', (now-start)/60 )
        print()

    return best_dev_acc, best_test_acc, best_epoch, best_dev_precision, best_test_precision, best_dev_recall, best_test_recall, best_dev_f1score, best_test_f1score, list_dev_acc, list_test_acc, list_dev_precision, list_test_precision, list_dev_recall, list_test_recall, list_dev_f1score, list_test_f1score

In [5]:
if __name__ == '__main__':
    n_epochs = 10
    models = ['ai4bharat/indic-bert']
    
    #model saving parameters
    model_save_flag = True
    model_load_flag = False
    
    overall_list_dev_acc = [] 
    overall_list_test_acc = []    
    overall_list_dev_precision = []  
    overall_list_test_precision  = []  
    overall_list_dev_recall = []  
    overall_list_test_recall = []  
    overall_list_dev_f1score = []  
    overall_list_test_f1score = [] 
    
    for i in range(5):
        
        for model_name in models:

            model_save_location = 'saved_models/' + model_name + 'Malayalam' + str(i)
            model_load_location = None

            best_dev_acc, best_test_acc, best_epoch, best_dev_precision, best_test_precision, best_dev_recall, best_test_recall, best_dev_f1score, best_test_f1score, list_dev_acc, list_test_acc, list_dev_precision, list_test_precision, list_dev_recall, list_test_recall, list_dev_f1score, list_test_f1score = main(n_epochs, model_name, model_save_flag, model_save_location, model_load_flag, model_load_location)
            
            overall_list_dev_acc.append(list_dev_acc) 
            overall_list_test_acc.append(list_test_acc) 
            overall_list_dev_precision.append(list_dev_precision)  
            overall_list_test_precision.append(list_test_precision) 
            overall_list_dev_recall.append(list_dev_recall)  
            overall_list_test_recall.append(list_test_recall)  
            overall_list_dev_f1score.append(list_dev_f1score)  
            overall_list_test_f1score.append(list_test_f1score) 

Downloading:   0%|          | 0.00/129M [00:00<?, ?B/s]

Some weights of the model checkpoint at ai4bharat/indic-bert were not used when initializing AlbertForSequenceClassification: ['predictions.decoder.weight', 'predictions.dense.weight', 'sop_classifier.classifier.weight', 'predictions.bias', 'predictions.decoder.bias', 'sop_classifier.classifier.bias', 'predictions.LayerNorm.bias', 'predictions.LayerNorm.weight', 'predictions.dense.bias']
- This IS expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ai4bharat/indi

Training epoch: 1
FINSIHED BATCH: 19 of 497
FINSIHED BATCH: 39 of 497
FINSIHED BATCH: 59 of 497
FINSIHED BATCH: 79 of 497
FINSIHED BATCH: 99 of 497
FINSIHED BATCH: 119 of 497
FINSIHED BATCH: 139 of 497
FINSIHED BATCH: 159 of 497
FINSIHED BATCH: 179 of 497
FINSIHED BATCH: 199 of 497
FINSIHED BATCH: 219 of 497
FINSIHED BATCH: 239 of 497
FINSIHED BATCH: 259 of 497
FINSIHED BATCH: 279 of 497
FINSIHED BATCH: 299 of 497
FINSIHED BATCH: 319 of 497
FINSIHED BATCH: 339 of 497
FINSIHED BATCH: 359 of 497
FINSIHED BATCH: 379 of 497
FINSIHED BATCH: 399 of 497
FINSIHED BATCH: 419 of 497
FINSIHED BATCH: 439 of 497
FINSIHED BATCH: 459 of 497
FINSIHED BATCH: 479 of 497
Validation loss per 100 evaluation steps: 1.0326826572418213
DEV ACC: 0.5171130952380952
DEV Precision: 0.4001391188242072
DEV Recall: 0.3857184501003607
DEV F1Score: 0.34826655495953157
BEST ACCURACY -->  DEV: 0.51711
BEST PRECISION -->  DEV: 0.40014
BEST RECALL -->  DEV: 0.38572
BEST F1SCORE -->  DEV: 0.34827
TIME PER EPOCH: 11.2569555

BEST ACCURACY -->  DEV: 0.66388
BEST PRECISION -->  DEV: 0.56829
BEST RECALL -->  DEV: 0.5645
BEST F1SCORE -->  DEV: 0.54513
TIME PER EPOCH: 11.46560564438502

Training epoch: 10
FINSIHED BATCH: 99 of 497
FINSIHED BATCH: 119 of 497
FINSIHED BATCH: 139 of 497
FINSIHED BATCH: 159 of 497
FINSIHED BATCH: 179 of 497
FINSIHED BATCH: 199 of 497
FINSIHED BATCH: 219 of 497
FINSIHED BATCH: 239 of 497
FINSIHED BATCH: 259 of 497
FINSIHED BATCH: 279 of 497
FINSIHED BATCH: 299 of 497
FINSIHED BATCH: 319 of 497
FINSIHED BATCH: 339 of 497
FINSIHED BATCH: 359 of 497
FINSIHED BATCH: 379 of 497
FINSIHED BATCH: 399 of 497
FINSIHED BATCH: 419 of 497
FINSIHED BATCH: 439 of 497
FINSIHED BATCH: 459 of 497
FINSIHED BATCH: 479 of 497
Validation loss per 100 evaluation steps: 1.7142410278320312
DEV ACC: 0.6205357142857143
DEV Precision: 0.558308130361702
DEV Recall: 0.5477764695621838
DEV F1Score: 0.5334457453277853
BEST ACCURACY -->  DEV: 0.66388
BEST PRECISION -->  DEV: 0.56829
BEST RECALL -->  DEV: 0.5645
BES

Some weights of the model checkpoint at ai4bharat/indic-bert were not used when initializing AlbertForSequenceClassification: ['predictions.decoder.weight', 'predictions.dense.weight', 'sop_classifier.classifier.weight', 'predictions.bias', 'predictions.decoder.bias', 'sop_classifier.classifier.bias', 'predictions.LayerNorm.bias', 'predictions.LayerNorm.weight', 'predictions.dense.bias']
- This IS expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ai4bharat/indi

Training epoch: 1
FINSIHED BATCH: 19 of 497
FINSIHED BATCH: 39 of 497
FINSIHED BATCH: 59 of 497
FINSIHED BATCH: 79 of 497
FINSIHED BATCH: 179 of 497
FINSIHED BATCH: 199 of 497
FINSIHED BATCH: 219 of 497
FINSIHED BATCH: 239 of 497
FINSIHED BATCH: 259 of 497
FINSIHED BATCH: 279 of 497
FINSIHED BATCH: 299 of 497
FINSIHED BATCH: 319 of 497
FINSIHED BATCH: 339 of 497
FINSIHED BATCH: 359 of 497
FINSIHED BATCH: 379 of 497
FINSIHED BATCH: 399 of 497
FINSIHED BATCH: 419 of 497
FINSIHED BATCH: 439 of 497
FINSIHED BATCH: 459 of 497
FINSIHED BATCH: 479 of 497
Validation loss per 100 evaluation steps: 1.3893400430679321
DEV ACC: 0.519345238095238
DEV Precision: 0.3424273289689784
DEV Recall: 0.35598790506878747
DEV F1Score: 0.33101501088285057
BEST ACCURACY -->  DEV: 0.51935
BEST PRECISION -->  DEV: 0.34243
BEST RECALL -->  DEV: 0.35599
BEST F1SCORE -->  DEV: 0.33102
TIME PER EPOCH: 11.450234150886535

Training epoch: 2
FINSIHED BATCH: 19 of 497
FINSIHED BATCH: 39 of 497
FINSIHED BATCH: 59 of 497
F

FINSIHED BATCH: 319 of 497
FINSIHED BATCH: 339 of 497
FINSIHED BATCH: 359 of 497
FINSIHED BATCH: 379 of 497
FINSIHED BATCH: 399 of 497
FINSIHED BATCH: 419 of 497
FINSIHED BATCH: 439 of 497
FINSIHED BATCH: 459 of 497
FINSIHED BATCH: 479 of 497
Validation loss per 100 evaluation steps: 1.7148226499557495
DEV ACC: 0.6203497023809524
DEV Precision: 0.5385427083858021
DEV Recall: 0.5531908944951432
DEV F1Score: 0.5254050789454988
BEST ACCURACY -->  DEV: 0.65681
BEST PRECISION -->  DEV: 0.57834
BEST RECALL -->  DEV: 0.55319
BEST F1SCORE -->  DEV: 0.53522
TIME PER EPOCH: 11.466040762265523

Training epoch: 10
FINSIHED BATCH: 19 of 497
FINSIHED BATCH: 39 of 497
FINSIHED BATCH: 59 of 497
FINSIHED BATCH: 79 of 497
FINSIHED BATCH: 99 of 497
FINSIHED BATCH: 119 of 497
FINSIHED BATCH: 139 of 497
FINSIHED BATCH: 159 of 497
FINSIHED BATCH: 179 of 497
FINSIHED BATCH: 199 of 497
FINSIHED BATCH: 219 of 497
FINSIHED BATCH: 239 of 497
FINSIHED BATCH: 259 of 497
FINSIHED BATCH: 279 of 497
FINSIHED BATCH: 2

Some weights of the model checkpoint at ai4bharat/indic-bert were not used when initializing AlbertForSequenceClassification: ['predictions.decoder.weight', 'predictions.dense.weight', 'sop_classifier.classifier.weight', 'predictions.bias', 'predictions.decoder.bias', 'sop_classifier.classifier.bias', 'predictions.LayerNorm.bias', 'predictions.LayerNorm.weight', 'predictions.dense.bias']
- This IS expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ai4bharat/indi

Training epoch: 1
FINSIHED BATCH: 19 of 497
FINSIHED BATCH: 39 of 497
FINSIHED BATCH: 59 of 497
FINSIHED BATCH: 79 of 497
FINSIHED BATCH: 99 of 497
FINSIHED BATCH: 119 of 497
FINSIHED BATCH: 139 of 497
FINSIHED BATCH: 159 of 497
FINSIHED BATCH: 179 of 497
FINSIHED BATCH: 199 of 497
FINSIHED BATCH: 219 of 497
FINSIHED BATCH: 239 of 497
FINSIHED BATCH: 259 of 497
FINSIHED BATCH: 279 of 497
FINSIHED BATCH: 299 of 497
FINSIHED BATCH: 319 of 497
FINSIHED BATCH: 339 of 497
FINSIHED BATCH: 359 of 497
FINSIHED BATCH: 379 of 497
FINSIHED BATCH: 399 of 497
FINSIHED BATCH: 419 of 497
FINSIHED BATCH: 439 of 497
FINSIHED BATCH: 459 of 497
FINSIHED BATCH: 479 of 497
Validation loss per 100 evaluation steps: 1.1577847003936768
DEV ACC: 0.5383184523809523
DEV Precision: 0.3731803795063728
DEV Recall: 0.41536794464775045
DEV F1Score: 0.37246203860480903
BEST ACCURACY -->  DEV: 0.53832
BEST PRECISION -->  DEV: 0.37318
BEST RECALL -->  DEV: 0.41537
BEST F1SCORE -->  DEV: 0.37246
TIME PER EPOCH: 11.428145

FINSIHED BATCH: 119 of 497
FINSIHED BATCH: 139 of 497
FINSIHED BATCH: 159 of 497
FINSIHED BATCH: 179 of 497
FINSIHED BATCH: 199 of 497
FINSIHED BATCH: 219 of 497
FINSIHED BATCH: 239 of 497
FINSIHED BATCH: 259 of 497
FINSIHED BATCH: 279 of 497
FINSIHED BATCH: 299 of 497
FINSIHED BATCH: 319 of 497
FINSIHED BATCH: 339 of 497
FINSIHED BATCH: 359 of 497
FINSIHED BATCH: 379 of 497
FINSIHED BATCH: 399 of 497
FINSIHED BATCH: 419 of 497
FINSIHED BATCH: 439 of 497
FINSIHED BATCH: 459 of 497
FINSIHED BATCH: 479 of 497
Validation loss per 100 evaluation steps: 1.8637686967849731
DEV ACC: 0.6402529761904762
DEV Precision: 0.5659167766757054
DEV Recall: 0.5807121405027476
DEV F1Score: 0.5512251104463942
BEST ACCURACY -->  DEV: 0.65234
BEST PRECISION -->  DEV: 0.57839
BEST RECALL -->  DEV: 0.58071
BEST F1SCORE -->  DEV: 0.55195
TIME PER EPOCH: 11.430582582950592

Training epoch: 10
FINSIHED BATCH: 19 of 497
FINSIHED BATCH: 39 of 497
FINSIHED BATCH: 59 of 497
FINSIHED BATCH: 79 of 497
FINSIHED BATCH: 

Some weights of the model checkpoint at ai4bharat/indic-bert were not used when initializing AlbertForSequenceClassification: ['predictions.decoder.weight', 'predictions.dense.weight', 'sop_classifier.classifier.weight', 'predictions.bias', 'predictions.decoder.bias', 'sop_classifier.classifier.bias', 'predictions.LayerNorm.bias', 'predictions.LayerNorm.weight', 'predictions.dense.bias']
- This IS expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ai4bharat/indi

Training epoch: 1
FINSIHED BATCH: 19 of 497
FINSIHED BATCH: 39 of 497
FINSIHED BATCH: 59 of 497
FINSIHED BATCH: 79 of 497
FINSIHED BATCH: 99 of 497
FINSIHED BATCH: 119 of 497
FINSIHED BATCH: 139 of 497
FINSIHED BATCH: 159 of 497
FINSIHED BATCH: 179 of 497
FINSIHED BATCH: 199 of 497
FINSIHED BATCH: 219 of 497
FINSIHED BATCH: 239 of 497
FINSIHED BATCH: 259 of 497
FINSIHED BATCH: 279 of 497
FINSIHED BATCH: 299 of 497
FINSIHED BATCH: 319 of 497
FINSIHED BATCH: 339 of 497
FINSIHED BATCH: 359 of 497
FINSIHED BATCH: 379 of 497
FINSIHED BATCH: 399 of 497
FINSIHED BATCH: 419 of 497
FINSIHED BATCH: 439 of 497
FINSIHED BATCH: 459 of 497
FINSIHED BATCH: 479 of 497
Validation loss per 100 evaluation steps: 1.1612027883529663
DEV ACC: 0.4920014880952381
DEV Precision: 0.33645889898482634
DEV Recall: 0.36218779484404484
DEV F1Score: 0.32810889846147945
BEST ACCURACY -->  DEV: 0.492
BEST PRECISION -->  DEV: 0.33646
BEST RECALL -->  DEV: 0.36219
BEST F1SCORE -->  DEV: 0.32811
TIME PER EPOCH: 11.5941016

FINSIHED BATCH: 179 of 497
FINSIHED BATCH: 199 of 497
FINSIHED BATCH: 219 of 497
FINSIHED BATCH: 239 of 497
FINSIHED BATCH: 259 of 497
FINSIHED BATCH: 279 of 497
FINSIHED BATCH: 299 of 497
FINSIHED BATCH: 319 of 497
FINSIHED BATCH: 339 of 497
FINSIHED BATCH: 359 of 497
FINSIHED BATCH: 379 of 497
FINSIHED BATCH: 399 of 497
FINSIHED BATCH: 419 of 497
FINSIHED BATCH: 439 of 497
FINSIHED BATCH: 459 of 497
FINSIHED BATCH: 479 of 497
Validation loss per 100 evaluation steps: 1.3914207220077515
DEV ACC: 0.6434151785714286
DEV Precision: 0.5903907186571109
DEV Recall: 0.5747915506835787
DEV F1Score: 0.5591260774016126
BEST ACCURACY -->  DEV: 0.6635
BEST PRECISION -->  DEV: 0.60056
BEST RECALL -->  DEV: 0.58882
BEST F1SCORE -->  DEV: 0.57277
TIME PER EPOCH: 11.376593748728434

Training epoch: 10
FINSIHED BATCH: 19 of 497
FINSIHED BATCH: 39 of 497
FINSIHED BATCH: 59 of 497
FINSIHED BATCH: 79 of 497
FINSIHED BATCH: 99 of 497
FINSIHED BATCH: 119 of 497
FINSIHED BATCH: 139 of 497
FINSIHED BATCH: 15

Some weights of the model checkpoint at ai4bharat/indic-bert were not used when initializing AlbertForSequenceClassification: ['predictions.decoder.weight', 'predictions.dense.weight', 'sop_classifier.classifier.weight', 'predictions.bias', 'predictions.decoder.bias', 'sop_classifier.classifier.bias', 'predictions.LayerNorm.bias', 'predictions.LayerNorm.weight', 'predictions.dense.bias']
- This IS expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ai4bharat/indi

Training epoch: 1
FINSIHED BATCH: 19 of 497
FINSIHED BATCH: 39 of 497
FINSIHED BATCH: 59 of 497
FINSIHED BATCH: 79 of 497
FINSIHED BATCH: 99 of 497
FINSIHED BATCH: 119 of 497
FINSIHED BATCH: 139 of 497
FINSIHED BATCH: 159 of 497
FINSIHED BATCH: 179 of 497
FINSIHED BATCH: 199 of 497
FINSIHED BATCH: 219 of 497
FINSIHED BATCH: 239 of 497
FINSIHED BATCH: 259 of 497
FINSIHED BATCH: 279 of 497
FINSIHED BATCH: 299 of 497
FINSIHED BATCH: 319 of 497
FINSIHED BATCH: 339 of 497
FINSIHED BATCH: 359 of 497
FINSIHED BATCH: 379 of 497
FINSIHED BATCH: 399 of 497
FINSIHED BATCH: 419 of 497
FINSIHED BATCH: 439 of 497
FINSIHED BATCH: 459 of 497
FINSIHED BATCH: 479 of 497
Validation loss per 100 evaluation steps: 1.3420857191085815
DEV ACC: 0.4884672619047619
DEV Precision: 0.3644801647392864
DEV Recall: 0.3555808168984376
DEV F1Score: 0.3162014739173598
BEST ACCURACY -->  DEV: 0.48847
BEST PRECISION -->  DEV: 0.36448
BEST RECALL -->  DEV: 0.35558
BEST F1SCORE -->  DEV: 0.3162
TIME PER EPOCH: 11.444638045

FINSIHED BATCH: 119 of 497
FINSIHED BATCH: 139 of 497
FINSIHED BATCH: 159 of 497
FINSIHED BATCH: 179 of 497
FINSIHED BATCH: 199 of 497
FINSIHED BATCH: 219 of 497
FINSIHED BATCH: 239 of 497
FINSIHED BATCH: 259 of 497
FINSIHED BATCH: 279 of 497
FINSIHED BATCH: 299 of 497
FINSIHED BATCH: 319 of 497
FINSIHED BATCH: 339 of 497
FINSIHED BATCH: 359 of 497
FINSIHED BATCH: 379 of 497
FINSIHED BATCH: 399 of 497
FINSIHED BATCH: 419 of 497
FINSIHED BATCH: 439 of 497
FINSIHED BATCH: 459 of 497
FINSIHED BATCH: 479 of 497
Validation loss per 100 evaluation steps: 0.9536765217781067
DEV ACC: 0.6551339285714286
DEV Precision: 0.5982469540846642
DEV Recall: 0.5936108692214261
DEV F1Score: 0.5722465250868252
BEST ACCURACY -->  DEV: 0.65737
BEST PRECISION -->  DEV: 0.59887
BEST RECALL -->  DEV: 0.59361
BEST F1SCORE -->  DEV: 0.57225
TIME PER EPOCH: 11.471261711915334

Training epoch: 10
FINSIHED BATCH: 19 of 497
FINSIHED BATCH: 39 of 497
FINSIHED BATCH: 59 of 497
FINSIHED BATCH: 79 of 497
FINSIHED BATCH: 

In [6]:
print(best_dev_acc)

0.6573660714285714


In [7]:
print(best_epoch)

8


In [8]:
print(best_dev_precision)

0.5988741860955828


In [9]:
print(best_dev_recall)

0.5936108692214261


In [10]:
print(best_dev_f1score)

0.5722465250868252


In [11]:
print(overall_list_dev_acc)

[[0.5171130952380952, 0.5881696428571429, 0.6287202380952381, 0.6374627976190476, 0.6638764880952381, 0.64453125, 0.6376488095238095, 0.6378348214285714, 0.6333705357142857, 0.6205357142857143], [0.519345238095238, 0.5963541666666667, 0.6341145833333333, 0.6568080357142857, 0.6534598214285714, 0.6421130952380952, 0.6302083333333333, 0.6356026785714286, 0.6203497023809524, 0.6380208333333333], [0.5383184523809523, 0.6026785714285714, 0.6316964285714286, 0.6402529761904762, 0.6517857142857143, 0.65234375, 0.6322544642857143, 0.6469494047619048, 0.6402529761904762, 0.6331845238095238], [0.4920014880952381, 0.6028645833333333, 0.6296502976190476, 0.6538318452380952, 0.6618303571428571, 0.6629464285714286, 0.6434151785714286, 0.6635044642857143, 0.6434151785714286, 0.6261160714285714], [0.4884672619047619, 0.6110491071428571, 0.626860119047619, 0.6432291666666667, 0.6499255952380952, 0.6573660714285714, 0.65625, 0.6473214285714286, 0.6551339285714286, 0.6235119047619048]]


In [12]:
print(overall_list_dev_precision)

[[0.4001391188242072, 0.460653766981581, 0.5076943238688248, 0.5194955769312386, 0.5421913229736441, 0.5682917667818568, 0.5367005340889269, 0.5509232320550597, 0.5676345921970694, 0.558308130361702], [0.3424273289689784, 0.5078427578733624, 0.5359899938114431, 0.5315593543959726, 0.550112445761184, 0.5783389148506907, 0.5653104043804371, 0.5443243316373293, 0.5385427083858021, 0.5607761044931407], [0.3731803795063728, 0.47554823485131414, 0.5247290422272787, 0.525412305334353, 0.5405535207964304, 0.5709322030094088, 0.5573239574246458, 0.5783852242014766, 0.5659167766757054, 0.5657252569319963], [0.33645889898482634, 0.4802939612616609, 0.5246307969744868, 0.5318310531257652, 0.5418371156891172, 0.5721125043200032, 0.5450287476855852, 0.6005584089696486, 0.5903907186571109, 0.5763977436837203], [0.3644801647392864, 0.5029118111786276, 0.5129235657878356, 0.5273070416479375, 0.5544024424923019, 0.5988741860955828, 0.5715013224722821, 0.5545276155333931, 0.5982469540846642, 0.5418381386

In [13]:
print(overall_list_dev_recall)

[[0.3857184501003607, 0.5055577697279378, 0.552792605970127, 0.5539277671602624, 0.5526473851486983, 0.5605898305026525, 0.5645043962369329, 0.5517026088744907, 0.5626525849327481, 0.5477764695621838], [0.35598790506878747, 0.4691231236812906, 0.5418138326556546, 0.525908400886425, 0.5295839397783711, 0.5401860233611602, 0.5448867332480778, 0.528502942859146, 0.5531908944951432, 0.5449008515963126], [0.41536794464775045, 0.4579764788345408, 0.5036627701051282, 0.5406202794307825, 0.5336111570210834, 0.5686025680668536, 0.5637661919601604, 0.5526265028270846, 0.5807121405027476, 0.5673072412239952], [0.36218779484404484, 0.5213556225727626, 0.5112226002150954, 0.5379449942855931, 0.5275988183006303, 0.5520643572459348, 0.5270070525296365, 0.5888197561950188, 0.5747915506835787, 0.5699320558451269], [0.3555808168984376, 0.47925525533563323, 0.5294669033843139, 0.5161028836068227, 0.5665640826552065, 0.5662951655749711, 0.5619831349336754, 0.551949421346259, 0.5936108692214261, 0.54345118

In [14]:
print(overall_list_dev_f1score)

[[0.34826655495953157, 0.45712420829032097, 0.5112307992809059, 0.5173074387864638, 0.5301756409988578, 0.5400923902444521, 0.5296553653115558, 0.5334505193535414, 0.5451335731728063, 0.5334457453277853], [0.33101501088285057, 0.4455587434208525, 0.5071971209715581, 0.5103822358399093, 0.5185190304021469, 0.535223841915602, 0.5312060285310946, 0.5224264783528991, 0.5254050789454988, 0.525455312046334], [0.37246203860480903, 0.42933718700579476, 0.48900862802073986, 0.5135232696335691, 0.5179077721705653, 0.5519535329012266, 0.5353473218042896, 0.5438275482661445, 0.5512251104463942, 0.5403112499754879], [0.32810889846147945, 0.48383973407119685, 0.49357141922180586, 0.5123823556935094, 0.5159054526839402, 0.5389099129964462, 0.5195169332821042, 0.5727705216360643, 0.5591260774016126, 0.5480875721513504], [0.3162014739173598, 0.4692640425530224, 0.4991499380546355, 0.49895341962525575, 0.5362056916009941, 0.5518465865056542, 0.5451349894467734, 0.5326436110248757, 0.5722465250868252, 0.

In [15]:
#The best model is 3