In [1]:
import pandas as pd
import numpy as np
import torch
from torch import cuda
from torch.utils.data import Dataset, DataLoader
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from sklearn.metrics import accuracy_score, classification_report, precision_score, recall_score, f1_score
from load_data import initialize_data
from reading_datasets import read_task
from labels_to_ids import labels_to_ids_mal
import time
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [2]:
def train(epoch, train_loader, model, optimizer, device, grad_step = 1, max_grad_norm = 10):
    tr_loss, tr_accuracy = 0, 0
    tr_precision, tr_recall = 0, 0
    tr_f1score = 0
    nb_tr_examples, nb_tr_steps = 0, 0
    tr_preds, tr_labels = [], []
    # put model in training mode
    model.train()
    optimizer.zero_grad()
    
    for idx, batch in enumerate(train_loader):
        ids = batch['input_ids'].to(device, dtype = torch.long)
        mask = batch['attention_mask'].to(device, dtype = torch.long)
        labels = batch['labels'].to(device, dtype = torch.long)

        if (idx + 1) % 20 == 0:
            print('FINSIHED BATCH:', idx, 'of', len(train_loader))

        #loss, tr_logits = model(input_ids=ids, attention_mask=mask, labels=labels)
        output = model(input_ids=ids, attention_mask=mask, labels=labels)
        tr_loss += output[0]

        nb_tr_steps += 1
        nb_tr_examples += labels.size(0)
           
        # compute training accuracy
        flattened_targets = labels.view(-1) # shape (batch_size * seq_len,)
        active_logits = output[1].view(-1, model.num_labels) # shape (batch_size * seq_len, num_labels)
        flattened_predictions = torch.argmax(active_logits, axis=1) # shape (batch_size * seq_len,)
        
        # only compute accuracy at active labels
        active_accuracy = labels.view(-1) != -100 # shape (batch_size, seq_len)
        #active_labels = torch.where(active_accuracy, labels.view(-1), torch.tensor(-100).type_as(labels))
        
        labels = torch.masked_select(flattened_targets, active_accuracy)
        predictions = torch.masked_select(flattened_predictions, active_accuracy)

        tr_labels.extend(labels)
        tr_preds.extend(predictions)

        tmp_tr_accuracy = accuracy_score(labels.cpu().numpy(), predictions.cpu().numpy())
        tr_accuracy += tmp_tr_accuracy
        
        # Compute Precision
        tmp_tr_precision = precision_score(labels.cpu().numpy(), predictions.cpu().numpy(), average = 'macro', zero_division=0 )
        tr_precision += tmp_tr_precision
        
        # Compute Recall
        tmp_tr_recall = recall_score(labels.cpu().numpy(), predictions.cpu().numpy(), average = 'macro', zero_division=0)
        tr_recall += tmp_tr_recall
        
        # Compute f1score
        tmp_tr_f1score = f1_score(labels.cpu().numpy(), predictions.cpu().numpy(), average= 'macro', zero_division=0)
        tr_f1score += tmp_tr_f1score
    
        # gradient clipping
        torch.nn.utils.clip_grad_norm_(
            parameters=model.parameters(), max_norm=max_grad_norm
        )
        
        # backward pass
        output['loss'].backward()
        if (idx + 1) % grad_step == 0:
            optimizer.step()
            optimizer.zero_grad()

    epoch_loss = tr_loss / nb_tr_steps
    tr_accuracy = tr_accuracy / nb_tr_steps
    tr_precision = tr_precision / nb_tr_steps
    tr_recall = tr_recall / nb_tr_steps
    tr_f1score= tr_f1score / nb_tr_steps
    #print(f"Training loss epoch: {epoch_loss}")
    #print(f"Training accuracy epoch: {tr_accuracy}")

    return model

In [3]:
def testing(model, testing_loader, labels_to_ids, device):
    # put model in evaluation mode
    model.eval()
    
    eval_loss, eval_accuracy = 0, 0
    eval_precision, eval_recall = 0, 0
    eval_f1score = 0
    nb_eval_examples, nb_eval_steps = 0, 0
    eval_preds, eval_labels = [], []
     
    
    ids_to_labels = dict((v,k) for k,v in labels_to_ids.items())

    with torch.no_grad():
        for idx, batch in enumerate(testing_loader):
            
            ids = batch['input_ids'].to(device, dtype = torch.long)
            mask = batch['attention_mask'].to(device, dtype = torch.long)
            labels = batch['labels'].to(device, dtype = torch.long)
            
            #loss, eval_logits = model(input_ids=ids, attention_mask=mask, labels=labels)
            output = model(input_ids=ids, attention_mask=mask, labels=labels)

            eval_loss += output['loss'].item()

            nb_eval_steps += 1
            nb_eval_examples += labels.size(0)
        
            if idx % 100==0:
                loss_step = eval_loss/nb_eval_steps
                print(f"Validation loss per 100 evaluation steps: {loss_step}")
              
            # compute evaluation accuracy
            flattened_targets = labels.view(-1) # shape (batch_size * seq_len,)
            active_logits = output[1].view(-1, model.num_labels) # shape (batch_size * seq_len, num_labels)
            flattened_predictions = torch.argmax(active_logits, axis=1) # shape (batch_size * seq_len,)
            
            # only compute accuracy at active labels
            active_accuracy = labels.view(-1) != -100 # shape (batch_size, seq_len)
        
            labels = torch.masked_select(flattened_targets, active_accuracy)
            predictions = torch.masked_select(flattened_predictions, active_accuracy)
            
            eval_labels.extend(labels)
            eval_preds.extend(predictions)
            
            tmp_eval_accuracy = accuracy_score(labels.cpu().numpy(), predictions.cpu().numpy())
            eval_accuracy += tmp_eval_accuracy
            
            # Compute Precision
            tmp_eval_precision = precision_score(labels.cpu().numpy(), predictions.cpu().numpy(), average = 'macro', zero_division=0)
            eval_precision += tmp_eval_precision
            
            # Compute Recall
            tmp_eval_recall = recall_score(labels.cpu().numpy(), predictions.cpu().numpy(), average = 'macro', zero_division=0)
            eval_recall += tmp_eval_recall
            
            # Compute f1score
            tmp_eval_f1score = f1_score(labels.cpu().numpy(), predictions.cpu().numpy(), average='macro', zero_division=0)
            eval_f1score += tmp_eval_f1score

    labels = [ids_to_labels[id.item()] for id in eval_labels]
    predictions = [ids_to_labels[id.item()] for id in eval_preds]
    
    eval_loss = eval_loss / nb_eval_steps
    eval_accuracy = eval_accuracy / nb_eval_steps
    eval_precision = eval_precision / nb_eval_steps
    eval_recall = eval_recall / nb_eval_steps
    eval_f1score = eval_f1score / nb_eval_steps
    #print(f"Validation Loss: {eval_loss}")
    #print(f"Validation Accuracy: {eval_accuracy}")

    return labels, predictions, eval_accuracy, eval_precision, eval_recall, eval_f1score

In [4]:
def main(n_epochs, model_name, model_save_flag, model_save_location, model_load_flag, model_load_location):
    #Initialization training parameters
    max_len = 256
    batch_size = 32
    grad_step = 1
    learning_rate = 1e-05
    initialization_input = (max_len, batch_size)

    #Reading datasets and initializing data loaders
    dataset_location = '../datasets/task_a/transliterated/'

    train_data = read_task(dataset_location , split = 'mal_train_trans')
    dev_data = read_task(dataset_location , split = 'mal_dev_trans')
    #test_data = read_task(dataset_location , split = 'dev')#load test set
    labels_to_ids = labels_to_ids_mal
    #input_data = (train_data, dev_data, labels_to_ids)

    #Define tokenizer, model and optimizer
    device = 'cuda' if cuda.is_available() else 'cpu' #save the processing time
    if model_load_flag:
        tokenizer = AutoTokenizer.from_pretrained(model_load_location)
        model = AutoModelForSequenceClassification.from_pretrained(model_load_location)
    else: 
        tokenizer =  AutoTokenizer.from_pretrained(model_name, add_prefix_space=True)
        model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=len(labels_to_ids))
    optimizer = torch.optim.Adam(params=model.parameters(), lr=learning_rate)
    model.to(device)

    #Get dataloaders
    train_loader = initialize_data(tokenizer, initialization_input, train_data, labels_to_ids, shuffle = True)
    dev_loader = initialize_data(tokenizer, initialization_input, dev_data, labels_to_ids, shuffle = True)
    #test_loader = initialize_data(tokenizer, initialization_input, test_data, labels_to_ids, shuffle = True)#create test loader

    best_dev_acc = 0
    best_test_acc = 0
    best_dev_precision = 0
    best_test_precision = 0
    best_dev_recall = 0
    best_test_recall = 0
    best_dev_f1score = 0
    best_test_f1score = 0
    best_epoch = -1
    
    list_dev_acc = [] 
    list_test_acc = []  
    list_dev_precision = []  
    list_test_precision  = []  
    list_dev_recall = []  
    list_test_recall = []  
    list_dev_f1score = []  
    list_test_f1score = []
    
    for epoch in range(n_epochs):
        start = time.time()
        print(f"Training epoch: {epoch + 1}")

        #train model
        if not model_load_flag:
            model = train(epoch, train_loader, model, optimizer, device, grad_step)
        
        #testing and logging
        labels_dev, predictions_dev, dev_accuracy, dev_precision, dev_recall, dev_f1score = testing(model, dev_loader, labels_to_ids, device)
        print('DEV ACC:', dev_accuracy)
        print('DEV Precision:' , dev_precision)
        print('DEV Recall:' , dev_recall)
        print('DEV F1Score:' , dev_f1score)
        
        list_dev_acc.append(dev_accuracy)     
        list_dev_precision.append(dev_precision)   
        list_dev_recall.append(dev_recall)  
        list_dev_f1score.append(dev_f1score)  
        
        
        #labels_test, predictions_test, test_accuracy, test_precision, test_recall, test_f1score = testing(model, test_loader, labels_to_ids, device)
        #print('TEST ACC:', test_accuracy)
        #print('TEST Precision:' , test_precision)
        #print('TEST Recall:' , test_recall)
        #print('TEST F1Score:' , test_f1score)
        
        #list_test_acc.append(test_accuracy) 
        #list_test_precision.append(test_precision)  
        #list_test_recall.append(test_recall)
        #list_test_f1score.append(test_f1score) 

        #saving model
        if dev_accuracy > best_dev_acc:
            best_dev_acc = dev_accuracy
            #best_test_acc = test_accuracy
        if dev_precision > best_dev_precision:
            best_dev_precision = dev_precision
            #best_test_precision = test_precision
        if dev_recall > best_dev_recall:
            best_dev_recall = dev_recall
            #best_test_recall = test_recall
        if dev_f1score > best_dev_f1score:
            best_dev_f1score = dev_f1score
            #best_test_f1score = test_f1score
            best_epoch = epoch
            
            if model_save_flag:
                os.makedirs(model_save_location, exist_ok=True)
                tokenizer.save_pretrained(model_save_location)
                model.save_pretrained(model_save_location)

        now = time.time()
        print('BEST ACCURACY --> ', 'DEV:', round(best_dev_acc, 5))
        print('BEST PRECISION --> ', 'DEV:', round(best_dev_precision, 5))
        print('BEST RECALL --> ', 'DEV:', round(best_dev_recall, 5))
        print('BEST F1SCORE --> ', 'DEV:', round(best_dev_f1score, 5))
        print('TIME PER EPOCH:', (now-start)/60 )
        print()

    return best_dev_acc, best_test_acc, best_epoch, best_dev_precision, best_test_precision, best_dev_recall, best_test_recall, best_dev_f1score, best_test_f1score, list_dev_acc, list_test_acc, list_dev_precision, list_test_precision, list_dev_recall, list_test_recall, list_dev_f1score, list_test_f1score

In [5]:
if __name__ == '__main__':
    n_epochs = 10
    models = ['ai4bharat/indic-bert']
    
    #model saving parameters
    model_save_flag = True
    model_load_flag = False
    
    overall_list_dev_acc = [] 
    overall_list_test_acc = []    
    overall_list_dev_precision = []  
    overall_list_test_precision  = []  
    overall_list_dev_recall = []  
    overall_list_test_recall = []  
    overall_list_dev_f1score = []  
    overall_list_test_f1score = [] 
    
    for i in range(5):
        
        for model_name in models:

            model_save_location = 'saved_models/' + model_name + 'Malayalam' + 'transliterated' + str(i)
            model_load_location = None

            best_dev_acc, best_test_acc, best_epoch, best_dev_precision, best_test_precision, best_dev_recall, best_test_recall, best_dev_f1score, best_test_f1score, list_dev_acc, list_test_acc, list_dev_precision, list_test_precision, list_dev_recall, list_test_recall, list_dev_f1score, list_test_f1score = main(n_epochs, model_name, model_save_flag, model_save_location, model_load_flag, model_load_location)
            
            overall_list_dev_acc.append(list_dev_acc) 
            overall_list_test_acc.append(list_test_acc) 
            overall_list_dev_precision.append(list_dev_precision)  
            overall_list_test_precision.append(list_test_precision) 
            overall_list_dev_recall.append(list_dev_recall)  
            overall_list_test_recall.append(list_test_recall)  
            overall_list_dev_f1score.append(list_dev_f1score)  
            overall_list_test_f1score.append(list_test_f1score) 

Some weights of the model checkpoint at ai4bharat/indic-bert were not used when initializing AlbertForSequenceClassification: ['predictions.LayerNorm.bias', 'predictions.dense.bias', 'predictions.LayerNorm.weight', 'sop_classifier.classifier.bias', 'predictions.decoder.bias', 'predictions.bias', 'predictions.decoder.weight', 'predictions.dense.weight', 'sop_classifier.classifier.weight']
- This IS expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ai4bharat/indi

Training epoch: 1
FINSIHED BATCH: 19 of 497
FINSIHED BATCH: 39 of 497
FINSIHED BATCH: 59 of 497
FINSIHED BATCH: 79 of 497
FINSIHED BATCH: 99 of 497
FINSIHED BATCH: 119 of 497
FINSIHED BATCH: 139 of 497
FINSIHED BATCH: 159 of 497
FINSIHED BATCH: 179 of 497
FINSIHED BATCH: 199 of 497
FINSIHED BATCH: 219 of 497
FINSIHED BATCH: 239 of 497
FINSIHED BATCH: 259 of 497
FINSIHED BATCH: 279 of 497
FINSIHED BATCH: 299 of 497
FINSIHED BATCH: 319 of 497
FINSIHED BATCH: 339 of 497
FINSIHED BATCH: 359 of 497
FINSIHED BATCH: 379 of 497
FINSIHED BATCH: 399 of 497
FINSIHED BATCH: 419 of 497
FINSIHED BATCH: 439 of 497
FINSIHED BATCH: 459 of 497
FINSIHED BATCH: 479 of 497
Validation loss per 100 evaluation steps: 1.042056918144226
DEV ACC: 0.5143229166666666
DEV Precision: 0.3908645520462023
DEV Recall: 0.3631297581678359
DEV F1Score: 0.3315849281867668
BEST ACCURACY -->  DEV: 0.51432
BEST PRECISION -->  DEV: 0.39086
BEST RECALL -->  DEV: 0.36313
BEST F1SCORE -->  DEV: 0.33158
TIME PER EPOCH: 10.092833892

FINSIHED BATCH: 179 of 497
FINSIHED BATCH: 199 of 497
FINSIHED BATCH: 219 of 497
FINSIHED BATCH: 239 of 497
FINSIHED BATCH: 259 of 497
FINSIHED BATCH: 279 of 497
FINSIHED BATCH: 299 of 497
FINSIHED BATCH: 319 of 497
FINSIHED BATCH: 339 of 497
FINSIHED BATCH: 359 of 497
FINSIHED BATCH: 379 of 497
FINSIHED BATCH: 399 of 497
FINSIHED BATCH: 419 of 497
FINSIHED BATCH: 439 of 497
FINSIHED BATCH: 459 of 497
FINSIHED BATCH: 479 of 497
Validation loss per 100 evaluation steps: 1.1218698024749756
DEV ACC: 0.6748511904761905
DEV Precision: 0.6018026341318027
DEV Recall: 0.5844366719589934
DEV F1Score: 0.5704709763601857
BEST ACCURACY -->  DEV: 0.68936
BEST PRECISION -->  DEV: 0.6018
BEST RECALL -->  DEV: 0.61118
BEST F1SCORE -->  DEV: 0.582
TIME PER EPOCH: 10.322660020987193

Training epoch: 10
FINSIHED BATCH: 19 of 497
FINSIHED BATCH: 39 of 497
FINSIHED BATCH: 59 of 497
FINSIHED BATCH: 79 of 497
FINSIHED BATCH: 99 of 497
FINSIHED BATCH: 119 of 497
FINSIHED BATCH: 139 of 497
FINSIHED BATCH: 159 

Some weights of the model checkpoint at ai4bharat/indic-bert were not used when initializing AlbertForSequenceClassification: ['predictions.LayerNorm.bias', 'predictions.dense.bias', 'predictions.LayerNorm.weight', 'sop_classifier.classifier.bias', 'predictions.decoder.bias', 'predictions.bias', 'predictions.decoder.weight', 'predictions.dense.weight', 'sop_classifier.classifier.weight']
- This IS expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ai4bharat/indi

Training epoch: 1
FINSIHED BATCH: 19 of 497
FINSIHED BATCH: 39 of 497
FINSIHED BATCH: 59 of 497
FINSIHED BATCH: 79 of 497
FINSIHED BATCH: 99 of 497
FINSIHED BATCH: 119 of 497
FINSIHED BATCH: 139 of 497
FINSIHED BATCH: 159 of 497
FINSIHED BATCH: 179 of 497
FINSIHED BATCH: 199 of 497
FINSIHED BATCH: 219 of 497
FINSIHED BATCH: 239 of 497
FINSIHED BATCH: 259 of 497
FINSIHED BATCH: 279 of 497
FINSIHED BATCH: 299 of 497
FINSIHED BATCH: 319 of 497
FINSIHED BATCH: 339 of 497
FINSIHED BATCH: 359 of 497
FINSIHED BATCH: 379 of 497
FINSIHED BATCH: 399 of 497
FINSIHED BATCH: 419 of 497
FINSIHED BATCH: 439 of 497
FINSIHED BATCH: 459 of 497
FINSIHED BATCH: 479 of 497
Validation loss per 100 evaluation steps: 1.2849206924438477
DEV ACC: 0.5690104166666666
DEV Precision: 0.37549538840325686
DEV Recall: 0.4429309328741618
DEV F1Score: 0.3969121983081862
BEST ACCURACY -->  DEV: 0.56901
BEST PRECISION -->  DEV: 0.3755
BEST RECALL -->  DEV: 0.44293
BEST F1SCORE -->  DEV: 0.39691
TIME PER EPOCH: 10.26999377

FINSIHED BATCH: 119 of 497
FINSIHED BATCH: 139 of 497
FINSIHED BATCH: 159 of 497
FINSIHED BATCH: 179 of 497
FINSIHED BATCH: 199 of 497
FINSIHED BATCH: 219 of 497
FINSIHED BATCH: 239 of 497
FINSIHED BATCH: 259 of 497
FINSIHED BATCH: 279 of 497
FINSIHED BATCH: 299 of 497
FINSIHED BATCH: 319 of 497
FINSIHED BATCH: 339 of 497
FINSIHED BATCH: 359 of 497
FINSIHED BATCH: 379 of 497
FINSIHED BATCH: 399 of 497
FINSIHED BATCH: 419 of 497
FINSIHED BATCH: 439 of 497
FINSIHED BATCH: 459 of 497
FINSIHED BATCH: 479 of 497
Validation loss per 100 evaluation steps: 0.84977787733078
DEV ACC: 0.6396949404761905
DEV Precision: 0.568377814005924
DEV Recall: 0.5579239312428443
DEV F1Score: 0.5415595156762883
BEST ACCURACY -->  DEV: 0.66555
BEST PRECISION -->  DEV: 0.59882
BEST RECALL -->  DEV: 0.58315
BEST F1SCORE -->  DEV: 0.56404
TIME PER EPOCH: 10.833748086293538

Training epoch: 10
FINSIHED BATCH: 19 of 497
FINSIHED BATCH: 39 of 497
FINSIHED BATCH: 59 of 497
FINSIHED BATCH: 79 of 497
FINSIHED BATCH: 99 

Some weights of the model checkpoint at ai4bharat/indic-bert were not used when initializing AlbertForSequenceClassification: ['predictions.LayerNorm.bias', 'predictions.dense.bias', 'predictions.LayerNorm.weight', 'sop_classifier.classifier.bias', 'predictions.decoder.bias', 'predictions.bias', 'predictions.decoder.weight', 'predictions.dense.weight', 'sop_classifier.classifier.weight']
- This IS expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ai4bharat/indi

Training epoch: 1
FINSIHED BATCH: 19 of 497
FINSIHED BATCH: 39 of 497
FINSIHED BATCH: 59 of 497
FINSIHED BATCH: 79 of 497
FINSIHED BATCH: 99 of 497
FINSIHED BATCH: 119 of 497
FINSIHED BATCH: 139 of 497
FINSIHED BATCH: 159 of 497
FINSIHED BATCH: 179 of 497
FINSIHED BATCH: 199 of 497
FINSIHED BATCH: 219 of 497
FINSIHED BATCH: 239 of 497
FINSIHED BATCH: 259 of 497
FINSIHED BATCH: 279 of 497
FINSIHED BATCH: 299 of 497
FINSIHED BATCH: 319 of 497
FINSIHED BATCH: 339 of 497
FINSIHED BATCH: 359 of 497
FINSIHED BATCH: 379 of 497
FINSIHED BATCH: 399 of 497
FINSIHED BATCH: 419 of 497
FINSIHED BATCH: 439 of 497
FINSIHED BATCH: 459 of 497
FINSIHED BATCH: 479 of 497
Validation loss per 100 evaluation steps: 1.0803619623184204
DEV ACC: 0.5846354166666667
DEV Precision: 0.3769153605178526
DEV Recall: 0.44566672414119607
DEV F1Score: 0.3963991547754074
BEST ACCURACY -->  DEV: 0.58464
BEST PRECISION -->  DEV: 0.37692
BEST RECALL -->  DEV: 0.44567
BEST F1SCORE -->  DEV: 0.3964
TIME PER EPOCH: 10.32159685

FINSIHED BATCH: 219 of 497
FINSIHED BATCH: 239 of 497
FINSIHED BATCH: 259 of 497
FINSIHED BATCH: 279 of 497
FINSIHED BATCH: 299 of 497
FINSIHED BATCH: 319 of 497
FINSIHED BATCH: 339 of 497
FINSIHED BATCH: 359 of 497
FINSIHED BATCH: 379 of 497
FINSIHED BATCH: 399 of 497
FINSIHED BATCH: 419 of 497
FINSIHED BATCH: 439 of 497
FINSIHED BATCH: 459 of 497
FINSIHED BATCH: 479 of 497
Validation loss per 100 evaluation steps: 1.2620519399642944
DEV ACC: 0.6428571428571429
DEV Precision: 0.5701442357092513
DEV Recall: 0.5630995319164963
DEV F1Score: 0.5436528873167151
BEST ACCURACY -->  DEV: 0.64286
BEST PRECISION -->  DEV: 0.57014
BEST RECALL -->  DEV: 0.5631
BEST F1SCORE -->  DEV: 0.54365
TIME PER EPOCH: 10.525147565205891

Training epoch: 10
FINSIHED BATCH: 19 of 497
FINSIHED BATCH: 39 of 497
FINSIHED BATCH: 59 of 497
FINSIHED BATCH: 79 of 497
FINSIHED BATCH: 99 of 497
FINSIHED BATCH: 119 of 497
FINSIHED BATCH: 139 of 497
FINSIHED BATCH: 159 of 497
FINSIHED BATCH: 179 of 497
FINSIHED BATCH: 19

Some weights of the model checkpoint at ai4bharat/indic-bert were not used when initializing AlbertForSequenceClassification: ['predictions.LayerNorm.bias', 'predictions.dense.bias', 'predictions.LayerNorm.weight', 'sop_classifier.classifier.bias', 'predictions.decoder.bias', 'predictions.bias', 'predictions.decoder.weight', 'predictions.dense.weight', 'sop_classifier.classifier.weight']
- This IS expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ai4bharat/indi

Training epoch: 1
FINSIHED BATCH: 19 of 497
FINSIHED BATCH: 39 of 497
FINSIHED BATCH: 59 of 497
FINSIHED BATCH: 79 of 497
FINSIHED BATCH: 99 of 497
FINSIHED BATCH: 119 of 497
FINSIHED BATCH: 139 of 497
FINSIHED BATCH: 159 of 497
FINSIHED BATCH: 179 of 497
FINSIHED BATCH: 199 of 497
FINSIHED BATCH: 219 of 497
FINSIHED BATCH: 239 of 497
FINSIHED BATCH: 259 of 497
FINSIHED BATCH: 279 of 497
FINSIHED BATCH: 299 of 497
FINSIHED BATCH: 319 of 497
FINSIHED BATCH: 339 of 497
FINSIHED BATCH: 359 of 497
FINSIHED BATCH: 379 of 497
FINSIHED BATCH: 399 of 497
FINSIHED BATCH: 419 of 497
FINSIHED BATCH: 439 of 497
FINSIHED BATCH: 459 of 497
FINSIHED BATCH: 479 of 497
Validation loss per 100 evaluation steps: 1.197865605354309
DEV ACC: 0.5621279761904762
DEV Precision: 0.37017626674182125
DEV Recall: 0.41454506432986654
DEV F1Score: 0.37255282554063934
BEST ACCURACY -->  DEV: 0.56213
BEST PRECISION -->  DEV: 0.37018
BEST RECALL -->  DEV: 0.41455
BEST F1SCORE -->  DEV: 0.37255
TIME PER EPOCH: 10.804154

FINSIHED BATCH: 119 of 497
FINSIHED BATCH: 139 of 497
FINSIHED BATCH: 159 of 497
FINSIHED BATCH: 179 of 497
FINSIHED BATCH: 199 of 497
FINSIHED BATCH: 219 of 497
FINSIHED BATCH: 239 of 497
FINSIHED BATCH: 259 of 497
FINSIHED BATCH: 279 of 497
FINSIHED BATCH: 299 of 497
FINSIHED BATCH: 319 of 497
FINSIHED BATCH: 339 of 497
FINSIHED BATCH: 359 of 497
FINSIHED BATCH: 379 of 497
FINSIHED BATCH: 399 of 497
FINSIHED BATCH: 419 of 497
FINSIHED BATCH: 439 of 497
FINSIHED BATCH: 459 of 497
FINSIHED BATCH: 479 of 497
Validation loss per 100 evaluation steps: 1.0325870513916016
DEV ACC: 0.6315104166666667
DEV Precision: 0.5548805162975816
DEV Recall: 0.5378718188907264
DEV F1Score: 0.5264079396338163
BEST ACCURACY -->  DEV: 0.64955
BEST PRECISION -->  DEV: 0.56478
BEST RECALL -->  DEV: 0.55783
BEST F1SCORE -->  DEV: 0.53647
TIME PER EPOCH: 10.679055881500243

Training epoch: 10
FINSIHED BATCH: 19 of 497
FINSIHED BATCH: 39 of 497
FINSIHED BATCH: 59 of 497
FINSIHED BATCH: 79 of 497
FINSIHED BATCH: 

Some weights of the model checkpoint at ai4bharat/indic-bert were not used when initializing AlbertForSequenceClassification: ['predictions.LayerNorm.bias', 'predictions.dense.bias', 'predictions.LayerNorm.weight', 'sop_classifier.classifier.bias', 'predictions.decoder.bias', 'predictions.bias', 'predictions.decoder.weight', 'predictions.dense.weight', 'sop_classifier.classifier.weight']
- This IS expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ai4bharat/indi

Training epoch: 1
FINSIHED BATCH: 19 of 497
FINSIHED BATCH: 39 of 497
FINSIHED BATCH: 59 of 497
FINSIHED BATCH: 79 of 497
FINSIHED BATCH: 99 of 497
FINSIHED BATCH: 119 of 497
FINSIHED BATCH: 139 of 497
FINSIHED BATCH: 159 of 497
FINSIHED BATCH: 179 of 497
FINSIHED BATCH: 199 of 497
FINSIHED BATCH: 219 of 497
FINSIHED BATCH: 239 of 497
FINSIHED BATCH: 259 of 497
FINSIHED BATCH: 279 of 497
FINSIHED BATCH: 299 of 497
FINSIHED BATCH: 319 of 497
FINSIHED BATCH: 339 of 497
FINSIHED BATCH: 359 of 497
FINSIHED BATCH: 379 of 497
FINSIHED BATCH: 399 of 497
FINSIHED BATCH: 419 of 497
FINSIHED BATCH: 439 of 497
FINSIHED BATCH: 459 of 497
FINSIHED BATCH: 479 of 497
Validation loss per 100 evaluation steps: 1.0653048753738403
DEV ACC: 0.5221354166666666
DEV Precision: 0.35197848328978226
DEV Recall: 0.3723779816413744
DEV F1Score: 0.3425525693594526
BEST ACCURACY -->  DEV: 0.52214
BEST PRECISION -->  DEV: 0.35198
BEST RECALL -->  DEV: 0.37238
BEST F1SCORE -->  DEV: 0.34255
TIME PER EPOCH: 10.0800369

FINSIHED BATCH: 119 of 497
FINSIHED BATCH: 139 of 497
FINSIHED BATCH: 159 of 497
FINSIHED BATCH: 179 of 497
FINSIHED BATCH: 199 of 497
FINSIHED BATCH: 219 of 497
FINSIHED BATCH: 239 of 497
FINSIHED BATCH: 259 of 497
FINSIHED BATCH: 279 of 497
FINSIHED BATCH: 299 of 497
FINSIHED BATCH: 319 of 497
FINSIHED BATCH: 339 of 497
FINSIHED BATCH: 359 of 497
FINSIHED BATCH: 379 of 497
FINSIHED BATCH: 399 of 497
FINSIHED BATCH: 419 of 497
FINSIHED BATCH: 439 of 497
FINSIHED BATCH: 459 of 497
FINSIHED BATCH: 479 of 497
Validation loss per 100 evaluation steps: 1.0491863489151
DEV ACC: 0.6646205357142857
DEV Precision: 0.5858284363923462
DEV Recall: 0.5432364343857207
DEV F1Score: 0.5377811930042861
BEST ACCURACY -->  DEV: 0.66462
BEST PRECISION -->  DEV: 0.58583
BEST RECALL -->  DEV: 0.57185
BEST F1SCORE -->  DEV: 0.55614
TIME PER EPOCH: 10.391606247425079

Training epoch: 10
FINSIHED BATCH: 19 of 497
FINSIHED BATCH: 39 of 497
FINSIHED BATCH: 59 of 497
FINSIHED BATCH: 79 of 497
FINSIHED BATCH: 99 

In [6]:
print(best_dev_acc)

0.670014880952381


In [7]:
print(best_epoch)

9


In [8]:
print(best_dev_precision)

0.6034487199369994


In [9]:
print(best_dev_recall)

0.5805819956456083


In [10]:
print(best_dev_f1score)

0.5663605659062549


In [11]:
print(overall_list_dev_acc)

[[0.5143229166666666, 0.6354166666666667, 0.658110119047619, 0.6809895833333333, 0.6882440476190476, 0.689360119047619, 0.6638764880952381, 0.6752232142857143, 0.6748511904761905, 0.6954985119047619], [0.5690104166666666, 0.6274181547619048, 0.6322544642857143, 0.6655505952380952, 0.6568080357142857, 0.6517857142857143, 0.6462053571428571, 0.6525297619047619, 0.6396949404761905, 0.634672619047619], [0.5846354166666667, 0.5851934523809524, 0.6393229166666667, 0.6415550595238095, 0.638578869047619, 0.63671875, 0.6335565476190476, 0.6246279761904762, 0.6428571428571429, 0.6322544642857143], [0.5621279761904762, 0.6158854166666667, 0.6476934523809524, 0.6458333333333333, 0.6495535714285714, 0.6471354166666667, 0.6378348214285714, 0.6313244047619048, 0.6315104166666667, 0.6283482142857143], [0.5221354166666666, 0.6263020833333333, 0.6419270833333333, 0.6553199404761905, 0.6374627976190476, 0.6488095238095238, 0.6488095238095238, 0.6400669642857143, 0.6646205357142857, 0.670014880952381]]


In [12]:
print(overall_list_dev_precision)

[[0.3908645520462023, 0.5173790606112719, 0.5405026472761658, 0.5490035878210013, 0.5874481363479942, 0.5943913826265385, 0.5875464110469364, 0.5955641680822739, 0.6018026341318027, 0.6354965846332706], [0.37549538840325686, 0.45956683651067876, 0.5042745731681214, 0.551866559733481, 0.5652264695993839, 0.5713848298643468, 0.5771377064006501, 0.5988170687126831, 0.568377814005924, 0.541664230085974], [0.3769153605178526, 0.41986825866777006, 0.5326829172466129, 0.5271535200760452, 0.5151812142896355, 0.5201291792212223, 0.5319847891846592, 0.5024290784204123, 0.5701442357092513, 0.5793518796228466], [0.37017626674182125, 0.5198168309619271, 0.5334056141319244, 0.5296140586187855, 0.5530001100819116, 0.5647762123497416, 0.5546897781471808, 0.5470242495806751, 0.5548805162975816, 0.5750469217407242], [0.35197848328978226, 0.5094406167030402, 0.5065877443481379, 0.5368818215264063, 0.5444865117943264, 0.5590597177099277, 0.583584811385909, 0.5621585555842381, 0.5858284363923462, 0.6034487

In [13]:
print(overall_list_dev_recall)

[[0.3631297581678359, 0.53215398707023, 0.5464813364271914, 0.5475544008127422, 0.5790488731185945, 0.5790118269044214, 0.5864137646800531, 0.6111764012131656, 0.5844366719589934, 0.6243251537211622], [0.4429309328741618, 0.4750325570204771, 0.5199785318062059, 0.5395741352339304, 0.53541031693778, 0.5831492082555634, 0.5628926238327496, 0.5687636686376182, 0.5579239312428443, 0.5374968449646759], [0.44566672414119607, 0.4455360414853587, 0.5514161741059754, 0.5465548277877584, 0.5190911715064814, 0.5447992542005148, 0.5441100831626664, 0.5174811580180472, 0.5630995319164963, 0.5871962973172337], [0.41454506432986654, 0.504038001637135, 0.5419044943152086, 0.5518193072683618, 0.5578307671407724, 0.5506789902655947, 0.5457772182299492, 0.5537392452878893, 0.5378718188907264, 0.560432114371328], [0.3723779816413744, 0.4886595830517214, 0.5257951002370645, 0.5422915733892624, 0.5456283402055461, 0.5718496230997615, 0.5688656596088317, 0.5593558412584148, 0.5432364343857207, 0.580581995645

In [14]:
print(overall_list_dev_f1score)

[[0.3315849281867668, 0.5024800740862939, 0.5217060750589392, 0.5294913869472567, 0.5599697781778302, 0.5670742847241954, 0.5665466039346978, 0.5820030225688625, 0.5704709763601857, 0.6117282513990149], [0.3969121983081862, 0.44099701631396104, 0.4862445424085465, 0.5259354737706694, 0.5243243282088235, 0.5550582541214734, 0.5450398968785969, 0.5640388194261847, 0.5415595156762883, 0.5229900996278213], [0.3963991547754074, 0.40374448751499215, 0.5248620204628031, 0.5193630215639378, 0.5040754296665406, 0.5129462208453363, 0.5199198700149913, 0.49226004276064667, 0.5436528873167151, 0.562065930482394], [0.37255282554063934, 0.4831325901705084, 0.5213274468085407, 0.5236459558659409, 0.5359130888186889, 0.5364656740186043, 0.5323603975109373, 0.5291762124394609, 0.5264079396338163, 0.5421587232948636], [0.3425525693594526, 0.4671561921272206, 0.49598508151416143, 0.509786031812416, 0.5189667581977899, 0.5399087109835541, 0.5561376673183266, 0.5414920554108273, 0.5377811930042861, 0.56636

In [15]:
#The best model is 0