In [None]:
import random
import os

## Running Evaluation on Three Models Trained on Corrected Telugu Translated Stanford QA 

## Testing Data Consists of 1000 samples 

## Evaluation Metrics  : Character F1Score, Exact Match


In [None]:

def compute_f1score(sentence1, sentence2):
    # Convert the sentences to sets of characters
    set1 = set(sentence1)
    set2 = set(sentence2)

    # Calculate the intersection and union of characters
    common_chars = set1.intersection(set2)
    all_chars = set1.union(set2)

    # Compute precision, recall, and F1 score
    precision = len(common_chars) / len(all_chars) if len(all_chars) > 0 else 0
    recall = len(common_chars) / len(set1) if len(set1) > 0 else 0

    # Avoid division by zero for precision and recall
    if precision + recall == 0:
        f1_score = 0
    else:
        f1_score = 2 * (precision * recall) / (precision + recall)

    return f1_score


In [None]:
def exact_match(string1 , string2):
  ## remove the space characters

  string1 = string1.replace(" ","")
  string2 = string2.replace(" ","")

  ## length check

  if(len(string1) != len(string2)):
    return 0

  ## every char check
  length = len(string1)

  for iter in range(length):
    if(string1[iter] != string2[iter]):
      return 0

  return 1


In [None]:
!git lfs install
!pip install transformers

!GIT_LFS_SKIP_SMUDGE=1

In [None]:
import requests
import json
import torch
import torch.nn as nn
import os
from tqdm import tqdm
from transformers import BertModel, BertTokenizerFast, AdamW
# AutoTokenizer, AutoModelForQuestionAnswering, BertTokenizer, BertForQuestionAnswering
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import ExponentialLR
import matplotlib.pyplot as plt

In [None]:
##Initializing QA model
!git clone 'https://huggingface.co/ai4bharat/indic-bert'
!git clone 'https://huggingface.co/l3cube-pune/telugu-bert'
!git clone 'https://huggingface.co/bert-base-multilingual-cased'


In [None]:
MODEL_PATH_L3Cube = '/kaggle/working/telugu-bert'
MODEL_PATH_indicBert = '/kaggle/working/indic-bert'
MODEL_PATH_mBert = '/kaggle/working/bert-base-multilingual-cased'

In [None]:
from transformers import AutoModel,AutoTokenizer, AutoModelForMaskedLM

tokenizer_l3cube = AutoTokenizer.from_pretrained("l3cube-pune/telugu-bert")
tokenizer_mbert = AutoTokenizer.from_pretrained("bert-base-multilingual-uncased")
tokenizer_indic = AutoTokenizer.from_pretrained('ai4bharat/indic-bert')

In [None]:
bert_model_l3cube = BertModel.from_pretrained(MODEL_PATH_L3Cube)
bert_model_indic = BertModel.from_pretrained(MODEL_PATH_indicBert)
bert_model_mbert = BertModel.from_pretrained(MODEL_PATH_mBert)

In [None]:
class QAModel(nn.Module):
    def __init__(self, bert_model):
        super(QAModel, self).__init__()
        self.bert = bert_model
        self.drop_out = nn.Dropout(0.1)
        self.l1 = nn.Linear(768 * 2, 768 * 2)
        self.l2 = nn.Linear(768 * 2, 2)
        self.linear_relu_stack = nn.Sequential(
            self.drop_out,
            self.l1,
            nn.LeakyReLU(),
            self.l2 
        )
        
    def forward(self, input_ids, attention_mask, token_type_ids):
        model_output = self.bert(input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids, output_hidden_states=True)
        hidden_states = model_output[2]
        out = torch.cat((hidden_states[-1], hidden_states[-3]), dim=-1)  # taking Start logits from last BERT layer, End Logits from third to last layer
        logits = self.linear_relu_stack(out)
        
        start_logits, end_logits = logits.split(1, dim=-1)
        
        start_logits = start_logits.squeeze(-1)
        end_logits = end_logits.squeeze(-1)

        return start_logits, end_logits

In [None]:
max_length = 512

def get_answer(question, context, model, tokenizer):
    inputs = tokenizer.encode_plus(question, context, return_tensors='pt', max_length=max_length, truncation=True).to(device)
    with torch.no_grad():
        output_start, output_end = model(**inputs)
        
        answer_start = torch.argmax(output_start)  
        answer_end = torch.argmax(output_end) 

        answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(inputs['input_ids'][0][answer_start:answer_end]))

        return(answer)

In [None]:
model_l3cube = QAModel(bert_model_l3cube)
model_indic = QAModel(bert_model_indic)
model_mbert = QAModel(bert_model_mbert)

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(device)

In [None]:
model_l3cube.to(device)
model_indic.to(device)
model_mbert.to(device)

In [None]:
model_l3cube.load_state_dict(torch.load('/kaggle/input/l3cube-teluguqafinetuned/QA_finutunemodel.pt'))
model_indic.load_state_dict(torch.load('/kaggle/input/indicbert-teluguqafinetuned/QA_finutunemodel.pt'))
model_mbert.load_state_dict(torch.load('/kaggle/input/mbert-teluguqafinetuned/QA_finutunemodel.pt'))

In [None]:
with open('/kaggle/input/code-and-data/two_epochs/proj_dataset/test_data/real_ans_tel_c.txt', 'r') as f_in:
  actual_answers = [line.rstrip('\n') for line in f_in.readlines()]
with open('/kaggle/input/code-and-data/two_epochs/proj_dataset/test_data/real_que_tel.txt', 'r') as f_in:
  questions = [line.rstrip('\n') for line in f_in.readlines()]
with open('/kaggle/input/code-and-data/two_epochs/proj_dataset/test_data/real_con_tel.txt', 'r') as f_in:
  context = [line.rstrip('\n') for line in f_in.readlines()]

print(len(actual_answers))
print(len(questions))
print(len(context))

In [None]:
answers_l3Cube = []
answers_indic = []
answers_mbert = []

for iter in range(len(context)):
    ans_l3cube = get_answer(questions[iter] , context[iter] , model_l3cube , tokenizer_l3cube)
    ans_indic = get_answer(questions[iter] , context[iter] , model_indic , tokenizer_indic)
    ans_mbert = get_answer(questions[iter] , context[iter] , model_mbert , tokenizer_mbert)
    
    
    answers_l3Cube.append(ans_l3cube)
    answers_indic.append(ans_indic)
    answers_mbert.append(ans_mbert)
    
    
    
    

In [None]:
Scorelist = []
totalScore = 0.0
for iter in range(len(context)):
  f1Score = compute_f1score(actual_answers[iter] , answers_l3Cube[iter])
  Scorelist.append(f1Score)
  totalScore += f1Score

print(f'The avg f1Score of l3_Cube is {totalScore / len(Scorelist)}')

In [None]:
Scorelist = []
totalScore = 0.0
for iter in range(len(context)):
  f1Score = compute_f1score(actual_answers[iter] , answers_indic[iter])
  Scorelist.append(f1Score)
  totalScore += f1Score

print(f'The avg f1Score of indic_Bert is {totalScore / len(Scorelist)}')

In [None]:
Scorelist = []
totalScore = 0.0
for iter in range(len(context)):
  f1Score = compute_f1score(actual_answers[iter] , answers_mbert[iter])
  Scorelist.append(f1Score)
  totalScore += f1Score

print(f'The avg f1Score of mbert is {totalScore / len(Scorelist)}')

In [None]:
Scorelist = []
totalScore = 0.0
for iter in range(len(context)):
  score = exact_match(actual_answers[iter] , answers_l3Cube[iter])
  Scorelist.append(score)
  totalScore += score

print(f'The exact_match score of l3_Cube is {totalScore / len(Scorelist)}')

In [None]:
Scorelist = []
totalScore = 0.0
for iter in range(len(context)):
  score = exact_match(actual_answers[iter] , answers_indic[iter])
  Scorelist.append(score)
  totalScore += score

print(f'The exact_match score of indic_Bert is {totalScore / len(Scorelist)}')

In [None]:
Scorelist = []
totalScore = 0.0
for iter in range(len(context)):
  score = exact_match(actual_answers[iter] , answers_mbert[iter])
  Scorelist.append(f1Score)
  totalScore += score

print(f'The exact_match of mbert is {totalScore / len(Scorelist)}')