# Quran_QA Ensemble 

In last Notebook __[quran_qa_v1](https://github.com/EmanElrefai/Quran_QA/blob/main/quran_qa_v1.ipynb)__ I mentioned that there many difrent pretrained models have been tried with diffrent configurations parameters. I chose the models with higher results so that their pRR range is between [48.4 to 52.9]

## The best models configuration.


## Imports

In [1]:
#Reading and spliting the data
import pandas as pd
from sklearn.model_selection import KFold
import numpy as np

#
import joblib
import os
import torch
import json
from datasets import Dataset
from transformers import AutoModelForQuestionAnswering
import os
from transformers import pipeline
from scipy.special import softmax
import torch
from transformers import AutoTokenizer
import json
import joblib
from transformers import PreTrainedTokenizerFast
import os


  from .autonotebook import tqdm as notebook_tqdm


## Loading the Testing Data

In [2]:
dev_data_path = r'./datasets/qrcd_v1.1_dev.jsonl'

def read_data(datapath):
    
    with open(datapath ,'rb') as fp:
        datalist = list(fp)
    data =[]
    for json_str in datalist:
        result = json.loads(json_str)
        #print(f"result: {result}")
        data.append(result)    
    return data 
val_data=read_data(dev_data_path)

## Data spliting
Where the data will splited to 8 parts. Each part has difrent 80% of data beacuse we have 8 tuning configurations for pretrained models.

In [3]:
def split_multi_answers(data_list):
    new_data=[]
    #Loop all sampe
    for sample in data_list :
        # If the sample has a single answer append it to the new_data list and return back to pick the next sample.
        if len(sample['answers']) ==1:
            new_data.append(sample)
            continue
        # if the sample has multiple answers, Loop through all answeres of the sample, and store each answer with it passage, question in a single sample.
        # And then append this sample to the new_data list.
        for answer in sample['answers']:
            new_sample={
            'answers':[answer],
            'passage':sample['passage'],
            'pq_id':sample['pq_id'],
            'question': sample['question'],
            'surah':sample['surah'],
            'verses':sample['verses']
            }
            new_data.append(new_sample)
    return new_data 

# split the answers in the training and the validation datasets.
val_data= split_multi_answers(val_data)
val_datadf = pd.DataFrame(val_data)


## Ensemble

In [4]:
# import the AutoTokenizer which will be used to download the pretrained tokenizer model.
token_transformer_name = "aubmindlab/bert-base-arabertv02"

# Downloading the pretrained tokenizer
tokenizer = AutoTokenizer.from_pretrained(token_transformer_name) # inputs id , attention mask 


In [5]:
def read_model_file(model_path, model_name):
    model = joblib.load(model_path+model_name)
    return model

In [9]:
models_path = './/models//'
qa_models = []
for model_name in os.listdir(models_path):
    print(model_name)
    
    qa_model = read_model_file(models_path, model_name)
    qa_models.append(qa_model)
    
#qa_model = joblib.load('.//models//stars_run12//stars_run12_model.hd5')

stars_run00.hd5
stars_run01.hd5
stars_run02.hd5
stars_run03.hd5
stars_run04.hd5
stars_run05.hd5
stars_run06.hd5
stars_run07.hd5


In [11]:
def predict_quran_qav1(passage , question):
    min_answer_length=1
    number_of_required_answers = 5
    models_outputs = []
    ranked_answers=[]
    
    # Pass the question and the passage to the tokenizer
    inputs = tokenizer(question, passage, add_special_tokens=True, return_tensors="pt").to("cuda") 

    # Obtain the input_ids from inputs
    input_ids = inputs["input_ids"].tolist()[0]
    
    for model in qa_models:
        # predict the inputs from the qa_model
        models_outputs.append(model(**inputs))

    
    sum_start_answer_scores = models_outputs[0].start_logits
    sum_end_answer_scores = models_outputs[0].end_logits
    
    for i in range(1, len(models_outputs)):
        sum_start_answer_scores += models_outputs[i].start_logits
        sum_end_answer_scores += models_outputs[i].end_logits
    
    # Get the most likely beginning of answer with the argmax of the score
    # answer_start = torch.argmax(answer_start_scores)
    
    answer_starts_probs = softmax(torch.topk(sum_start_answer_scores , 5).values.cpu().data.numpy())

    #answer_starts_probs = softmax(torch.topk(sum_start_answer_scores , 5).values.cpu().detach().numpy())[0]
    # print(answer_starts_probs)
    answer_starts =  torch.topk(sum_start_answer_scores , 5).indices
    
    # Get the most likely end of answer with the argmax of the score
    # answer_end = torch.argmax(answer_end_scores) + 1
    answer_ends_probs = softmax(torch.topk(sum_end_answer_scores, 5).values.cpu().data.numpy())
    #answer_ends_probs = softmax(torch.topk(sum_end_answer_scores, 5).values.cpu().detach().numpy())[0]
    answer_ends = torch.topk(sum_end_answer_scores, 5).indices +1
    
    
    # obtain the full probability by multiplying the matrix elementwise
    full_probs = softmax((np.multiply(answer_starts_probs,answer_ends_probs)))[0] #check[1]

    #print(f"Question: {question}")
    #print('top predicted answers:')
    idx =0
    
    # loop on each answer_start and answer_end indicies
    #This loop mainly will be used to convert the indcies to the words according to the indcies obtained from above.
    for answer_start ,  answer_end in zip(answer_starts.tolist()[0], answer_ends.tolist()[0]):
        
        idx+=1
        # use the convert_tokens_to_string API to convert the input_ids
        #'from the answer_start to the answer_end' back to the words starting from
        answer = tokenizer.convert_tokens_to_string( tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end]))
        
       
        #If the answer is not empty
        if answer.strip() !='':
            
            # if the full_probs is good, then register the answer
            if (full_probs[idx-1] > 0.1):    
                
                # print the answer
                #print(f"Answer number {idx}: {answer}")
                #Append the answer to the ranked_answers
                ranked_answers.append( { 'answer': answer, 'rank' : len(ranked_answers)+1, 'score':float(full_probs[idx-1])})
    #This is just for checking the ranked_answers if empty.
    if len(ranked_answers) == 0:
        print(' Empty Answer List')      
    return ranked_answers

In [6]:
torch.cuda.empty_cache()

def predict_quran_qa(passage , question):
    min_answer_length=1
    number_of_required_answers = 5
    models_outputs = []
    ranked_answers=[]
    
    # Pass the question and the passage to the tokenizer
    inputs = tokenizer(question, passage, add_special_tokens=True, return_tensors="pt").to("cuda") 

    # Obtain the input_ids from inputs
    input_ids = inputs["input_ids"].tolist()[0]
    
    for model in qa_models:
        # predict the inputs from the qa_model
        models_outputs.append(model(**inputs))

    
    sum_start_answer_scores = models_outputs[0].start_logits
    sum_end_answer_scores = models_outputs[0].end_logits
    
    for i in range(1, len(models_outputs)):
        sum_start_answer_scores += models_outputs[i].start_logits
        sum_end_answer_scores += models_outputs[i].end_logits
    
    # Get the most likely beginning of answer with the argmax of the score
    # answer_start = torch.argmax(answer_start_scores)
    answer_starts_probs = softmax(torch.topk(sum_start_answer_scores , 5).values.cpu().detach().numpy())[0]
    # print(answer_starts_probs)
    answer_starts =  torch.topk(sum_start_answer_scores , 5).indices
    
    # Get the most likely end of answer with the argmax of the score
    # answer_end = torch.argmax(answer_end_scores) + 1
    answer_ends_probs = softmax(torch.topk(sum_end_answer_scores, 5).values.cpu().detach().numpy())[0]
    answer_ends = torch.topk(sum_end_answer_scores, 5).indices +1
    #print(f"Question: {question}")
    idx =0
    for i , answer_start in enumerate(answer_starts.tolist()[0]):
        for j , answer_end in enumerate(answer_ends.tolist()[0]):
            idx+=1
            if (answer_end < answer_start or answer_end - answer_start + 1 < min_answer_length):
                continue
            answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end]))
        
            ranked_answers.append({ 'answer': answer, 'rank' : len(ranked_answers)+1,
                                   'score':float(answer_starts_probs[i]*answer_ends_probs[j])})
    #sort by probability 
    ranked_answers.sort(key =lambda x : x['score'], reverse =True)
    ranked_answers = ranked_answers[:number_of_required_answers]
    # reset rank 
    for i , answer in enumerate(ranked_answers):
        answer['rank']=i+1
        
    if len(ranked_answers)==0: 
        raise BaseException("empty list ")
    
    #print('top predicted answer:',)
    
    return ranked_answers
    



    

In [10]:
'''
# Pass the question and the passage to the tokenizer
passage= 'سبحان الذي أسرى بعبده ليلا من المسجد الحرام إلى المسجد الأقصى الذي باركنا حوله لنريه من آياتنا إنه هو السميع البصير . واتينا موسى الكتاب وجعلناه هدى لبني إسرائيل أن لا تتخذوا من دوني وكيلا .'
question='لماذا آسرى الله برسوله صلى الله عليه وسلم ؟'
models_outputs=[]
inputs = tokenizer(question, passage, add_special_tokens=True, return_tensors="pt").to("cuda") 

# Obtain the input_ids from inputs
input_ids = inputs["input_ids"].tolist()[0]

for model in qa_models:
    # predict the inputs from the qa_model
    models_outputs.append(model(**inputs))

print(models_outputs[0].start_logits.size())
print(models_outputs[0].end_logits.size())

print(models_outputs[0].start_logits)   
print(models_outputs[0].end_logits)

print()
print(models_outputs[1].start_logits.size())
print(models_outputs[1].end_logits.size())

sum_start_answer = models_outputs[1].start_logits
sum_end_answer = models_outputs[1].end_logits
print(models_outputs[1].start_logits)   
print(models_outputs[1].end_logits)


print()
print(models_outputs[2].start_logits.size())
print(models_outputs[2].end_logits.size())

sum_start_answer = models_outputs[2].start_logits
sum_end_answer = models_outputs[2].end_logits
print(models_outputs[2].start_logits)   
print(models_outputs[2].end_logits)

print()
print(models_outputs[3].start_logits.size())
print(models_outputs[3].end_logits.size())

sum_start_answer = models_outputs[3].start_logits
sum_end_answer = models_outputs[3].end_logits
print(models_outputs[3].start_logits)   
print(models_outputs[3].end_logits)

print()
print(models_outputs[4].start_logits.size())
print(models_outputs[4].end_logits.size())

sum_start_answer = models_outputs[4].start_logits
sum_end_answer = models_outputs[4].end_logits
print(models_outputs[4].start_logits)   
print(models_outputs[4].end_logits)

print()
print(models_outputs[5].start_logits.size())
print(models_outputs[5].end_logits.size())

sum_start_answer = models_outputs[5].start_logits
sum_end_answer = models_outputs[5].end_logits
print(models_outputs[5].start_logits)   
print(models_outputs[5].end_logits)

print()
print(models_outputs[6].start_logits.size())
print(models_outputs[6].end_logits.size())

sum_start_answer = models_outputs[6].start_logits
sum_end_answer = models_outputs[6].end_logits
print(models_outputs[6].start_logits)   
print(models_outputs[6].end_logits)

print()
print(models_outputs[7].start_logits.size())
print(models_outputs[7].end_logits.size())

sum_start_answer = models_outputs[7].start_logits
sum_end_answer = models_outputs[7].end_logits
print(models_outputs[7].start_logits)   
print(models_outputs[7].end_logits)


sum_start_answer = models_outputs[0].start_logits
sum_end_answer = models_outputs[0].end_logits


for i in range(1, len(models_outputs)):
    #print(i)
    sum_start_answer += (models_outputs[i].start_logits)
    sum_end_answer += (models_outputs[i].end_logits)
print('summation \n')    
print(sum_start_answer)
print(sum_end_answer)

print()
print(torch.topk(sum_start_answer , 5))
print(torch.topk(sum_end_answer , 5))

'''

'\n# Pass the question and the passage to the tokenizer\npassage= \'سبحان الذي أسرى بعبده ليلا من المسجد الحرام إلى المسجد الأقصى الذي باركنا حوله لنريه من آياتنا إنه هو السميع البصير . واتينا موسى الكتاب وجعلناه هدى لبني إسرائيل أن لا تتخذوا من دوني وكيلا .\'\nquestion=\'لماذا آسرى الله برسوله صلى الله عليه وسلم ؟\'\nmodels_outputs=[]\ninputs = tokenizer(question, passage, add_special_tokens=True, return_tensors="pt").to("cuda") \n\n# Obtain the input_ids from inputs\ninput_ids = inputs["input_ids"].tolist()[0]\n\nfor model in qa_models:\n    # predict the inputs from the qa_model\n    models_outputs.append(model(**inputs))\n\nprint(models_outputs[0].start_logits.size())\nprint(models_outputs[0].end_logits.size())\n\nprint(models_outputs[0].start_logits)   \nprint(models_outputs[0].end_logits)\n\nprint()\nprint(models_outputs[1].start_logits.size())\nprint(models_outputs[1].end_logits.size())\n\nsum_start_answer = models_outputs[1].start_logits\nsum_end_answer = models_outputs[1].end_

In [12]:
result ={}
for index, sample in val_datadf.iterrows():
    result[sample['pq_id']] = predict_quran_qav1(sample['passage'], sample['question'])

In [13]:
import json 
with open('stars_run100.json', 'w' , encoding= 'utf8') as fp:
    json.dump(result , fp, ensure_ascii=False)

In [14]:
!python ./evaluation/quranqa22_eval.py --gold_answers_file=./datasets/qrcd_v1.1_dev.jsonl --run_file=stars_run100.json

Loaded 109 records from ./datasets/qrcd_v1.1_dev.jsonl
The run file is correct.
{"pRR": 0.6114219515081039, "exact_match": 0.30275229357798167, "f1": 0.5737629712427744}





 train:
[ 32  33  34  35  36  37  38  39  40  41  42  43  44  45  46  47  48  49
  50  51  52  53  54  55  56  57  58  59  60  61  62  63  64  65  66  67
  68  69  70  71  72  73  74  75  76  77  78  79  80  81  82  83  84  85
  86  87  88  89  90  91  92  93  94  95  96  97  98  99 100 101 102 103
 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121
 122 123 124 125 126 127]

 test:
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31]

 train:
[  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
  18  19  20  21  22  23  24  25  26  27  28  29  30  31  64  65  66  67
  68  69  70  71  72  73  74  75  76  77  78  79  80  81  82  83  84  85
  86  87  88  89  90  91  92  93  94  95  96  97  98  99 100 101 102 103
 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121
 122 123 124 125 126 127]

 test:
[32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
 56 57 58 59