<a href="https://colab.research.google.com/github/PremalMatalia/An_Ensemble_Model_with_Semantic_Awareness_-_Answer_Verification_for_Q-A/blob/main/An_Ensemble_Model_with_Semantic_Awareness_%26_Answer_Verification_for_Q%26A.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Install required Libraries

In [None]:
! pip install datasets transformers gensim pyemd sentencepiece 
! pip install tqdm==4.61.1
# Installs PyTorch, PyTorch/XLA, and Torchvision to use PyTorch on Cloud TPUs 
!pip install cloud-tpu-client==0.10 https://storage.googleapis.com/tpu-pytorch/wheels/torch_xla-1.9-cp37-cp37m-linux_x86_64.whl

Collecting torch-xla==1.9
  Using cached https://storage.googleapis.com/tpu-pytorch/wheels/torch_xla-1.9-cp37-cp37m-linux_x86_64.whl (149.9 MB)


## Import Libraries

***Notes:***


> '***Ensemble_model_configuration.py***' contains all the model and training configuration values

> Set the mount point and configuration file path below



In [None]:
mount_path  = '/content/gdrive'
config_path = '/content/gdrive/My Drive/Colab_Notebooks/Master_Project/Final'

In [None]:
## If Google Colab is used then mount gdrive
from google.colab import drive
import sys 
drive.mount(mountpoint=mount_path,force_remount=True)

sys.path.insert(0,config_path)
import Ensemble_model_configuration as conf

Mounted at /content/gdrive


In [None]:
from dataclasses import dataclass, field
from typing import Optional
from tqdm.auto import tqdm
from tqdm.contrib.concurrent import process_map
from datasets import load_dataset, load_metric
import transformers
from transformers import AutoTokenizer,AutoModelForQuestionAnswering,TrainingArguments,Trainer
from transformers import default_data_collator,DataCollatorWithPadding
from transformers import AdamW,get_scheduler,EvalPrediction
import sentencepiece

import torch
from torch.utils.data import DataLoader
import collections
import numpy as np
import pandas as pd
import csv
import pdb #For debugging purpose...
import gc

###To use TPU with Pytorch
import torch_xla.core.xla_model as xm 
import torch_xla.distributed.xla_multiprocessing as xmp
import torch_xla.distributed.parallel_loader as pl

import warnings
warnings.filterwarnings("ignore")



### ***Model Arguments class***: 
Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.

In [None]:
@dataclass
class ModelArguments:
    """
    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
    """

    model_checkpoint: str = field(
        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"})

    token_checkpoint: Optional[str] = field(default=None, 
        metadata={"help": "Pretrained tokenizer name or path if not the same as model_checkpoint"})

    squad_v2: bool = field(default=False, 
        metadata={"help": "If true, some of the examples do not have an answer."})

    max_length: int = field(default=512,
        metadata={"help": "The maximum total input sequence length after tokenization. Sequences longer " 
                  "than this will be truncated, sequences shorter will be padded."},)
        
    doc_stride: int = field(default=128,
        metadata={"help": "When splitting up a long document into chunks, how much stride to take between chunks."},)
    
    batch_size: int = field(default=16,
        metadata={"help": "Batch size to train"})

    n_best_size: int = field(default=20,
        metadata={"help": "The total number of n-best predictions to generate when looking for an answer."},)
    
    max_answer_length: int = field(default=30,
        metadata={"help": "The maximum length of an answer that can be generated. This is needed because the start "
                  "and end predictions are not conditioned on one another."},)
    
    min_null_score: float = field(default=0.0,
        metadata={"help": "The threshold used to select the null answer: if the best answer has a score that is less than "
                  "the score of the null answer minus this threshold, the null answer is selected for this example. "
                  "Only useful when `squad_v2=True`."},)
    
    pad_side: str = field(default="right",
        metadata={"help": "Padding side determines if we do (question|context) or (context|question)"})
    
    NA_threshold: int = field(default=-3,
        metadata={"help": "Threshod to decide if answer is null based on difference between CLS Score and predicted answer score"})

As part of train features preparation-

the encoded training dataset has a start and end token position amended for each question and context pair at the end of the process.

In [None]:
def prepare_train_features(examples):
    # Tokenize our examples with truncation and padding, but keep the overflows using a stride. This results
    # in one example possible giving several features when a context is long, each of those features having a
    # context that overlaps a bit the context of the previous feature.
    
    # Some of the questions have lots of whitespace on the left, which is not useful and will make the
    # truncation of the context fail (the tokenized question will take a lots of space). So we remove that left whitespace
    #examples["question"] = [q.lstrip() for q in examples["question"]]

    tokenized_examples = tokenizer(
        examples["question" if model_args.pad_side else "context"],
        examples["context" if model_args.pad_side else "question"],
        truncation="only_second" if model_args.pad_side else "only_first",
        max_length=model_args.max_length,
        stride=model_args.doc_stride,
        return_overflowing_tokens=True,
        return_offsets_mapping=True,
        padding="max_length",
    )
    
    # Since one example might give us several features if it has a long context, we need a map from a feature to
    # its corresponding example. This key gives us just that.
    sample_mapping = tokenized_examples.pop("overflow_to_sample_mapping")
    # The offset mappings will give us a map from token to character position in the original context. This will
    # help us compute the start_positions and end_positions.
    offset_mapping = tokenized_examples.pop("offset_mapping")

    # Let's label those examples!
    tokenized_examples["start_positions"] = []
    tokenized_examples["end_positions"] = []

    for i, offsets in enumerate(offset_mapping):
        # We will label impossible answers with the index of the CLS token.
        input_ids = tokenized_examples["input_ids"][i]
        cls_index = input_ids.index(tokenizer.cls_token_id)

        # Grab the sequence corresponding to that example (to know what is the context and what is the question).
        sequence_ids = tokenized_examples.sequence_ids(i)

        # One example can give several spans, this is the index of the example containing this span of text.
        sample_index = sample_mapping[i]
        answers      = examples["answers"][sample_index]
        # If no answers are given, set the cls_index as answer.
        if len(answers["answer_start"]) == 0:
            tokenized_examples["start_positions"].append(cls_index)
            tokenized_examples["end_positions"].append(cls_index)
        else:
            # Start/end character index of the answer in the text.
            start_char = answers["answer_start"][0]
            end_char   = start_char + len(answers["text"][0])

            # Start token index of the current span in the text.
            token_start_index = 0
            while sequence_ids[token_start_index] != (1 if model_args.pad_side else 0):
                token_start_index += 1

            # End token index of the current span in the text.
            token_end_index = len(input_ids) - 1
            while sequence_ids[token_end_index] != (1 if model_args.pad_side else 0):
                token_end_index -= 1

            # Detect if the answer is out of the span (in which case this feature is labeled with the CLS index).
            if not (offsets[token_start_index][0] <= start_char and offsets[token_end_index][1] >= end_char):
                tokenized_examples["start_positions"].append(cls_index)
                tokenized_examples["end_positions"].append(cls_index)
            else:
                # Otherwise move the token_start_index and token_end_index to the two ends of the answer.
                # Note: we could go after the last offset if the answer is the last word (edge case).
                while token_start_index < len(offsets) and offsets[token_start_index][0] <= start_char:
                    token_start_index += 1
                tokenized_examples["start_positions"].append(token_start_index - 1)
                while offsets[token_end_index][1] >= end_char:
                    token_end_index -= 1
                tokenized_examples["end_positions"].append(token_end_index + 1)

    return tokenized_examples

We need to add two things to our validation features:

1. The ID of the example that generated the feature (since each example can generate several features, as seen before);
2. The offset mapping that will give us a map from token indices to character positions in the context.

In [None]:
def prepare_validation_features(examples):
    # Tokenize our examples with truncation and maybe padding, but keep the overflows using a stride. This results
    # in one example possible giving several features when a context is long, each of those features having a
    # context that overlaps a bit the context of the previous feature.
    tokenized_examples = tokenizer(
        examples["question" if model_args.pad_side else "context"],
        examples["context" if model_args.pad_side else "question"],
        truncation="only_second" if model_args.pad_side else "only_first",
        max_length=model_args.max_length,
        stride=model_args.doc_stride,
        return_overflowing_tokens=True,
        return_offsets_mapping=True,
        padding="max_length",
    )

    # Since one example might give us several features if it has a long context, we need a map from a feature to
    # its corresponding example. This key gives us just that.
    sample_mapping = tokenized_examples.pop("overflow_to_sample_mapping")
    #print("Sample mapping:", sample_mapping)
    # We keep the example_id that gave us this feature and we will store the offset mappings.
    tokenized_examples["example_id"] = []

    for i in range(len(tokenized_examples["input_ids"])):
        # Grab the sequence corresponding to that example (to know what is the context and what is the question).
        sequence_ids = tokenized_examples.sequence_ids(i)
        context_index = 1 if model_args.pad_side else 0

        # One example can give several spans, this is the index of the example containing this span of text.
        sample_index = sample_mapping[i]
        tokenized_examples["example_id"].append(examples["id"][sample_index])

        # Set to None the offset_mapping that are not part of the context so it's easy to determine if a token
        # position is part of the context or not.
        tokenized_examples["offset_mapping"][i] = [
            (o if sequence_ids[k] == context_index else None)
            for k, o in enumerate(tokenized_examples["offset_mapping"][i])
        ]

    return tokenized_examples

### Valid Answer Selection Module Logic:

If the answer doesn’t fulfill any of the below conditions then it is being considered an unanswerable question.

	(Sindex OR Eindex) ≥ Length (OM) & (Sindex OR Eindex) ∉ Cindexes & Sindex > Eindex

Where 
- Sindex and Eindex is the Start and End Index of answer tokens. 
- Cindexes represents the context token indexes. 
- OM presents the Offset Mapping of Question Context pair.

From the above equations, Start and End Indexes of the possible answer are obtained which are used to find out the Best Answer (BA) score using the below equation. 

	BAscore=Slogit[Sindex]+ Elogit[Eindex]

Minimum Null Score (NSmin) (a hyperparameter to be tuned) is used to decide if a question is answerable using the below equation. If this condition is not satisfied then answerable is not answerable.

	BAscore > NSmin

Null Answer (NA) score is calculated using CLS tokens Start and End logit scores using the below equation.

	NA_score=Slogit[CLS]+ Elogit[CLS]

Delta (D) score of NA and BA score is calculated and if it is greater than Null Answer Threshold (a hyperparameter to be tuned) then the question is answerable otherwise not.

	Dscore  =NAscore-BAscore

In [None]:
def Valid_Answer_Selection(examples, features, raw_predictions):
    # Assign start_logits and end_logits from raw_predictions
    all_start_logits, all_end_logits = raw_predictions

    # Build a map example to its corresponding features.
    ## This will create a dictionary of question id as key and it's index as value For example: {'56ddde6b9a695914005b9628': 0, '56ddde6b9a695914005b9629': 1...}
    example_id_to_index = {k: i for i, k in enumerate(examples["id"])}
    ## This will create mapping of Features per example. For example: {0: [0], 1: [1], 2: [2, 3] ...}
    features_per_example = collections.defaultdict(list)
    for i, feature in enumerate(features):
        features_per_example[example_id_to_index[feature["example_id"]]].append(i)

    # The dictionaries we have to fill.
    predictions = collections.OrderedDict()
    score_predictions = collections.OrderedDict()  ## This dictionariy is to store score along with best answer

    # Logging.
    print(f"Post-processing {len(examples)} example predictions split into {len(features)} features.")

    # Let's loop over all the examples!
    for example_index, example in enumerate(tqdm(examples)):
        # Those are the indices of the features associated to the current example.
        feature_indices = features_per_example[example_index]

        min_null_score = model_args.min_null_score # Only used if squad_v2 is True.
        valid_answers = []
        
        context = example["context"]
        # Looping through all the features associated to the current example.
        for feature_index in feature_indices:
            # We grab the predictions of the model for this feature.
            start_logits = all_start_logits[feature_index]
            end_logits = all_end_logits[feature_index]
            # This is what will allow us to map some the positions in our logits to span of texts in the original
            # context.
            offset_mapping = features[feature_index]["offset_mapping"]

            # Update minimum null prediction.
            cls_index = features[feature_index]["input_ids"].index(tokenizer.cls_token_id)
            feature_null_score = start_logits[cls_index] + end_logits[cls_index]
            if min_null_score is None or min_null_score < feature_null_score:
                min_null_score = feature_null_score

            # Go through all possibilities for the `n_best_size` greater start and end logits.
            start_indexes = np.argsort(start_logits)[-1 : -model_args.n_best_size - 1 : -1].tolist()
            end_indexes   = np.argsort(end_logits)[-1 : -model_args.n_best_size - 1 : -1].tolist()
            for start_index in start_indexes:
                for end_index in end_indexes:
                    # Don't consider out-of-scope answers, either because the indices are out of bounds or correspond
                    # to part of the input_ids that are not in the context.
                    if (
                        start_index  >= len(offset_mapping)
                        or end_index >= len(offset_mapping)
                        or offset_mapping[start_index] is None
                        or offset_mapping[end_index] is None
                    ):
                        continue
                    # Don't consider answers with a length that is either < 0 or > max_answer_length.
                    if end_index < start_index or end_index - start_index + 1 > model_args.max_answer_length:
                        continue

                    start_char = offset_mapping[start_index][0]
                    end_char   = offset_mapping[end_index][1]
                    valid_answers.append(
                        {
                            "score": start_logits[start_index] + end_logits[end_index],
                            "text": context[start_char: end_char]
                        }
                    )
        
        if len(valid_answers) > 0:
            best_answer = sorted(valid_answers, key=lambda x: x["score"], reverse=True)[0]
        else:
            # In the very rare edge case we have not a single non-null prediction, we create a fake prediction to avoid failure.
            best_answer = {"text": "", "score": 0.0}
        
        # Let's pick our final answer: the best one or the null answer (only for squad_v2)
        answer    = best_answer["text"]  if best_answer["score"] > min_null_score else ""
        BA_score  = best_answer["score"] if best_answer["score"] > 0 else 0.0

        '''Extra Logic added on 17-JUL-2021'''
        ## To calculate score difference between NA (Null Answer) score for CLS token and Best Answer score to compare with NULL threashold 
        NA_score    = start_logits[0] + end_logits[0]
        Delta_score = NA_score - BA_score

        ## Check if NULL score - logit score difference is less than answer_threshold then consider it as No answer
        if Delta_score>model_args.NA_threshold:
          answer    = ""
          BA_score  = 0.0
        
        predictions[example["id"]] = answer
        score_predictions[example["id"]] = answer +" || "+ str(round(BA_score,2)) +" || "+ str(round(NA_score,2)) #+" || "+ str(round(Delta_score,2))
        #score_predictions[example["id"]] = answer +" || "+ str(round(BA_score,2))

    return predictions, score_predictions

In [None]:
def model_selection(model_to_train='ALBERT'):
  '''
  input: model type to train 'ALBERT' OR 'RoBERTA' OR 'ELECTRA'
  
  Output:
  Pre-trained model and token checkpoints from configuration file which will be used to fine-tune or to do direct prediction 
  model_savepath should be the Google Colab/Local path where fine-tuned model will be saved based on save strategy
  '''
  if model_to_train=='ALBERT':
    token_checkpoint = conf.albert_token_cp
    model_checkpoint = conf.albert_model_cp
    model_savepath   = conf.albert_savepath
  elif model_to_train=='RoBERTa':
    token_checkpoint = conf.roberta_token_cp
    model_checkpoint = conf.roberta_model_cp
    model_savepath   = conf.roberta_savepath
  elif model_to_train=='ELECTRA':
    token_checkpoint = conf.electra_token_cp
    model_checkpoint = conf.electra_model_cp
    model_savepath   = conf.electra_savepath
  else:
    xm.master_print("***Please select a model from ALBERT, RoBERTa OR ELECTRA only***")

  return token_checkpoint,model_checkpoint,model_savepath

In [None]:
def predict_answers():
  '''
  This method provides predicted answers (using trainer.predict) with prediction score and actual answer for reference
  '''  
  
  xm.master_print("***Prepare Validation Features***")
  validation_features = datasets["validation"].map(prepare_validation_features,batched=True, remove_columns=datasets["validation"].column_names)
  
  xm.master_print("***Get Predications using validation Features***")
  raw_predictions = trainer.predict(validation_features)
  
  ##The Trainer hides the columns that are not used by the model (here example_id and offset_mapping which we will need for our post-processing), so we set them back:
  validation_features.set_format(type=validation_features.format["type"], columns=list(validation_features.features.keys()))

  xm.master_print("***Valid Answer Selection Process***")
  final_predictions, final_score_predictions=Valid_Answer_Selection(datasets["validation"], 
                                                                    validation_features, 
                                                                    raw_predictions.predictions)

  formatted_predictions = [{"id": k, "prediction_text": v, "no_answer_probability": 0.0} for k, v in final_predictions.items()]
  formatted_score_predictions = [{"id": k, "predicted_answer": v.split(" || ")[0], "predicted_score": v.split(" || ")[1], "NA_score": v.split(" || ")[2]} for k, v in final_score_predictions.items()]
  references = [{"id": ex["id"], "answers": ex["answers"]} for ex in datasets["validation"]]
  
  #appended_prediction =[]
  for v in formatted_score_predictions:
    for r in references:
      if v["id"]==r["id"]:
        v["actual_answers"]=r["answers"]["text"]
        #appended_prediction.append(v)

  return formatted_predictions, formatted_score_predictions,references

In [None]:
def compute_metrics(p: EvalPrediction):
  formatted_predictions,_,references=predict_answers()
  return metric.compute(predictions=formatted_predictions, references=references)

In [None]:
def compute_metrics_save(model_to_train='ALBERT'):
  
  formatted_predictions,formatted_score_predictions,references=predict_answers()

  csv_columns = ['id', 'predicted_answer','predicted_score','NA_score','actual_answers']
  csv_file = conf.final_answer_path +  model_to_train + conf.final_answer_filename
    
  try:
    with open(csv_file, 'w') as csvfile:
      writer = csv.DictWriter(csvfile, fieldnames=csv_columns)
      writer.writeheader()
      for data in formatted_score_predictions:
        writer.writerow(data)
    xm.master_print("***CSV file is created***")
  except IOError:
    print("I/O error",IOError)  
  
  xm.master_print("***",model_to_train,": FINAL RESULTS***")
  xm.master_print(metric.compute(predictions=formatted_predictions, references=references))

In [None]:
def pipeline(index, flags):
  
  global datasets,tokenizer,model,model_args,training_args,trainer,metric

  token_checkpoint, model_checkpoint,model_savepath = model_selection(FLAGS['model_to_train'])
  data_collator = default_data_collator
  
  model_args = ModelArguments(
    model_checkpoint=model_checkpoint,
    token_checkpoint=token_checkpoint,
    squad_v2=conf.squad_v2,
    max_length=conf.max_length,
    doc_stride=conf.doc_stride,
    batch_size=FLAGS['batch_size'],
    n_best_size=conf.n_best_size,
    max_answer_length=conf.max_answer_length,
    min_null_score=conf.min_null_score, 
    NA_threshold=FLAGS['NA_threshold'],
    pad_side=conf.pad_side)


  training_args = TrainingArguments(
    output_dir=model_savepath,
    overwrite_output_dir=True,
    evaluation_strategy=conf.evaluation_strategy,
    save_strategy=conf.save_strategy,
    save_steps=conf.save_steps,
    save_total_limit=conf.save_total_limit,
    learning_rate=conf.learning_rate,
    adam_epsilon=conf.adam_epsilon,
    adam_beta1=conf.adam_beta1,
    adam_beta2=conf.adam_beta2,
    per_device_train_batch_size=model_args.batch_size,
    per_device_eval_batch_size=model_args.batch_size,
    num_train_epochs=FLAGS['num_train_epochs'],
    warmup_steps=conf.warmup_steps,
    weight_decay=conf.weight_decay,
    tpu_num_cores=8,
    do_train=FLAGS['do_train'],
    gradient_accumulation_steps=conf.gradient_accumulation_steps,
    )
  
  # Acquires the (unique) Cloud TPU core corresponding to this process's index
  device = xm.xla_device()  

  # Note: master goes first and downloads the dataset only once (xm.rendezvous)
  #   all the other workers wait for the master to be done downloading.
  if not xm.is_master_ordinal():
    xm.rendezvous('download_only_once')

  xm.master_print("***Load SQuAD 1.0 OR 2.0 dataset***")
  datasets = load_dataset("squad_v2" if model_args.squad_v2 else "squad")

  if xm.is_master_ordinal():
    xm.rendezvous('download_only_once')

  xm.master_print("***Initialize Tokenizer and Model***")
  tokenizer = AutoTokenizer.from_pretrained(token_checkpoint)
  model     = AutoModelForQuestionAnswering.from_pretrained(model_checkpoint)
  model = model.to(device)

  xm.master_print("***Prepare Train Features***")
  ## This indice = 107709 has question with lot of white space at the start which is not supported by RoBERTa model
  datasets = datasets.filter(lambda example, indice: indice != 107709, with_indices=True)  
  tokenized_datasets = datasets.map(prepare_train_features, batched=True, batch_size=1000,remove_columns=datasets["train"].column_names)
  
  num_training_steps = int(len(tokenized_datasets["train"])/model_args.batch_size/xm.xrt_world_size() * training_args.num_train_epochs)
  xm.master_print(f"***Number of Training Steps*** {num_training_steps}")
  optimizer    = AdamW(model.parameters(),lr=training_args.learning_rate,betas=(training_args.adam_beta1,training_args.adam_beta2),eps=training_args.adam_epsilon) 
  lr_scheduler = get_scheduler(name="polynomial",optimizer=optimizer,num_warmup_steps=training_args.warmup_steps,num_training_steps=num_training_steps)
  #lr_scheduler = get_scheduler(name="linear",optimizer=optimizer,num_warmup_steps=training_args.warmup_steps,num_training_steps=num_training_steps)
  xm.master_print(f"***Learning Rate*** {training_args.learning_rate}")
  
  metric = load_metric("squad_v2")
  
  trainer = Trainer(  
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    optimizers=(optimizer, lr_scheduler)
    )

  if FLAGS['do_train']==True:
    xm.master_print("***Initialize Training***")
    train_result=trainer.train()
    trainer.save_model()
  
  compute_metrics_save(FLAGS['model_to_train'])

In [None]:
!export XLA_USE_BF16=1

FLAGS={}
FLAGS['do_train']=conf.do_train
FLAGS['batch_size'] = conf.batch_size

if FLAGS['do_train']==True:
  nprocs=8
else:
  nprocs=1
  
###SELECT from 'ALBERT, RoBERTa, ELECTRA models to train ###
FLAGS['model_to_train'] = 'ELECTRA'
FLAGS['num_train_epochs']=conf.electra_train_epoch
FLAGS['NA_threshold']=conf.electra_NA_threshold
xmp.spawn(pipeline, args=(FLAGS,), nprocs=nprocs, start_method='fork')

FLAGS['model_to_train'] = 'ALBERT'
FLAGS['num_train_epochs']=conf.albert_train_epoch
FLAGS['NA_threshold']=conf.albert_NA_threshold
xmp.spawn(pipeline, args=(FLAGS,), nprocs=nprocs, start_method='fork')

FLAGS['model_to_train'] = 'RoBERTa'
FLAGS['num_train_epochs']=conf.roberta_train_epoch
FLAGS['NA_threshold']=conf.roberta_NA_threshold
xmp.spawn(pipeline, args=(FLAGS,), nprocs=nprocs, start_method='fork')

***Load SQuAD 1.0 OR 2.0 dataset***




***Initialize Tokenizer and Model***




***Prepare Train Features***
***Number of Training Steps*** 49407
***Learning Rate*** 1.5e-05


Downloading:   0%|          | 0.00/2.26k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/3.18k [00:00<?, ?B/s]

***Prepare Validation Features***


  0%|          | 0/12 [00:00<?, ?ba/s]

The following columns in the test set  don't have a corresponding argument in `ElectraForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Prediction *****
  Num examples = 12134
  Batch size = 8


***Get Predications using validation Features***


***Valid Answer Selection Process***
Post-processing 11873 example predictions split into 12134 features.


  0%|          | 0/11873 [00:00<?, ?it/s]

***CSV file is created***
*** ELECTRA : FINAL RESULTS***


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


{'exact': 79.29756590583678, 'f1': 83.23129836279163, 'total': 11873, 'HasAns_exact': 76.43387314439946, 'HasAns_f1': 84.31261900496399, 'HasAns_total': 5928, 'NoAns_exact': 82.15306980656014, 'NoAns_f1': 82.15306980656014, 'NoAns_total': 5945, 'best_exact': 79.29756590583678, 'best_exact_thresh': 0.0, 'best_f1': 83.23129836279142, 'best_f1_thresh': 0.0}
***Load SQuAD 1.0 OR 2.0 dataset***




***Initialize Tokenizer and Model***


https://huggingface.co/PremalMatalia/albert-base-best-squad2/resolve/main/tokenizer_config.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmp7pix3whg


Downloading:   0%|          | 0.00/705 [00:00<?, ?B/s]

storing https://huggingface.co/PremalMatalia/albert-base-best-squad2/resolve/main/tokenizer_config.json in cache at /root/.cache/huggingface/transformers/5b0120aefd590756a9d9a1796d4bdac51b5b64d1600e33401100c3687c514a34.c70ae4c3edc322066ce2f95d104c23a0e6a38e9c793ed74441eac3909995b46a
creating metadata file for /root/.cache/huggingface/transformers/5b0120aefd590756a9d9a1796d4bdac51b5b64d1600e33401100c3687c514a34.c70ae4c3edc322066ce2f95d104c23a0e6a38e9c793ed74441eac3909995b46a
https://huggingface.co/PremalMatalia/albert-base-best-squad2/resolve/main/tokenizer.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmp1e7_kh2m


Downloading:   0%|          | 0.00/1.31M [00:00<?, ?B/s]

storing https://huggingface.co/PremalMatalia/albert-base-best-squad2/resolve/main/tokenizer.json in cache at /root/.cache/huggingface/transformers/08025f1695c0bad2570a4ab66cb271d801fd1d1bd9651ae7da2a56c99111ba46.5e472f34fae7f9d0ff5afefbe7f6a98d1d3ae3db1c9537d1aef1783a18b6175e
creating metadata file for /root/.cache/huggingface/transformers/08025f1695c0bad2570a4ab66cb271d801fd1d1bd9651ae7da2a56c99111ba46.5e472f34fae7f9d0ff5afefbe7f6a98d1d3ae3db1c9537d1aef1783a18b6175e
https://huggingface.co/PremalMatalia/albert-base-best-squad2/resolve/main/special_tokens_map.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpp9lg72g6


Downloading:   0%|          | 0.00/156 [00:00<?, ?B/s]

storing https://huggingface.co/PremalMatalia/albert-base-best-squad2/resolve/main/special_tokens_map.json in cache at /root/.cache/huggingface/transformers/9188fa4f066c34eb7180c46af601ea73e7c05e7cffe36cd0dcf1a00dc220f618.623993453f3f6b9f6ad831899812f482e5cde100e664124feb3a6446d69a26bf
creating metadata file for /root/.cache/huggingface/transformers/9188fa4f066c34eb7180c46af601ea73e7c05e7cffe36cd0dcf1a00dc220f618.623993453f3f6b9f6ad831899812f482e5cde100e664124feb3a6446d69a26bf
loading file https://huggingface.co/PremalMatalia/albert-base-best-squad2/resolve/main/spiece.model from cache at None
loading file https://huggingface.co/PremalMatalia/albert-base-best-squad2/resolve/main/tokenizer.json from cache at /root/.cache/huggingface/transformers/08025f1695c0bad2570a4ab66cb271d801fd1d1bd9651ae7da2a56c99111ba46.5e472f34fae7f9d0ff5afefbe7f6a98d1d3ae3db1c9537d1aef1783a18b6175e
loading file https://huggingface.co/PremalMatalia/albert-base-best-squad2/resolve/main/added_tokens.json from cache 

Downloading:   0%|          | 0.00/942 [00:00<?, ?B/s]

storing https://huggingface.co/PremalMatalia/albert-base-best-squad2/resolve/main/config.json in cache at /root/.cache/huggingface/transformers/b73f799e8dc7d9cb32c30646ad70bd91eae60016ef29877b7ec8f2248344e7bb.ad07075a32247c85a565b1e8f782660a92923b48076d6fdb530b2eff2f2048e7
creating metadata file for /root/.cache/huggingface/transformers/b73f799e8dc7d9cb32c30646ad70bd91eae60016ef29877b7ec8f2248344e7bb.ad07075a32247c85a565b1e8f782660a92923b48076d6fdb530b2eff2f2048e7
loading configuration file https://huggingface.co/PremalMatalia/albert-base-best-squad2/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/b73f799e8dc7d9cb32c30646ad70bd91eae60016ef29877b7ec8f2248344e7bb.ad07075a32247c85a565b1e8f782660a92923b48076d6fdb530b2eff2f2048e7
Model config AlbertConfig {
  "_name_or_path": "/content/gdrive/MyDrive/Colab_Notebooks/Master_Project/Saved_models/ALBERT",
  "architectures": [
    "AlbertForQuestionAnswering"
  ],
  "attention_probs_dropout_prob": 0,
  "bos_token_id

Downloading:   0%|          | 0.00/44.4M [00:00<?, ?B/s]

storing https://huggingface.co/PremalMatalia/albert-base-best-squad2/resolve/main/pytorch_model.bin in cache at /root/.cache/huggingface/transformers/e2a981473dca2cf8e999b2a991cd82c337daa94d0c5e829b24316bb63ed6dce4.aeb54fc0325e0c88f243fe764bd0a8a25cc02777570909423075f72f62e8c713
creating metadata file for /root/.cache/huggingface/transformers/e2a981473dca2cf8e999b2a991cd82c337daa94d0c5e829b24316bb63ed6dce4.aeb54fc0325e0c88f243fe764bd0a8a25cc02777570909423075f72f62e8c713
loading weights file https://huggingface.co/PremalMatalia/albert-base-best-squad2/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/e2a981473dca2cf8e999b2a991cd82c337daa94d0c5e829b24316bb63ed6dce4.aeb54fc0325e0c88f243fe764bd0a8a25cc02777570909423075f72f62e8c713
All model checkpoint weights were used when initializing AlbertForQuestionAnswering.

All the weights of AlbertForQuestionAnswering were initialized from the model checkpoint at PremalMatalia/albert-base-best-squad2.
If your task 

***Prepare Train Features***


  0%|          | 0/131 [00:00<?, ?ba/s]

  0%|          | 0/12 [00:00<?, ?ba/s]

***Number of Training Steps*** 49483
***Learning Rate*** 1.5e-05
***Prepare Validation Features***


  0%|          | 0/12 [00:00<?, ?ba/s]

The following columns in the test set  don't have a corresponding argument in `AlbertForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Prediction *****
  Num examples = 12171
  Batch size = 8


***Get Predications using validation Features***


***Valid Answer Selection Process***
Post-processing 11873 example predictions split into 12171 features.


  0%|          | 0/11873 [00:00<?, ?it/s]

***CSV file is created***
*** ALBERT : FINAL RESULTS***


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


{'exact': 78.32055925208456, 'f1': 81.58613864036595, 'total': 11873, 'HasAns_exact': 73.61673414304994, 'HasAns_f1': 80.15725777278443, 'HasAns_total': 5928, 'NoAns_exact': 83.01093355761144, 'NoAns_f1': 83.01093355761144, 'NoAns_total': 5945, 'best_exact': 78.3289817232376, 'best_exact_thresh': 0.0, 'best_f1': 81.59456111151897, 'best_f1_thresh': 0.0}
***Load SQuAD 1.0 OR 2.0 dataset***




***Initialize Tokenizer and Model***


https://huggingface.co/PremalMatalia/roberta-base-best-squad2/resolve/main/tokenizer_config.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpozkpsf2w


Downloading:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

storing https://huggingface.co/PremalMatalia/roberta-base-best-squad2/resolve/main/tokenizer_config.json in cache at /root/.cache/huggingface/transformers/e6276e7a1dc89d802461c8af0ae53d3325a763e0d8c515a37a37d8684ecc67e7.adc4fd6045765b8c3f401e1daa2e854486d022d3c6c3648d14daf4871dbcbac4
creating metadata file for /root/.cache/huggingface/transformers/e6276e7a1dc89d802461c8af0ae53d3325a763e0d8c515a37a37d8684ecc67e7.adc4fd6045765b8c3f401e1daa2e854486d022d3c6c3648d14daf4871dbcbac4
https://huggingface.co/PremalMatalia/roberta-base-best-squad2/resolve/main/vocab.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmp2jxptch6


Downloading:   0%|          | 0.00/798k [00:00<?, ?B/s]

storing https://huggingface.co/PremalMatalia/roberta-base-best-squad2/resolve/main/vocab.json in cache at /root/.cache/huggingface/transformers/24c2489c871245af09e62a131326da54a995f3041711e1437765b3dec711ac00.bfdcc444ff249bca1a95ca170ec350b442f81804d7df3a95a2252217574121d7
creating metadata file for /root/.cache/huggingface/transformers/24c2489c871245af09e62a131326da54a995f3041711e1437765b3dec711ac00.bfdcc444ff249bca1a95ca170ec350b442f81804d7df3a95a2252217574121d7
https://huggingface.co/PremalMatalia/roberta-base-best-squad2/resolve/main/merges.txt not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpkr1q4sf3


Downloading:   0%|          | 0.00/456k [00:00<?, ?B/s]

storing https://huggingface.co/PremalMatalia/roberta-base-best-squad2/resolve/main/merges.txt in cache at /root/.cache/huggingface/transformers/4437ed21176a24325af31d25ffa2ff9fb054d61d17de56ef319ba354d9f3e313.f5b91da9e34259b8f4d88dbc97c740667a0e8430b96314460cdb04e86d4fc435
creating metadata file for /root/.cache/huggingface/transformers/4437ed21176a24325af31d25ffa2ff9fb054d61d17de56ef319ba354d9f3e313.f5b91da9e34259b8f4d88dbc97c740667a0e8430b96314460cdb04e86d4fc435
https://huggingface.co/PremalMatalia/roberta-base-best-squad2/resolve/main/tokenizer.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpgkfa2lkq


Downloading:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

storing https://huggingface.co/PremalMatalia/roberta-base-best-squad2/resolve/main/tokenizer.json in cache at /root/.cache/huggingface/transformers/587163293a08046b424dbf84b6cd3c94ca529b6a9d79300c3a9e492c48d65f70.f4476f3a555c71770a366f45094abd0aa33146a612bbfe0af5be11384f66af62
creating metadata file for /root/.cache/huggingface/transformers/587163293a08046b424dbf84b6cd3c94ca529b6a9d79300c3a9e492c48d65f70.f4476f3a555c71770a366f45094abd0aa33146a612bbfe0af5be11384f66af62
https://huggingface.co/PremalMatalia/roberta-base-best-squad2/resolve/main/special_tokens_map.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpxb6yzg2d


Downloading:   0%|          | 0.00/772 [00:00<?, ?B/s]

storing https://huggingface.co/PremalMatalia/roberta-base-best-squad2/resolve/main/special_tokens_map.json in cache at /root/.cache/huggingface/transformers/4ecf9b0d616e415218095439a016a251a5c2822db84bf8eb53e56416654f5dc8.cb2244924ab24d706b02fd7fcedaea4531566537687a539ebb94db511fd122a0
creating metadata file for /root/.cache/huggingface/transformers/4ecf9b0d616e415218095439a016a251a5c2822db84bf8eb53e56416654f5dc8.cb2244924ab24d706b02fd7fcedaea4531566537687a539ebb94db511fd122a0
loading file https://huggingface.co/PremalMatalia/roberta-base-best-squad2/resolve/main/vocab.json from cache at /root/.cache/huggingface/transformers/24c2489c871245af09e62a131326da54a995f3041711e1437765b3dec711ac00.bfdcc444ff249bca1a95ca170ec350b442f81804d7df3a95a2252217574121d7
loading file https://huggingface.co/PremalMatalia/roberta-base-best-squad2/resolve/main/merges.txt from cache at /root/.cache/huggingface/transformers/4437ed21176a24325af31d25ffa2ff9fb054d61d17de56ef319ba354d9f3e313.f5b91da9e34259b8f4d88

Downloading:   0%|          | 0.00/740 [00:00<?, ?B/s]

storing https://huggingface.co/PremalMatalia/roberta-base-best-squad2/resolve/main/config.json in cache at /root/.cache/huggingface/transformers/e152a04d67d4f4c37055deeb7b0cbe1ca0f85f4c2870e18d4104e2fc2764fc0b.fa97b08540d6bd850edd073e0c633e5fef148b76dbe8a9ca0b68d97846927112
creating metadata file for /root/.cache/huggingface/transformers/e152a04d67d4f4c37055deeb7b0cbe1ca0f85f4c2870e18d4104e2fc2764fc0b.fa97b08540d6bd850edd073e0c633e5fef148b76dbe8a9ca0b68d97846927112
loading configuration file https://huggingface.co/PremalMatalia/roberta-base-best-squad2/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/e152a04d67d4f4c37055deeb7b0cbe1ca0f85f4c2870e18d4104e2fc2764fc0b.fa97b08540d6bd850edd073e0c633e5fef148b76dbe8a9ca0b68d97846927112
Model config RobertaConfig {
  "_name_or_path": "/Saved_models/RoBERTa",
  "architectures": [
    "RobertaForQuestionAnswering"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "eos_token_id": 2,
  "gradient_checkpoi

Downloading:   0%|          | 0.00/496M [00:00<?, ?B/s]

storing https://huggingface.co/PremalMatalia/roberta-base-best-squad2/resolve/main/pytorch_model.bin in cache at /root/.cache/huggingface/transformers/b8b84fef22be2cf551283c0576465a4be02255ca394581c89ed9975505628c9f.21b4f32df468a9b452511c99f38cc46cfd2dbcd16c53e079aea0013992a02942
creating metadata file for /root/.cache/huggingface/transformers/b8b84fef22be2cf551283c0576465a4be02255ca394581c89ed9975505628c9f.21b4f32df468a9b452511c99f38cc46cfd2dbcd16c53e079aea0013992a02942
loading weights file https://huggingface.co/PremalMatalia/roberta-base-best-squad2/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/b8b84fef22be2cf551283c0576465a4be02255ca394581c89ed9975505628c9f.21b4f32df468a9b452511c99f38cc46cfd2dbcd16c53e079aea0013992a02942
All model checkpoint weights were used when initializing RobertaForQuestionAnswering.

All the weights of RobertaForQuestionAnswering were initialized from the model checkpoint at PremalMatalia/roberta-base-best-squad2.
If your 

***Prepare Train Features***


  0%|          | 0/131 [00:00<?, ?ba/s]

  0%|          | 0/12 [00:00<?, ?ba/s]

***Number of Training Steps*** 98866
***Learning Rate*** 1.5e-05
***Prepare Validation Features***


  0%|          | 0/12 [00:00<?, ?ba/s]

The following columns in the test set  don't have a corresponding argument in `RobertaForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id.
***** Running Prediction *****
  Num examples = 12165
  Batch size = 8


***Get Predications using validation Features***


***Valid Answer Selection Process***
Post-processing 11873 example predictions split into 12165 features.


  0%|          | 0/11873 [00:00<?, ?it/s]

***CSV file is created***
*** RoBERTa : FINAL RESULTS***
{'exact': 81.01575002105618, 'f1': 83.96053304951707, 'total': 11873, 'HasAns_exact': 74.03846153846153, 'HasAns_f1': 79.93647248598477, 'HasAns_total': 5928, 'NoAns_exact': 87.973086627418, 'NoAns_f1': 87.973086627418, 'NoAns_total': 5945, 'best_exact': 81.01575002105618, 'best_exact_thresh': 0.0, 'best_f1': 83.96053304951694, 'best_f1_thresh': 0.0}


# MERGE 3 FILES #

In [None]:
#albert_file = "/content/gdrive/MyDrive/Colab_Notebooks/Master_Project/answer_scores/ALBERT_final_score_prediction.csv"
albert_file = conf.final_answer_path + "ALBERT" + conf.final_answer_filename
albert = pd.read_csv(albert_file, index_col='id')
a_actual_answer=albert.pop('actual_answers')

In [None]:
#roberta_file = "/content/gdrive/MyDrive/Colab_Notebooks/Master_Project/answer_scores/RoBERTa_final_score_prediction.csv"
roberta_file =  conf.final_answer_path + "RoBERTa" + conf.final_answer_filename
roberta = pd.read_csv(roberta_file, index_col='id')
r_actual_answer=roberta.pop('actual_answers')

In [None]:
#electra_file = "/content/gdrive/MyDrive/Colab_Notebooks/Master_Project/answer_scores/ELECTRA_final_score_prediction.csv"
electra_file =  conf.final_answer_path + "ELECTRA" + conf.final_answer_filename
electra = pd.read_csv(electra_file, index_col='id')

In [None]:
combined= albert.join(roberta,lsuffix='_al',rsuffix='_ro').join(electra,lsuffix='',rsuffix='_el')
combined.fillna(value='',inplace=True)
#combined.head()

In [None]:
datasets = load_dataset("squad_v2")
metric   = load_metric("squad_v2")
references = [{"id": ex["id"], "answers": ex["answers"]} for ex in datasets["validation"]]

def compute_new_metrics(new_predictions):
  formatted_new_predictions = [{"id": k, "prediction_text": v, "no_answer_probability": 0.0} for k, v in new_predictions.items()]
  print(metric.compute(predictions=formatted_new_predictions, references=references))



In [None]:
def merge_answers_score(max_threshold=10):

  combined['answer']=''
  new_predictions = collections.OrderedDict()

  for idx in combined.index:
    al=combined['predicted_score_al'][idx]
    al_answer=combined['predicted_answer_al'][idx]
    ro=combined['predicted_score_ro'][idx]
    ro_answer=combined['predicted_answer_ro'][idx]
    el=combined['predicted_score'][idx]
    el_answer=combined['predicted_answer'][idx]
    
    if (al==0 and el==0 and ro==0) or (al==0 and el==0) or (el==0 and ro==0) or (al==0 and ro==0):
      combined['answer'][idx]=''
    elif al==0 and (el>=max_threshold and ro>=max_threshold):
      #print("inside al=0")
      combined['answer'][idx]=el_answer if el>ro else ro_answer
    elif el==0 and (al>=max_threshold and ro>=max_threshold):
      #print("inside el=0")
      combined['answer'][idx]=al_answer if al>ro else ro_answer
    elif ro==0 and (el>=max_threshold and al>=max_threshold):
      #print("inside ro=0")
      combined['answer'][idx]=al_answer if al>el else el_answer
    elif (al>=max_threshold and ro>=max_threshold and el>=max_threshold):
      if al>el:	
        combined['answer'][idx]=al_answer if al>ro else ro_answer
      elif al>ro:
        combined['answer'][idx]=al_answer if al>el else el_answer
      elif el>ro:
        combined['answer'][idx]=el_answer if el>al else al_answer
      elif el>al:	
        combined['answer'][idx]=el_answer if el>ro else ro_answer			
      elif ro>al:	
        combined['answer'][idx]=ro_answer if ro>el else el_answer
      elif ro>el:	
        combined['answer'][idx]=ro_answer if ro>al else al_answer
      else:
        combined['answer'][idx]=el_answer
    else:
      combined['answer'][idx]=''

    new_predictions[idx]=combined['answer'][idx]

  #print(new_predictions)
  compute_new_metrics(new_predictions)


for threshold in range(15,6,-1):    
  print("With Threshold:",threshold)
  merge_answers_score(threshold)

With Threshold: 15
{'exact': 65.68685252253012, 'f1': 66.42344187129909, 'total': 11873, 'HasAns_exact': 33.28272604588394, 'HasAns_f1': 34.75801709479313, 'HasAns_total': 5928, 'NoAns_exact': 97.99831791421363, 'NoAns_f1': 97.99831791421363, 'NoAns_total': 5945, 'best_exact': 65.68685252253012, 'best_exact_thresh': 0.0, 'best_f1': 66.42344187129918, 'best_f1_thresh': 0.0}
With Threshold: 14
{'exact': 70.32763412785312, 'f1': 71.49461494593993, 'total': 11873, 'HasAns_exact': 43.960863697705804, 'HasAns_f1': 46.29817193878948, 'HasAns_total': 5928, 'NoAns_exact': 96.61900756938604, 'NoAns_f1': 96.61900756938604, 'NoAns_total': 5945, 'best_exact': 70.32763412785312, 'best_exact_thresh': 0.0, 'best_f1': 71.49461494594004, 'best_f1_thresh': 0.0}
With Threshold: 13
{'exact': 73.82295965636318, 'f1': 75.51886583956322, 'total': 11873, 'HasAns_exact': 52.58097165991903, 'HasAns_f1': 55.977647454982055, 'HasAns_total': 5928, 'NoAns_exact': 95.00420521446594, 'NoAns_f1': 95.00420521446594, 'No

In [None]:
global new_predictions,al_ro_dis_list,al_el_dis_list,el_ro_dis_list

def pre_requisite_wmdistance():
  from nltk.corpus import stopwords
  from nltk import download
  from pyemd import emd
  from gensim.similarities import WmdSimilarity
  from gensim.models import Word2Vec
  import gensim.downloader as api

  #download('stopwords')
  distance_model = api.load('word2vec-google-news-300')
  return distance_model

from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

def pre_requisite_cosine_distance():
  # Program to measure the similarity between
  # two sentences using cosine similarity.
  import nltk
  nltk.download('punkt')
  nltk.download('stopwords')


def cosine_similarity(X,Y):
  # tokenization
  X_list = word_tokenize(X)
  Y_list = word_tokenize(Y)

  # sw contains the list of stopwords
  sw = stopwords.words('english')
  l1 =[];l2 =[]

  # remove stop words from the string
  X_set = {w for w in X_list if not w in sw}
  Y_set = {w for w in Y_list if not w in sw}

  # form a set containing keywords of both strings
  rvector = X_set.union(Y_set)
  for w in rvector:
    if w in X_set: l1.append(1) # create a vector
    else: l1.append(0)
    if w in Y_set: l2.append(1)
    else: l2.append(0)
  c = 0

  #Cosine formula
  for i in range(len(rvector)):
      c+= l1[i]*l2[i]
  if sum(l1)*sum(l2)==0:
    cosine_dis = 0
  else:
    cosine_dis = c / float((sum(l1)*sum(l2))**0.5)
  
  #print("similarity: ", cosine_dis)
  return cosine_dis

def merge_answers_distance(wmddistance_model,distanace_type='wmd',max_threshold=10):

  combined['answer']=''
  new_predictions = collections.OrderedDict()
  al_ro_dis_list = []
  al_el_dis_list = []
  el_ro_dis_list = []

  for idx in combined.index:
    al=combined['predicted_score_al'][idx]
    al_answer=combined['predicted_answer_al'][idx]
    ro=combined['predicted_score_ro'][idx]
    ro_answer=combined['predicted_answer_ro'][idx]
    el=combined['predicted_score'][idx]
    el_answer=combined['predicted_answer'][idx]

    if distance_type=='wmd':
      al_ro_dis=wmddistance_model.wmdistance(al_answer,ro_answer)
      al_el_dis=wmddistance_model.wmdistance(al_answer,el_answer)
      el_ro_dis=wmddistance_model.wmdistance(el_answer,ro_answer)
    elif distance_type=='cos':
      al_ro_dis=cosine_similarity(al_answer,ro_answer)
      al_el_dis=cosine_similarity(al_answer,el_answer)
      el_ro_dis=cosine_similarity(el_answer,ro_answer)      
    else:
      print("You haven't selected correct distance type. Please select from 'wmd' or 'cos'")
      exit

    al_ro_dis_list.append(al_ro_dis)
    al_el_dis_list.append(al_el_dis)
    el_ro_dis_list.append(el_ro_dis)

    if (al==0 and el==0 and ro==0) or (al==0 and el==0) or (el==0 and ro==0) or (al==0 and ro==0):
      combined['answer'][idx]=''
    elif al==0 and (el>=max_threshold and ro>=max_threshold):
      combined['answer'][idx]=el_answer if el>ro else ro_answer
    elif el==0 and (al>=max_threshold and ro>=max_threshold):
      combined['answer'][idx]=al_answer if al>ro else ro_answer
    elif ro==0 and (el>=max_threshold and al>=max_threshold):
      combined['answer'][idx]=al_answer if al>el else el_answer
    elif distance_type=='wmd':
      if (al>=max_threshold and ro>=max_threshold and el>=max_threshold):
        if al_ro_dis <= al_el_dis:
          combined['answer'][idx]=al_answer if al_ro_dis<=el_ro_dis else ro_answer
        elif al_el_dis <= el_ro_dis:
          combined['answer'][idx]=al_answer if al_el_dis<=al_ro_dis else el_answer 
        elif el_ro_dis <= al_el_dis:
          combined['answer'][idx]=el_answer if el_ro_dis<=al_ro_dis else ro_answer 
    elif distance_type=='cos':
      if (al>=max_threshold and ro>=max_threshold and el>=max_threshold):
        if al_ro_dis >= al_el_dis:
          combined['answer'][idx]=al_answer if al_ro_dis>=el_ro_dis else ro_answer
        elif al_el_dis >= el_ro_dis:
          combined['answer'][idx]=al_answer if al_el_dis>=al_ro_dis else el_answer 
        elif el_ro_dis >= al_el_dis:
          combined['answer'][idx]=el_answer if el_ro_dis>=al_ro_dis else ro_answer 
    else:
      combined['answer'][idx]=''

    new_predictions[idx]=combined['answer'][idx]

  compute_new_metrics(new_predictions)
  return new_predictions,al_ro_dis_list,al_el_dis_list,el_ro_dis_list,combined['predicted_answer_al'],combined['predicted_answer_ro'],combined['predicted_answer']

In [None]:
distance_type='wmd'
print("***SSP [Sentence Similarity Prediction] Using WMD Distance***")
distance_model=pre_requisite_wmdistance()

for threshold in range(15,6,-1):    
#threshold=7
  print("With Threshold:",threshold)
  new_predictions,al_ro_dis_list,al_el_dis_list,el_ro_dis_list,al_answer,ro_answer,el_answer=merge_answers_distance(distance_model,distance_type,threshold)

***SSP [Sentence Similarity Prediction] Using WMD Distance***
With Threshold: 15
{'exact': 65.77107723406047, 'f1': 66.46964961354813, 'total': 11873, 'HasAns_exact': 33.45141700404859, 'HasAns_f1': 34.850565091372594, 'HasAns_total': 5928, 'NoAns_exact': 97.99831791421363, 'NoAns_f1': 97.99831791421363, 'NoAns_total': 5945, 'best_exact': 65.77107723406047, 'best_exact_thresh': 0.0, 'best_f1': 66.46964961354821, 'best_f1_thresh': 0.0}
With Threshold: 14
{'exact': 70.52135096437294, 'f1': 71.58161045380632, 'total': 11873, 'HasAns_exact': 44.34885290148448, 'HasAns_f1': 46.472412435567165, 'HasAns_total': 5928, 'NoAns_exact': 96.61900756938604, 'NoAns_f1': 96.61900756938604, 'NoAns_total': 5945, 'best_exact': 70.52135096437294, 'best_exact_thresh': 0.0, 'best_f1': 71.5816104538064, 'best_f1_thresh': 0.0}
With Threshold: 13
{'exact': 74.21039332940285, 'f1': 75.68181691529963, 'total': 11873, 'HasAns_exact': 53.35695006747638, 'HasAns_f1': 56.30401690879759, 'HasAns_total': 5928, 'NoAns_

In [None]:
distance_type='cos'
distance_model=None
pre_requisite_cosine_distance()
  
for threshold in range(15,6,-1):    
  print("With Threshold:",threshold)
  new_predictions,al_ro_dis_list,al_el_dis_list,el_ro_dis_list,al_ro_dis,al_el_dis,el_ro_dis=merge_answers_distance(distance_model,distance_type,threshold)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
With Threshold: 15
{'exact': 65.72054240714226, 'f1': 66.46108312178447, 'total': 11873, 'HasAns_exact': 33.350202429149796, 'HasAns_f1': 34.83340754132032, 'HasAns_total': 5928, 'NoAns_exact': 97.99831791421363, 'NoAns_f1': 97.99831791421363, 'NoAns_total': 5945, 'best_exact': 65.72054240714226, 'best_exact_thresh': 0.0, 'best_f1': 66.46108312178457, 'best_f1_thresh': 0.0}
With Threshold: 14
{'exact': 70.4876610797608, 'f1': 71.57918510184842, 'total': 11873, 'HasAns_exact': 44.28137651821862, 'HasAns_f1': 46.46755477635726, 'HasAns_total': 5928, 'NoAns_exact': 96.61900756938604, 'NoAns_f1': 96.61900756938604, 'NoAns_total': 5945, 'best_exact': 70.4876610797608, 'best_exact_thresh': 0.0, 'best_f1': 71.57918510184852, 'best_f1_thresh': 0.0}
With Threshold: 13
{'exact': 74.