In [1]:
!pip install transformers
% pip install sentencepiece

import torch
from transformers import AlbertTokenizer, AlbertForQuestionAnswering
from collections import OrderedDict
import sentencepiece as spm
from google.colab import files
import json



Collecting transformers
[?25l  Downloading https://files.pythonhosted.org/packages/ed/db/98c3ea1a78190dac41c0127a063abf92bd01b4b0b6970a6db1c2f5b66fa0/transformers-4.0.1-py3-none-any.whl (1.4MB)
[K     |████████████████████████████████| 1.4MB 9.0MB/s 
[?25hCollecting tokenizers==0.9.4
[?25l  Downloading https://files.pythonhosted.org/packages/0f/1c/e789a8b12e28be5bc1ce2156cf87cb522b379be9cadc7ad8091a4cc107c4/tokenizers-0.9.4-cp36-cp36m-manylinux2010_x86_64.whl (2.9MB)
[K     |████████████████████████████████| 2.9MB 26.7MB/s 
Collecting sacremoses
[?25l  Downloading https://files.pythonhosted.org/packages/7d/34/09d19aff26edcc8eb2a01bed8e98f13a1537005d31e95233fd48216eed10/sacremoses-0.0.43.tar.gz (883kB)
[K     |████████████████████████████████| 890kB 19.8MB/s 
Building wheels for collected packages: sacremoses
  Building wheel for sacremoses (setup.py) ... [?25l[?25hdone
  Created wheel for sacremoses: filename=sacremoses-0.0.43-cp36-none-any.whl size=893261 sha256=f8c26d3d373a3

In [2]:
uploaded = files.upload()

Saving dev-v2.0.json to dev-v2.0.json


In [3]:
def read_file(file_name):
  with open(file_name) as f:
    data = json.load(f)
  contexts = list()
  questions = list()
  answers = list()
    
  for group in data['data']:
      for passage in group['paragraphs']:
          context = passage['context']
          for qa in passage['qas']:
              question = qa['question']
              an = []
              if(qa['is_impossible']):
                an.append('')
              else:
                for answer in qa['answers']:
                  an.append(answer['text'])
              contexts.append(context)
              questions.append(question)
              answers.append(an)

  return contexts, questions, answers

In [4]:
context, question, answer = read_file('dev-v2.0.json')

In [28]:
context_1 = context[0:500]
question_1 = question[0:500]
answer_1 = answer[0:500]

In [29]:
def get_answers(context, question, model, tokenizer):
  answers = []
  for c, q in zip(context, question):
    encoding = tokenizer.encode_plus(q,c, return_tensors='pt')
    inputs = encoding['token_type_ids'].lt(1)  #Token embeddings

    qt = torch.masked_select(encoding['input_ids'], inputs)

    chunk_size = model.config.max_position_embeddings - qt.size()[0] - 1

    chunked_input = OrderedDict()
    for k,v in encoding.items():
      q = torch.masked_select(v, inputs)
      c = torch.masked_select(v, ~inputs)
      chunks = torch.split(c, chunk_size)

      for i, chunk in enumerate(chunks):
        if i not in chunked_input:
          chunked_input[i] = {}

        thing = torch.cat((q, chunk))
        if i != len(chunks)-1:
          if k == 'input_ids':
            thing = torch.cat((thing, torch.tensor([102])))
          else:
            thing = torch.cat((thing, torch.tensor([1])))

        chunked_input[i][k] = torch.unsqueeze(thing, dim=0)
 
    def convert_ids_to_string(tokenizer, input_ids):
      return tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids))
    answer = ''

    # iterate over our chunks to look for the best answer from each chunk
    for _, chunk in chunked_input.items():
        m = model(**chunk)
        answer_start_scores = m[0]
        answer_end_scores = m[1]

        answer_start = torch.argmax(answer_start_scores)
        answer_end = torch.argmax(answer_end_scores) + 1

        ans = convert_ids_to_string(tokenizer, chunk['input_ids'][0][answer_start:answer_end])
        
        # if the ans == [CLS] then the model did not find a real answer in this chunk
        if ans != '[CLS]':
            answer += ans + " "
#    answers.append({'answer_start': start_index, 'text': corrected_answer})
    answers.append(answer.strip())
  return answers

In [30]:
def normalize_text(s):
    """Removing articles and punctuation, and standardizing whitespace are all typical text processing steps."""
    import string, re

    def remove_articles(text):
        regex = re.compile(r"\b(a|an|the)\b", re.UNICODE)
        return re.sub(regex, " ", text)

    def white_space_fix(text):
        return " ".join(text.split())

    def remove_punc(text):
        exclude = set(string.punctuation)
        return "".join(ch for ch in text if ch not in exclude)

    def lower(text):
        return text.lower()

    return white_space_fix(remove_articles(remove_punc(lower(s))))

def compute_exact_match(prediction, truth):
    return int(normalize_text(prediction) == normalize_text(truth))

def compute_f1(prediction, truth):
    pred_tokens = normalize_text(prediction).split()
    truth_tokens = normalize_text(truth).split()
    
    # if either the prediction or the truth is no-answer then f1 = 1 if they agree, 0 otherwise
    if len(pred_tokens) == 0 or len(truth_tokens) == 0:
        return int(pred_tokens == truth_tokens)
    
    common_tokens = set(pred_tokens) & set(truth_tokens)
    
    # if there are no common tokens then f1 = 0
    if len(common_tokens) == 0:
        return 0
    
    prec = len(common_tokens) / len(pred_tokens)
    rec = len(common_tokens) / len(truth_tokens)
    
    return 2 * (prec * rec) / (prec + rec)

In [31]:
from transformers import AlbertTokenizerFast
albert_tokenizer = AlbertTokenizerFast.from_pretrained('albert-base-v2')
#albert_tokenizer = AlbertTokenizer.from_pretrained('ahotrod/albert_xxlargev1_squad2_512')
albert_model = torch.load('ALBERT_tuned_for_SQUAD_2.0_complete', map_location = torch.device('cpu'))


In [32]:
albert_answer = get_answers(context_1, question_1, albert_model, albert_tokenizer)

In [33]:

import re
for i in range(len(albert_answer)):
  albert_answer[i] = re.sub(' +', ' ',albert_answer[i])

In [34]:
albert_em_score = []
for p, true_answer in zip(albert_answer, answer_1):
  albert_em_score.append(max((compute_exact_match(p, a)) for a in true_answer))

In [35]:
albert_f1_score = []
for p, true_answer in zip(albert_answer, answer_1):
  albert_f1_score.append(max((compute_f1(p, a)) for a in true_answer))

In [36]:
print('Fine-tuned Albert accuracy: ', sum(albert_em_score)/len(albert_em_score))

Fine-tuned Albert accuracy:  0.334


In [37]:
print('Fine-tuned Albert Average F1: ', sum(albert_f1_score)/len(albert_f1_score))

Fine-tuned Albert Average F1:  0.36980266380678145
