### Requirements

In [1]:
# Install requirements
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.28.1-py3-none-any.whl (7.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.0/7.0 MB[0m [31m37.8 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m44.2 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.11.0
  Downloading huggingface_hub-0.13.4-py3-none-any.whl (200 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m200.1/200.1 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.13.4 tokenizers-0.13.3 transformers-4.28.1


In [30]:
# Import requirements
from transformers import pipeline
from transformers import DistilBertTokenizer, DistilBertForQuestionAnswering
from transformers import AutoModelForQuestionAnswering, AutoTokenizer
import torch
import collections

### QA classes

In [32]:
# QA Factory

class QA_model(object):
    def __init__(self, config):
      self.config = config
    
    def download_model(self):
      pass
    
    def answer_question(self):
      pass


def QA_factory(qa_model_type, config):
  return globals()[qa_model_type](config)


In [None]:
# Pipeline QA class

class pipeline_QA(QA_model):
    def __init__(self, config, download = True):
      super().__init__(config)
      if download:
        self.model = self.download_model()
    
    def download_model(self):
      model = pipeline(self.config["model_name"])
      return model
    
    def answer_question(self, question, context):
      answer = self.model(question = question, context = context)
      return answer['answer']

In [26]:
#Huggingface QA Class with tokenizer and model in one function

class QA_one_model(QA_model):
    def __init__(self, config):
      super().__init__(config)
      self.config = config
      if self.config["download"]:
        self.model = self.download_model()
    
    def download_model(self):
      model_name = self.config["model_name"]
      self.reply_model = pipeline('question-answering', model=model_name, tokenizer=model_name)
      pass
    
    def answer_question(self, QA_input):
      return self.reply_model(QA_input)
      pass


In [33]:
# Huggingface QA class

class huggingface_QA(QA_model):
  def __init__(self, config, download = True):
      super().__init__(config)
      if download:
        self.model = self.download_model()
        self.tokenizer = self.download_tokenizer()
  
  def download_model(self):
    if "distilbert" in self.config["model_name"]:
      self.model = DistilBertForQuestionAnswering.from_pretrained(self.config["model_name"], return_dict = self.config["return_dict"])
    else:
      self.model = AutoModelForQuestionAnswering.from_pretrained(self.config["model_name"])
    return self.model
  
  def download_tokenizer(self):
    if "distilbert" in self.config["tokenizer_name"]:
      self.tokenizer = DistilBertTokenizer.from_pretrained(self.config["tokenizer_name"], return_token_type_ids = self.config["return_token_type_ids"])
    else:
      self.tokenizer = AutoTokenizer.from_pretrained(self.config["tokenizer_name"])
    return self.tokenizer
  
  def answer_question(self, question, context):
    encoding = self.tokenizer.encode_plus(question, context)
    input_ids, attention_mask = encoding["input_ids"], encoding["attention_mask"]
    start_scores, end_scores = self.model(torch.tensor([input_ids]), attention_mask=torch.tensor([attention_mask]))
    ans_tokens = input_ids[torch.argmax(start_scores) : torch.argmax(end_scores)+1]
    answer_tokens = self.tokenizer.convert_ids_to_tokens(ans_tokens , skip_special_tokens=self.config["skip_special_tokens"])
    answer_tokens_to_string = self.tokenizer.convert_tokens_to_string(answer_tokens)
    return answer_tokens_to_string

### Evaluation Metrics

In [15]:
# Exact Match

def exact_match(generated_answer, ground_truth):
  if ground_truth == generated_answer:
    return True
  else:
    return False

In [16]:
# F1 Score

def f1_score(generated_answer, ground_truth):
  ground_truth_tokens = ground_truth.split(" ")
  generated_answer_tokens = generated_answer.split(" ")

  if len(ground_truth_tokens) == 0 or len(generated_answer_tokens) == 0:
    return (ground_truth_tokens == generated_answer_tokens)

  common_tokens = collections.Counter(ground_truth_tokens) & collections.Counter(generated_answer_tokens)
  num_common_tokens_same = sum(common_tokens.values())
  
  if num_common_tokens_same == 0:
    return 0
  
  precision = 1.0 * num_common_tokens_same / len(generated_answer_tokens)
  recall = 1.0 * num_common_tokens_same / len(ground_truth_tokens)

  f1 = (2 * precision * recall) / (precision + recall)
  return f1

### Workflows

Sid wrote this part

In [27]:
QA_input = {
    'question': 'Why is model conversion important?',
    'context': 'The option to convert models between FARM and transformers gives freedom to the user and let people easily switch between frameworks.'
}

In [28]:
config = {
      "model_name": "deepset/tinyroberta-squad2",
      "download": True,
  }


QA_model = QA_factory("QA_one_model", config)

In [29]:
QA_model.answer_question(QA_input)

{'score': 0.2624489367008209,
 'start': 59,
 'end': 132,
 'answer': 'gives freedom to the user and let people easily switch between frameworks'}

EL Shravs area

In [None]:
questions = ["Where is the ball?"]
contexts = ["The chair is near the table, and the ball is behind the chair."]
ground_truths = ["it is behind the chair"]

In [None]:
# Workflow Two

def workflow_2():
  config = {
      "tokenizer_name": "distilbert-base-uncased",
      "model_name": "distilbert-base-uncased-distilled-squad",
      "return_token_type_ids": True,
      "skip_special_tokens": True,
      "return_dict": False
  }

  QA_model = QA_factory("huggingface_QA", config)

  exact_match_scores = list()
  f1_scores = list()

  for i in range(len(questions)):
    question_i = questions[i]
    context_i = contexts[i]
    ground_truth_i = ground_truths[i]

    predicted_answer_i = QA_model.answer_question(question_i, context_i)

    em_score_i = exact_match(predicted_answer_i, ground_truth_i)
    f1_score_i = f1_score(predicted_answer_i, ground_truth_i)

    exact_match_scores.append(em_score_i)
    f1_scores.append(f1_score_i)
  
  avg_em_score = sum(exact_match_scores) / len(exact_match_scores)
  print("Exact Match Score: ", avg_em_score)

  avg_f1_score = sum(f1_scores) / len(f1_scores)
  print("F1 Score: ", avg_f1_score)

workflow_2()

Exact Match Score:  0.0
F1 Score:  0.7499999999999999


In [None]:
# Workflow One

def workflow_1():
  config = {
      "model_name": "question-answering"
  }
  QA_model = QA_factory("pipeline_QA", config) 

  exact_match_scores = list()
  f1_scores = list()

  for i in range(len(questions)):
    question_i = questions[i]
    context_i = contexts[i]
    ground_truth_i = ground_truths[i]

    predicted_answer_i = QA_model.answer_question(question_i, context_i)

    em_score_i = exact_match(predicted_answer_i, ground_truth_i)
    f1_score_i = f1_score(predicted_answer_i, ground_truth_i)

    exact_match_scores.append(em_score_i)
    f1_scores.append(f1_score_i)
  
  avg_em_score = sum(exact_match_scores) / len(exact_match_scores)
  print("Exact Match Score: ", avg_em_score)

  avg_f1_score = sum(f1_scores) / len(f1_scores)
  print("F1 Score: ", avg_f1_score)

workflow_1()

No model was supplied, defaulted to distilbert-base-cased-distilled-squad and revision 626af31 (https://huggingface.co/distilbert-base-cased-distilled-squad).
Using a pipeline without specifying a model name and revision in production is not recommended.


Exact Match Score:  0.0
F1 Score:  0.7499999999999999
