In [1]:
from onnxruntime import InferenceSession
from transformers import AutoTokenizer, AutoModelForQuestionAnswering
from pathlib import Path
from transformers.convert_graph_to_onnx import convert

In [2]:
onnx_model_name="./onnx_model/roberta-base-squad2.onnx"
model_name="deepset/roberta-base-squad2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForQuestionAnswering.from_pretrained(model_name)

In [3]:
def convert_for_onnx(model_path, onnx_model_name, tokenizer):
    if Path(onnx_model_name).exists():
        print("ONNX input exists")
        return
    convert(
        framework="pt",
        model=model_path,
        tokenizer=tokenizer,
        output=Path(onnx_model_name),
        pipeline_name="question-answering",
        opset=12
    ) 

In [4]:
convert_for_onnx(model_path=model, onnx_model_name=onnx_model_name, tokenizer=tokenizer)

ONNX input exists


In [5]:
# need example.context_text and example.question_text
example_dict={"context": "Extractive Question Answering is the task of extracting an answer from a text given a question. An example of a question answering dataset is the SQuAD dataset, which is entirely based on that task. If you would like to fine-tune a model on a SQuAD task, you may leverage the `run_squad.py`.", "question": "What is extractive question answering?"}

In [6]:
from onnx_utils import get_examples, get_features, predict_qa

In [7]:
examples=get_examples(example_dict)

In [8]:
features_list=get_features(examples=examples, tokenizer=tokenizer)

In [9]:
predict_qa(onnx_model_name, tokenizer, features_list, examples)

{'score': 0.21330079436302185,
 'start': 71,
 'end': 94,
 'answer': 'a text given a question'}

In [18]:
pred_onnx=predict_qa(model_type="onnx", features_list=features_list, examples=examples)

In [10]:
session=InferenceSession(onnx_model_name)
inputs = tokenizer(example_dict["question"], example_dict["context"], add_special_tokens=True, return_tensors="np")
input_ids = inputs["input_ids"][0]
text_tokens = tokenizer.convert_ids_to_tokens(input_ids)
output = session.run(None, inputs.__dict__["data"])

In [12]:
import numpy as np

In [13]:
start = output[0]
end = output[1]
outer = np.matmul(np.expand_dims(start, -1), np.expand_dims(end, 1))
candidates = np.tril(np.triu(outer), 99)

scores_flat = candidates.flatten()
topk=3
if topk == 1:
    idx_sort = [np.argmax(scores_flat)]
elif len(scores_flat) < topk:
    idx_sort = np.argsort(-scores_flat)
else:
    idx = np.argpartition(-scores_flat, topk)[0:topk]
    idx_sort = idx[np.argsort(-scores_flat[idx])]
s, e = np.unravel_index(idx_sort, candidates.shape)[1:]
print(f"Score: {candidates[0, s, e]}")

Score: [91.88429  91.55705  91.011696]


In [14]:
s, e

(array([43, 43, 43]), array([65, 52, 64]))

In [15]:
for start, end in zip(s, e):
    answer=tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[start:end]))
    print(answer)

, which is entirely based on that task. If you would like to fine-tune a model on a
, which is entirely based on that task.
, which is entirely based on that task. If you would like to fine-tune a model on


In [16]:
text = r""" 
🤗 Transformers (formerly known as pytorch-transformers and pytorch-pretrained-bert) provides general-purpose
architectures (BERT, GPT-2, RoBERTa, XLM, DistilBert, XLNet…) for Natural Language Understanding (NLU) and Natural
Language Generation (NLG) with over 32+ pretrained models in 100+ languages and deep interoperability between
TensorFlow 2.0 and PyTorch.
"""

questions = [
    "How many pretrained models are available in 🤗 Transformers?",
    "What does 🤗 Transformers provide?",
    "🤗 Transformers provides interoperability between which frameworks?",
]

session=InferenceSession(onnx_model_name)
for question in questions:
    inputs = tokenizer(question, text, add_special_tokens=True, return_tensors="np")
    input_ids = inputs["input_ids"][0]
    text_tokens = tokenizer.convert_ids_to_tokens(input_ids)
    outputs = session.run(None, inputs.__dict__["data"])
    answer_start_scores = outputs[0]
    answer_end_scores = outputs[1]
    answer_start = np.argmax(answer_start_scores)  # Get the most likely beginning of answer with the argmax of the score
    answer_end = np.argmax(answer_end_scores) + 1  # Get the most likely end of answer with the argmax of the score
    answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end]))
    print(f"Question: {question}")
    print(f"Answer: {answer}")

Question: How many pretrained models are available in 🤗 Transformers?
Answer:  over 32+
Question: What does 🤗 Transformers provide?
Answer:  general-purpose
architectures
Question: 🤗 Transformers provides interoperability between which frameworks?
Answer: <s>🤗 Transformers provides interoperability between which frameworks?</s></s> 
🤗 Transformers (formerly known as pytorch-transformers and pytorch-pretrained-bert) provides general-purpose
architectures (BERT, GPT-2, RoBERTa, XLM, DistilBert, XLNet…) for Natural Language Understanding (NLU) and Natural
Language Generation (NLG) with over 32+ pretrained models in 100+ languages and deep interoperability between
TensorFlow 2.0 and PyTorch
