In [1]:
!pip install datasets transformers
!pip install onnxruntime onnxruntime_tools



In [2]:
from transformers import AutoTokenizer, AutoModelForQuestionAnswering
import torch
import numpy as np
from onnxruntime import ExecutionMode, InferenceSession, SessionOptions


tokenizer = AutoTokenizer.from_pretrained("distilbert-base-cased-distilled-squad") 
model = AutoModelForQuestionAnswering.from_pretrained("distilbert-base-cased-distilled-squad")

In [3]:
question = "When did Kim Jong-il died??"

context = """On 17 December 2011, Kim Jong-il died from a heart attack. His youngest son Kim Jong-un was announced as his successor.[92] In the face of international condemnation, North Korea continued to develop its nuclear arsenal, possibly including a hydrogen bomb and a missile capable of reaching the United States.[93]

Throughout 2017, following Donald Trump's assumption of the US presidency, tensions between the United States and North Korea increased, and there was heightened rhetoric between the two, with Trump threatening "fire and fury"[94] and North Korea threatening to test missiles that would land near Guam.[95] The tensions substantially decreased in 2018, and a détente developed.[96] A series of summits took place between Kim Jong-un of North Korea, President Moon Jae-in of South Korea, and President Trump.[97] It has been 3 years, 3 months since North Korea's last ICBM test."""


In [4]:

inputs = tokenizer.encode_plus(question, context, return_tensors="pt") 

answer = model(**inputs)
answer_start = torch.argmax(answer[0])  
answer_end = torch.argmax(answer[1]) + 1 
output = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(inputs["input_ids"][0][answer_start:answer_end]))
output

'17 December 2011'

In [5]:
ids = inputs["input_ids"]
mask = inputs["attention_mask"]

In [6]:
torch.onnx.export(model,
   (ids, mask),
    "john_model.onnx",
    input_names=["input_ids", "attention_mask"],
    output_names=["output"],
    dynamic_axes = {
        "input_ids": {0: "batch_size"},
        "attention_mask": {0: "batch_size"},
        "output": {0: "batch_size"}
    }
    
)

# Handles all the above steps for you


  input_tensor.shape[chunk_dim] == tensor_shape for input_tensor in input_tensors
  "Passing an tensor of different rank in execution will be incorrect.")
  "intended to be used with dynamic input shapes, please use opset version 11 to export the model.")


In [8]:
session = InferenceSession("john_model.onnx")

tokens = tokenizer.encode_plus(question, context, return_tensors="np")
inputs_onnx = {"input_ids":tokens["input_ids"], "attention_mask":tokens["attention_mask"]}
output = session.run(None, inputs_onnx)
answer_start = np.argmax(output[0])  # get the most likely beginning of answer with the argmax of the score
answer_end = np.argmax(output[1]) + 1 
output = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(inputs["input_ids"][0][answer_start:answer_end]))
output


'17 December 2011'