# BERT-SQuAD Inference Example with AMD MIGraphX

This tutorial shows how to run the BERT-Squad model on ONNX-Runtime with MIGraphX backend.

## Requirements 

In [1]:
!pip3 install -r requirements_bertsquad.txt

distutils: /usr/local/lib/python3.6/dist-packages
sysconfig: /usr/lib/python3.6/site-packages[0m
distutils: /usr/local/lib/python3.6/dist-packages
sysconfig: /usr/lib/python3.6/site-packages[0m
distutils: /usr/local/include/python3.6/UNKNOWN
sysconfig: /usr/include/python3.6m[0m
distutils: /usr/local/bin
sysconfig: /usr/bin[0m
distutils: /usr/local
sysconfig: /usr[0m
user = False
home = None
root = None
prefix = None[0m


distutils: /usr/local/lib/python3.6/dist-packages
sysconfig: /usr/lib/python3.6/site-packages[0m
distutils: /usr/local/lib/python3.6/dist-packages
sysconfig: /usr/lib/python3.6/site-packages[0m
distutils: /usr/local/include/python3.6/UNKNOWN
sysconfig: /usr/include/python3.6m[0m
distutils: /usr/local/bin
sysconfig: /usr/bin[0m
distutils: /usr/local
sysconfig: /usr[0m
user = False
home = None
root = None
prefix = None[0m


In [2]:
import numpy as np
import json
import time
import os.path
from os import path
import sys

import tokenizers
from run_onnx_squad import *

import migraphx

## Download BERT ONNX file

In [3]:
!wget -nc https://github.com/onnx/models/raw/master/text/machine_comprehension/bert-squad/model/bertsquad-10.onnx

File ‘bertsquad-10.onnx’ already there; not retrieving.



## Download uncased file / vocabulary

In [4]:
!apt-get install unzip
!wget -q -nc https://storage.googleapis.com/bert_models/2018_10_18/uncased_L-12_H-768_A-12.zip
!unzip -n uncased_L-12_H-768_A-12.zip

Reading package lists... Done
Building dependency tree       
Reading state information... Done
unzip is already the newest version (6.0-21ubuntu1.1).
The following packages were automatically installed and are no longer required:
  cmake-data libarchive13 libjsoncpp1 liblzo2-2 librhash0 libuv1
Use 'apt autoremove' to remove them.
0 upgraded, 0 newly installed, 0 to remove and 27 not upgraded.
Archive:  uncased_L-12_H-768_A-12.zip


## Input data

In [5]:
input_file = 'inputs.json'
with open(input_file) as json_file:
    test_data = json.load(json_file)
    print(json.dumps(test_data, indent=2))

{
  "version": "1.4",
  "data": [
    {
      "paragraphs": [
        {
          "context": "In its early years, the new convention center failed to meet attendance and revenue expectations.[12] By 2002, many Silicon Valley businesses were choosing the much larger Moscone Center in San Francisco over the San Jose Convention Center due to the latter's limited space. A ballot measure to finance an expansion via a hotel tax failed to reach the required two-thirds majority to pass. In June 2005, Team San Jose built the South Hall, a $6.77 million, blue and white tent, adding 80,000 square feet (7,400 m2) of exhibit space",
          "qas": [
            {
              "question": "where is the businesses choosing to go?",
              "id": "1"
            },
            {
              "question": "how may votes did the ballot measure need?",
              "id": "2"
            },
            {
              "question": "By what year many Silicon Valley businesses were choosing the Mos

# Configuration for inference

In [6]:
max_seq_length = 256
doc_stride = 128
max_query_length = 64
batch_size = 1
n_best_size = 20
max_answer_length = 30

## Read vocabulary file and tokenize

In [7]:
vocab_file = os.path.join('uncased_L-12_H-768_A-12', 'vocab.txt')
tokenizer = tokenizers.BertWordPieceTokenizer(vocab_file)

## Convert the example to features to input

In [8]:
# preprocess input
predict_file = 'inputs.json'

# Use read_squad_examples method from run_onnx_squad to read the input file
eval_examples = read_squad_examples(input_file=predict_file)

# Use convert_examples_to_features method from run_onnx_squad to get parameters from the input
input_ids, input_mask, segment_ids, extra_data = convert_examples_to_features(
    eval_examples, tokenizer, max_seq_length, doc_stride, max_query_length)

## Compile with MIGraphX for GPU

In [9]:
model = migraphx.parse_onnx("bertsquad-10.onnx")
model.compile(migraphx.get_target("gpu"))
#model.print()

model.get_parameter_names()
model.get_parameter_shapes()

{'unique_ids_raw_output___9:0': int64_type, {1}, {1},
 'input_mask:0': int64_type, {1, 256}, {256, 1},
 'segment_ids:0': int64_type, {1, 256}, {256, 1},
 'input_ids:0': int64_type, {1, 256}, {256, 1}}

## Run the input through the model

In [10]:
n = len(input_ids)
bs = batch_size
all_results = []

for idx in range(0, n):
    item = eval_examples[idx]
    print(item)

    result = model.run({
        "unique_ids_raw_output___9:0":
        np.array([item.qas_id], dtype=np.int64),
        "input_ids:0":
        input_ids[idx:idx + bs],
        "input_mask:0":
        input_mask[idx:idx + bs],
        "segment_ids:0":
        segment_ids[idx:idx + bs]
    })

    in_batch = result[1].get_shape().lens()[0]
    print(in_batch)
    start_logits = [float(x) for x in result[1].tolist()]
    end_logits = [float(x) for x in result[0].tolist()]
    # print(start_logits)
    # print(end_logits)
    for i in range(0, in_batch):
        unique_id = len(all_results)
        all_results.append(
            RawResult(unique_id=unique_id,
                      start_logits=start_logits,
                      end_logits=end_logits))

qas_id: 1, question_text: where is the businesses choosing to go?, doc_tokens: [In its early years, the new convention center failed to meet attendance and revenue expectations.[12] By 2002, many Silicon Valley businesses were choosing the much larger Moscone Center in San Francisco over the San Jose Convention Center due to the latter's limited space. A ballot measure to finance an expansion via a hotel tax failed to reach the required two-thirds majority to pass. In June 2005, Team San Jose built the South Hall, a $6.77 million, blue and white tent, adding 80,000 square feet (7,400 m2) of exhibit space]
1
qas_id: 2, question_text: how may votes did the ballot measure need?, doc_tokens: [In its early years, the new convention center failed to meet attendance and revenue expectations.[12] By 2002, many Silicon Valley businesses were choosing the much larger Moscone Center in San Francisco over the San Jose Convention Center due to the latter's limited space. A ballot measure to finance

## Get the predictions

In [11]:
output_dir = 'predictions'
os.makedirs(output_dir, exist_ok=True)
output_prediction_file = os.path.join(output_dir, "predictions.json")
output_nbest_file = os.path.join(output_dir, "nbest_predictions.json")
write_predictions(eval_examples, extra_data, all_results, n_best_size,
                  max_answer_length, True, output_prediction_file,
                  output_nbest_file)

with open(output_prediction_file) as json_file:
    test_data = json.load(json_file)
    print(json.dumps(test_data, indent=2))

{
  "1": "Moscone Center",
  "2": "two-thirds majority",
  "3": "2002"
}
