# Hugging Face Transformers executed on Graphcore's IPU devices


In [3]:
import transformers
import torch
import poptorch

In [5]:
batch_size = 2
context_file="context.txt"
questions_file="questions_file.txt"

In [None]:
with open(context_file, "w+") as f:
    f.write("Scotland is a country that is part of the United Kingdom. Covering the northern third of the island of Great Britain, mainland Scotland has a 96 mile (154 km) border with England to the southeast and is otherwise surrounded by the Atlantic Ocean to the north and west, the North Sea to the northeast and the Irish Sea to the south. In addition, Scotland includes more than 790 islands; principally within the Northern Isles and the Hebrides archipelagos.")

In [9]:
with open(questions_file, "w+") as f:
    f.writelines(
        [
            "How many islands are there in Scotland?\n",
            "What sea is to the south of Scotland?\n",
            "How long is Scotland's border in km?\n",
            "Where is England in relation to scotland?\n"
        ]
    )

In [16]:
def read_inputs(context_file, questions_file, batch_size):
    context = context_file.read()
    questions = questions_file.readlines()
    questions = [q.rstrip() for q in questions]

    # Pad last batch with empty question if required
    questions += [""] * (len(questions) % batch_size)
    return context, questions

In [27]:
# Pre-trained model and tokenizer.
tokenizer = transformers.BertTokenizer.from_pretrained(
    'mrm8488/bert-medium-finetuned-squadv2', 
    return_token_type_ids=True,
    return_dict=False
)

model = transformers.BertForQuestionAnswering.from_pretrained(
    'mrm8488/bert-medium-finetuned-squadv2',
    return_dict=False
)

# Parse command-line arguments.
context, questions = read_inputs(open(context_file), open(questions_file), batch_size)

num_questions = len(questions)
num_batches = num_questions // batch_size

# Pipeline the model over two IPUs. You must have at least as many batches (questions) as you have IPUs.
model.bert.embeddings.position_embeddings = poptorch.BeginBlock(
    layer_to_call=model.bert.embeddings.position_embeddings, 
    ipu_id=1
)

In [28]:
# Wrap PyTorch model insde a PopTorch InferenceModel. This will make the model run on the IPU.
opts = poptorch.Options().deviceIterations(batch_size)
inference_model = poptorch.inferenceModel(model, options=opts)

In [29]:
from tqdm import trange
from tqdm.contrib import tenumerate

# Process inputs in batches.
for batch_idx in trange(num_batches):
    print("XXXXXXXXXx")
    input_pairs = [
        (questions[batch_size*batch_idx + i], context)
        for i in range(batch_size)]

    batched_encoding = tokenizer.batch_encode_plus(
        input_pairs,
        max_length=110,
        pad_to_max_length='right'
    )

    # Convert to PyTorch tensors.
    input_batch = torch.tensor(batched_encoding["input_ids"])
    attention_batch = torch.tensor(batched_encoding["attention_mask"])

    # Execute on IPU.
    start_score_pop, end_scores_pop = inference_model(input_batch, attention_batch)

    # Process outputs.
    for i, (start_score, end_score) in tenumerate(zip(start_score_pop, end_scores_pop)):
        answer_start, answer_stop = start_score.argmax(), end_score.argmax()
        answer_ids = input_batch[i][answer_start:answer_stop + 1]
        answer_tokens = tokenizer.convert_ids_to_tokens(answer_ids,
                                                        skip_special_tokens=True)
        answer = tokenizer.convert_tokens_to_string(answer_tokens)

        print(f"Question: {questions[batch_size*batch_idx + i]}")
        print(f"Answer: {answer}")

  0%|          | 0/2 [00:00<?, ?it/s]Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


XXXXXXXXXx



Graph compilation:   0%|          | 0/100 [00:00<?][A
Graph compilation:   3%|▎         | 3/100 [00:01<01:00][A
Graph compilation:   4%|▍         | 4/100 [00:02<00:44][A
Graph compilation:   7%|▋         | 7/100 [00:03<00:41][A
Graph compilation:  16%|█▌        | 16/100 [00:03<00:11][A
Graph compilation:  20%|██        | 20/100 [00:04<00:14][A
Graph compilation:  23%|██▎       | 23/100 [00:05<00:14][A
Graph compilation:  26%|██▌       | 26/100 [00:05<00:11][A
Graph compilation:  28%|██▊       | 28/100 [00:06<00:15][A
Graph compilation:  30%|███       | 30/100 [00:06<00:13][A
Graph compilation:  32%|███▏      | 32/100 [00:06<00:12][A
Graph compilation:  34%|███▍      | 34/100 [00:07<00:13][A
Graph compilation:  42%|████▏     | 42/100 [00:08<00:08][A
Graph compilation:  45%|████▌     | 45/100 [00:08<00:06][A
Graph compilation:  49%|████▉     | 49/100 [00:08<00:05][A
Graph compilation:  51%|█████     | 51/100 [00:10<00:12][A
Graph compilation:  53%|█████▎    | 53/100 [00:

0it [00:00, ?it/s]

 50%|█████     | 1/2 [01:00<01:00, 60.02s/it]

Question: How many islands are there in Scotland?
Answer: more than 790
Question: What sea is to the south of Scotland?
Answer: irish sea
XXXXXXXXXx




0it [00:00, ?it/s]

100%|██████████| 2/2 [01:00<00:00, 30.02s/it]

Question: How long is Scotland's border in km?
Answer: 154
Question: Where is England in relation to scotland?
Answer: southeast





In [31]:
inference_model.detachFromDevice()