In [1]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch

# Importing the tokenizer and model.
tokenizer = AutoTokenizer.from_pretrained("facebook/mbart-large-cc25")
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/mbart-large-cc25")

# Model parameters.
param = sum(p.numel() for p in model.parameters())
print("Total Parameters: ", param)

In [2]:
# Creating the inputs for the model with the help of the tokenizer. 
input_ids = tokenizer("Just an example", add_special_tokens=True, return_tensors="pt").input_ids
attn_mask = tokenizer("Just an example", add_special_tokens=True, return_tensors="pt").attention_mask
decoder_input_ids = tokenizer("example continued", add_special_tokens=True, return_tensors="pt").input_ids

print("Input_ids:", input_ids)
print("Attention_mask", attn_mask)
print("Decoder_input_ids", decoder_input_ids)

Input_ids: tensor([[  9563,    142,  27781,      2, 250004]])
Attention_mask tensor([[1, 1, 1, 1, 1]])
Decoder_input_ids tensor([[ 27781, 136475,      2, 250004]])


In [3]:
# Storing the pytorch model's output in pt_outputs variable.
pt_outputs = model(input_ids, attn_mask, decoder_input_ids)

print('\x1b[6;30;42m' + 'PyTorch output:' + '\x1b[0m', pt_outputs)

[6;30;42mPyTorch output:[0m Seq2SeqLMOutput(loss=None, logits=tensor([[[169.1353,  -4.9708, 180.5166,  ..., 168.7248, 162.5110,  84.3728],
         [124.7149,  -3.7138, 139.2386,  ..., 124.0178, 118.8718,  64.9085],
         [114.2669,  -3.1706, 122.9266,  ..., 113.8349, 108.5745,  55.9742],
         [ 88.6076,  -1.6644,  61.8927,  ...,  63.6106,  61.7157,  26.3426]]],
       grad_fn=<AddBackward0>), past_key_values=((tensor([[[[ 3.0555e-01, -9.1517e-01, -6.0642e-01,  ...,  6.2951e-01,
            3.6249e-01, -1.9330e-01],
          [-1.8615e-01, -1.6122e-01, -1.0600e+00,  ...,  4.2224e-01,
            3.1476e-01, -7.1112e-03],
          [ 4.3802e-01,  5.4728e-02, -3.5712e-01,  ...,  6.1628e-01,
            1.0333e+00,  2.2887e-01],
          [-1.0643e+00, -2.2007e-01, -1.1838e-01,  ..., -7.9920e-01,
           -5.0208e-01,  3.0128e-01]],

         [[ 6.2090e-01, -2.4985e-01,  1.1483e+00,  ..., -1.0682e+00,
           -5.4376e-01,  1.1747e+00],
          [-9.8667e-02,  6.3262e-01,  2

In [6]:
# Exporting from torch to onnx fails because this model
# when converted to onnx is bigger than 2 gb.
torch.onnx.export(model,
                 (input_ids, attn_mask, decoder_input_ids),
                 "mbart-large-cc25.onnx",
                 input_names=["input_ids", "attn_mask", "decoder_input_ids"],
                 output_names=["outputs"],
                 opset_version=12)

  src_len,
  src_len,
  self.head_dim,
  if torch.isinf(hidden_states).any() or torch.isnan(hidden_states).any():
  if input_shape[-1] > 1:


RuntimeError: Exporting model exceed maximum protobuf size of 2GB. Please call torch.onnx.export with use_external_data_format=True.

In [7]:
# To get past the previous file size limit torch.onnx.export provides
# a param named use_external_data_format which when set to true can 
# export models bigger than 2 gb in which stores some of the model
# parameters in external binary files rather than in the single onnx file.
torch.onnx.export(model,
                 (input_ids, attn_mask, decoder_input_ids),
                 "mbart-large-cc25.onnx",
                 input_names=["input_ids", "attn_mask", "decoder_input_ids"],
                 output_names=["outputs"],
                 opset_version=12,
                 use_external_data_format=True)

In [1]:
# Converting from onnx to openvino is successful but takes
# a lot of memory so if you are on a linux based system
# you can create the swap area if you have less RAM.
!python3 /opt/intel/openvino_2021.2.200/deployment_tools/model_optimizer/mo.py --input_model mbart-large-cc25.onnx

Model Optimizer arguments:
Common parameters:
	- Path to the Input Model: 	/home/aakash/box_of_ai_tools/Natural_Language_Processing/Machine_Translation/Mbart-large-cc25/mbart-large-cc25.onnx
	- Path for generated IR: 	/home/aakash/box_of_ai_tools/Natural_Language_Processing/Machine_Translation/Mbart-large-cc25/.
	- IR output name: 	mbart-large-cc25
	- Log level: 	ERROR
	- Batch: 	Not specified, inherited from the model
	- Input layers: 	Not specified, inherited from the model
	- Output layers: 	Not specified, inherited from the model
	- Input shapes: 	Not specified, inherited from the model
	- Mean values: 	Not specified
	- Scale values: 	Not specified
	- Scale factor: 	Not specified
	- Precision of IR: 	FP32
	- Enable fusing: 	True
	- Enable grouped convolutions fusing: 	True
	- Move mean values to preprocess section: 	None
	- Reverse input channels: 	False
ONNX specific parameters:
Model Optimizer version: 	2021.2.0-1877-176bdf51370-releases/2021/2

[ SUCCESS ] Generated IR version 1

In [3]:
from openvino.inference_engine import IECore
ie = IECore()

openvino_mbart = ie.read_network(model="mbart-large-cc25.xml", weights="mbart-large-cc25.bin")
exec_mbart = ie.load_network(network=openvino_mbart, device_name="CPU", num_requests=1)
openvino_outputs = exec_mbart.infer(inputs={"input_ids": input_ids,
                                            "attn_mask": attn_mask,
                                            "decoder_input_ids": decoder_input_ids})

In [6]:
print('\x1b[6;30;42m' + 'Openvino output:' + '\x1b[0m', openvino_outputs)

[6;30;42mOpenvino output:[0m {'2434': array([[[-8.6025614e-03,  7.9059601e-03, -1.1764058e-02, ...,
         -3.7537418e-02, -9.1848336e-04,  1.6217344e-02],
        [-1.0595697e+00,  6.7613035e-01,  6.1263770e-02, ...,
         -4.9537528e-01,  5.7463038e-01,  6.8071461e-01],
        [-4.2326415e-01,  4.4639364e-01, -1.2840933e-01, ...,
          3.1769216e-01, -8.3564490e-02,  6.1315931e-02],
        [-2.0410563e-01, -1.6575670e-02,  4.4710234e-02, ...,
          3.9313087e-01, -2.8597248e-01,  2.3620409e-01],
        [ 1.8710844e-02, -7.5129438e-03, -2.3711197e-02, ...,
         -3.0127564e-02, -2.6817219e-03,  2.4575405e-02]]], dtype=float32), '2597': array([[[[ 3.0554870e-01, -9.1517055e-01, -6.0641730e-01, ...,
           6.2951297e-01,  3.6248922e-01, -1.9329900e-01],
         [-1.8615250e-01, -1.6122401e-01, -1.0600333e+00, ...,
           4.2223525e-01,  3.1475547e-01, -7.1115419e-03],
         [ 4.3801913e-01,  5.4728106e-02, -3.5711625e-01, ...,
           6.1627734e-01,  