In [1]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch

# Importing the tokenizer and model.
tokenizer = AutoTokenizer.from_pretrained("t5-large")
model = AutoModelForSeq2SeqLM.from_pretrained("t5-large")

# Model parameters.
param = sum(p.numel() for p in model.parameters())
print("Total Parameters: ", param)

Total Parameters:  737668096


In [2]:
# Creating the inputs for the model with the help of the tokenizer. 
input_ids = tokenizer("Just an example", add_special_tokens=True, return_tensors="pt").input_ids
attn_mask = tokenizer("Just an example", add_special_tokens=True, return_tensors="pt").attention_mask
decoder_input_ids = tokenizer("example continued", add_special_tokens=True, return_tensors="pt").input_ids

print("Input_ids:", input_ids)
print("Attention_mask", attn_mask)
print("Decoder_input_ids", decoder_input_ids)

Input_ids: tensor([[1142,   46,  677,    1]])
Attention_mask tensor([[1, 1, 1, 1]])
Decoder_input_ids tensor([[ 677, 2925,    1]])


In [3]:
# Storing the pytorch model's output in pt_outputs variable.
pt_outputs = model(input_ids, attn_mask, decoder_input_ids)

print("Pytorch's Output: ", pt_outputs)

Pytorch's Output:  Seq2SeqLMOutput(loss=None, logits=tensor([[[-35.0933, -10.8688, -22.6821,  ..., -60.8281, -61.0236, -60.9531],
         [-39.5849, -18.0455, -24.0839,  ..., -64.7345, -65.0839, -65.1017],
         [-40.0805, -15.8571, -24.7450,  ..., -64.1570, -64.3340, -64.2042]]],
       grad_fn=<UnsafeViewBackward>), past_key_values=((tensor([[[[ 1.5357e+00, -6.1100e-01, -5.7725e-01,  ...,  5.0432e-01,
            1.1943e+00,  4.5772e-01],
          [ 1.3150e+00,  8.0343e-01,  1.5736e-01,  ..., -1.9693e-01,
            2.2044e-01, -2.9261e-01],
          [ 1.8306e-01, -1.4531e+00, -1.4290e-01,  ...,  1.1862e+00,
            5.8147e-01,  1.7820e+00]],

         [[-8.4602e-01,  7.1135e-01,  1.3015e+00,  ...,  6.5264e-01,
            7.9320e-02, -9.5181e-02],
          [ 1.1592e+00, -1.0552e-01, -1.2651e-01,  ..., -1.2981e-03,
           -4.7836e-02, -2.4157e-01],
          [-2.3674e-01, -8.4556e-01, -9.8635e-01,  ..., -6.6309e-02,
           -8.2804e-01,  2.0766e-01]],

         [[-

In [5]:
# Exporting from torch to onnx fails because this model
# when converted to onnx is bigger than 2 gb.
torch.onnx.export(model,
                 (input_ids, attn_mask, decoder_input_ids),
                 "t5-large.onnx",
                 input_names=["input_ids", "attn_mask", "decoder_input_ids"],
                 output_names=["outputs"],
                 opset_version=12)

  if torch.isinf(hidden_states).any():
  if torch.isinf(hidden_states).any():
  if causal_mask.shape[1] < attention_mask.shape[1]:
  if torch.isinf(hidden_states).any():


RuntimeError: Exporting model exceed maximum protobuf size of 2GB. Please call torch.onnx.export with use_external_data_format=True.

In [7]:
# To get past the previous file size limit torch.onnx.export provides
# a param named use_external_data_format which when set to true can 
# export models bigger than 2 gb in which stores some of the model
# parameters in external binary files rather than in the single onnx file.
torch.onnx.export(model,
                 (input_ids, attn_mask, decoder_input_ids),
                 "t5-large.onnx",
                 input_names=["input_ids", "attn_mask", "decoder_input_ids"],
                 output_names=["outputs"],
                 opset_version=12,
                 use_external_data_format=True)

In [8]:
# Converting from onnx to openvino fails because of how the
# model was saved in different binary files rather than in
# just one but another reason could just be that pytorch was
# not able to generate a proper graph because of the python values.
!python3 /opt/intel/openvino_2021.2.200/deployment_tools/model_optimizer/mo.py --input_model t5-large.onnx

Model Optimizer arguments:
Common parameters:
	- Path to the Input Model: 	/home/aakash/box_of_ai_tools/Natural_Language_Processing/Machine_Translation/t5-large/t5-large.onnx
	- Path for generated IR: 	/home/aakash/box_of_ai_tools/Natural_Language_Processing/Machine_Translation/t5-large/.
	- IR output name: 	t5-large
	- Log level: 	ERROR
	- Batch: 	Not specified, inherited from the model
	- Input layers: 	Not specified, inherited from the model
	- Output layers: 	Not specified, inherited from the model
	- Input shapes: 	Not specified, inherited from the model
	- Mean values: 	Not specified
	- Scale values: 	Not specified
	- Scale factor: 	Not specified
	- Precision of IR: 	FP32
	- Enable fusing: 	True
	- Enable grouped convolutions fusing: 	True
	- Move mean values to preprocess section: 	None
	- Reverse input channels: 	False
ONNX specific parameters:
Model Optimizer version: 	2021.2.0-1877-176bdf51370-releases/2021/2
  operation = staticmethod(lambda x: np.log(x))
[ ERROR ]  Cannot i