In [1]:
import torch
import torchvision
torch.__version__

'1.0.0'

### Alexnet

In [22]:
dummy_input = torch.randn(10, 3, 224, 224, device='cuda')
torch_model = torchvision.models.alexnet(pretrained=True).cuda()

In [23]:
input_names = [ "actual_input_1" ] + [ "learned_%d" % i for i in range(16) ]
output_names = [ "output1" ]

torch_out = torch.onnx._export(torch_model, dummy_input, "alexnet.onnx", verbose=True, input_names=input_names, output_names=output_names)

graph(%actual_input_1 : Float(10, 3, 224, 224)
      %learned_0 : Float(64, 3, 11, 11)
      %learned_1 : Float(64)
      %learned_2 : Float(192, 64, 5, 5)
      %learned_3 : Float(192)
      %learned_4 : Float(384, 192, 3, 3)
      %learned_5 : Float(384)
      %learned_6 : Float(256, 384, 3, 3)
      %learned_7 : Float(256)
      %learned_8 : Float(256, 256, 3, 3)
      %learned_9 : Float(256)
      %learned_10 : Float(4096, 9216)
      %learned_11 : Float(4096)
      %learned_12 : Float(4096, 4096)
      %learned_13 : Float(4096)
      %learned_14 : Float(1000, 4096)
      %learned_15 : Float(1000)) {
  %17 : Float(10, 64, 55, 55) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[11, 11], pads=[2, 2, 2, 2], strides=[4, 4]](%actual_input_1, %learned_0, %learned_1), scope: AlexNet/Sequential[features]/Conv2d[0]
  %18 : Float(10, 64, 55, 55) = onnx::Relu(%17), scope: AlexNet/Sequential[features]/ReLU[1]
  %19 : Float(10, 64, 27, 27) = onnx::MaxPool[kernel_shape=[3, 3], pads=[0, 0, 

In [26]:
torch_out

tensor([[-0.2694, -1.4682, -1.4477,  ..., -1.4013, -1.1519,  1.3629],
        [ 0.0960, -1.4116, -1.3971,  ..., -1.0261, -1.0680,  1.1595],
        [-0.0173, -1.3886, -1.2807,  ..., -1.1756, -1.1435,  1.0807],
        ...,
        [ 0.1755, -1.4784, -1.2024,  ..., -1.1207, -1.0591,  1.4796],
        [ 0.1555, -1.6090, -1.2815,  ..., -0.9031, -1.1223,  1.5993],
        [ 0.2649, -0.9999, -1.3977,  ..., -1.2150, -0.8930,  0.9923]],
       device='cuda:0', grad_fn=<AddmmBackward>)

### Predict with Onnx Model in Caffe2

In [28]:
import onnx

# Load the ONNX model
model = onnx.load("./pytorch/alexnet.onnx")

# Check that the IR is well formed
onnx.checker.check_model(model)

# Print a human readable representation of the graph
# print(onnx.helper.printable_graph(model.graph))

In [29]:
# caffe dependencies
# !pip install -r https://raw.githubusercontent.com/pytorch/pytorch/master/requirements.txt

In [30]:
import caffe2.python.onnx.backend as backend
import numpy as np

In [31]:
rep = backend.prepare(model, device="CUDA:0")

CUDA operators do not support 64-bit doubles, please use arr.astype(np.float32) or np.int32 for ints. Blob: actual_input_1 type: float64


In [33]:
outputs = rep.run(dummy_input.cpu().numpy().astype(np.float32))

In [34]:
outputs.output1

array([[-0.26941356, -1.4682022 , -1.4477373 , ..., -1.4012641 ,
        -1.1518751 ,  1.3629025 ],
       [ 0.0959611 , -1.4116307 , -1.397087  , ..., -1.026058  ,
        -1.0679855 ,  1.1594914 ],
       [-0.01731079, -1.3886348 , -1.2806541 , ..., -1.1756328 ,
        -1.1434869 ,  1.0807338 ],
       ...,
       [ 0.17552656, -1.4783663 , -1.2023933 , ..., -1.1206607 ,
        -1.0591127 ,  1.4795922 ],
       [ 0.15547907, -1.6090451 , -1.2814711 , ..., -0.90313953,
        -1.1223465 ,  1.5992917 ],
       [ 0.2648559 , -0.9998633 , -1.3977414 , ..., -1.2149727 ,
        -0.8930128 ,  0.99228346]], dtype=float32)

In [36]:
# Verify the numerical correctness upto 3 decimal places
np.testing.assert_almost_equal(torch_out.data.cpu().numpy(), outputs.output1, decimal=3)

### Limitations

- The ONNX exporter is a trace-based exporter, which means that it operates by executing your model once, and exporting the operators which were actually run during this run. This means that if your model is dynamic, e.g., changes behavior depending on input data, the export won’t be accurate. Similarly, a trace is likely to be valid only for a specific input size (which is one reason why we require explicit inputs on tracing.) We recommend examining the model trace and making sure the traced operators look reasonable.


- PyTorch and Caffe2 often have implementations of operators with some numeric differences. Depending on model structure, these differences may be negligible, but they can also cause major divergences in behavior (especially on untrained models.) In a future release, we plan to allow Caffe2 to call directly to Torch implementations of operators, to help you smooth over these differences when precision is important, and to also document these differences.