## Exporting a convnet using ONNX in PyTorch

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.datasets as dsets
import torchvision.transforms as transforms
from torch.utils.data.sampler import SubsetRandomSampler
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# Assuming that we are on a CUDA machine, this should print a CUDA device:
print(device)
import numpy as np
VERBOSE = True  # Suppress printing of mini-batch losses

Note the 2-D input_size.  Read the MNIST dataset and resizing the images to 32x32 pixels with zero padding includes.  They are grayscale already, so the input will be 32x32x1.

In [None]:
# Hyper Parameters 
input_size = (32, 32)
num_classes = 10
num_epochs = 10
batch_size_train = 256
batch_size_val = 256
batch_size_test = 1024
learning_rate = 2e-3
num_folds = 6  # V-fold cross validation!
v = 4  # The filter hyperparameter.  The number of activation maps is dependent.
torch.set_printoptions(threshold=1000)

This initially downloads two datasets, one for training and validation (called train_dataset) and one for test.

In [None]:
# Load image data and transform images to 32x32x1
train_dataset = dsets.MNIST(root='./data',
                         train=True,
                         transform=transforms.Compose([
                             transforms.Resize(input_size),
                             transforms.ToTensor()]),
                         download=True)
test_dataset = dsets.MNIST('./data',
                        train=False,
                        transform=transforms.Compose([
                            transforms.Resize(input_size),
                            transforms.ToTensor()]),
                        download=True)

In [None]:
# Dataset loaders (handle mini-batching of data) 
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size_train, shuffle=True) 
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size_test, shuffle=False)

A version of the well-known LeNet-5 CNN model architecture.

In [None]:
class LeNet5(nn.Module):
    # A version of LeNet-5.  Note the hyperparameter 'v' (n^v activation maps).
    def __init__(self, v=0):
        super(LeNet5, self).__init__()
        # 1 image input channel, 6 filters, 5x5 kernel
        self.convnet = nn.Sequential(
            nn.Conv2d(1, 2**v, kernel_size=(5, 5)),  # c1
            nn.ReLU(),  # relu1
            nn.MaxPool2d(kernel_size=(2, 2), stride=2),  # s2
            nn.Conv2d(2**v, 3**v, kernel_size=(5, 5)),  # c3
            nn.ReLU(),  # relu3
            nn.MaxPool2d(kernel_size=(2, 2), stride=2),  # s4
            nn.Conv2d(3**v, 5**v, kernel_size=(5, 5)),  # c5
            nn.ReLU(),  # relu5
        )

        self.fc = nn.Sequential(
            nn.Linear(5**v, 84),  # f6
            nn.ReLU(),  # relu6
            nn.Linear(84, 10),  # f7
        )
        
    def forward(self, input):
        convout = self.convnet(input)
        convout = convout.view(input.size(0), -1)
        output = self.fc(convout)
        return output
        

The following function will train the model for a single epoch and report the training loss.

In [None]:
def train_one_epoch(epoch_num, verbose=VERBOSE):
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)
        # Forward pass
        optimizer.zero_grad() 
        outputs = model(images) 
        loss = criterion(outputs, labels)
        # Backward pass
        loss.backward()
        # Optimize
        optimizer.step()
        if verbose is True:
            if (i + 1) % 100 == 0:
                print('Epoch: [% d/% d], Step: [% d/% d], Loss: %.4f'
                      % (epoch_num + 1, num_epochs, i + 1,
                         len(train_dataset) // batch_size_train, loss.item())) 

The following function computes the error for one epoch of data.

In [None]:
def epoch_error(loader, length, split='validation'):
    """ Computes the error for all data points in a loader.
       
        Inputs:
            loader: Pytorch data loader (object)
            length: Number of data points (integer)
            split: Name of split, typically 'train', 'test', or 'validation' (string)
        
        Returns:
            error (floating point)
    """
    model.eval()
    # Measure the error for the entire loader split.
    i = 0
    total = 0.
    incorrect = 0.
    for images, labels in loader:  # One batch at a time!
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1) 
        total += labels.size(0) 
        incorrect += (predicted != labels).sum()

    print(f'Error of the model on the {length} {split} images: {float(incorrect) / total:3.1%}')
    return float(incorrect) / total

This procedure will initialize the model and run a training loop.

This is a 10-class classification problem.  Adam is used for optimization.

In [None]:
def run_training(v):
    # Re-initialize model and optimizer!
    global model, criterion, optimizer
    model = LeNet5(v).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    for epoch in range(num_epochs):
        train_one_epoch(epoch)

In [None]:
# Assess the training and test accuracy of LeNet-5
run_training(v)
test_accuracy = 1. - epoch_error(test_loader, test_dataset, 'test')
train_accuracy = 1. - epoch_error(train_loader, train_dataset, 'train')

In [None]:
# Set the model to inference mode
model.eval()

In [None]:
for images, labels in test_loader:
    batch_of_images = images.to(device)  # one batch
    print(batch_of_images.shape)
    break

In [None]:
# Input to the model
torch_out = model(batch_of_images)

# Export the model
torch.onnx.export(model,                     # model being run
                  batch_of_images,           # model input (or a tuple for multiple inputs)
                  "convnet.onnx",   # where to save the model (can be a file or file-like object)
                  export_params=True,        # store the trained parameter weights inside the model file
                  opset_version=10,          # the ONNX version to export the model to
                  do_constant_folding=True,  # whether to execute constant folding for optimization
                  input_names = ['input'],   # the model's input names
                  output_names = ['output'], # the model's output names
                  dynamic_axes={'input' : {0 : 'batch_size'},    # variable length axes
                                'output' : {0 : 'batch_size'}})

In [None]:
import onnx

onnx_model = onnx.load("convnet.onnx")
onnx.checker.check_model(onnx_model)

Verify that the ONNX runtime and PyTorch models are computing the same values for the network.  Do this by creating an inference session for the model and evaluating it.

In [None]:
import onnxruntime

ort_session = onnxruntime.InferenceSession("convnet.onnx")

def to_numpy(tensor):
    return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()

# compute ONNX Runtime output prediction
ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(batch_of_images)}
ort_outs = ort_session.run(None, ort_inputs)

# compare ONNX Runtime and PyTorch results
np.testing.assert_allclose(to_numpy(torch_out), ort_outs[0], rtol=1e-02, atol=1e-03)

print("Exported model has been tested with ONNXRuntime, and the result looks good!")

In [None]:
!ls -alt

In [None]:
!protoc --decode=onnx.ModelProto onnx.proto < convnet.onnx