In [1]:
import os
import time

import numpy as np
import onnx
import onnxruntime as ort

import torch
import torchvision
import torchvision.transforms as T
from torch.utils.data import DataLoader

In [2]:
BATCH_SIZE = 128

# Load data

In [3]:
train_transforms = T.Compose([
    T.RandomCrop(32, padding=4),
    T.RandomHorizontalFlip(),
    T.ToTensor(),
    T.Normalize(mean=[0.4914, 0.4822, 0.4465],
                std=[0.2023, 0.1994, 0.2010])
])

val_transforms = T.Compose([
    T.ToTensor(),
    T.Normalize(mean=[0.4914, 0.4822, 0.4465],
                std=[0.2023, 0.1994, 0.2010])
])

In [4]:
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=train_transforms)
val_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=val_transforms)

train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=8)
val_loader = DataLoader(dataset=val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=8)

Files already downloaded and verified
Files already downloaded and verified


# Optimize model

## onnxoptimizer

In [5]:
!python -m onnxoptimizer "model.onnx" "model_optium.onnx"

## onnxsim

In [6]:
!onnxsim "model_optium.onnx" "model_optium.onnx"

Simplifying[33m...[0m
Finish! Here is the difference:
┏━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┓
┃[1m [0m[1m          [0m[1m [0m┃[1m [0m[1mOriginal Model[0m[1m [0m┃[1m [0m[1mSimplified Model[0m[1m [0m┃
┡━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━┩
│ Concat     │ 1              │ 1                │
│ Constant   │ 25             │ 25               │
│ Conv       │ 8              │ 8                │
│ Gather     │ 1              │ 1                │
│ Gemm       │ 3              │ 3                │
│ MaxPool    │ 4              │ 4                │
│ Relu       │ 10             │ 10               │
│ Reshape    │ 1              │ 1                │
│ Shape      │ 1              │ 1                │
│ Unsqueeze  │ 1              │ 1                │
│ Model Size │ 41.9MiB        │ 41.9MiB          │
└────────────┴────────────────┴──────────────────┘


# Eval

In [7]:
os.path.getsize('model_optium.onnx') / (1024**2)

41.931105613708496

In [25]:
onnx_session = ort.InferenceSession('model_optium.onnx', providers=["CPUExecutionProvider"])

test_images, test_labels = next(iter(val_loader))
test_images = test_images[:1]  
test_images_np = test_images.numpy() 
onnx_inputs = {"input": test_images_np}
onnx_out = onnx_session.run(["output"], onnx_inputs)[0]

num_samples = 100
start_time = time.time()
for _ in range(num_samples):
    onnx_out = onnx_session.run(["output"], onnx_inputs)[0]
end_time = time.time()

infer_time = ((end_time - start_time) / num_samples) * 1000
print(f'CPU Avg inference time: {infer_time:.4f} ms')

CPU Avg inference time: 1.2873 ms


In [24]:
onnx_session = ort.InferenceSession('model_optium.onnx', providers=["CUDAExecutionProvider"])

test_images, test_labels = next(iter(val_loader))
test_images = test_images[:1]  
test_images_np = test_images.numpy() 
onnx_inputs = {"input": test_images_np}
onnx_out = onnx_session.run(["output"], onnx_inputs)[0]

num_samples = 100
start_time = time.time()
for _ in range(num_samples):
    onnx_out = onnx_session.run(["output"], onnx_inputs)[0]
end_time = time.time()

infer_time = ((end_time - start_time) / num_samples) * 1000
print(f'CPU Avg inference time: {infer_time:.4f} ms')

CPU Avg inference time: 0.3646 ms


In [10]:
correct = 0
total = 0
for X_, y_ in val_loader:
    onnx_inputs = {"input": np.array(X_)}
    outputs = onnx_session.run(["output"], onnx_inputs)[0]
    _, predicted = torch.max(torch.tensor(outputs), 1)
    correct += (predicted == y_).sum().item()
    total += y_.size(0)

accuracy = 100.0 * correct / total
print("Accuracy:", accuracy)

Accuracy: 88.05
