In [29]:
import os
import time

import numpy as np
import onnx
import onnxruntime as ort
from openvino.tools import ovc
from openvino.runtime import serialize, Core

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as T
from torch.utils.data import DataLoader
from torchsummary import summary

In [2]:
EPOCHS = 20
BATCH_SIZE = 128
LEARNING_RATE = 0.001

DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Load data

In [3]:
train_transforms = T.Compose([
    T.RandomCrop(32, padding=4),
    T.RandomHorizontalFlip(),
    T.ToTensor(),
    T.Normalize(mean=[0.4914, 0.4822, 0.4465],
                std=[0.2023, 0.1994, 0.2010])
])

val_transforms = T.Compose([
    T.ToTensor(),
    T.Normalize(mean=[0.4914, 0.4822, 0.4465],
                std=[0.2023, 0.1994, 0.2010])
])

In [4]:
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=train_transforms)
val_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=val_transforms)

train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=8)
val_loader = DataLoader(dataset=val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=8)

Files already downloaded and verified
Files already downloaded and verified


# Init model

In [5]:
class CNN(nn.Module):
    def __init__(self, num_classes=10):
        super(CNN, self).__init__()
        self.block1 = self._make_conv_block(in_ch=3,   out_ch=64)
        self.block2 = self._make_conv_block(in_ch=64,  out_ch=128)
        self.block3 = self._make_conv_block(in_ch=128, out_ch=256)
        self.block4 = self._make_conv_block(in_ch=256, out_ch=512)

        self.classifier = nn.Sequential(
            nn.Linear(512 * 2 * 2, 2048),  # из 512x2x2 -> 2048
            nn.ReLU(inplace=True),
            nn.Linear(2048, 1024),
            nn.ReLU(inplace=True),
            nn.Linear(1024, num_classes)
        )

    def _make_conv_block(self, in_ch, out_ch):
        block = nn.Sequential(
            nn.Conv2d(in_ch, out_ch, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(out_ch),
            nn.ReLU(inplace=True),

            nn.Conv2d(out_ch, out_ch, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(out_ch),
            nn.ReLU(inplace=True),

            nn.MaxPool2d(kernel_size=2)
        )
        return block

    def forward(self, x):
        x = self.block1(x) 
        x = self.block2(x)  
        x = self.block3(x)  
        x = self.block4(x) 

        x = x.view(x.size(0), -1)  
        x = self.classifier(x)
        return x

model = CNN(num_classes=10).to(DEVICE)
model.load_state_dict(torch.load('./model.pt', weights_only=True))

_ = summary(model, input_size=(BATCH_SIZE, 3, 32, 32), device=DEVICE, depth=4)

Layer (type:depth-idx)                   Param #
├─Sequential: 1-1                        --
|    └─Conv2d: 2-1                       1,728
|    └─BatchNorm2d: 2-2                  128
|    └─ReLU: 2-3                         --
|    └─Conv2d: 2-4                       36,864
|    └─BatchNorm2d: 2-5                  128
|    └─ReLU: 2-6                         --
|    └─MaxPool2d: 2-7                    --
├─Sequential: 1-2                        --
|    └─Conv2d: 2-8                       73,728
|    └─BatchNorm2d: 2-9                  256
|    └─ReLU: 2-10                        --
|    └─Conv2d: 2-11                      147,456
|    └─BatchNorm2d: 2-12                 256
|    └─ReLU: 2-13                        --
|    └─MaxPool2d: 2-14                   --
├─Sequential: 1-3                        --
|    └─Conv2d: 2-15                      294,912
|    └─BatchNorm2d: 2-16                 512
|    └─ReLU: 2-17                        --
|    └─Conv2d: 2-18                      589,

# Convert

## ONNX

In [6]:
dummy_input = torch.randn(1, 3, 32, 32).to(DEVICE)
torch.onnx.export(model, dummy_input, 'model.onnx', opset_version=14, export_params=True, input_names=["input"], 
                  output_names=["output"], dynamic_axes={"input": {0: "batch_size"}, "output": {0: "batch_size"}})

## OpenVino

In [10]:
ov_model = ovc.convert_model('model.onnx')

os.makedirs("openvino_model", exist_ok=True)
serialize(ov_model, "openvino_model/model.xml")
openvino_bin_path = "openvino_model/model.bin"
openvino_xml_path = "openvino_model/model.xml"

# openvino_model_size = os.path.getsize(openvino_bin_path) / (1024**2)

# Eval

## ONNX

In [14]:
os.path.getsize('model.onnx') / (1024**2)

41.92947006225586

In [18]:
onnx_session = ort.InferenceSession('model.onnx', providers=["CPUExecutionProvider"])

test_images, test_labels = next(iter(val_loader))
test_images = test_images[:1]  
test_images_np = test_images.numpy() 
onnx_inputs = {"input": test_images_np}
onnx_out = onnx_session.run(["output"], onnx_inputs)[0]

num_samples = 100
start_time = time.time()
for _ in range(num_samples):
    onnx_out = onnx_session.run(["output"], onnx_inputs)[0]
end_time = time.time()

infer_time = ((end_time - start_time) / num_samples) * 1000
print(f'CPU Avg inference time: {infer_time:.4f} ms')

CPU Avg inference time: 1.2587 ms


In [19]:
onnx_session = ort.InferenceSession('model.onnx', providers=["CUDAExecutionProvider"])

test_images, test_labels = next(iter(val_loader))
test_images = test_images[:1]  
test_images_np = test_images.numpy() 
onnx_inputs = {"input": test_images_np}
onnx_out = onnx_session.run(["output"], onnx_inputs)[0]

num_samples = 100
start_time = time.time()
for _ in range(num_samples):
    onnx_out = onnx_session.run(["output"], onnx_inputs)[0]
end_time = time.time()

infer_time = ((end_time - start_time) / num_samples) * 1000
print(f'CPU Avg inference time: {infer_time:.4f} ms')

CPU Avg inference time: 0.4011 ms


In [43]:
correct = 0
total = 0
for X_, y_ in val_loader:
    onnx_inputs = {"input": np.array(X_)}
    outputs = onnx_session.run(["output"], onnx_inputs)[0]
    _, predicted = torch.max(torch.tensor(outputs), 1)
    correct += (predicted == y_).sum().item()
    total += y_.size(0)

accuracy = 100.0 * correct / total
print("Accuracy:", accuracy)

Accuracy: 88.05


## OpenVino

In [41]:
sum([os.path.getsize(f'./openvino_model/{fn}') / (1024**2) for fn in os.listdir('./openvino_model')])

41.95438098907471

In [42]:
core = Core()
model = core.read_model('openvino_model/model.xml')
compiled_model = core.compile_model(model, "CPU")
output_any_name = compiled_model.outputs[0]

test_images_ov = test_images_np.astype(np.float32)
res_ov = compiled_model([test_images_ov])[output_any_name]

num_samples = 100
start_time = time.time()
for _ in range(num_samples):
    ov_out = compiled_model([test_images_ov])[output_any_name]
end_time = time.time()

infer_time = ((end_time - start_time) / num_samples) * 1000
print(f'CPU Avg inference time: {infer_time:.4f} ms')

CPU Avg inference time: 1.3848 ms


In [50]:
correct = 0
total = 0
for X_, y_ in val_loader:
    X_ = X_.cpu().numpy().astype(np.float32)
    outputs = compiled_model([X_])[output_any_name]
    _, predicted = torch.max(torch.tensor(outputs), 1)
    correct += (predicted == y_).sum().item()
    total += y_.size(0)

accuracy = 100.0 * correct / total
print("Accuracy:", accuracy)

Accuracy: 88.05
