# openvino vs pytorch

## common things

In [1]:
import sys
import os
from time import time

import cv2
import numpy as np

In [2]:
img_path = './data/root-test.jpg'
test_loops = 100
batch_size = 4

## openvino test

In [3]:
from openvino.inference_engine import IECore

In [4]:
'''
do NOT scale in model since VPU OP is not accurate enough (at least for regression)

convert model with:
python mo.py --input_model score_net.onnx --data_type FP16

for PIL.Image, convert model with:
python mo.py --input_model score_net.onnx --reverse_input_channels --data_type FP16
'''

model_xml = 'score_net.xml'
model_bin = os.path.splitext(model_xml)[0] + '.bin'
#device = 'CPU'
#device = 'MYRIAD'
device = 'MULTI'

In [5]:
# create model
ie = IECore()

print(ie.available_devices)

model = ie.read_network(model_xml, model_bin)

input_layer_name = next(iter(model.inputs))
out_layer_name = next(iter(model.outputs))

dev_count = 0

'''
if device == 'MULTI':
    for dev in ie.available_devices:
        if 'MYRIAD' in dev:
            print(dev_count, dev)
            device = '{}{}{}'.format(device, ':' if dev_count==0 else ',', dev)
            dev_count += 1
print(device)
'''

exec_model = ie.load_network(model, device)

# prepare data
n, c, h, w = model.inputs[input_layer_name].shape
print(model.inputs[input_layer_name].shape)

img = cv2.imread(img_path)
img = cv2.resize(img, (w, h))
img = img.transpose((2, 0, 1))

img = img / 255.

# infer
reqs = []

t0 = cv2.getTickCount()
for i in range(test_loops):
    output = exec_model.infer(inputs={input_layer_name: img})
    '''
    reqs.append(exec_model.start_async(
        request_id=0,
        inputs={input_layer_name: img}
    ))
    
exec_model.wait(num_requests=test_loops)
'''
infer_time = (cv2.getTickCount() - t0) / cv2.getTickFrequency()

print(infer_time)
print(output)

['CPU', 'GNA', 'MYRIAD.1.1-ma2480', 'MYRIAD.1.2-ma2480']
MYRIAD
[1, 3, 224, 224]
0.775584101


In [5]:
# multi device experiment

# create model
ie = IECore()

print(ie.available_devices)

model = ie.read_network(model_xml, model_bin)

input_layer_name = next(iter(model.inputs))
out_layer_name = next(iter(model.outputs))

exec_models = []

if device == 'MULTI':
    for dev in ie.available_devices:
        if 'MYRIAD' in dev:
            exec_models.append(ie.load_network(model, dev))
print(len(exec_models))


# prepare data
n, c, h, w = model.inputs[input_layer_name].shape
print(model.inputs[input_layer_name].shape)

img = cv2.imread(img_path)
img = cv2.resize(img, (w, h))
img = img.transpose((2, 0, 1))

img = img / 255.

reqs = []

# infer
t0 = cv2.getTickCount()

for i in range(test_loops):
    done = False
    
    while not done:
        for m in exec_models:
            rid = m.get_idle_request_id()
            if rid > -1:
                reqs.append(m.start_async(request_id=rid, inputs={input_layer_name: img}))
                done = True
                break

for req in reqs:
    req.wait(-1)
    print(req.outputs)
    
infer_time = (cv2.getTickCount() - t0) / cv2.getTickFrequency()

print(infer_time)

['CPU', 'GNA', 'MYRIAD.1.1-ma2480', 'MYRIAD.1.2-ma2480']
2
[1, 3, 224, 224]
{'Gemm_179': array([[0.9145508]], dtype=float32)}
{'Gemm_179': array([[0.9145508]], dtype=float32)}
{'Gemm_179': array([[0.9145508]], dtype=float32)}
{'Gemm_179': array([[0.9145508]], dtype=float32)}
{'Gemm_179': array([[0.9145508]], dtype=float32)}
{'Gemm_179': array([[0.9145508]], dtype=float32)}
{'Gemm_179': array([[0.9145508]], dtype=float32)}
{'Gemm_179': array([[0.9145508]], dtype=float32)}
{'Gemm_179': array([[0.9145508]], dtype=float32)}
{'Gemm_179': array([[0.9145508]], dtype=float32)}
{'Gemm_179': array([[0.9145508]], dtype=float32)}
{'Gemm_179': array([[0.9145508]], dtype=float32)}
{'Gemm_179': array([[0.9145508]], dtype=float32)}
{'Gemm_179': array([[0.9145508]], dtype=float32)}
{'Gemm_179': array([[0.9145508]], dtype=float32)}
{'Gemm_179': array([[0.9145508]], dtype=float32)}
{'Gemm_179': array([[0.9145508]], dtype=float32)}
{'Gemm_179': array([[0.9145508]], dtype=float32)}
{'Gemm_179': array([[0.9

## pytorch test

In [7]:
import torch
import torchvision
import torchvision.transforms.functional as TF

from resnet import resnet as score_model

In [8]:
score_model_path = './data/score_state_dict.pth'
device_name = 'cuda:0'

W = 224
H = 224

In [9]:
device = torch.device(device_name)
# create model
score_net = score_model.resnet50(num_classes=1)
score_net.load_state_dict(torch.load(score_model_path, map_location=torch.device(device_name)))
score_net = score_net.to(device)
score_net.eval()

# prepare data
img = cv2.imread(img_path)
img = cv2.resize(img, (W, H))
img = TF.to_tensor(img).unsqueeze(0).to(device)

# infer
t0 = cv2.getTickCount()
with torch.no_grad():
    for i in range(test_loops):
        output = score_net(img)
infer_time = (cv2.getTickCount() - t0) / cv2.getTickFrequency()

print(infer_time)

0.182976663


In [10]:
print(output)

tensor([[0.9206]], device='cuda:0')
