In [1]:
!pip install -q ultralytics

In [2]:
!pip install -q onnx onnxruntime

In [24]:
!pip install -q openvino-dev

In [3]:
from ultralytics import YOLO
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.datasets as datasets
from torchvision import models
from torch.nn.utils import prune
from torch.utils.data import DataLoader
import time
from tqdm.notebook import tqdm
import onnx
import onnxruntime as ort
import os
import numpy as np

In [4]:
model = YOLO('yolov8l.pt')

Downloading https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l.pt to 'yolov8l.pt'...
100%|██████████| 83.7M/83.7M [00:01<00:00, 53.5MB/s]


In [None]:
param_size = 0
for param in model.parameters():
    param_size += param.nelement() * param.element_size()
buffer_size = 0
for buffer in model.buffers():
    buffer_size += buffer.nelement() * buffer.element_size()

size_all_mb = (param_size + buffer_size) / 1024**2
print('model size: {:.3f}MB'.format(size_all_mb))

model size: 166.848MB


In [21]:
inp = torch.randn(1, 3, 640, 640)

num_samples = 100
start_time = time.time()
for _ in tqdm(range(num_samples)):
    output = model(inp / 255)
end_time = time.time()

infer_time = ((end_time - start_time) / num_samples) * 1000
print(f'Avg inference time: {infer_time:.4f} ms')

  0%|          | 0/100 [00:00<?, ?it/s]


0: 640x640 (no detections), 2745.9ms
Speed: 0.0ms preprocess, 2745.9ms inference, 11.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 3518.8ms
Speed: 0.0ms preprocess, 3518.8ms inference, 6.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 3395.1ms
Speed: 0.0ms preprocess, 3395.1ms inference, 4.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2671.5ms
Speed: 0.0ms preprocess, 2671.5ms inference, 6.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2702.0ms
Speed: 0.0ms preprocess, 2702.0ms inference, 4.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2666.2ms
Speed: 0.0ms preprocess, 2666.2ms inference, 4.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 4207.7ms
Speed: 0.0ms preprocess, 4207.7ms inference, 4.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 2659.3ms
Sp

Avg inference time: 3060.1253 ms


In [None]:
infer_time = ((end_time - start_time) / num_samples) * 1000
print(f'Avg inference time: {infer_time:.4f} ms')

Avg inference time: 622.9927 ms


In [5]:
model.export(format='onnx')

Ultralytics YOLOv8.0.199 🚀 Python-3.10.12 torch-2.0.1+cu118 CPU (Intel Xeon 2.20GHz)
YOLOv8l summary (fused): 268 layers, 43668288 parameters, 0 gradients, 165.2 GFLOPs

[34m[1mPyTorch:[0m starting from 'yolov8l.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 84, 8400) (83.7 MB)

[34m[1mONNX:[0m starting export with onnx 1.14.1 opset 17...


verbose: False, log level: Level.ERROR



[34m[1mONNX:[0m export success ✅ 6.6s, saved as 'yolov8l.onnx' (166.8 MB)

Export complete (17.2s)
Results saved to [1m/content[0m
Predict:         yolo predict task=detect model=yolov8l.onnx imgsz=640  
Validate:        yolo val task=detect model=yolov8l.onnx imgsz=640 data=coco.yaml  
Visualize:       https://netron.app


'yolov8l.onnx'

In [6]:
onnx_model = ort.InferenceSession('yolov8l.onnx', providers=['AzureExecutionProvider', 'CPUExecutionProvider'])

In [7]:
file_size_bytes = os.path.getsize('yolov8l.onnx')
file_size_mb = file_size_bytes / (1024 * 1024) # Перевод из байтов в мегабайты

print('model size: {:.3f}MB'.format(file_size_mb))

model size: 166.813MB


In [20]:
inp = torch.randn(1, 3, 640, 640)

num_samples = 100
start_time = time.time()
for _ in tqdm(range(num_samples)):
    output = onnx_model.run(None, {"images": np.array(inp / 255)})
end_time = time.time()

infer_time = ((end_time - start_time) / num_samples) * 1000
print(f'Avg inference time: {infer_time:.4f} ms')

  0%|          | 0/100 [00:00<?, ?it/s]

Avg inference time: 2851.7783 ms


In [22]:
model.export(format='openvino')

Ultralytics YOLOv8.0.199 🚀 Python-3.10.12 torch-2.0.1+cu118 CPU (Intel Xeon 2.20GHz)

[34m[1mPyTorch:[0m starting from 'yolov8l.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 84, 8400) (83.7 MB)

[34m[1mONNX:[0m starting export with onnx 1.14.1 opset 17...


verbose: False, log level: Level.ERROR



[34m[1mONNX:[0m export success ✅ 6.4s, saved as 'yolov8l.onnx' (166.8 MB)
[31m[1mrequirements:[0m Ultralytics requirement ['openvino-dev>=2023.0'] not found, attempting AutoUpdate...
Collecting openvino-dev>=2023.0
  Downloading openvino_dev-2023.1.0-12185-py3-none-any.whl (5.8 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 5.8/5.8 MB 26.5 MB/s eta 0:00:00
Collecting addict>=2.4.0 (from openvino-dev>=2023.0)
  Downloading addict-2.4.0-py3-none-any.whl (3.8 kB)
Collecting jstyleson>=0.0.2 (from openvino-dev>=2023.0)
  Downloading jstyleson-0.0.2.tar.gz (2.0 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting openvino-telemetry>=2022.1.0 (from openvino-dev>=2023.0)
  Downloading openvino_telemetry-2023.2.0-py3-none-any.whl (22 kB)
Collecting texttable>=1.6.3 (from openvino-dev>=2023.0)
  Downloading texttable-1.7.0-py2.py3-none-any.whl (10 kB)
Collecting openvino==2023.1.0 (from openvino-dev>=2023.0)
  Downloading 

'yolov8l_openvino_model'

In [23]:
file_size_bytes = os.path.getsize('/content/yolov8l_openvino_model/yolov8l.bin')
file_size_mb = file_size_bytes / (1024 * 1024) # Перевод из байтов в мегабайты

print('model size: {:.3f}MB'.format(file_size_mb))

model size: 166.678MB


In [25]:
import openvino as ov

core = ov.Core()
classification_model_xml = "/content/yolov8l_openvino_model/yolov8l.xml"

model = core.read_model(model=classification_model_xml)
compiled_model = core.compile_model(model=model, device_name="CPU")

In [26]:
inp = torch.randn(1, 3, 640, 640)

num_samples = 100
start_time = time.time()
for _ in tqdm(range(num_samples)):
    output = compiled_model(inp / 255)
end_time = time.time()

infer_time = ((end_time - start_time) / num_samples) * 1000
print(f'Avg inference time: {infer_time:.4f} ms')

  0%|          | 0/100 [00:00<?, ?it/s]

Avg inference time: 3372.4768 ms
