# Загрузка библиотек

In [1]:
import torch
import numpy as np
from ultralytics import YOLO
import os
import time
from PIL import Image
import onnxruntime as onnx
from pathlib import Path
from IPython.display import clear_output
from tqdm import tqdm
import onnxruntime as ort
import pandas as pd

# Загрузка моделей и данных

In [2]:
path_models = Path('models')

model_terminal_od_pt = YOLO(path_models / 'model_terminal_od.pt')
model_defect_od_pt = YOLO(path_models / 'model_defect_od_crop.pt')

In [3]:
model_defect_od_onnx_fixed = ort.InferenceSession(
    path_models / 'model_defect_od_crop.onnx', 
    providers=['CPUExecutionProvider']
)
model_terminal_od_onnx_fixed = ort.InferenceSession(
    path_models / 'model_terminal_od.onnx', 
    providers=['CPUExecutionProvider']
)

In [4]:
path_filenames = Path('samples')
filenames = [path_filenames / x for x in os.listdir(path_filenames)]
print(f'Количество семплов: {len(filenames)}')

Количество семплов: 273


In [5]:
# models heat-up
temp = model_terminal_od_pt(filenames[0], imgsz=640, verbose=False)
temp = model_defect_od_pt(filenames[0], imgsz=1280, verbose=False)
del temp

Loading models\model_terminal_od.onnx for ONNX Runtime inference...
Loading models\model_defect_od_crop.onnx for ONNX Runtime inference...


# Options comparison

In [6]:
n_iters = 200
df_speed_comparison = pd.DataFrame()

## Pytorch model inference

In [7]:
duration_pt = [0] * n_iters

In [8]:
%%time
for i in tqdm(range(n_iters)):
    start = time.time()
    sample_file = filenames[i]
    model_terminal_od_pt(sample_file, imgsz=640, verbose=False)
    end = time.time()
    duration_pt[i] = end - start

100%|██████████| 200/200 [00:55<00:00,  3.64it/s]

CPU times: total: 18.2 s
Wall time: 55 s





In [9]:
df_speed_comparison['pt'] = duration_pt
df_speed_comparison.describe()

Unnamed: 0,pt
count,200.0
mean,0.274277
std,0.020276
min,0.231703
25%,0.264719
50%,0.273902
75%,0.285637
max,0.333182


## ONNX model inference

In [11]:
def iou(box1,box2):
    return intersection(box1,box2)/union(box1,box2)

def union(box1,box2):
    box1_x1,box1_y1,box1_x2,box1_y2 = box1[:4]
    box2_x1,box2_y1,box2_x2,box2_y2 = box2[:4]
    box1_area = (box1_x2-box1_x1)*(box1_y2-box1_y1)
    box2_area = (box2_x2-box2_x1)*(box2_y2-box2_y1)
    return box1_area + box2_area - intersection(box1,box2)

def intersection(box1,box2):
    box1_x1,box1_y1,box1_x2,box1_y2 = box1[:4]
    box2_x1,box2_y1,box2_x2,box2_y2 = box2[:4]
    x1 = max(box1_x1,box2_x1)
    y1 = max(box1_y1,box2_y1)
    x2 = min(box1_x2,box2_x2)
    y2 = min(box1_y2,box2_y2)
    return (x2-x1)*(y2-y1)

In [13]:
duration_onnx = [0] * n_iters

In [14]:
%%time
for i in tqdm(range(n_iters)):
    start = time.time()
    sample_file = filenames[i]
    img = Image.open(
        sample_file
    )
    img_width, img_height = img.size
    img_terminal = img.resize((640, 640)).convert("RGB")
    img_terminal = (np.array(img_terminal).transpose(2, 0, 1).reshape(1, 3, 640, 640)/255.0).astype(np.float32)

    outputs = model_terminal_od_onnx_fixed.run(
        ["output0"], {"images":img_terminal}
    )
    output = outputs[0][0].transpose()
    filtered_rows = output[
        output[:, 4:].max(axis=1) > 0.5
    ]
    filtered_rows = filtered_rows[
        filtered_rows[:, 4].argsort()
    ]
    x1s = (filtered_rows[:, 0] - filtered_rows[:, 2]/2) / 640 * img_width
    y1s = (filtered_rows[:, 1] - filtered_rows[:, 3]/2) / 640 * img_height
    x2s = (filtered_rows[:, 0] + filtered_rows[:, 2]/2) / 640 * img_width
    y2s = (filtered_rows[:, 1] + filtered_rows[:, 3]/2) / 640 * img_height
    boxes = np.array([x1s, y1s, x2s, y2s]).T
    result = []
    while len(boxes)>0:
        result.append(boxes[0])
        boxes = [box for box in boxes if iou(box, boxes[0])<0.7]
    end = time.time()
    duration_onnx[i] = end - start

100%|██████████| 200/200 [00:47<00:00,  4.21it/s]

CPU times: total: 9min 24s
Wall time: 47.5 s





In [15]:
df_speed_comparison['onnx'] = duration_onnx
df_speed_comparison.describe()

Unnamed: 0,pt,onnx
count,200.0,200.0
mean,0.274277,0.23641
std,0.020276,0.027227
min,0.231703,0.16663
25%,0.264719,0.216892
50%,0.273902,0.236424
75%,0.285637,0.250223
max,0.333182,0.452277


## ONNX + numpy (impr, boxes)

In [16]:
duration_onnx_impr = [0] * n_iters

In [17]:
%%time
for i in tqdm(range(n_iters)):
    start = time.time()
    sample_file = filenames[i]
    img = Image.open(
        sample_file
    )
    img_width, img_height = img.size
    img_terminal = img.resize((640, 640)).convert("RGB")
    img_terminal = (np.array(img_terminal).transpose(2, 0, 1).reshape(1, 3, 640, 640)/255.0).astype(np.float32)
    outputs = model_terminal_od_onnx_fixed.run(
        ["output0"], {"images":img_terminal}
    )
    output = outputs[0][0].transpose()
    filtered_rows = output[
        output[:, 4:].max(axis=1) > 0.5
    ]
    filtered_rows = filtered_rows[
        filtered_rows[:, 4].argsort()
    ]
    x1s = (filtered_rows[:, 0] - filtered_rows[:, 2]/2) / 640 * img_width
    y1s = (filtered_rows[:, 1] - filtered_rows[:, 3]/2) / 640 * img_height
    x2s = (filtered_rows[:, 0] + filtered_rows[:, 2]/2) / 640 * img_width
    y2s = (filtered_rows[:, 1] + filtered_rows[:, 3]/2) / 640 * img_height
    boxes = np.array([x1s, y1s, x2s, y2s]).T
    
    result = []
    while len(boxes)>0:
        result.append(boxes[0])
        box2_x1, box2_y1, box2_x2, box2_y2 = boxes[0]
        box1_x1 = boxes[:, 0]
        box1_y1 = boxes[:, 1]
        box1_x2 = boxes[:, 2]
        box1_y2 = boxes[:, 3]
        x1 = np.maximum(box1_x1, box2_x1)
        y1 = np.maximum(box1_y1, box2_y1)
        x2 = np.minimum(box1_x2, box2_x2)
        y2 = np.minimum(box1_y2, box2_y2)
        intersec = (x2-x1)*(y2-y1)
        box1_area = (box1_x2-box1_x1)*(box1_y2-box1_y1)
        box2_area = (box2_x2-box2_x1)*(box2_y2-box2_y1)
        uni = box1_area + box2_area - intersec
        metric = intersec / uni
        boxes = boxes[metric<0.7]
    end = time.time()
    duration_onnx_impr[i] = end - start

100%|██████████| 200/200 [00:48<00:00,  4.13it/s]

CPU times: total: 9min 23s
Wall time: 48.4 s





In [18]:
df_speed_comparison['onnx_impr'] = duration_onnx_impr
df_speed_comparison.describe()

Unnamed: 0,pt,onnx,onnx_impr
count,200.0,200.0,200.0
mean,0.274277,0.23641,0.240396
std,0.020276,0.027227,0.022078
min,0.231703,0.16663,0.17453
25%,0.264719,0.216892,0.2243
50%,0.273902,0.236424,0.246208
75%,0.285637,0.250223,0.254021
max,0.333182,0.452277,0.315783


Стандартный инференс через onnx оказался самым быстрым. Попробуем ускорить функции обработки numpy

## ONNX + jax

In [19]:
import jax.numpy as jnp
from jax import jit

@jit
def get_boxes(filtered_rows, img_width, img_height):
    x1s = (filtered_rows[:, 0] - filtered_rows[:, 2]/2) / 640 * img_width
    y1s = (filtered_rows[:, 1] - filtered_rows[:, 3]/2) / 640 * img_height
    x2s = (filtered_rows[:, 0] + filtered_rows[:, 2]/2) / 640 * img_width
    y2s = (filtered_rows[:, 1] + filtered_rows[:, 3]/2) / 640 * img_height
    boxes = jnp.array([x1s, y1s, x2s, y2s]).T
    return boxes

@jit
def convert_pil_tonumpy(img):
    return (
        img.transpose(2, 0, 1).reshape(1, 3, 640, 640)/255.0
    ).astype(np.float32)

In [20]:
duration_onnx_jax = [0] * n_iters

In [21]:
%%time
for i in tqdm(range(n_iters)):
    start = time.time()
    sample_file = filenames[i]
    img = Image.open(
        sample_file
    )
    img_width, img_height = img.size
    img_terminal = img.resize((640, 640)).convert("RGB")
    
    img_terminal = (np.array(img_terminal).transpose(2, 0, 1).reshape(1, 3, 640, 640)/255.0).astype(np.float32)
    outputs = model_terminal_od_onnx_fixed.run(
        ["output0"], {"images":img_terminal}
    )
    output = outputs[0][0].transpose()
    filtered_rows = output[
        output[:, 4:].max(axis=1) > 0.5
    ]
    filtered_rows = filtered_rows[
        filtered_rows[:, 4].argsort()
    ]
    boxes = get_boxes(filtered_rows, img_width, img_height)
    result = []
    while len(boxes)>0:
        result.append(boxes[0])
        boxes = [box for box in boxes if iou(box, boxes[0])<0.7]
    end = time.time()
    duration_onnx_jax[i] = end - start

100%|██████████| 200/200 [00:50<00:00,  3.98it/s]

CPU times: total: 9min
Wall time: 50.2 s





In [22]:
df_speed_comparison['onnx_jax'] = duration_onnx_jax
df_speed_comparison.describe()

Unnamed: 0,pt,onnx,onnx_impr,onnx_jax
count,200.0,200.0,200.0,200.0
mean,0.274277,0.23641,0.240396,0.249841
std,0.020276,0.027227,0.022078,0.030698
min,0.231703,0.16663,0.17453,0.182125
25%,0.264719,0.216892,0.2243,0.232339
50%,0.273902,0.236424,0.246208,0.252049
75%,0.285637,0.250223,0.254021,0.266373
max,0.333182,0.452277,0.315783,0.383943


## ONNX + jax + jax image processing

In [23]:
duration_onnx_jax_img_preproc = [0] * n_iters

In [24]:
%%time
for i in tqdm(range(n_iters)):
    start = time.time()
    sample_file = filenames[i]
    img = Image.open(
        sample_file
    )
    img_width, img_height = img.size
    img_terminal = img.resize((640, 640)).convert("RGB")
    img_terminal = jnp.array(img_terminal)
    img_terminal = convert_pil_tonumpy(img_terminal)
    outputs = model_terminal_od_onnx_fixed.run(
        ["output0"], {"images":np.array(img_terminal)}
    )
    output = outputs[0][0].transpose()
    filtered_rows = output[
        output[:, 4:].max(axis=1) > 0.5
    ]
    filtered_rows = filtered_rows[
        filtered_rows[:, 4].argsort()
    ]
    boxes = get_boxes(filtered_rows, img_width, img_height)
    result = []
    while len(boxes)>0:
        result.append(boxes[0])
        boxes = [box for box in boxes if iou(box, boxes[0])<0.7]
    end = time.time()
    duration_onnx_jax_img_preproc[i] = end - start

100%|██████████| 200/200 [00:46<00:00,  4.34it/s]

CPU times: total: 8min 32s
Wall time: 46.1 s





In [25]:
df_speed_comparison['onnx_jax_img_prepoc'] = duration_onnx_jax_img_preproc
df_speed_comparison.describe()

Unnamed: 0,pt,onnx,onnx_impr,onnx_jax,onnx_jax_img_prepoc
count,200.0,200.0,200.0,200.0,200.0
mean,0.274277,0.23641,0.240396,0.249841,0.229232
std,0.020276,0.027227,0.022078,0.030698,0.025059
min,0.231703,0.16663,0.17453,0.182125,0.18242
25%,0.264719,0.216892,0.2243,0.232339,0.211945
50%,0.273902,0.236424,0.246208,0.252049,0.231616
75%,0.285637,0.250223,0.254021,0.266373,0.249049
max,0.333182,0.452277,0.315783,0.383943,0.297797


Jaxlib не ставится на сигму :(

## Numba

In [26]:
numba = [0] * n_iters

In [27]:
from numba import njit

@njit
def get_boxes(filtered_rows, img_width, img_height):
    x1s = (filtered_rows[:, 0] - filtered_rows[:, 2]/2) / 640 * img_width
    y1s = (filtered_rows[:, 1] - filtered_rows[:, 3]/2) / 640 * img_height
    x2s = (filtered_rows[:, 0] + filtered_rows[:, 2]/2) / 640 * img_width
    y2s = (filtered_rows[:, 1] + filtered_rows[:, 3]/2) / 640 * img_height
    return x1s, y1s, x2s, y2s

@njit
def convert_pil_tonumpy(img):
    return (img/255.0).astype(np.float32)

In [28]:
%%time
for i in tqdm(range(n_iters)):
    start = time.time()
    sample_file = filenames[i]
    img = Image.open(
        sample_file
    )
    img_width, img_height = img.size
    img_terminal = img.resize((640, 640)).convert("RGB")
    img_terminal = np.array(img_terminal).transpose(2, 0, 1).reshape(1, 3, 640, 640)
    img_terminal = convert_pil_tonumpy(img_terminal)
    outputs = model_terminal_od_onnx_fixed.run(
        ["output0"], {"images": img_terminal}
    )
    output = outputs[0][0].transpose()
    filtered_rows = output[
        output[:, 4:].max(axis=1) > 0.5
    ]
    filtered_rows = filtered_rows[
        filtered_rows[:, 4].argsort()
    ]
    x1s, y1s, x2s, y2s = get_boxes(filtered_rows, img_width, img_height)
    boxes = np.array([x1s, y1s, x2s, y2s]).T
    result = []
    while len(boxes)>0:
        result.append(boxes[0])
        boxes = [box for box in boxes if iou(box, boxes[0])<0.7]
    end = time.time()
    numba[i] = end - start

100%|██████████| 200/200 [00:48<00:00,  4.08it/s]

CPU times: total: 9min 27s
Wall time: 49 s





In [29]:
df_speed_comparison['numba'] = numba
df_speed_comparison.describe()

Unnamed: 0,pt,onnx,onnx_impr,onnx_jax,onnx_jax_img_prepoc,numba
count,200.0,200.0,200.0,200.0,200.0,200.0
mean,0.274277,0.23641,0.240396,0.249841,0.229232,0.24402
std,0.020276,0.027227,0.022078,0.030698,0.025059,0.067842
min,0.231703,0.16663,0.17453,0.182125,0.18242,0.167603
25%,0.264719,0.216892,0.2243,0.232339,0.211945,0.216746
50%,0.273902,0.236424,0.246208,0.252049,0.231616,0.234115
75%,0.285637,0.250223,0.254021,0.266373,0.249049,0.254091
max,0.333182,0.452277,0.315783,0.383943,0.297797,0.995256


## Numba + Numba IOU

In [30]:
numba_iou = [0] * n_iters

In [31]:
@njit
def get_metric(boxes):
    box2_x1, box2_y1, box2_x2, box2_y2 = boxes[0]
    box1_x1 = boxes[:, 0]
    box1_y1 = boxes[:, 1]
    box1_x2 = boxes[:, 2]
    box1_y2 = boxes[:, 3]
    x1 = np.maximum(box1_x1, box2_x1)
    y1 = np.maximum(box1_y1, box2_y1)
    x2 = np.minimum(box1_x2, box2_x2)
    y2 = np.minimum(box1_y2, box2_y2)
    intersec = (x2-x1)*(y2-y1)
    box1_area = (box1_x2-box1_x1)*(box1_y2-box1_y1)
    box2_area = (box2_x2-box2_x1)*(box2_y2-box2_y1)
    uni = box1_area + box2_area - intersec
    metric = intersec / uni
    return metric

In [32]:
%%time
for i in tqdm(range(n_iters)):
    start = time.time()
    sample_file = filenames[i]
    img = Image.open(
        sample_file
    )
    img_width, img_height = img.size
    img_terminal = img.resize((640, 640)).convert("RGB")
    img_terminal = (np.array(img_terminal).transpose(2, 0, 1).reshape(1, 3, 640, 640)/255.0).astype(np.float32)
    outputs = model_terminal_od_onnx_fixed.run(
        ["output0"], {"images":img_terminal}
    )
    output = outputs[0][0].transpose()
    filtered_rows = output[
        output[:, 4:].max(axis=1) > 0.5
    ]
    filtered_rows = filtered_rows[
        filtered_rows[:, 4].argsort()
    ]
    x1s = (filtered_rows[:, 0] - filtered_rows[:, 2]/2) / 640 * img_width
    y1s = (filtered_rows[:, 1] - filtered_rows[:, 3]/2) / 640 * img_height
    x2s = (filtered_rows[:, 0] + filtered_rows[:, 2]/2) / 640 * img_width
    y2s = (filtered_rows[:, 1] + filtered_rows[:, 3]/2) / 640 * img_height
    boxes = np.array([x1s, y1s, x2s, y2s]).T
    result = []
    while len(boxes)>0:
        result.append(boxes[0])
        metric = get_metric(boxes)
        boxes = boxes[metric<0.7]
    end = time.time()
    numba_iou[i] = end - start

100%|██████████| 200/200 [00:48<00:00,  4.09it/s]

CPU times: total: 9min 8s
Wall time: 48.9 s





In [33]:
df_speed_comparison['numba_iou'] = numba_iou
df_speed_comparison.describe()

Unnamed: 0,pt,onnx,onnx_impr,onnx_jax,onnx_jax_img_prepoc,numba,numba_iou
count,200.0,200.0,200.0,200.0,200.0,200.0,200.0
mean,0.274277,0.23641,0.240396,0.249841,0.229232,0.24402,0.243303
std,0.020276,0.027227,0.022078,0.030698,0.025059,0.067842,0.051673
min,0.231703,0.16663,0.17453,0.182125,0.18242,0.167603,0.166496
25%,0.264719,0.216892,0.2243,0.232339,0.211945,0.216746,0.217221
50%,0.273902,0.236424,0.246208,0.252049,0.231616,0.234115,0.245026
75%,0.285637,0.250223,0.254021,0.266373,0.249049,0.254091,0.255152
max,0.333182,0.452277,0.315783,0.383943,0.297797,0.995256,0.748493


In [39]:
print(f'Уменьшение времени в процентах: {100 - (0.236410 / 0.274277 * 100):.4f}%')

Уменьшение времени в процентах: 13.8061%


Стало хуже, лучшее время показывает вариант ONNX

In [32]:
# from PIL import ImageDraw

# img = Image.open(sample_file)
# draw = ImageDraw.Draw(img)

# for box in result:
#     x1,y1,x2,y2 = box
#     draw.rectangle((x1,y1,x2,y2),None,"#00ff00")
# img