In [1]:
import onnxruntime as rt
import cv2
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
import random

In [2]:
def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleup=True, stride=32):
    # Resize and pad image while meeting stride-multiple constraints
    shape = im.shape[:2]  # current shape [height, width]
    if isinstance(new_shape, int):
        new_shape = (new_shape, new_shape)

    # Scale ratio (new / old)
    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
    if not scaleup:  # only scale down, do not scale up (for better val mAP)
        r = min(r, 1.0)

    # Compute padding
    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding

    if auto:  # minimum rectangle
        dw, dh = np.mod(dw, stride), np.mod(dh, stride)  # wh padding

    dw /= 2  # divide padding into 2 sides
    dh /= 2

    if shape[::-1] != new_unpad:  # resize
        im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
    im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
    return im, r, (dw, dh)

DETECTION_NAMES = ['object']
colors = {name:[random.randint(0, 255) for _ in range(3)] for i,name in enumerate(DETECTION_NAMES)}

In [3]:
session = rt.InferenceSession(
    "../data/models/yolov7/yolov7-tiny.onnx", providers=rt.get_available_providers()
)

In [4]:
session.get_inputs()[0].name, session.get_outputs()[0].name

('images', 'output')

In [5]:
img = cv2.imread("../data/input/train_shelf_images/train1.jpg")
pilimage = Image.open("../data/input/train_shelf_images/train1.jpg")

In [6]:
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

image = img.copy()
image, ratio, dwdh = letterbox(image, auto=False)
image = image.transpose((2, 0, 1))
image = np.expand_dims(image, 0)
image = np.ascontiguousarray(image)

im = image.astype(np.float32)
im /= 255
im.shape

(1, 3, 640, 640)

In [7]:
outname = [i.name for i in session.get_outputs()]
outname

inname = [i.name for i in session.get_inputs()]
inname

inp = {inname[0]:im}

In [8]:
outputs = session.run(outname, inp)[0]
outputs[0]

array([  0.        ,  44.407883  , 382.23026   , 106.23451   ,
       460.87308   ,   0.        ,   0.91727257], dtype=float32)

In [9]:
detections = []

In [10]:
for i, (batch_id, x0, y0, x1, y1, cls_id, score) in enumerate(outputs):
    box = np.array([x0, y0, x1, y1])
    box -= np.array(dwdh * 2)
    box /= ratio
    box = box.round().astype(np.int32).tolist()
    cls_id = int(cls_id)
    score = round(float(score), 3)
    name = DETECTION_NAMES[cls_id]
    detections.append({
        'num': i,
        'name': name,
        'score': score,
        'coords': box
    })

In [11]:
detections

[{'num': 0, 'name': 'object', 'score': 0.917, 'coords': [44, 302, 106, 381]},
 {'num': 1, 'name': 'object', 'score': 0.915, 'coords': [168, 204, 229, 286]},
 {'num': 2, 'name': 'object', 'score': 0.914, 'coords': [104, 203, 166, 286]},
 {'num': 3, 'name': 'object', 'score': 0.914, 'coords': [173, 301, 232, 380]},
 {'num': 4, 'name': 'object', 'score': 0.909, 'coords': [233, 200, 292, 289]},
 {'num': 5, 'name': 'object', 'score': 0.909, 'coords': [0, 80, 51, 179]},
 {'num': 6, 'name': 'object', 'score': 0.904, 'coords': [52, 80, 120, 181]},
 {'num': 7, 'name': 'object', 'score': 0.901, 'coords': [110, 299, 169, 380]},
 {'num': 8, 'name': 'object', 'score': 0.895, 'coords': [535, 0, 599, 67]},
 {'num': 9, 'name': 'object', 'score': 0.894, 'coords': [296, 199, 356, 289]},
 {'num': 10, 'name': 'object', 'score': 0.885, 'coords': [426, 203, 487, 287]},
 {'num': 11, 'name': 'object', 'score': 0.884, 'coords': [137, 1, 206, 60]},
 {'num': 12, 'name': 'object', 'score': 0.881, 'coords': [186, 

In [12]:
cropped_images = []
new_img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

for i, detection in enumerate(detections):
    coords = detection['coords']
    # box = 
    # image = ori_images[int(batch_id)]
    # box -= np.array(dwdh*2)
    # box /= ratio
    # box = box.round().astype(np.int32).tolist()
    # cls_id = int(cls_id)
    # score = round(float(score),3)
    # name = names[cls_id]
    # color = colors[name]
    # name += ' '+str(score)
    # cv2.rectangle(image,box[:2],box[2:],color,2)
    # cv2.putText(image,name,(box[0], box[1] - 2),cv2.FONT_HERSHEY_SIMPLEX,0.75,[225, 255, 255],thickness=2)
    print(f"{i}:{coords}")
    try:
        image_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        cropped_images.append(new_img[coords[1]:coords[3], coords[0]:coords[2]])
    except Exception as e:
        print(f"Unable to save detection {i}, with boxes: {box}")
        print(e)
# Image.fromarray(ori_images[0])

0:[44, 302, 106, 381]
1:[168, 204, 229, 286]
2:[104, 203, 166, 286]
3:[173, 301, 232, 380]
4:[233, 200, 292, 289]
5:[0, 80, 51, 179]
6:[52, 80, 120, 181]
7:[110, 299, 169, 380]
8:[535, 0, 599, 67]
9:[296, 199, 356, 289]
10:[426, 203, 487, 287]
11:[137, 1, 206, 60]
12:[186, 79, 252, 183]
13:[300, 302, 359, 381]
14:[61, 2, 137, 58]
15:[254, 89, 319, 182]
16:[120, 78, 185, 184]
17:[358, 204, 429, 286]
18:[172, 411, 234, 481]
19:[470, 0, 533, 64]
20:[-2, 301, 45, 383]
21:[364, 399, 418, 480]
22:[36, 203, 101, 288]
23:[319, 89, 388, 181]
24:[520, 76, 576, 153]
25:[594, 83, 618, 132]
26:[107, 410, 167, 481]
27:[583, 149, 620, 206]
28:[421, 400, 479, 476]
29:[507, 201, 566, 274]
30:[621, 83, 640, 133]
31:[452, 79, 517, 184]
32:[479, 400, 533, 476]
33:[598, 4, 639, 67]
34:[0, 19, 54, 56]
35:[382, 77, 453, 180]
36:[240, 399, 299, 481]
37:[477, 307, 550, 373]
38:[2, 203, 36, 285]
39:[362, 301, 420, 380]
40:[619, 414, 640, 454]
41:[-1, 400, 43, 482]
42:[420, 303, 480, 375]
43:[336, 0, 478, 63]
44

In [13]:
cv2.imshow("test", cropped_images[0])
cv2.waitKey(0)

-1

In [16]:
for i,image in enumerate(cropped_images):
    try:
        # cv2.imshow("test", cropped_images[0])
        # cv2.waitKey(0)
        cv2.imwrite(f"../data/processed_data/detections/image_{i}.jpg", image)
    except Exception:
        print('Could not save image')

Could not save image
Could not save image
Could not save image
Could not save image
Could not save image
Could not save image


In [None]:
ori_images = [img.copy()]
# cropped_images = [


for i,(batch_id,x0,y0,x1,y1,cls_id,score) in enumerate(outputs):
    
    image = ori_images[int(batch_id)]
    box = np.array([x0,y0,x1,y1])
    box -= np.array(dwdh*2)
    box /= ratio
    box = box.round().astype(np.int32).tolist()
    cls_id = int(cls_id)
    score = round(float(score),3)
    name = names[cls_id]
    color = colors[name]
    name += ' '+str(score)
    cv2.rectangle(image,box[:2],box[2:],color,2)
    cv2.putText(image,name,(box[0], box[1] - 2),cv2.FONT_HERSHEY_SIMPLEX,0.75,[225, 255, 255],thickness=2)
    print(f"{i}:{box}")
    try:
        image_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        save_path = f"../data/output/image_{int(i)}_{i}.jpg"
        cv2.imwrite(save_path, cv2.cvtColor(img, cv2.COLOR_RGB2BGR)[box[1]:box[3], box[0]:box[2]])
    except Exception as e:
        print(f"Unable to save detection {i}, with boxes: {box}")
        print(e)
Image.fromarray(ori_images[0])

In [None]:
# [274, -1, 341, 30]
image[-1:30, 273:341]

In [None]:
cv2.imshow("img",img[box[1]:box[3], box[0]:box[2]])
cv2.waitKey(0)

In [None]:
image_bgr[box[1]:box[3], box[0]:box[2]]

In [None]:
detection['x0'], detection['y0'], detection['x1'], detection['x1']