In [1]:
import sys
import torch
print(f"Python version: {sys.version}, {sys.version_info} ")
print(f"Pytorch version: {torch.__version__} ")

In [2]:
!nvidia-smi

In [3]:
!wget https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7-tiny.pt

In [4]:
!python detect.py --weights ./yolov7-tiny.pt --conf 0.25 --img-size 640 --source inference/images

In [5]:
from PIL import Image
Image.open('runs/detect/exp/bus.jpg')

In [6]:
Image.open('runs/detect/exp/horses.jpg')

In [7]:
Image.open('runs/detect/exp/image1.jpg')

In [8]:
Image.open('runs/detect/exp/image2.jpg')

In [9]:
Image.open('runs/detect/exp/image3.jpg')

In [10]:
Image.open('runs/detect/exp/zidane.jpg')

In [11]:
# export temporary ONNX model for TensorRT converter
!python export.py --weights ./yolov7-tiny.pt --grid --end2end --simplify --topk-all 100 --iou-thres 0.65 --conf-thres 0.35 --img-size 640 640 --dynamic-batch
!ls

In [12]:
# Download ONNX to TensorRT converter
!git clone https://github.com/triple-Mu/YOLO-TensorRT8.git

In [13]:
%cd YOLO-TensorRT8
!ls

In [14]:
# Export TensorRT-engine model 
!python build_engine.py -o ../yolov7-tiny.onnx -e ./yolov7-tiny-nms.trt --fp16 --batch-size 1 16 32

In [15]:
import cv2
import torch
import random
import time
import numpy as np
import tensorrt as trt
from PIL import Image
from pathlib import Path
from collections import OrderedDict,namedtuple

In [16]:
w = './yolov7-tiny-nms.trt'
device = torch.device('cuda:0')
imgList = [cv2.imread('../inference/images/horses.jpg'),
           cv2.imread('../inference/images/bus.jpg'),
           cv2.imread('../inference/images/zidane.jpg'),
           cv2.imread('../inference/images/image1.jpg'),
           cv2.imread('../inference/images/image2.jpg'),
           cv2.imread('../inference/images/image3.jpg')]
imgList*=6
imgList = imgList[:32]

In [17]:
# Infer TensorRT Engine
logger = trt.Logger(trt.Logger.INFO)
trt.init_libnvinfer_plugins(logger, namespace="")
with open(w, 'rb') as f, trt.Runtime(logger) as runtime:
    model = runtime.deserialize_cuda_engine(f.read())
context = model.create_execution_context()


def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleup=True, stride=32):
    # Resize and pad image while meeting stride-multiple constraints
    shape = im.shape[:2]  # current shape [height, width]
    if isinstance(new_shape, int):
        new_shape = (new_shape, new_shape)

    # Scale ratio (new / old)
    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
    if not scaleup:  # only scale down, do not scale up (for better val mAP)
        r = min(r, 1.0)

    # Compute padding
    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding

    if auto:  # minimum rectangle
        dw, dh = np.mod(dw, stride), np.mod(dh, stride)  # wh padding

    dw /= 2  # divide padding into 2 sides
    dh /= 2

    if shape[::-1] != new_unpad:  # resize
        im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
    im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
    return im, r, (dw, dh)

def postprocess(boxes,r,dwdh):
    dwdh = torch.tensor(dwdh*2).to(boxes.device)
    boxes -= dwdh
    boxes /= r
    return boxes.clip_(0,6400)

names = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 
         'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 
         'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 
         'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 
         'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 
         'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 
         'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 
         'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 
         'hair drier', 'toothbrush']
colors = {name:[random.randint(0, 255) for _ in range(3)] for i,name in enumerate(names)}

In [18]:
origin_RGB = []
resize_data = []
for img in imgList:
  img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
  origin_RGB.append(img)
  image = img.copy()
  image, ratio, dwdh = letterbox(image, auto=False)
  image = image.transpose((2, 0, 1))
  image = np.expand_dims(image, 0)
  image = np.ascontiguousarray(image)
  im = image.astype(np.float32)
  resize_data.append((im,ratio,dwdh))

In [19]:
DTYPE = {
    trt.DataType.FLOAT : torch.float32,
    trt.DataType.INT32 : torch.int32,
}

In [20]:
def getBindings(model,context,shape=(1,3,640,640)):
    context.set_binding_shape(0, shape)
    bindings = OrderedDict()
    Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
    
    for index in range(model.num_bindings):
        name = model.get_binding_name(index)
        dtype = trt.nptype(model.get_binding_dtype(index))
        shape = tuple(context.get_binding_shape(index))
        data = torch.from_numpy(np.empty(shape, dtype=np.dtype(dtype))).to(device)
        bindings[name] = Binding(name, dtype, shape, data, int(data.data_ptr()))
    return bindings

In [21]:
# warmup for 10 times
bindings = getBindings(model,context,(4,3,640,640))
binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())
for _ in range(10):
    tmp = torch.randn(4,3,640,640).to(device)
    binding_addrs['images'] = int(tmp.data_ptr())
    context.execute_v2(list(binding_addrs.values()))

In [22]:
np_batch = np.concatenate([data[0] for data in resize_data])
np_batch.shape

In [23]:
batch_1 = torch.from_numpy(np_batch[0:1]).to(device)/255
bindings = getBindings(model,context,(1,3,640,640))
binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())

print("batch==1")
start = time.perf_counter()
binding_addrs['images'] = int(batch_1.data_ptr())
context.execute_v2(list(binding_addrs.values()))
print(f'Cost {time.perf_counter()-start} s')

In [24]:
batch_16 = torch.from_numpy(np_batch[0:16]).to(device)/255
bindings = getBindings(model,context,(16,3,640,640))
binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())

print("batch==16")
start = time.perf_counter()
binding_addrs['images'] = int(batch_16.data_ptr())
context.execute_v2(list(binding_addrs.values()))
print(f'Cost {time.perf_counter()-start} s')

In [25]:
batch_32 = torch.from_numpy(np_batch[0:32]).to(device)/255
bindings = getBindings(model,context,(32,3,640,640))
binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())

print("batch==32")
start = time.perf_counter()
binding_addrs['images'] = int(batch_32.data_ptr())
context.execute_v2(list(binding_addrs.values()))
print(f'Cost {time.perf_counter()-start} s')

In [26]:
# show batch 32 output the first 6 pictures
nums = bindings['num_dets'].data
boxes = bindings['det_boxes'].data
scores = bindings['det_scores'].data
classes = bindings['det_classes'].data
nums.shape,boxes.shape,scores.shape,classes.shape

In [27]:
for batch,(num,box,score,cls) in enumerate(zip(nums.flatten(),boxes,scores,classes)):
    if batch>6:
        break
    RGB = origin_RGB[batch]
    ratio,dwdh = resize_data[batch][1:]
    box = postprocess(box[:num].clone(),ratio,dwdh).round().int()
    for idx,(b,s,c) in enumerate(zip(box,score,cls)):
        b,s,c = b.tolist(),round(float(s),3),int(c)
        name = names[c]
        color = colors[name]
        name += ' ' + str(s)
        cv2.rectangle(RGB,b[:2],b[2:],color,2)
        cv2.putText(RGB,name,(b[0], b[1] - 2),cv2.FONT_HERSHEY_SIMPLEX,0.75,color,thickness=2)

In [28]:
Image.fromarray(origin_RGB[0])

In [29]:
Image.fromarray(origin_RGB[1])

In [30]:
Image.fromarray(origin_RGB[2])

In [31]:
Image.fromarray(origin_RGB[3])

In [32]:
Image.fromarray(origin_RGB[4])

In [33]:
Image.fromarray(origin_RGB[5])