In [36]:
import time
import cv2
import torch
from utils.tinyyolov2 import TinyYoloV2Fused
from utils.camera import CameraDisplay, CameraDetectionDisplay
from utils.yolo import nms, filter_boxes
from utils.viz import display_result
from utils.dataloader import num_to_class
from torchvision.transforms.functional import to_tensor
import onnx
import onnxruntime as ort
import numpy as np

In [38]:
!pip install torch-tensorrt -f https://github.com/NVIDIA/Torch-TensorRT/releases

Defaulting to user installation because normal site-packages is not writeable
Looking in links: https://github.com/NVIDIA/Torch-TensorRT/releases
Collecting torch-tensorrt
  Using cached torch-tensorrt-0.0.0.post1.tar.gz (9.0 kB)
  Preparing metadata (setup.py) ... [?25lerror
[31m  ERROR: Command errored out with exit status 1:
   command: /usr/bin/python3 -c 'import io, os, sys, setuptools, tokenize; sys.argv[0] = '"'"'/tmp/pip-install-4l9xwjel/torch-tensorrt_f2f82d6d22fe453196952714f74e2fbc/setup.py'"'"'; __file__='"'"'/tmp/pip-install-4l9xwjel/torch-tensorrt_f2f82d6d22fe453196952714f74e2fbc/setup.py'"'"';f = getattr(tokenize, '"'"'open'"'"', open)(__file__) if os.path.exists(__file__) else io.StringIO('"'"'from setuptools import setup; setup()'"'"');code = f.read().replace('"'"'\r\n'"'"', '"'"'\n'"'"');f.close();exec(compile(code, __file__, '"'"'exec'"'"'))' egg_info --egg-base /tmp/pip-pip-egg-info-qn7gh6tk
       cwd: /tmp/pip-install-4l9xwjel/torch-tensorrt_f2f82d6d22fe45319695

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [27]:
run_with_pytorch = False

load_path = "runs/taylor-new/voc_pruned_7_finetuned.pt"

if run_with_pytorch:
    
    net = TinyYoloV2Fused(num_classes=1)
    net.load_state_dict(torch.load(load_path), strict=False)
    net.fuse_after_loading_sd()
    net.eval()
    
    torch.onnx.export(net, torch.zeros(1, 3, 320, 320), f"{load_path}.onnx", opset_version=11, input_names = ['input'], output_names = ['output'])
    
    net.to(device)
    print(net)
    
else:
    
    sess_options = ort.SessionOptions()
    sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
    
    session = ort.InferenceSession(f"{load_path}.onnx", sess_options, providers=['CUDAExecutionProvider'])
    binding = session.io_binding()

    device_name = 'cuda'
    output_orig = torch.empty((1, 5, 10, 10, 5 + 1), dtype=torch.float32, device=torch.device("cpu"))
    binding.bind_output(
        name = 'output',
        device_type = "cpu",
        device_id = 0,
        element_type = np.float32,
        shape = output_orig.shape,
        buffer_ptr = output_orig.data_ptr()
    )

    print(session.get_providers())

['CUDAExecutionProvider', 'CPUExecutionProvider']


In [28]:
def do_inference(net, image):
    input = to_tensor(image).unsqueeze(0).to(device)
    output = None
    
    if run_with_pytorch:
        
        # torch.inference_mode() doesnt work
        with torch.no_grad(): 
            output = net(input)
        output = output.cpu()
        
    else:
        
        binding.bind_input(
            name = 'input',
            device_type = device_name,
            device_id = 0,
            element_type = np.float32,
            shape = input.shape,
            buffer_ptr = input.data_ptr()
        )
        session.run_with_iobinding(binding)
        output = output_orig

    output = filter_boxes(output, 0.3)
    output = nms(output, 0.25)

    return output

In [29]:
def apply_bboxes(image, output):
    img_shape = 320
    
    bboxes = torch.stack(output, dim=0)
    
    for i in range(bboxes.shape[1]):

        # only show person
        # if int(bboxes[0,i,5]) != 14:
        #    continue
        
        if bboxes[0,i,-1] >= 0:
            
            cx = int(bboxes[0,i,0]*img_shape - bboxes[0,i,2]*img_shape/2)
            cy = int(bboxes[0,i,1]*img_shape - bboxes[0,i,3]*img_shape/2)

            w = int(bboxes[0,i,2]*img_shape)
            h = int(bboxes[0,i,3]*img_shape)
            
            cv2.rectangle(image, (cx, cy), (cx + w, cy + h), color=(255,0,0), thickness=2)

            # annotation = num_to_class(int(bboxes[0,i,5])) + " "+  f"{float(bboxes[0,i,4]):.2f}"
            annotation = f"{float(bboxes[0,i,4]):.2f}"
            
            cv2.putText(image, annotation, (cx, cy), cv2.FONT_HERSHEY_SIMPLEX, 1, (100, 255, 0), 2, cv2.LINE_AA)
            
    return image

In [30]:
now = time.time()

def apply_fps(image):
    global now

    fps = f"{int(1/(time.time() - now))}"
    now = time.time()

    cv2.putText(image, f"{fps}fps", (2, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (100, 255, 0), 2, cv2.LINE_AA)
    
    return image

In [31]:
def callback(net, image):
    image = image[0:320,0:320, :]
    output = do_inference(net if run_with_pytorch else None, image)

    image = apply_bboxes(image, output)
    image = apply_fps(image)

    return image

In [32]:
cam = CameraDetectionDisplay(net if run_with_pytorch else None, callback)

Initializing camera...


Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00\x00\x01\x00\x01\x00\x00\xff\xdb\x00C\x00\x02\x01\x0â€¦

In [33]:
cam.start()

In [34]:
# The camera should always be stopped and released for a new camera is instantiated (calling CameraDisplay(callback) again)
cam.stop()
cam.release()

Camera released
