# Embedded ML Lab - Challenge (Camera example)

This is an example notebook for the camera usage

In [1]:
from utils.camera import CameraDisplay
from utils.yolo import nms, filter_boxes
from utils.viz import display_result
import torch
import time
import cv2
import onnxruntime as ort
from models.pruned_my_tinyyolo2 import PrunedMyTinyYoloV2
import tqdm

In [2]:
def dummy(image):
    return image
cam = CameraDisplay(dummy)
cam.start()
cam.stop()
cam.release()

Initializing camera...


Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00\x00\x01\x00\x01\x00\x00\xff\xdb\x00C\x00\x02\x01\x0…

Camera released


In [3]:
sd = torch.load("models/configs/voc_pruned_10.pt")
net = PrunedMyTinyYoloV2(num_classes=1)
net.load_state_dict(sd)
net.cuda()
net.eval()

PrunedMyTinyYoloV2(
  (pad): ReflectionPad2d((0, 1, 0, 1))
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(32, 54, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv4): Conv2d(54, 98, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv5): Conv2d(98, 193, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv6): Conv2d(193, 382, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv7): Conv2d(382, 760, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv8): Conv2d(760, 760, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv9): Conv2d(760, 30, kernel_size=(1, 1), stride=(1, 1))
)

In [4]:
torch_input = torch.randn(1, 3, 320, 320).cuda()
for i in tqdm.tqdm(enumerate(range(5)), total=5):
    output = net(torch_input)

100%|██████████| 5/5 [01:25<00:00, 17.06s/it]


In [5]:
# Define a callback function (your detection pipeline)
# Make sure to first load all your pipeline code and only at the end init the camera
def callback(image):
    global now
    global counter
    global times
    global fpss
    
    counter += 1
    
    if counter < 10:
        return image
    
    if counter == 10:
        print("Begin Cropping")
    
    fps = int(1/(time.time() - now))
    now = time.time()
    image = image[0:320,0:320, :]
        
    
    if counter < 20:
        return image
    
    if counter == 20:
        print("Begin Conversion")
    
    # convert image to torch
    # from 320 x 320 x 3 to 1 x 3 x 320 x 320
    torch_image2 = torch.from_numpy(image)
    torch_image = torch.zeros([1, 3, 320, 320])
    #torch_image = torch.zeros([1, 3, int(320 / downscale), int(320 / downscale)])
    
    # from BGR to RGB and from uint8 to float
    for i in range(3):
        torch_image[0, 2-i, :, :] = torch_image2[:, :, i] / 256
    
    
    if counter < 30:
        return image
    
    if counter == 30:
        print("Begin NN")
    
    '''if downscale != 1:
        for i in range(torch_image.size()[2]):
            torch_image[:, :, i, i] = torch.mean(torch_image3[:, :, downscale*i:downscale*i+down_add, downscale*i:downscale*i+down_add])
    else:
        torch_image = torch_image3'''
    
    # calculate result
    #input is a 1 x 3 x 320 x 320 image
    torch_image = torch_image.cuda()
    output = net(torch_image).cpu()
    #output = net(torch_image)
    #output = output.cpu()
    
    
    
    if counter < 40:
        return image
    
    if counter == 40:
        print("Begin Filter")
    
    #output is a 32 x 125 x 10 x 10 tensor
    #filter boxes based on confidence score (class_score*confidence)
    output = filter_boxes(output, 0.4)
    #filter boxes based on overlap
    output = nms(output, 0)
    
    
    if counter < 50:
        return image
    
    if counter == 50:
        print("Begin Drawing")
    
    # draw result on camera image
    for out1 in output:
        for out in out1:
            #convert relative to absolute width
            w = int(out[2] * 320)
            h = int(out[3] * 320)
            # convert middle point to upper left corner
            x = int(out[0] * 320 - int(w/2))
            y = int(out[1] * 320 - int(h/2))
            # draw
            cv2.rectangle(image, (x, y), (x + w, y + h), (255, 0, 0), 2)
            cv2.putText(image, f"{int(out[4]*100)}", (x, y-5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2, cv2.LINE_AA)
            
    cv2.putText(image, f"fps={fps}", (2, 25), cv2.FONT_HERSHEY_SIMPLEX, 1,
                (100, 255, 0), 2, cv2.LINE_AA)
    
    # prints current frame with output
    #display_result(torch_image, output, torch.zeros([1,10,6]), file_path='yolo_prediction.png')
    
    if counter > 60 and counter <= 200:
        fpss.append(fps)
    
    if counter == 200:
        print("avg fps: ", sum(fpss) / 140)
    
    return image

In [6]:
# Initialize the camera with the callback
cam = CameraDisplay(callback)

Initializing camera...


Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00\x00\x01\x00\x01\x00\x00\xff\xdb\x00C\x00\x02\x01\x0…

In [7]:
# The camera stream can be started with cam.start()
# The callback gets asynchronously called (can be stopped with cam.stop())
counter = 0
times = [[], [], [], [], []]
fpss = []
now = time.time()
cam.start()

Begin Cropping
Begin Conversion
Begin NN
Begin Filter
Begin Drawing
avg fps:  11.9


In [8]:
# The camera should always be stopped and released for a new camera is instantiated (calling CameraDisplay(callback) again)
cam.stop()
cam.release()

Camera released
