In [None]:
import sys
import os
import subprocess
import shlex
import threading
import numpy as np

import cv2

In [None]:
# Constants.
INPUT_WIDTH = 640               # Inference input width
INPUT_HEIGHT = 640              # Inference input height
ORIGINAL_WIDTH = 1280           # Original image width
ORIGINAL_HEIGHT = 1280          # Original image height
SCORE_THRESHOLD = 0.5			# cls score
NMS_THRESHOLD = 0.45
CONFIDENCE_THRESHOLD = 0.45		# obj confidence

# Text parameters.
FONT_FACE = cv2.FONT_HERSHEY_SIMPLEX
FONT_SCALE = 0.7
THICKNESS = 1

In [None]:
classes = None
with open("coco.names", 'rt') as f:
    classes = f.read().rstrip('\n').split('\n')

BLACK  = (0,0,0)
BLUE   = (255,178,50)
YELLOW = (0,255,255)
RED = (0,0,255)
WHITE = (255,255,255)

ClassColors = np.random.randint(0, 255, size=(len(classes), 3), dtype=np.uint8)

In [None]:
def draw_label(input_image, label, left, top):
    """Draw text onto image at location."""

    # Get text size.
    text_size = cv2.getTextSize(label, FONT_FACE, FONT_SCALE, THICKNESS)
    dim, baseline = text_size[0], text_size[1]
    # Use text size to create a BLACK rectangle.
    cv2.rectangle(input_image, (left, top), (left + dim[0], top + dim[1] + baseline), BLACK, cv2.FILLED)
    # Display text inside the rectangle.
    cv2.putText(input_image, label, (left, top + dim[1]), FONT_FACE, FONT_SCALE, YELLOW, THICKNESS, cv2.LINE_AA)


In [None]:
def pre_process(input_image, net):
	# Run inference.

	# Create a 4D blob from a frame.
	blob = cv2.dnn.blobFromImage(input_image, 1/255, (INPUT_WIDTH, INPUT_HEIGHT), [0,0,0], 1, crop=False)

	# Sets the input to the network.
	net.setInput(blob)

	# Runs the forward pass to get output of the output layers.
	output_layers = net.getUnconnectedOutLayersNames()
	outputs = net.forward(output_layers)

	return outputs

In [None]:
def post_process(input_image, outputs):
	# Draw bounding boxes.

	# Lists to hold respective values while unwrapping.
	class_ids = []
	confidences = []
	cls_confidences = []
	boxes = []

	# Resizing factor.
	x_factor = ORIGINAL_WIDTH / INPUT_WIDTH
	y_factor =  ORIGINAL_HEIGHT / INPUT_HEIGHT

	# Iterate through 8400 detections.
	for row in outputs[0][0]:

		confidence = row[4]

		# Discard bad detections and continue. (Why all 1?)
		if confidence >= CONFIDENCE_THRESHOLD:
			classes_scores = row[5:]

			# Get the index of max class score.
			class_id = np.argmax(classes_scores)

			#  Continue if the class score is above threshold.
			if (classes_scores[class_id] > SCORE_THRESHOLD):
				confidences.append(confidence)
				class_ids.append(class_id)
				cls_confidences.append(classes_scores[class_id])


				cx, cy, w, h = row[0], row[1], row[2], row[3]

				left = int((cx - w/2) * x_factor)
				top = int((cy - h/2) * y_factor)
				width = int(w * x_factor)
				height = int(h * y_factor)

				box = np.array([left, top, width, height])
				boxes.append(box)

	# Perform non maximum suppression to eliminate redundant overlapping boxes with
	# lower confidences.
	indices = cv2.dnn.NMSBoxes(boxes, confidences, CONFIDENCE_THRESHOLD, NMS_THRESHOLD)

	# Draw boxes and labels.
	for i in indices:
		box = boxes[i]
		left = box[0]
		top = box[1]
		width = box[2]
		height = box[3]
		cv2.rectangle(input_image, (left, top), (left + width, top + height), BLUE, 3*THICKNESS)
		label = "{}:{:.2f}:{:.2f}".format(classes[class_ids[i]], confidences[i], cls_confidences[i])
		draw_label(input_image, label, left, top)

	 # return input_image, [boxes[i] for i in indices], [class_ids[i] for i in indices], [confidences[i] for i in indices]
	return input_image, boxes, class_ids, confidences, cls_confidences

In [None]:
modelFile = '/home/daniel/Projects/Drone/LocalDetection/detr.onnx'
net = cv2.dnn.readNet(modelFile)

In [None]:
modelFile = '/home/daniel/Public/YOLOv6/deploy/models/yolov6l.onnx'
net = cv2.dnn.readNet(modelFile)

In [None]:
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)

In [None]:
recGabriel = "ffmpeg -nostdin -probesize 32 -flags low_delay -fflags nobuffer -codec:v h264_cuvid -r 25 -i tcp://gabriel.local:5001 -pix_fmt rgb24 -an -vcodec rawvideo -f rawvideo pipe: 2>/dev/null"
recGabriel = shlex.split(recGabriel)

class CameraBufferCleanerThread(threading.Thread):
    def __init__(self, camera_process, name='camera-buffer-cleaner-thread'):
        self.camera = camera_process
        self.last_frame = None
        super(CameraBufferCleanerThread, self).__init__(name=name)
        self.start()

    def run(self):
        while True:
            raw_frame = self.camera.stdout.read(ORIGINAL_WIDTH*ORIGINAL_HEIGHT*3)

            if len(raw_frame) != (ORIGINAL_WIDTH*ORIGINAL_HEIGHT*3):
                print('Error reading frame!!!')
                break

            else:
                # Transform the byte read into a numpy array, and reshape it to video frame dimensions
                frame = np.frombuffer(raw_frame, np.uint8)
                self.last_frame = frame.reshape((ORIGINAL_HEIGHT, ORIGINAL_WIDTH, 3))

In [None]:
process = subprocess.Popen(recGabriel, stdout=subprocess.PIPE)
CameraCleaner = CameraBufferCleanerThread(process)

In [None]:
while True:

    # Flush the stdout to avoid buffering problems
    frame = CameraCleaner.last_frame

    detections = pre_process(frame.copy(), net)
    img, boxes, cls_ids, confidences, cls_confidences = post_process(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR), detections)

    cv2.imshow('frame', img)

    # Press Q on keyboard to  exit
    if cv2.waitKey(1) & 0xFF == ord('q'):
        cv2.destroyAllWindows()
        break

In [None]:
process.terminate()