In [None]:
import numpy as np
from pathlib import Path
import shutil
from natsort import natsorted
import cv2
import imutils

In [None]:
def YOLOToCOCOBox(box, imgXY):
    '''Convert from normalised xc,yc,w,h to x1,y1,w,h'''
    xc,yc,w,h = box
    imgX, imgY = imgXY
    return [(xc-w/2)*imgX, (yc-h/2)*imgY, w*imgX, h*imgY]

def helper(frameCount, framesWithLabels, labelPaths, frame, displayFlag):
    if frameCount in framesWithLabels:
        idx = framesWithLabels.index(frameCount)
        labelPath = labelPaths[idx]
        shutil.copy2(labelPath, labelRootDir/labelPath.name)
        cv2.imwrite(str(imgRootDir/f"{labelPath.stem}.jpg"), frame)

        if displayFlag:
            f = open(labelPath, "r")
            fileContents = f.read()
            boxes = [] if fileContents == "" else fileContents.strip().split("\n")
            boxes = [[float(num) for num in box.strip().split()[1:]] for box in boxes]
            
            for box in boxes:
                x, y, w, h = YOLOToCOCOBox(box, frame.shape[-2::-1])
                cv2.rectangle(frame, (int(x), int(y)), (int(x + w), int(y + h)), (0,0,255), 3)
    return frame

# Prepare Data

In [None]:
dirPath = Path(f"track")
labelPaths = list(dirPath.glob("*.txt"))
framesWithLabels = [int(path.stem.strip("frame_")) for path in labelPaths]

saveDir = Path(f"detect")
imgRootDir = saveDir/"images"/"root"
labelRootDir = saveDir/"labels"/"root"
imgRootDir.mkdir(parents=True, exist_ok=True)
labelRootDir.mkdir(parents=True, exist_ok=True)

DISPLAY_FLAG = False

vidPath = dirPath/"video.mp4"

frameCount = 0
cap = cv2.VideoCapture(str(vidPath))
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    
    if frameCount in framesWithLabels:
        frame = helper(frameCount, framesWithLabels, labelPaths, frame, DISPLAY_FLAG)

    if DISPLAY_FLAG:
        frame = imutils.resize(frame, height=500)
        cv2.imshow("Ground Truth", frame)
        key = cv2.waitKey(1)
        if key == ord("q") or key == ord("Q"):
            break

    frameCount += 1

cv2.destroyAllWindows()

# Check Conversion

In [None]:
dirPath = Path(f"detect")
imgPaths = natsorted(dirPath.glob("images/root/*"))

assert len(imgPaths) == len(list(dirPath.glob("labels/root/*")))

for imgPath in imgPaths:
    img = cv2.imread(str(imgPath))
    labelPath = str(imgPath).replace("images", "labels").replace(".jpg", ".txt").replace(".png", ".txt")
    
    f = open(labelPath, "r")
    fileContents = f.read()
    boxes = [] if fileContents == "" else fileContents.strip().split("\n")
    boxes = [[float(num) for num in box.strip().split()[1:]] for box in boxes]
    
    for box in boxes:
        x, y, w, h = YOLOToCOCOBox(box, img.shape[-2::-1])
        cv2.rectangle(img, (int(x), int(y)), (int(x + w), int(y + h)), (0,0,255), 3)

    img = imutils.resize(img, height=500)
    cv2.imshow("Ground Truth", img)
    key = cv2.waitKey(1)
    if key == ord("q") or key == ord("Q"):
        break

cv2.destroyAllWindows()

# Split Training, Validation, Test

In [None]:
def getLabelPaths(imagePaths):
    labelPaths = np.char.replace(imagePaths.astype(str), "images", "labels")
    labelPaths = np.char.replace(labelPaths.astype(str), "jpg", "txt")
    labelPaths = np.char.replace(labelPaths.astype(str), "png", "txt")
    return labelPaths

def copyFiles(currPaths, datasetType):
    saveDir = Path(currPaths[0].replace("root", datasetType)).parent
    saveDir.mkdir(parents=True, exist_ok=True)
    
    for src in currPaths:
        dst = src.replace("root", datasetType)
        shutil.copy2(src, dst)

In [None]:
validProp = 0.2
testProp = 0.01

rootDir = Path(f"detect/images/root")

imagePaths = np.array(natsorted(rootDir.glob("*")))
np.random.seed(314159)
validAndTest = np.random.choice(imagePaths, int(len(imagePaths)*(validProp+testProp)), replace=False)
valid = np.random.choice(validAndTest, int(len(imagePaths)*validProp), replace=False)
test = validAndTest[~np.isin(validAndTest, valid)]
train = imagePaths[~np.isin(imagePaths, validAndTest)]

trainLabels = getLabelPaths(train)
validLabels = getLabelPaths(valid)
testLabels = getLabelPaths(test)

trainDir = Path(f"detect/images/train")
validDir = Path(f"detect/images/valid")
testDir = Path(f"detect/images/test")
shutil.rmtree(trainDir, ignore_errors=True)
shutil.rmtree(validDir, ignore_errors=True)
shutil.rmtree(testDir, ignore_errors=True)

copyFiles(trainLabels, "train")
copyFiles(validLabels, "valid")
copyFiles(testLabels, "test")
copyFiles(train.astype(str), "train")
copyFiles(valid.astype(str), "valid")
copyFiles(test.astype(str), "test")


print("Total:")
print(f"""\
    Test Size: {len(test)}
    Validation Size: {len(valid)}
    Training Size: {len(train)}
    """)

# Export

In [None]:
from ultralytics import YOLO
import cv2
import imutils

modelName = "yolov8n_010923_1"

model = YOLO(f'weights/{modelName}.pt')
model.export(format='onnx', opset=12)

# Inference using OpenCV

In [None]:
import cv2
import numpy as np
import imutils

In [None]:
# Constants.
INPUT_WIDTH = 640
INPUT_HEIGHT = 640
SCORE_THRESHOLD = 0.5
NMS_THRESHOLD = 0.45
CONFIDENCE_THRESHOLD = 0.45
 
# Text parameters.
FONT_FACE = cv2.FONT_HERSHEY_SIMPLEX
FONT_SCALE = 0.7
THICKNESS = 1
 
# Colors.
BLACK  = (0,0,0)
BLUE   = (255,178,50)
YELLOW = (0,255,255)

classes = ["red", "green", "black", "turntable"]

In [None]:
def draw_label(im, label, x, y):
    """Draw text onto image at location."""
    # Get text size.
    text_size = cv2.getTextSize(label, FONT_FACE, FONT_SCALE, THICKNESS)
    dim, baseline = text_size[0], text_size[1]
    # Use text size to create a BLACK rectangle.
    cv2.rectangle(im, (x,y), (x + dim[0], y + dim[1] + baseline), (0,0,0), cv2.FILLED)
    # Display text inside the rectangle.
    cv2.putText(im, label, (x, y + dim[1]), FONT_FACE, FONT_SCALE, YELLOW, THICKNESS, cv2.LINE_AA)

def pre_process(input_image, net):
    # Create a 4D blob from a frame.
    blob = cv2.dnn.blobFromImage(input_image, 1/255, (INPUT_WIDTH, INPUT_HEIGHT), [0,0,0], 1, crop=False)

    # Sets the input to the network.
    net.setInput(blob)

    # Run the forward pass to get output of the output layers.
    outputs = net.forward(net.getUnconnectedOutLayersNames())
    return outputs

def post_process(input_image, outputs):
    # Lists to hold respective values while unwrapping.
    class_ids = []
    confidences = []
    boxes = []
    labels = []
    rowsTest = []
    # Rows.
    detections = outputs[0][0].T
    rows = len(detections)
    image_height, image_width = input_image.shape[:2]
    # Resizing factor.
    x_factor = image_width / INPUT_WIDTH
    y_factor =  image_height / INPUT_HEIGHT
    # print(x_factor, y_factor)
    # Iterate through detections.
    for r in range(rows):
        row = detections[r]
        # Discard bad detections and continue.
        # if confidence >= CONFIDENCE_THRESHOLD:
        if True:
            classes_scores = row[4:]
            # Get the index of max class score.
            # print(len(classes_scores))
            class_id = np.argmax(classes_scores)
            # print(class_id)
            #  Continue if the class score is above threshold.
            if (classes_scores[class_id] > SCORE_THRESHOLD):
                class_ids.append(class_id)
                labels.append("{}".format(classes[class_id]))
                # rowsTest.append(row)
                cx, cy, w, h = row[0], row[1], row[2], row[3]
                left = int((cx - w/2) * x_factor)
                top = int((cy - h/2) * y_factor)
                width = int(w * x_factor)
                height = int(h * y_factor)
                box = np.array([left, top, width, height])
                boxes.append(box)
                
    # Perform non maximum suppression to eliminate redundant, overlapping boxes with lower confidences.
    # print(len(boxes))
    indices = cv2.dnn.NMSBoxes(boxes, np.repeat(1.0, len(boxes)), CONFIDENCE_THRESHOLD, NMS_THRESHOLD)
    for i in indices:
        box = boxes[i]
        # row = rowsTest[i]
        left = box[0]
        top = box[1]
        width = box[2]
        height = box[3]             
        # Draw bounding box.
        # print(box)
        # print(row) 
        cv2.rectangle(input_image, (left, top), (left + width, top + height), BLUE, 3*THICKNESS)
        # Class label.           
        # Draw label.             
        draw_label(input_image, labels[i], left, top)
    return boxes, labels, input_image

In [None]:
net = cv2.dnn.readNetFromONNX("weights/yolov8n_010923_1.onnx")
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)

cap = cv2.VideoCapture("track/video.mp4")

ret, frame = cap.read()
while ret:
    outputs = pre_process(frame, net)
    boxes, labels, im = post_process(frame, outputs)
    t, _ = net.getPerfProfile()
    label = 'Inference time: %.2f ms' % (t * 1000.0 /  cv2.getTickFrequency())
    # print(label)
    cv2.putText(im, label, (20, 40), FONT_FACE, FONT_SCALE,  (0, 0, 255), THICKNESS, cv2.LINE_AA)
    cv2.imshow('Output', imutils.resize(im, width=1000))
    # cv2.namedWindow("Output", cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE)
    # cv2.imshow('Output', im)

    key = cv2.waitKey(1)

    if key == ord("q"):
        break

    ret, frame = cap.read()

cv2.destroyAllWindows()
