In [8]:
import cv2
import numpy as np
import math

In [9]:
class ObjectDetection:
    def __init__(
        self, weights_path="dnn_model/yolov4.weights", cfg_path="dnn_model/yolov4.cfg"
    ):
        print("Loading Object Detection")
        print("Running opencv dnn with YOLOv4")
        self.nmsThreshold = 0.3
        self.confThreshold = 0.6
        self.image_size = 224  # Reduced for speed, change as needed

        # Load Network
        net = cv2.dnn.readNet(weights_path, cfg_path)

        # Enable GPU CUDA
        net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
        net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
        self.model = cv2.dnn_DetectionModel(net)

        self.model.setInputParams(size=(self.image_size, self.image_size), scale=1 / 255)

    def detect(self, frame):
        # Detect objects, only retrieve boxes and scores
        _, scores, boxes = self.model.detect(frame, nmsThreshold=self.nmsThreshold, confThreshold=self.confThreshold)
        return scores, boxes

In [10]:
def select_object(event, x, y, flags, param):
    global tracking_object_id, selected_box, tracking_locked
    if event == cv2.EVENT_LBUTTONDOWN and not tracking_locked:
        frame_number = param["frame_number"]
        print(f"Click recorded at frame {frame_number}, coordinates: ({x}, {y})")

        min_distance = float("inf")
        closest_box = None

        for i, box in enumerate(param["boxes"]):
            (bx, by, bw, bh) = box
            # Calculate the center of the box
            box_center_x = bx + bw / 2
            box_center_y = by + bh / 2
            # Calculate the Euclidean distance from click point to box center
            distance = math.sqrt((x - box_center_x) ** 2 + (y - box_center_y) ** 2)
            # Check if this is the closest box
            if distance < min_distance:
                min_distance = distance
                closest_box = box
                tracking_object_id = i

        # Update selected box with the closest box found
        if closest_box is not None:
            selected_box = closest_box
            tracking_locked = True  # Lock tracking to this object
            print(f"Selected box: {selected_box}")


In [11]:
# Initialize Object Detection
od = ObjectDetection()
cap = cv2.VideoCapture("los_angeles.mp4")
if not cap.isOpened():
    print("Error: Could not open video.")

Loading Object Detection
Running opencv dnn with YOLOv4


In [12]:
# Set up mouse callback
cv2.namedWindow("Frame")
# cv2.setMouseCallback("Frame", select_object, param={"frame_number": 0, "boxes": []})

In [13]:

# Initialize tracking variables
tracking_object_id = None
selected_box = None
tracking_locked = False  # Flag to lock tracking to a selected object

frame_number = 0  # Initialize frame number for tracking clicks

In [None]:
while True:
    print(f"frame_number = {frame_number}")
    frame_number += 1

    ret, frame = cap.read()
    if not ret:
        break

    frame = cv2.resize(frame, (192 * 3, 144 * 3))

    # Detect objects in frame
    scores, boxes = od.detect(frame)

    # Update mouse callback with the current frame's data
    cv2.setMouseCallback("Frame", select_object, param={"frame_number": frame_number, "boxes": boxes})

    # Track the selected box if one has been chosen and locked
    if selected_box is not None:
        min_distance = float("inf")
        closest_box = None

        # Find the box in the current frame closest to the last known position
        (prev_x, prev_y, prev_w, prev_h) = selected_box
        prev_center_x = prev_x + prev_w / 2
        prev_center_y = prev_y + prev_h / 2

        for box in boxes:
            (bx, by, bw, bh) = box
            box_center_x = bx + bw / 2
            box_center_y = by + bh / 2
            distance = math.sqrt(
                (prev_center_x - box_center_x) ** 2
                + (prev_center_y - box_center_y) ** 2
            )

            if distance < min_distance:
                min_distance = distance
                closest_box = box

        # Update the selected box with the closest matching box from the detections
        if closest_box is not None:
            selected_box = closest_box

        # Draw the updated selected box
        (x, y, w, h) = selected_box
        cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 2)

    else:
        print("No box selected")
    cv2.imshow("Frame", frame)

    # Increment the frame number for click tracking

    # key = cv2.waitKey(1)
    # # Press 'r' to reset selection and unlock tracking
    # if key == ord("r"):
    #     tracking_locked = False
    #     selected_box = None
    #     print("Tracking reset")


cap.release()
cv2.destroyAllWindows()

frame_number = 0


[ WARN:0@20.848] global net_impl.cpp:178 setUpNet DNN module was not built with CUDA backend; switching to CPU


KeyboardInterrupt: 