In [None]:
import cv2
import numpy as np
import pyrealsense2 as rs

# Initialize the RealSense pipeline
pipeline = rs.pipeline()
config = rs.config()
config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30)
config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 30)  # Enable depth stream
pipeline.start(config)

# Load YOLO model and classes
net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")
classes = []
with open("coco.names", "r") as f:
    classes = [line.strip() for line in f.readlines()]
layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]

# Create a tracker object
tracker = cv2.TrackerCSRT_create()

# Initialize variables for tracking
tracked_objects = []

# Initialize a dictionary to store the last known position of tracked objects
last_known_positions = {}

while True:
    # Wait for the next set of frames
    frames = pipeline.wait_for_frames()
    color_frame = frames.get_color_frame()
    depth_frame = frames.get_depth_frame()  # Retrieve depth frame
    
    if color_frame and depth_frame:
        # Convert the color frame to a numpy array
        color_image = np.asanyarray(color_frame.get_data())

        # Detect objects using YOLO
        blob = cv2.dnn.blobFromImage(color_image, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
        net.setInput(blob)
        outs = net.forward(output_layers)

        # Process YOLO output
        boxes = []
        for out in outs:
            for detection in out:
                scores = detection[5:]
                class_id = np.argmax(scores)
                confidence = scores[class_id]
                if confidence > 0.5:
                    # Object detected
                    center_x = int(detection[0] * color_image.shape[1])
                    center_y = int(detection[1] * color_image.shape[0])
                    w = int(detection[2] * color_image.shape[1])
                    h = int(detection[3] * color_image.shape[0])
                    x = int(center_x - w / 2)
                    y = int(center_y - h / 2)
                    boxes.append((x, y, w, h))

        # Update tracking for each detected object
        for box in boxes:
            # Initialize tracker for new objects
            tracker = cv2.TrackerCSRT_create()
            tracker.init(color_image, box)
            tracked_objects.append(tracker)

        # Update and draw tracked objects
        for tracker in tracked_objects:
            ok, bbox = tracker.update(color_image)
            if ok:
                # Tracking successful, draw bounding box
                p1 = (int(bbox[0]), int(bbox[1]))
                p2 = (int(bbox[0] + bbox[2]), int(bbox[1] + bbox[3]))
                cv2.rectangle(color_image, p1, p2, (0, 255, 0), 2)
                # Store the last known position of the tracked object
                last_known_positions[tracker] = (p1, p2)
            else:
                # Tracking failed, remove tracker
                tracked_objects.remove(tracker)

        # Filter out overlapping bounding boxes
        keys_to_remove = []
        for tracker1, bbox1 in last_known_positions.items():
            for tracker2, bbox2 in last_known_positions.items():
                if tracker1 != tracker2 and bbox1[0][0] <= bbox2[1][0] and bbox1[1][0] >= bbox2[0][0] \
                        and bbox1[0][1] <= bbox2[1][1] and bbox1[1][1] >= bbox2[0][1]:
                    # Bounding boxes overlap, add the key to the removal list
                    keys_to_remove.append(tracker2)

        # Remove keys from the dictionary
        for key in keys_to_remove:
            if key in last_known_positions:
                del last_known_positions[key]

        # Display the color image
        cv2.imshow('Object Tracking', color_image)

    # Check for key press
    key = cv2.waitKey(1) & 0xFF
    if key == 27:
        # Exit program when 'ESC' is pressed
        break

# Stop the pipeline and close all OpenCV windows
pipeline.stop()
cv2.destroyAllWindows()


In [1]:
import cv2
import numpy as np
import pyrealsense2 as rs

# Initialize the RealSense pipeline
pipeline = rs.pipeline()
config = rs.config()
config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30)
config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 30)  # Enable depth stream
pipeline.start(config)

# Load YOLO model and classes
net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")
classes = []
with open("coco.names", "r") as f:
    classes = [line.strip() for line in f.readlines()]
layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]

# Create a tracker object
tracker = cv2.TrackerCSRT_create()

# Initialize variables for tracking
tracked_objects = []

# Initialize a dictionary to store the last known position of tracked objects
last_known_positions = {}

while True:
    # Wait for the next set of frames
    frames = pipeline.wait_for_frames()
    color_frame = frames.get_color_frame()
    depth_frame = frames.get_depth_frame()  # Retrieve depth frame
    
    if color_frame and depth_frame:
        # Convert the color frame to a numpy array
        color_image = np.asanyarray(color_frame.get_data())

        # Resize the image for faster processing
        resized_image = cv2.resize(color_image, (416, 416))

        # Detect objects using YOLO
        blob = cv2.dnn.blobFromImage(resized_image, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
        net.setInput(blob)
        outs = net.forward(output_layers)

        # Process YOLO output
        boxes = []
        confidences = []
        class_ids = []
        for out in outs:
            for detection in out:
                scores = detection[5:]
                class_id = np.argmax(scores)
                confidence = scores[class_id]
                if confidence > 0.5:
                    # Object detected
                    center_x = int(detection[0] * color_image.shape[1])
                    center_y = int(detection[1] * color_image.shape[0])
                    w = int(detection[2] * color_image.shape[1])
                    h = int(detection[3] * color_image.shape[0])
                    x = int(center_x - w / 2)
                    y = int(center_y - h / 2)
                    boxes.append((x, y, w, h))
                    confidences.append(float(confidence))
                    class_ids.append(class_id)

        # Apply non-max suppression to remove overlapping bounding boxes
        indices = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)

        # Update tracking for each detected object
        if len(indices) > 0:
            for i in indices.flatten():
                box = boxes[i]
                x, y, w, h = box

                # Initialize tracker for new objects
                tracker = cv2.TrackerCSRT_create()
                tracker.init(color_image, (x, y, w, h))
                tracked_objects.append(tracker)

        # Update and draw tracked objects
        for tracker in tracked_objects:
            ok, bbox = tracker.update(color_image)
            if ok:
                # Tracking successful, draw bounding box
                p1 = (int(bbox[0]), int(bbox[1]))
                p2 = (int(bbox[0] + bbox[2]), int(bbox[1] + bbox[3]))
                cv2.rectangle(color_image, p1, p2, (0, 255, 0), 2)
                # Store the last known position of the tracked object
                last_known_positions[tracker] = (p1, p2)
            else:
                # Tracking failed, remove tracker
                tracked_objects.remove(tracker)

        # Display the color image
        cv2.imshow('Object Tracking', color_image)

    # Check for key press
    key = cv2.waitKey(1) & 0xFF
    if key == 27:
        # Exit program when 'ESC' is pressed
        break

# Stop the pipeline and close all OpenCV windows
pipeline.stop()
cv2.destroyAllWindows()


