In [None]:
from ultralytics import YOLO

# Load a model
model = YOLO("yolov8n.pt") # load a pretrained model (recommended for training)

In [1]:
#working yolo v8 with roi2 and without pixel distance calculations between disappeared and reappeared objects
import numpy as np
import cv2
from scipy.optimize import linear_sum_assignment

class CentroidTracker:
    def __init__(self, max_disappeared=8):
        self.next_object_id = 1
        self.objects = {}
        self.disappeared = {}
        self.max_disappeared = max_disappeared
        self.used_ids = set()

    def register(self, centroid):
        object_id = self.next_object_id
        self.objects[object_id] = centroid
        self.disappeared[object_id] = 0
        self.used_ids.add(object_id)
        self.next_object_id += 1
        return object_id

    def deregister(self, object_id):
        del self.objects[object_id]
        del self.disappeared[object_id]
        self.used_ids.remove(object_id)

    def update(self, centroids):
        if len(centroids) == 0:
            for object_id in list(self.disappeared.keys()):
                self.disappeared[object_id] += 1
                if self.disappeared[object_id] > self.max_disappeared:
                    self.deregister(object_id)
            return self.objects

        input_centroids = np.array(centroids)

        if len(self.objects) == 0:
            for i in range(0, len(centroids)):
                object_id = self.register(centroids[i])
        else:
            object_ids = list(self.objects.keys())
            object_centroids = np.array(list(self.objects.values()))

            D = np.zeros((len(object_ids), len(centroids)))
            for i, object_centroid in enumerate(object_centroids):
                for j, centroid in enumerate(input_centroids):
                    D[i, j] = np.linalg.norm(object_centroid - centroid)

            row_ind, col_ind = linear_sum_assignment(D)

            used_rows = set()
            used_cols = set()

            for row, col in zip(row_ind, col_ind):
                if row in used_rows or col in used_cols:
                    continue

                object_id = object_ids[row]
                self.objects[object_id] = centroids[col]
                self.disappeared[object_id] = 0

                used_rows.add(row)
                used_cols.add(col)

            unused_rows = set(range(len(object_ids))).difference(used_rows)
            unused_cols = set(range(len(centroids))).difference(used_cols)

            for row in unused_rows:
                object_id = object_ids[row]
                self.disappeared[object_id] += 1

                if self.disappeared[object_id] > self.max_disappeared:
                    self.deregister(object_id)

            for col in unused_cols:
                self.register(centroids[col])

        return self.objects

from ultralytics import YOLO

def yolo_detect_objects(frame, model, confidence_threshold=0.6, nms_threshold=0.9):
    # Convert frame to format expected by YOLOv8 model
    img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    labels = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']


    # Perform object detection
    results = model(img)

    # Initialize list for detected objects
    detected_boxes = []

    # Check if any objects are detected
    if results and results[0].boxes:
        # Iterate over each element in results
        for res in results:
            # Access bounding boxes from each element
            boxes = res.boxes

            # Iterate over bounding boxes
            for box in boxes:
                x1, y1, x2, y2 = box.xyxy[0].tolist()  # Extract box coordinates
                conf = box.conf.item()  # Extract confidence score
                class_id = box.cls.item()  # Extract class ID

                # Filter out detections below confidence threshold
                if conf > confidence_threshold :
                    # Map class_id to label
                    label = labels[int(class_id)]
                    if label=="person":
                        detected_boxes.append((int(x1), int(y1), int(x2), int(y2), label))

    return detected_boxes

def detect_persons(frame, model, confidence_threshold=0.65, roi=None):
    detected_boxes = yolo_detect_objects(frame, model, confidence_threshold=confidence_threshold)
    
    height, width, _ = frame.shape
    persons = []

    for box in detected_boxes:
        x1, y1, x2, y2, label = box
        center_x = int((x1 + x2) / 2)
        center_y = int((y1 + y2) / 2)
        # Check if the box is within the specified ROI
        if roi is not None:
            roi_x, roi_y, roi_w, roi_h = roi
            if roi_x < center_x < roi_x + roi_w and roi_y < center_y < roi_y + roi_h:
                persons.append((center_x, center_y))
        else:
            persons.append((center_x, center_y))

    return persons

def draw_boxes(img, objects, roi1, roi2, counts):
    # Draw first ROI rectangle
    cv2.rectangle(img, (roi1[0], roi1[1]), (roi1[0] + roi1[2], roi1[1] + roi1[3]), (255, 0, 0), 2)
    # Draw second ROI rectangle
    cv2.rectangle(img, (roi2[0], roi2[1]), (roi2[0] + roi2[2], roi2[1] + roi2[3]), (0, 255, 0), 2)
    
    for object_id, centroid in objects.items():
        cv2.circle(img, centroid, 4, (0, 255, 0), -1)
        cv2.putText(img, f'ID: {object_id}', (centroid[0] - 10, centroid[1] - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        
        # Check if the centroid is within the ROI
        x, y, w, h = roi1
        if x < centroid[0] < x + w and y < centroid[1] < y + h:
            
            if object_id not in counts:
                counts[object_id] = 0  # Initialize count for new object
            counts[object_id] += 1
            
            cv2.putText(img, 'Passed ROI', (centroid[0] - 10, centroid[1] + 20),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
    
    cv2.putText(img, f'Count: {len(counts)}', (10, 30 ), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    return img

def main():
    tracker = CentroidTracker()

    # Initialize YOLOv8 model
    model = YOLO("yolov8n.pt")

    cap = cv2.VideoCapture(r"C:\Users\Chandhana\Downloads\Test video half.mp4")  # Path to your sample video

    # Define the ROI coordinates (x, y, width, height)
    roi1 = (0, 200, 550, 70)
    roi2 = (0, 60, 550, 360)  # Define second ROI
    counts = {}  # Dictionary to store count for each object

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        centroids = detect_persons(frame, model, roi=roi2)

        tracked_objects = tracker.update(centroids)

        frame = draw_boxes(frame, tracked_objects, roi1, roi2, counts)

        cv2.imshow('Frame', frame)
        if cv2.waitKey(25) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

    print("Total persons crossed ROI:", len(counts))

if __name__ == "__main__":
    main()



0: 384x640 (no detections), 85.5ms
Speed: 3.4ms preprocess, 85.5ms inference, 135.0ms postprocess per image at shape (1, 3, 384, 640)


  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)



0: 384x640 (no detections), 79.5ms
Speed: 2.0ms preprocess, 79.5ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 80.1ms
Speed: 2.4ms preprocess, 80.1ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 83.7ms
Speed: 2.0ms preprocess, 83.7ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 80.2ms
Speed: 2.0ms preprocess, 80.2ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 74.6ms
Speed: 2.0ms preprocess, 74.6ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 76.4ms
Speed: 1.0ms preprocess, 76.4ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 83.7ms
Speed: 2.0ms preprocess, 83.7ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 85.4ms
Speed: 2.0ms preprocess, 85.4ms i