### Deep SORT

In [1]:
import cv2
from ultralytics import YOLO
from deep_sort_realtime.deepsort_tracker import DeepSort

model = YOLO("yolov8l.pt")
tracker = DeepSort(max_age=30)

cap = cv2.VideoCapture('entry_exit_input.mp4')

fourcc = cv2.VideoWriter_fourcc(*'mp4v')
fps = int(cap.get(cv2.CAP_PROP_FPS))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
out = cv2.VideoWriter('entry_exit_output.mp4', fourcc, fps, (width, height))

while True:
    ret, frame = cap.read()
    if not ret:
        break

    results = model(frame)[0]

    detections = []
    for box in results.boxes:
        cls_id = int(box.cls[0])
        conf = float(box.conf[0])
        if cls_id == 0 and conf > 0.6:
            x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
            detections.append(([x1, y1, x2 - x1, y2 - y1], conf, 'person'))

    tracks = tracker.update_tracks(detections, frame=frame)

    for track in tracks:
        if not track.is_confirmed():
            continue

        track_id = track.track_id
        ltrb = track.to_ltrb()
        x1, y1, x2, y2 = map(int, ltrb)

        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 255), 1)
        cv2.putText(frame, f'ID: {track_id}', (x1, y1 - 10),
                    cv2.FONT_HERSHEY_DUPLEX, 0.5, (0, 0, 255), 1)

    cv2.imshow("Person Tracking", frame)

    out.write(frame)
    
    if cv2.waitKey(1) == ord("q"):
        break

cap.release()
out.release()
cv2.destroyAllWindows()

  import pkg_resources



0: 480x640 1 person, 2 chairs, 161.2ms
Speed: 3.0ms preprocess, 161.2ms inference, 107.8ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 2 chairs, 40.4ms
Speed: 2.3ms preprocess, 40.4ms inference, 2.3ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 2 chairs, 1 dining table, 40.1ms
Speed: 2.4ms preprocess, 40.1ms inference, 2.7ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 2 chairs, 1 dining table, 39.8ms
Speed: 1.6ms preprocess, 39.8ms inference, 2.1ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 2 chairs, 1 dining table, 41.2ms
Speed: 1.7ms preprocess, 41.2ms inference, 2.3ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 2 chairs, 41.2ms
Speed: 1.5ms preprocess, 41.2ms inference, 2.3ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 2 chairs, 1 dining table, 40.3ms
Speed: 1.9ms preprocess, 40.3ms inference, 2.7ms postprocess per image at sha

### Inbuilt YOLO.track() (ByteTrack implementation)

In [4]:
import cv2
from ultralytics import YOLO

model = YOLO("yolov8l.pt")

IGNORE_REGION = (400, 0, 640, 150)  # (x1, y1, x2, y2)

cap = cv2.VideoCapture("entry_exit_input.mp4")

ret, frame = cap.read()
H, W = frame.shape[:2]
cap.set(cv2.CAP_PROP_POS_FRAMES, 0)

fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter("entry_exit_output.mp4", fourcc, cap.get(cv2.CAP_PROP_FPS), (W, H))

# detection region (bottom-right corner)
region_x1 = int(H * 0.5)
region_y1 = int(H * 0.65)
region_x2 = W
region_y2 = H

# Tracking info
track_memory = {}
in_count = 0
out_count = 0

# Check if point is inside the region
def is_inside_region(x, y):
    return region_x1 <= x <= region_x2 and region_y1 <= y <= region_y2

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Mask ignore region before detection
    x1, y1, x2, y2 = IGNORE_REGION
    frame[y1:y2, x1:x2] = 0

    results = model.track(source=frame, persist=True, stream=False, classes=[0])
    result = results[0]

    annotated = result.plot(line_width=1, font_size=0.4)

    overlay = annotated.copy()
    cv2.rectangle(annotated, (region_x1, region_y1), (region_x2, region_y2), (0, 255, 0), -1) #-1 -> fill
    alpha = 0.8  # 30% opacity
    cv2.addWeighted(overlay, alpha, annotated, 1 - alpha, 0, annotated)

    if result.boxes.id is not None:
        ids = result.boxes.id.int().tolist()
        boxes = result.boxes.xyxy.cpu().numpy()

        current_ids = set(ids)

        for id_, box in zip(ids, boxes):
            x1, y1, x2, y2 = box
            cx, cy = int((x1 + x2) // 2), int((y1 + y2) // 2)
            cv2.circle(annotated, (cx, cy), radius=3, color=(0, 0, 255), thickness=-1)

            curr_inside = is_inside_region(cx, cy)
            prev_data = track_memory.get(id_, {"inside": curr_inside, "counted": False})

            prev_inside = prev_data["inside"]
            counted = prev_data["counted"]

            if not counted:
                if not prev_inside and curr_inside:
                    in_count += 1
                    counted = True
                elif prev_inside and not curr_inside:
                    out_count += 1
                    counted = True

            track_memory[id_] = {"inside": curr_inside, "counted": counted}

        for old_id in list(track_memory.keys()):
            if old_id not in current_ids:
                del track_memory[old_id]

    cv2.putText(annotated, f"IN: {in_count}", (10, 25), cv2.FONT_HERSHEY_DUPLEX, 0.6, (0, 255, 0), 1)
    cv2.putText(annotated, f"OUT: {out_count}", (10, 50), cv2.FONT_HERSHEY_DUPLEX, 0.6, (0, 0, 255), 1)
    cv2.putText(annotated, f"INSIDE: {abs(in_count-out_count)}", (10, 75), cv2.FONT_HERSHEY_DUPLEX, 0.6, (0, 255, 255), 1)

    cv2.imshow("Entry Exit Tracking", annotated)
    out.write(annotated)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
out.release()
cv2.destroyAllWindows()


0: 480x640 1 person, 43.5ms
Speed: 2.0ms preprocess, 43.5ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 38.3ms
Speed: 1.5ms preprocess, 38.3ms inference, 4.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 41.8ms
Speed: 2.0ms preprocess, 41.8ms inference, 2.9ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 41.4ms
Speed: 1.2ms preprocess, 41.4ms inference, 2.3ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 40.6ms
Speed: 1.2ms preprocess, 40.6ms inference, 4.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 41.9ms
Speed: 1.6ms preprocess, 41.9ms inference, 4.2ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 40.2ms
Speed: 1.6ms preprocess, 40.2ms inference, 2.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 41.0ms
Speed: 1.7ms preprocess, 41.0ms inference, 3.4ms postprocess per image at shape (1, 3, 4