In [1]:
from ultralytics import YOLO
model = YOLO("yolov8n.pt")

In [5]:
import sys
import os
sys.path.append(os.path.abspath("ByteTrack"))


In [2]:
class ByteTrackArgument:
    track_thresh = 0.5 # High_threshold
    track_buffer = 50 # Number of frame lost tracklets are kept
    match_thresh = 0.8 # Matching threshold for first stage linear assignment
    aspect_ratio_thresh = 10.0 # Minimum bounding box aspect ratio
    min_box_area = 1.0 # Minimum bounding box area
    mot20 = False # If used, bounding boxes are not clipped.
MIN_THRESHOLD = 0.001

In [6]:
from yolox.tracker.byte_tracker import BYTETracker
import cv2
import numpy as np
from collections import deque

np.float = float #fix

In [7]:
# Load YOLO model
model = YOLO("yolov8n.pt")

# Initialize tracker
tracker = BYTETracker(ByteTrackArgument)

# Open video
video_path = "Behaviour survey (online-video-cutter.com).mp4"
cap = cv2.VideoCapture(video_path)

# Output writer
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter("output.mp4", fourcc, 30.0,
                      (int(cap.get(3)), int(cap.get(4))))

while True:
    ret, frame = cap.read()
    if not ret:
        break
    height, width = frame.shape[:2]

    # Run YOLO detection
    results = model(frame)[0]

    # Prepare detections for ByteTrack
    dets = []
    for box in results.boxes:
        if box.conf.item() > 0.3:
            x1, y1, x2, y2 = box.xyxy[0]
            score = box.conf.item()
            dets.append([x1.item(), y1.item(), x2.item(), y2.item(), score])
    dets = np.array(dets)

    # Run tracking
    tracks = tracker.update(dets, (height, width), (height, width))

    # Draw tracks
    for track in tracks:
        x, y, w, h = track.tlwh
        track_id = track.track_id

        x1, y1, x2, y2 = int(x), int(y), int(x + w), int(y + h)
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(frame, f'ID: {track_id}', (x1, y1 - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

    # Write output
    out.write(frame)


cap.release()
out.release()



0: 384x640 3 persons, 5 cars, 3 traffic lights, 118.3ms
Speed: 14.0ms preprocess, 118.3ms inference, 314.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 5 cars, 3 traffic lights, 18.4ms
Speed: 2.5ms preprocess, 18.4ms inference, 2.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 4 cars, 3 traffic lights, 9.3ms
Speed: 1.5ms preprocess, 9.3ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 4 cars, 3 traffic lights, 13.1ms
Speed: 1.6ms preprocess, 13.1ms inference, 2.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 5 cars, 3 traffic lights, 1 potted plant, 11.1ms
Speed: 1.8ms preprocess, 11.1ms inference, 5.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 4 cars, 3 traffic lights, 9.3ms
Speed: 1.2ms preprocess, 9.3ms inference, 2.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 4 cars, 3 traffic lights, 11.8ms
Speed: 1.

In [8]:


# Create a VideoCapture object and read from input file
cap = cv2.VideoCapture('output.mp4')

# Check if camera opened successfully
if (cap.isOpened()== False):
    print("Error opening video file")

# Read until video is completed
while(cap.isOpened()):
    
# Capture frame-by-frame
    ret, frame = cap.read()
    if ret == True:
    # Display the resulting frame
        cv2.imshow('Frame', frame)
        
    # Press Q on keyboard to exit
        if cv2.waitKey(25) & 0xFF == ord('q'):
            break

# Break the loop
    else:
        break

# When everything done, release
# the video capture object
cap.release()

# Closes all the frames
cv2.destroyAllWindows()