In [12]:
import cv2

cap = cv2.VideoCapture(r"..\videos\vehicle-counting-low.mp4")

# Get the video's frame width and height
width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)

# release the VideoCapture object
cap.release()

In [15]:
import numpy as np
import supervision as sv
from ultralytics import YOLO

# model = YOLO(r"..\models\yolov8s-worldv2.pt")
# model = YOLO(r"..\models\custom_yolov8s-worldv2.pt")
model = YOLO(r"..\models\yolov10m.pt")
model.to("cuda")
model.fuse()

tracker = sv.ByteTrack()
box_annotator = sv.BoxAnnotator()
label_annotator = sv.LabelAnnotator()

# Calculate aspect ratio
aspect_ratio = width / height

# Create a named window with the ability to resize
cv2.namedWindow('frame', cv2.WINDOW_NORMAL)

# Resize the window to the desired size (e.g., 800x600)
cv2.resizeWindow('frame', int(600 * aspect_ratio), 600)

def callback(frame: np.ndarray, _: int) -> np.ndarray:
    results = model(frame)[0]
    detections = sv.Detections.from_ultralytics(results)
    detections = tracker.update_with_detections(detections)

    labels = [
        f"#{tracker_id} {results.names[class_id]}"
        for class_id, tracker_id
        in zip(detections.class_id, detections.tracker_id)
    ]

    annotated_frame = box_annotator.annotate(
        frame.copy(), detections=detections)
    label_annotator.annotate(
        annotated_frame, detections=detections, labels=labels)
    
    # display the frame to the user using OpenCV
    cv2.imshow("frame", annotated_frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        return None
    
    return frame

sv.process_video(
    source_path=r"..\videos\vehicle-counting-low.mp4",
    target_path="result_111.mp4",
    callback=callback
)

cv2.destroyAllWindows()

# Process the video
# generator = sv.get_video_frames_generator(r"..\videos\vehicle-counting-low.mp4")
# iterator = iter(generator)

# while not stop_processing:
#     try:
#         frame = next(iterator)
#         callback(frame, 0)
#     except StopIteration:
#         break

# cv2.destroyAllWindows()

YOLOv10m summary (fused): 369 layers, 16,543,024 parameters, 0 gradients, 63.9 GFLOPs

0: 384x640 4 cars, 1 truck, 22.1ms
Speed: 1.4ms preprocess, 22.1ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 cars, 1 truck, 27.5ms
Speed: 1.0ms preprocess, 27.5ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 cars, 1 truck, 24.9ms
Speed: 2.7ms preprocess, 24.9ms inference, 5.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 cars, 1 truck, 17.9ms
Speed: 1.0ms preprocess, 17.9ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 cars, 1 truck, 21.5ms
Speed: 0.0ms preprocess, 21.5ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 cars, 1 truck, 23.0ms
Speed: 0.0ms preprocess, 23.0ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 cars, 1 truck, 18.5ms
Speed: 0.0ms preprocess, 18.5ms inference, 9.5ms postprocess per image at shape 