In [5]:
import cv2
from ultralytics import YOLO
import torch, torchvision

print(torch.__version__)
print(torchvision.__version__)
print(torch.cuda.is_available())

# Load the YOLOv8 model
model = YOLO('yolov8x.pt')

# Open the video file
video_path = "C2024-04-08-14-04-59-01.mp4"
cap = cv2.VideoCapture(video_path)

# Loop through the video frames
while cap.isOpened():
    # Read a frame from the video
    success, frame = cap.read()

    if success:
        # Run YOLOv8 tracking on the frame, persisting tracks between frames
        frame = cv2.GaussianBlur(frame,(21,21),0)
        
        
        results = model.track(frame, persist=True)

        # Visualize the results on the frame
        annotated_frame = results[0].plot()

        # Display the annotated frame
        cv2.imshow("YOLOv8 Tracking", annotated_frame)

        # Break the loop if 'q' is pressed
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break
    else:
        # Break the loop if the end of the video is reached
        break

# Release the video capture object and close the display window
cap.release()
cv2.destroyAllWindows()

2.2.2
0.17.2
True

0: 384x640 2 cows, 72.2ms
Speed: 4.4ms preprocess, 72.2ms inference, 3.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 cow, 55.4ms
Speed: 3.5ms preprocess, 55.4ms inference, 6.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 53.0ms
Speed: 3.5ms preprocess, 53.0ms inference, 3.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 cow, 59.2ms
Speed: 1.5ms preprocess, 59.2ms inference, 4.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 cow, 57.5ms
Speed: 3.0ms preprocess, 57.5ms inference, 4.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 cow, 58.0ms
Speed: 3.5ms preprocess, 58.0ms inference, 4.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 cows, 59.8ms
Speed: 3.1ms preprocess, 59.8ms inference, 4.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 cows, 56.9ms
Speed: 3.0ms preprocess, 56.9ms inference, 6.5ms postprocess per image at shape (1

In [22]:
import cv2
import numpy as np
from ultralytics import YOLO

from ultralytics.utils.checks import check_imshow
from ultralytics.utils.plotting import Annotator, colors

from collections import defaultdict

track_history = defaultdict(lambda: [])
model = YOLO("yolov8x.pt")
names = model.model.names

video_path = "T2024-04-08-14-00-37-01.mp4"
cap = cv2.VideoCapture(video_path)
assert cap.isOpened(), "Error reading video file"

w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))

# result = cv2.VideoWriter("object_tracking.avi",
#                        cv2.VideoWriter_fourcc(*'mp4v'),
#                        fps,
#                        (w, h))

while cap.isOpened():
    success, frame = cap.read()
    if success:
        results = model.track(frame, persist=True, verbose=False)
        boxes = results[0].boxes.xyxy.cpu()

        if results[0].boxes.id is not None:

            # Extract prediction results
            clss = results[0].boxes.cls.cpu().tolist()
            track_ids = results[0].boxes.id.int().cpu().tolist()
            confs = results[0].boxes.conf.float().cpu().tolist()
            # Annotator Init
            annotator = Annotator(frame, line_width=2)

            for box, cls, track_id in zip(boxes, clss, track_ids):
                print(box)
                annotator.box_label(box, color=colors(int(cls), True), label=names[int(cls)])

                # Store tracking history
                track = track_history[track_id]
                track.append((int((box[0] + box[2]) / 2), int((box[1] + box[3]) / 2)))
                if len(track) > 30:
                    track.pop(0)

                # Plot tracks
                points = np.array(track, dtype=np.int32).reshape((-1, 1, 2))
                cv2.circle(frame, (track[-1]), 7, colors(int(cls), True), -1)
                cv2.polylines(frame, [points], isClosed=False, color=colors(int(cls), True), thickness=2)

        cv2.imshow("YOLOv8 Tracking", frame)
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break
    else:
        break

# result.release()
cap.release()
cv2.destroyAllWindows()

tensor([533.4163, 458.4791, 593.6075, 483.1598])
tensor([533.2224, 458.4853, 593.6731, 483.1558])
tensor([536.7592, 458.4494, 593.2349, 482.3391])
tensor([537.3197, 458.4841, 592.5217, 482.1379])
tensor([540.2350, 457.4671, 588.0181, 481.9239])
tensor([539.2317, 457.3469, 584.7028, 482.1064])
tensor([423.1012, 457.7221, 482.7539, 482.2251])
tensor([421.9347, 456.9661, 481.0877, 482.8191])
tensor([420.8401, 456.8573, 479.8260, 483.1710])
tensor([419.1371, 456.8183, 478.1742, 483.1808])
tensor([417.9751, 456.7407, 477.0536, 483.0748])
tensor([416.8511, 456.4559, 476.2647, 482.4674])
tensor([415.7463, 456.3163, 475.4063, 482.1689])
tensor([414.3743, 456.8715, 473.1111, 481.6491])
tensor([413.3582, 457.0805, 471.7474, 481.4365])
tensor([412.1672, 457.1913, 469.9752, 481.2689])
tensor([410.7731, 457.4697, 468.7724, 480.9523])
tensor([410.3418, 457.5703, 468.4208, 480.8438])
tensor([409.3003, 457.4789, 467.6855, 480.7207])
tensor([408.2510, 457.4523, 466.7485, 480.7091])
tensor([407.2982, 45