In [79]:
from ultralytics import YOLO
import numpy as np
import cv2
from mss import mss
from PIL import Image, ImageDraw
from collections import defaultdict
import time
import torch

# model_track = YOLO("runs/detect/train8/weights/best.pt")
model_track = YOLO("runs/detect/train11/weights/best.pt")
# model_track = YOLO("yolov8n.pt")

In [80]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"

model_track.to(device)

<ultralytics.models.yolo.model.YOLO at 0x164b0ac8ed0>

In [18]:
video_path = "tenis2.mp4"

cap = cv2.VideoCapture(video_path)

tracked_points = []

while cap.isOpened():
    success, frame = cap.read()

    if success:
        results = model_track.track(frame, persist=True, conf=0.50)        
        annotated_frame = results[0].plot()

        if results[0].boxes.shape[0] > 0:
            first_box = results[0].boxes.data[0]

            # Extract coordinates
            x1 = first_box[0]
            y1 = first_box[1]
            x2 = first_box[2]
            y2 = first_box[3]

            center_x = int((x1 + x2) / 2)
            center_y = int((y1 + y2) / 2)

            tracked_points.append(((center_x, center_y), time.time()))
        current_time = time.time()
        tracked_points = [(pt, t) for (pt, t) in tracked_points if current_time - t < 3]
    

        for i in range(1, len(tracked_points)):
            if tracked_points[i - 1] is None or tracked_points[i] is None:
                continue
            pt1, _ = tracked_points[i - 1]
            pt2, _ = tracked_points[i]
            cv2.line(annotated_frame, pt1, pt2, (0, 255, 0), 2)



        cv2.imshow("YOLOv8 Tracking", annotated_frame)

        if cv2.waitKey(1) & 0xFF == ord("q") or cv2.waitKey(1) & 0xFF == ord("Q"):
            break
    else:
        break

cap.release()
cv2.destroyAllWindows()


0: 384x640 (no detections), 13.0ms
Speed: 1.0ms preprocess, 13.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 13.0ms
Speed: 2.0ms preprocess, 13.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 14.0ms
Speed: 1.0ms preprocess, 14.0ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 12.0ms
Speed: 2.0ms preprocess, 12.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 13.0ms
Speed: 1.0ms preprocess, 13.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 12.0ms
Speed: 1.0ms preprocess, 12.0ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 12.0ms
Speed: 2.0ms preprocess, 12.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 13.0ms
Speed: 1.0ms preprocess, 13.0ms i

In [59]:
results[0].boxes.data[0][1]

tensor(71.7820)

In [89]:
model_seg = YOLO("runs/segment/train2/weights/last.pt")

In [90]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"

model_seg.to(device)

<ultralytics.models.yolo.model.YOLO at 0x165d6fd0190>

In [115]:
from collections import deque

def track_video_or_camera(video_source, device='cpu'):
    """
    Tracks points of interest in video file or live camera feed.
    
    Parameters:
    - video_source: Path to the video file or integer for camera index.
    - device: Device to run the segmentation and tracking models (default 'cpu').
    
    Press 'q' to exit the loop and close the window.
    """

    cap = cv2.VideoCapture(video_source)
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
    
    tracked_points = deque(maxlen=10)
    kernel = np.ones((100, 100), np.uint8)
    
    while cap.isOpened():
        success, frame = cap.read()

        if success:
            segmentation_results = model_seg(frame, conf=0.5, device=device, show_labels=False, vid_stride=10)
            overlay = frame.copy()
            if segmentation_results[0].masks and segmentation_results[0].masks.shape[0] > 0:
                mask = segmentation_results[0].masks.xy[0]
                stencil = np.zeros(overlay.shape[:-1]).astype(np.uint8)
                cv2.fillPoly(stencil, [np.int32([mask])], 255)

                dilated_stencil = cv2.dilate(stencil, kernel, iterations=1)
                padding_mask = cv2.subtract(dilated_stencil, stencil)

                overlay[padding_mask == 255] = (0, 0, 255)

                overlay[stencil == 255] = (0, 255, 0)

                frame = cv2.addWeighted(overlay, 0.15, frame, 0.85, 0)


                tracking_results = model_track.track(frame, persist=True, conf=0.60, device=device)

                if tracking_results[0].boxes.shape[0] > 0:
                    first_box = tracking_results[0].boxes.data[0]

                    center_x = int((first_box[0] + first_box[2]) / 2)
                    center_y = int((first_box[1] + first_box[3]) / 2)

                    tracked_points.append(((center_x, center_y), time.time()))

                current_time = time.time()
                tracked_points = [(pt, t) for (pt, t) in tracked_points if current_time - t < 3]


                for i in range(1, len(tracked_points)):
                    cv2.line(frame, tracked_points[i - 1][0], tracked_points[i][0], (0, 255, 0), 2)

                cv2.imshow("YOLOv8 Tracking", frame)
            else:
                cv2.imshow("YOLOv8 Tracking", frame)

            if cv2.waitKey(1) & 0xFF in [ord("q"), ord("Q")]:
                break
        else:
            break

    cap.release()
    cv2.destroyAllWindows()

In [116]:
video_path_or_camera_index = 1  # or 0 for webcam
track_video_or_camera(video_path_or_camera_index, device=device)


0: 384x640 (no detections), 11.5ms
Speed: 3.5ms preprocess, 11.5ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 12.1ms
Speed: 2.5ms preprocess, 12.1ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 13.5ms
Speed: 1.0ms preprocess, 13.5ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 12.5ms
Speed: 2.0ms preprocess, 12.5ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 12.5ms
Speed: 1.0ms preprocess, 12.5ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 10.6ms
Speed: 1.0ms preprocess, 10.6ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 9.5ms
Speed: 2.0ms preprocess, 9.5ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 9.5ms
Speed: 2.0ms preprocess, 9.5ms infer