In [1]:
from ultralytics import YOLO
import numpy as np
import cv2
from mss import mss
from PIL import Image, ImageDraw
from collections import defaultdict
import time
import torch

model_track = YOLO("runs/detect/train8/weights/best.pt")

In [2]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"

model_track.to(device)

<ultralytics.models.yolo.model.YOLO at 0x1ee67596490>

In [18]:
video_path = "tenis2.mp4"

cap = cv2.VideoCapture(video_path)

tracked_points = []

while cap.isOpened():
    success, frame = cap.read()

    if success:
        results = model_track.track(frame, persist=True, conf=0.50)        
        annotated_frame = results[0].plot()

        if results[0].boxes.shape[0] > 0:
            first_box = results[0].boxes.data[0]

            # Extract coordinates
            x1 = first_box[0]
            y1 = first_box[1]
            x2 = first_box[2]
            y2 = first_box[3]

            center_x = int((x1 + x2) / 2)
            center_y = int((y1 + y2) / 2)

            tracked_points.append(((center_x, center_y), time.time()))
        current_time = time.time()
        tracked_points = [(pt, t) for (pt, t) in tracked_points if current_time - t < 3]
    

        for i in range(1, len(tracked_points)):
            if tracked_points[i - 1] is None or tracked_points[i] is None:
                continue
            pt1, _ = tracked_points[i - 1]
            pt2, _ = tracked_points[i]
            cv2.line(annotated_frame, pt1, pt2, (0, 255, 0), 2)



        cv2.imshow("YOLOv8 Tracking", annotated_frame)

        if cv2.waitKey(1) & 0xFF == ord("q") or cv2.waitKey(1) & 0xFF == ord("Q"):
            break
    else:
        break

cap.release()
cv2.destroyAllWindows()


0: 384x640 (no detections), 13.0ms
Speed: 1.0ms preprocess, 13.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 13.0ms
Speed: 2.0ms preprocess, 13.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 14.0ms
Speed: 1.0ms preprocess, 14.0ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 12.0ms
Speed: 2.0ms preprocess, 12.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 13.0ms
Speed: 1.0ms preprocess, 13.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 12.0ms
Speed: 1.0ms preprocess, 12.0ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 12.0ms
Speed: 2.0ms preprocess, 12.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 13.0ms
Speed: 1.0ms preprocess, 13.0ms i

In [59]:
results[0].boxes.data[0][1]

tensor(71.7820)

In [3]:
model_seg = YOLO("runs/segment/train/weights/best.pt")

In [4]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"

model_seg.to(device)

<ultralytics.models.yolo.model.YOLO at 0x1ee3cf02910>

In [117]:
video_path = "tenis2.mp4"

cap = cv2.VideoCapture(video_path)

tracked_points = []

while cap.isOpened():
    success, frame = cap.read()

    if success:
        segmentation_results = model_seg(frame, conf=0.99, device=device)  
        
        if segmentation_results[0].masks and segmentation_results[0].masks.shape[0] > 0:      
            court_mask = segmentation_results[0].masks
            segment_frame = segmentation_results[0].plot()


            tracking_results = model_track.track(segment_frame, persist=True, conf=0.25, device=device)      

            annotated_frame = tracking_results[0].plot()
            if tracking_results[0].boxes.shape[0] > 0:
                first_box = tracking_results[0].boxes.data[0]

                # Extract coordinates
                x1 = first_box[0]
                y1 = first_box[1]
                x2 = first_box[2]
                y2 = first_box[3]

                center_x = int((x1 + x2) / 2)
                center_y = int((y1 + y2) / 2)

                tracked_points.append(((center_x, center_y), time.time()))
            current_time = time.time()
            tracked_points = [(pt, t) for (pt, t) in tracked_points if current_time - t < 3]
    

            for i in range(1, len(tracked_points)):
                if tracked_points[i - 1] is None or tracked_points[i] is None:
                    continue
                pt1, _ = tracked_points[i - 1]
                pt2, _ = tracked_points[i]
                cv2.line(annotated_frame, pt1, pt2, (0, 255, 0), 2)



        cv2.imshow("YOLOv8 Tracking", annotated_frame)

        if cv2.waitKey(1) & 0xFF in [ord("q"), ord("Q")]:
            break
    else:
        break

cap.release()
cv2.destroyAllWindows()




0: 384x640 (no detections), 260.2ms
Speed: 14.0ms preprocess, 260.2ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 17.0ms
Speed: 1.0ms preprocess, 17.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 tennis-court, 15.0ms
Speed: 2.0ms preprocess, 15.0ms inference, 3.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 12.0ms
Speed: 2.0ms preprocess, 12.0ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 tennis-court, 16.0ms
Speed: 2.0ms preprocess, 16.0ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 13.0ms
Speed: 1.0ms preprocess, 13.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 15.0ms
Speed: 1.0ms preprocess, 15.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 tennis-court, 18.0ms
Speed: 1.0ms preprocess, 18.0ms in

In [24]:
def track_video_or_camera(video_source, device='cpu'):
    """
    Tracks points of interest in video file or live camera feed.
    
    Parameters:
    - video_source: Path to the video file or integer for camera index.
    - device: Device to run the segmentation and tracking models (default 'cpu').
    
    Press 'q' to exit the loop and close the window.
    """

    cap = cv2.VideoCapture(video_source)
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1920)
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 1080)
    tracked_points = []
    
    while cap.isOpened():
        success, frame = cap.read()

        if success:
            segmentation_results = model_seg(frame, conf=0.99, device=device)

            if segmentation_results[0].masks and segmentation_results[0].masks.shape[0] > 0:
                segment_frame = segmentation_results[0].plot()

                tracking_results = model_track.track(segment_frame, persist=True, conf=0.35, device=device)

                annotated_frame = tracking_results[0].plot()
                if tracking_results[0].boxes.shape[0] > 0:
                    first_box = tracking_results[0].boxes.data[0]

                    # Extract coordinates
                    x1 = first_box[0]
                    y1 = first_box[1]
                    x2 = first_box[2]
                    y2 = first_box[3]

                    center_x = int((x1 + x2) / 2)
                    center_y = int((y1 + y2) / 2)

                    tracked_points.append(((center_x, center_y), time.time()))

                current_time = time.time()
                tracked_points = [(pt, t) for (pt, t) in tracked_points if current_time - t < 3]


                for i in range(1, len(tracked_points)):
                    pt1, _ = tracked_points[i - 1]
                    pt2, _ = tracked_points[i]
                    cv2.line(annotated_frame, pt1, pt2, (0, 255, 0), 2)


                cv2.imshow("YOLOv8 Tracking", annotated_frame)
            else:
                cv2.imshow("YOLOv8 Tracking", frame)

            if cv2.waitKey(1) & 0xFF in [ord("q"), ord("Q")]:
                break
        else:
            break

    cap.release()
    cv2.destroyAllWindows()

In [25]:
video_path_or_camera_index = 1  # or 0 for webcam
track_video_or_camera(video_path_or_camera_index, device=device)


0: 384x640 (no detections), 17.6ms
Speed: 2.0ms preprocess, 17.6ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 21.0ms
Speed: 2.0ms preprocess, 21.0ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 19.6ms
Speed: 3.0ms preprocess, 19.6ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 16.5ms
Speed: 2.5ms preprocess, 16.5ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 17.0ms
Speed: 2.0ms preprocess, 17.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 18.1ms
Speed: 2.5ms preprocess, 18.1ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 18.0ms
Speed: 2.5ms preprocess, 18.0ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 17.5ms
Speed: 2.5ms preprocess, 17.5ms i