In [2]:
from collections import defaultdict
import cv2
import numpy as np

from ultralytics import YOLO

!yolo checks

  from .autonotebook import tqdm as notebook_tqdm


[2K
[2K
Ultralytics 8.3.29 🚀 Python-3.10.2 torch-2.1.2+cpu CPU (Intel Core(TM) i3-8145U 2.10GHz)
Setup complete ✅ (4 CPUs, 7.9 GB RAM, 367.0/1816.4 GB disk)

OS                  Windows-10-10.0.19045-SP0
Environment         Windows
Python              3.10.2
Install             pip
RAM                 7.90 GB
Disk                367.0/1816.4 GB
CPU                 Intel Core(TM) i3-8145U 2.10GHz
CPU count           4
GPU                 None
GPU count           None
CUDA                None

numpy               ✅ 1.26.1>=1.23.0
matplotlib          ✅ 3.8.1>=3.3.0
opencv-python       ✅ 4.9.0.80>=4.6.0
pillow              ✅ 10.1.0>=7.1.2
pyyaml              ✅ 6.0.1>=5.3.1
requests            ✅ 2.31.0>=2.23.0
scipy               ✅ 1.13.0>=1.4.1
torch               ✅ 2.1.2>=1.8.0
torchvision         ✅ 0.16.2>=0.9.0
tqdm                ✅ 4.66.1>=4.64.0
psutil              ✅ 5.9.5
py-cpuinfo          ✅ 9.0.0
pandas              ✅ 2.1.1>=1.1.4
seaborn             ✅ 0.13.2>=0.11.0
ultralytics

In [None]:
def tracking(model, video, plot_bbox=False, save=False, save_as='output.avi', mot16=False):
    """
    입력 비디오 경로의 영상으로부터 YOLO 검출 모델 기반 트래킹을 수행함. 각 프레임이 처리될 때마다 처리결과를 display window에서 보여줌.
    처리결과(영상 또는 MOT16 annotation)를 저장할 수 있음
    
        : model (string): YOLOv8n 가중치 파일 경로. 반드시 YOLOv8n에 상응하는 가중치를 사용해야함
        : video (string): 검출 및 트래킹을 수행할 입력 비디오 경로
        : plot_bbox (bool): 처리결과에 bounding box를 시각화할 것인지
        : save (bool): 트래킹 결과 영상을 저장할 것인지
        : save_as (string): (save=True일 때) 트래킹 결과 영상을 저장할 경로. 반드시 .avi 확장자여야 함
        : mot16 (bool): 트래킹 결과 MOT16 format annotation 텍스트 파일(.txt)을 현재 디렉토리에 저장할 것인지 (mot16_tracking_results.txt)
        
    """
    
    model = YOLO(weight)
    
    # Open the video file
    video_path = video
    cap = cv2.VideoCapture(video_path)
    
    # Video Write IF 'save' IS TRUE
    if save:
        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = cap.get(cv2.CAP_PROP_FPS)
        
        fourcc = cv2.VideoWriter_fourcc(*'DIVX')
        out = cv2.VideoWriter(save_as, fourcc, fps, (width, height))
        
    # Generate MOT-16 format output file IF 'mot16' IS TRUE
    if mot16:
        mot16_file_path = 'mot16_tracking_results.txt'
        mot16_file = open(output_file_path, 'w')
        
    # Store the track history
    track_history = defaultdict(lambda: [])
    
    # Init frame number for writing MOT-16 format annotation file
    fnum = 1

    # Loop through the video frames
    while cap.isOpened():
        # Read a frame from the video
        success, frame = cap.read()

        if success:
            # Run YOLOv8 tracking on the frame, persisting tracks between frames
            results = model.track(frame, persist=True, tracker='bytetrack.yaml')
        
            # Get the boxes and track IDs
            boxes = results[0].boxes.xywh.cpu()
            track_ids = results[0].boxes.id.int().cpu().tolist()
            confs = results[0].boxes.conf
            clss = results[0].boxes.cls

            # Visualize the results on the frame
            # https://docs.ultralytics.com/reference/engine/results/#ultralytics.engine.results.Results.numpy
            annotated_frame = results[0].plot(boxes=plot_bbox)
        
            # Write MOT-16 format annotation file
            if mot16:
                expansion_factor = 3 # expansion_factor defined in 'update' method of BYTETracker class (byte_tracker.py)
                
                for box, track_id, conf, cls in zip(boxes, track_ids, confs, clss):
                    x, y, w, h = box
                    if cls == 1:
                        w, h = (w / expansion_factor), (h / expansion_factor)
                    x, y, w, h = x - (w / 2), y - (h / 2), w, h
                    mot16_file.write(f"{fnum},{track_id},{x},{y},{w},{h},{conf},{int(cls)},-1,-1\n")
            
            # Plot the tracks
            for box, track_id, cls in zip(boxes, track_ids, clss):
                x, y, w, h = box
                track = track_history[track_id]
            
                # Plot the tracks of players
                if cls == 0:
                    track.append((float(x), float(y + 0.4*h)))  # points of players' foot
                    if len(track) > 50:  # retain 50 tracks for 50 frames
                        track.pop(0)
                    for i, (x_, y_) in enumerate(track):
                        cv2.ellipse(annotated_frame, center=(int(x_), int(y_)), axes=(int(0.1*w), int(0.05*w)), 
                                    angle=0, startAngle=0, endAngle=360, color=(255 - 2*i, 200 - 5*i, 0), thickness=int(i/5))

                # Plot the tracks of tennis ball
                else: # elif cls == 1:
                    track.append((float(x), float(y)))  # x, y center(ball) point
                    if len(track) > 20:  # retain 20 tracks for 20 frames
                        track.pop(0)
                
                    for i, (x_, y_) in enumerate(track):
                        cv2.circle(annotated_frame, center=(int(x_), int(y_)), radius=int(i/3),
                                    color=(255 - 10*i, 250, 250), thickness=2)
        
            # Video Write
            if save:
                out.write(annotated_frame)
            
            # Display the annotated frame
            cv2.imshow("YOLOv8 Tracking", annotated_frame)

            fnum += 1

            # Break the loop if 'q' is pressed
            if cv2.waitKey(1) & 0xFF == ord("q"):
                break
        else:
            # Break the loop if the end of the video is reached
            break

    # Release the video capture object and close the display window
    cap.release()
    cv2.destroyAllWindows()
    
    # Release the video writer
    if save:
        out.release()
    
    # Close writing the MOT-16 output file
    if mot16:
        mot16_file.close()

#### 트레킹 완료본

In [None]:
import cv2
import os

# Paths to video and frame_label directory
video_path = 'D:/Tennis_Video/Tennis_MP4_5.mp4'
output_video_path = 'D:/Tennis_Video/Tennis_Output_with_Frame_Label.mp4'
frame_label_dir = 'D:/Tennis_Video/frame_label'

# Class mapping
class_mapping = {0: "ball", 1: "player", 2: "tennis racket", 3: "referee"}
class_colors = {
    0: (255, 0, 0),    # Blue for ball
    1: (0, 255, 0),    # Green for player
    2: (0, 0, 255),    # Red for tennis racket
    3: (255, 255, 0)   # Yellow for referee
}

# Video capture and writer setup
cap = cv2.VideoCapture(video_path)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

frame_index = 0

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Path to corresponding frame label file
    label_file = os.path.join(frame_label_dir, f'frame_{frame_index:04d}.txt')

    if os.path.exists(label_file):
        # Read the label file
        with open(label_file, 'r') as f:
            lines = f.readlines()

        for line in lines:
            values = line.strip().split()
            class_id = int(values[0])
            x_center, y_center = float(values[1]) * width, float(values[2]) * height
            box_width, box_height = float(values[3]) * width, float(values[4]) * height

            # Calculate bounding box coordinates
            x1 = int(x_center - box_width / 2)
            y1 = int(y_center - box_height / 2)
            x2 = int(x_center + box_width / 2)
            y2 = int(y_center + box_height / 2)

            # Draw bounding box and label
            color = class_colors.get(class_id, (255, 255, 255))  # Default to white if class_id is unknown
            label = class_mapping.get(class_id, "Unknown")
            cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
            cv2.putText(frame, label, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)

    # Write the frame to output video
    out.write(frame)
    frame_index += 1

cap.release()
out.release()
cv2.destroyAllWindows()
print("Video processing with frame labels completed.")

#### 화살표 경로 예측 코드

In [None]:
import cv2
import os

# Paths to video and frame_label directory
video_path = 'D:/Tennis_Video/Tennis_MP4_5.mp4'
output_video_path = 'D:/Tennis_Video/Predict_path_Tennis.mp4'
frame_label_dir = 'D:/Tennis_Video/frame_label'

# Class mapping and colors
class_mapping = {0: "ball", 1: "player", 2: "tennis racket", 3: "referee"}
class_colors = {
    0: (255, 0, 0),    # Blue for ball
    1: (0, 255, 0),    # Green for player
    2: (0, 0, 255),    # Red for tennis racket
    3: (255, 255, 0)   # Yellow for referee
}

# Define opponent court bounds (top-left, top-right, bottom-left, bottom-right)
opponent_court_bounds = [(733, 374), (1221, 372), (692, 483), (1266, 487)]

def calculate_distance(point1, point2):
    return ((point1[0] - point2[0])**2 + (point1[1] - point2[1])**2)**0.5

def find_optimal_targets(opponent_player, court_bounds):
    distances = [(calculate_distance(opponent_player, corner), corner) for corner in court_bounds]
    distances.sort(reverse=True, key=lambda x: x[0])
    return [corner for _, corner in distances[:3]]

def bounding_boxes_overlap(box1, box2):
    """Check if two bounding boxes overlap."""
    x1_min, y1_min, x1_max, y1_max = box1
    x2_min, y2_min, x2_max, y2_max = box2
    return not (x1_max < x2_min or x2_max < x1_min or y1_max < y2_min or y2_max < y1_min)

def draw_arrows(frame, ball_position, targets):
    for target in targets:
        pt1 = (int(ball_position[0]), int(ball_position[1]))
        pt2 = (int(target[0]), int(target[1]))
        cv2.arrowedLine(frame, pt1, pt2, (0, 0, 255), 2, tipLength=0.2)  # Reduced tip size
        cv2.circle(frame, pt2, 5, (0, 0, 255), -1)  # Small marker at target location

# Video capture and writer setup
cap = cv2.VideoCapture(video_path)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

frame_index = 0

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Path to corresponding frame label file
    label_file = os.path.join(frame_label_dir, f'frame_{frame_index:04d}.txt')

    ball_position = None
    racket_position = None
    players = []

    if os.path.exists(label_file):
        # Read the label file
        with open(label_file, 'r') as f:
            lines = f.readlines()

        for line in lines:
            values = line.strip().split()
            class_id = int(values[0])
            x_center, y_center = float(values[1]) * width, float(values[2]) * height
            box_width, box_height = float(values[3]) * width, float(values[4]) * height

            # Calculate bounding box coordinates
            x1 = int(x_center - box_width / 2)
            y1 = int(y_center - box_height / 2)
            x2 = int(x_center + box_width / 2)
            y2 = int(y_center + box_height / 2)

            # Store ball and racket positions
            if class_id == 0:  # Ball
                ball_position = (x_center, y_center)
                ball_box = (x1, y1, x2, y2)
            elif class_id == 2:  # Tennis racket
                racket_position = (x_center, y_center)
                racket_box = (x1, y1, x2, y2)
            elif class_id == 1:  # Player
                players.append((x_center, y_center))

            # Draw bounding box and label
            color = class_colors.get(class_id, (255, 255, 255))  # Default to white if class_id is unknown
            label = class_mapping.get(class_id, "Unknown")
            cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
            cv2.putText(frame, label, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)

    # If ball and racket positions exist and overlap
    if ball_position and racket_position and bounding_boxes_overlap(ball_box, racket_box):
        # Find the opponent player farthest from the racket
        if players:
            opponent_player = max(players, key=lambda p: calculate_distance(racket_position, p))
            # Predict target positions
            targets = find_optimal_targets(opponent_player, opponent_court_bounds)
            # Draw arrows
            draw_arrows(frame, ball_position, targets)

    # Write the frame to output video
    out.write(frame)
    frame_index += 1

cap.release()
out.release()
cv2.destroyAllWindows()
print("Video processing with predictions completed.")

#### YOLOv8n

In [None]:
import ultralytics
ultralytics.checks()

In [None]:
from ultralytics import YOLO

# YOLO 모델 로드
model = YOLO("yolov8n.pt")  # 사전 학습된 Nano 모델 사용

# 데이터 훈련
model.train(data="D:/data.yaml", epochs=5, imgsz=640)  # data.yaml 파일 사용


In [None]:
import cv2
import os

# Paths to video and frame_label directory
video_path = 'D:/Tennis_Video/Tennis_MP4_5.mp4'
output_video_path = 'D:/Tennis_Video/Predict_path_Tennis.mp4'
frame_label_dir = 'D:/Tennis_Video/frame_label'

# Class mapping and colors
class_mapping = {0: "ball", 1: "player", 2: "tennis racket", 3: "referee"}
class_colors = {
    0: (255, 0, 0),    # Blue for ball
    1: (0, 255, 0),    # Green for player
    2: (0, 0, 255),    # Red for tennis racket
    3: (255, 255, 0)   # Yellow for referee
}

# Define opponent court bounds (top-left, top-right, bottom-left, bottom-right)
opponent_court_bounds = [(733, 374), (1221, 372), (692, 483), (1266, 487)]

def calculate_distance(point1, point2):
    return ((point1[0] - point2[0])**2 + (point1[1] - point2[1])**2)**0.5

def find_optimal_targets(opponent_player, court_bounds):
    distances = [(calculate_distance(opponent_player, corner), corner) for corner in court_bounds]
    distances.sort(reverse=True, key=lambda x: x[0])
    return [corner for _, corner in distances[:3]]

def bounding_boxes_overlap(box1, box2):
    """Check if two bounding boxes overlap."""
    x1_min, y1_min, x1_max, y1_max = box1
    x2_min, y2_min, x2_max, y2_max = box2
    return not (x1_max < x2_min or x2_max < x1_min or y1_max < y2_min or y2_max < y1_min)

def draw_arrows(frame, ball_position, targets):
    for target in targets:
        pt1 = (int(ball_position[0]), int(ball_position[1]))
        pt2 = (int(target[0]), int(target[1]))
        cv2.arrowedLine(frame, pt1, pt2, (0, 0, 255), 2, tipLength=0.2)  # Reduced tip size
        cv2.circle(frame, pt2, 5, (0, 0, 255), -1)  # Small marker at target location

# Video capture and writer setup
cap = cv2.VideoCapture(video_path)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

frame_index = 0

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Path to corresponding frame label file
    label_file = os.path.join(frame_label_dir, f'frame_{frame_index:04d}.txt')

    ball_position = None
    racket_position = None
    players = []

    if os.path.exists(label_file):
        # Read the label file
        with open(label_file, 'r') as f:
            lines = f.readlines()

        for line in lines:
            values = line.strip().split()
            class_id = int(values[0])
            x_center, y_center = float(values[1]) * width, float(values[2]) * height
            box_width, box_height = float(values[3]) * width, float(values[4]) * height

            # Calculate bounding box coordinates
            x1 = int(x_center - box_width / 2)
            y1 = int(y_center - box_height / 2)
            x2 = int(x_center + box_width / 2)
            y2 = int(y_center + box_height / 2)

            # Store ball and racket positions
            if class_id == 0:  # Ball
                ball_position = (x_center, y_center)
                ball_box = (x1, y1, x2, y2)
            elif class_id == 2:  # Tennis racket
                racket_position = (x_center, y_center)
                racket_box = (x1, y1, x2, y2)
            elif class_id == 1:  # Player
                players.append((x_center, y_center))

            # Draw bounding box and label
            color = class_colors.get(class_id, (255, 255, 255))  # Default to white if class_id is unknown
            label = class_mapping.get(class_id, "Unknown")
            cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
            cv2.putText(frame, label, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)

    # If ball and racket positions exist and overlap
    if ball_position and racket_position and bounding_boxes_overlap(ball_box, racket_box):
        # Find the opponent player farthest from the racket
        if players:
            opponent_player = max(players, key=lambda p: calculate_distance(racket_position, p))
            # Predict target positions
            targets = find_optimal_targets(opponent_player, opponent_court_bounds)
            # Draw arrows
            draw_arrows(frame, ball_position, targets)

    # Write the frame to output video
    out.write(frame)
    frame_index += 1

cap.release()
out.release()
cv2.destroyAllWindows()
print("Video processing with predictions completed.")

#### 평가 지표

In [None]:
import pandas as pd
import numpy as np

# Paths to data
predicted_data_path = "D:/Tennis_Video/predicted_data.csv"
ground_truth_path = "D:/Tennis_Video/ground_truth_data.csv"

# Load data
predicted_df = pd.read_csv(predicted_data_path)
ground_truth_df = pd.read_csv(ground_truth_path)

# IoU calculation function
def calculate_iou(box1, box2):
    x1_min, y1_min = box1[0] - box1[2] / 2, box1[1] - box1[3] / 2
    x1_max, y1_max = box1[0] + box1[2] / 2, box1[1] + box1[3] / 2
    x2_min, y2_min = box2[0] - box2[2] / 2, box2[1] - box2[3] / 2
    x2_max, y2_max = box2[0] + box2[2] / 2, box2[1] + box2[3] / 2

    inter_x_min, inter_y_min = max(x1_min, x2_min), max(y1_min, y2_min)
    inter_x_max, inter_y_max = min(x1_max, x2_max), min(y1_max, y2_max)

    inter_area = max(0, inter_x_max - inter_x_min) * max(0, inter_y_max - inter_y_min)
    union_area = (x1_max - x1_min) * (y1_max - y1_min) + (x2_max - x2_min) * (y2_max - y2_min) - inter_area
    return inter_area / union_area if union_area > 0 else 0

tp, fp, fn = 0, 0, 0
ious = []

for frame in ground_truth_df["Frame"].unique():
    gt_frame = ground_truth_df[ground_truth_df["Frame"] == frame]
    pred_frame = predicted_df[predicted_df["Frame"] == frame]

    for _, gt_row in gt_frame.iterrows():
        pred_row = pred_frame[pred_frame["Class"] == gt_row["Class"]]
        if not pred_row.empty:
            gt_box = [gt_row["X"], gt_row["Y"], gt_row["Width"], gt_row["Height"]]
            pred_box = [pred_row.iloc[0]["X"], pred_row.iloc[0]["Y"], pred_row.iloc[0]["Width"], pred_row.iloc[0]["Height"]]
            iou = calculate_iou(gt_box, pred_box)
            if iou > 0.5:
                tp += 1
                ious.append(iou)
            else:
                fp += 1
        else:
            fn += 1

precision = tp / (tp + fp) if (tp + fp) > 0 else 0
recall = tp / (tp + fn) if (tp + fn) > 0 else 0
average_iou = np.mean(ious) if ious else 0
# Results
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"Average IoU: {average_iou:.4f}")
print(f"Mean Absolute Error (MAE): {mae:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")