In [None]:
import cv2

def extract_frame(video_path, frame_number, output_path):
    cap = cv2.VideoCapture(video_path)
    cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
    ret, frame = cap.read()
    if ret:
        cv2.imwrite(output_path, frame)
    cap.release()
    return output_path

# Example usage
video_path = 'input_videos/antwerp_angle.mp4'
frame_number = 250
output_image_path = 'dataset_maker/frame_250.jpg'
extract_frame(video_path, frame_number, output_image_path)


In [None]:
import torch
from ultralytics import YOLO

# Load the YOLOv8 model
model_path = 'models/Field_Key_Points.pt'
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = YOLO(model_path).to(device)

def predict_keypoints(image_path, model):
    # Predict keypoints using the YOLO model
    results = model.predict(image_path, device=device, conf=0.5)
    
    # Extract results from the prediction
    result = results[0]
    
    # Extract bounding box (normalized)
    if result.boxes:
        bbox = result.boxes.xywhn[0].tolist()  # xywh normalized format and convert to list of floats
    else:
        bbox = [0, 0, 0, 0]
    
    # Extract keypoints and confidences
    keypoints = result.keypoints.xyn[0].tolist()  # Normalized keypoints and convert to list of lists
    confidences = result.keypoints.conf[0].tolist()  # Confidence scores and convert to list
    
    keypoints_data = []
    for kp, conf in zip(keypoints, confidences):
        x, y = kp
        visibility = 2 if conf > 0.90 else 0  # Use 2 for visible keypoints, 0 for not visible
        if visibility == 0:
            x, y = 0, 0
        keypoints_data.extend([x, y, visibility])
    
    return bbox, keypoints_data, results[0]

def format_output(bbox, keypoints_data):
    # Combine all parts into the required format
    output = [0]  # class-index for pitch
    output.extend(bbox)  # Bounding box (x_center, y_center, width, height)
    output.extend(keypoints_data)  # All keypoints data (x, y, visibility for each keypoint)
    
    return output

# Example usage
output_image_path = 'dataset_maker/frame_250.jpg'
bounding_box, predicted_keypoints, results = predict_keypoints(output_image_path, model)

# Format the output
formatted_output = format_output(bounding_box, predicted_keypoints)

# Print the formatted output for verification
print("Formatted Output:", formatted_output)


In [None]:
def draw_on_image(results):
    # Use the built-in plot method to draw the results on the image
    annotated_image = results.plot(
        conf=True,         # Include detection confidence scores
        line_width=None,   # Line width of bounding boxes
        font_size=None,    # Text font size
        font='Arial.ttf',  # Font name for text annotations
        pil=False,         # Return image as a PIL Image object
        img=None,          # Alternative image for plotting
        im_gpu=None,       # GPU-accelerated image for faster mask plotting
        kpt_radius=5,      # Radius for drawn keypoints
        kpt_line=True,     # Connect keypoints with lines
        labels=True,       # Include class labels in annotations
        boxes=True,        # Overlay bounding boxes on the image
        masks=False,       # Overlay masks on the image
        probs=True,        # Include classification probabilities
        show=False,        # Display the annotated image directly using the default image viewer
        save=False,        # Save the annotated image to a file
        filename=None      # Path and name of the file to save the annotated image if save is True
    )
    
    results.show()



draw_on_image(results)

In [None]:

def draw_formatted_output(image_path, formatted_output):
    # Load the image
    image = cv2.imread(image_path)
    
    if image is None:
        print(f"Error: Failed to load image at {image_path}")
        return
    
    # Extract bounding box and keypoints from the formatted output
    class_id = formatted_output[0]
    bbox = formatted_output[1:5]
    keypoints = formatted_output[5:]
    
    # Draw the bounding box
    x_center, y_center, bbox_width, bbox_height = bbox
    h, w, _ = image.shape
    x_center, y_center = int(x_center * w), int(y_center * h)
    bbox_width, bbox_height = int(bbox_width * w), int(bbox_height * h)
    x1, y1 = x_center - bbox_width // 2, y_center - bbox_height // 2
    x2, y2 = x_center + bbox_width // 2, y_center + bbox_height // 2
    
    cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
    
    # Draw the keypoints
    for i in range(0, len(keypoints), 3):
        if i + 2 >= len(keypoints):
            break  # Avoid index out of range error
        x, y, vis = keypoints[i], keypoints[i+1], keypoints[i+2]
        x, y = int(x * w), int(y * h)
        if vis == 2:
            cv2.circle(image, (x, y), 5, (0, 0, 255), -1)
    
    # Display the image
    cv2.imshow('Annotated Image', image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

# Example usage
output_image_path = 'dataset_maker/frame_250.jpg'


# Draw the formatted output on the image
draw_formatted_output(output_image_path, formatted_output)


In [None]:
import cv2
import numpy as np
import torch
from ultralytics import YOLO

# Load the YOLOv8 model
model_path = 'models/Field_Key_Points.pt'
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = YOLO(model_path).to(device)

def predict_keypoints(image_path, model):
    # Predict keypoints using the YOLO model
    results = model.predict(image_path, device=device, conf=0.5)
    
    # Extract results from the prediction
    result = results[0]
    
    # Extract bounding box (normalized)
    if result.boxes:
        bbox = result.boxes.xyxy[0].tolist()  # xyxy format and convert to list of floats
    else:
        bbox = [0, 0, 0, 0]
    
    # Extract keypoints and confidences
    keypoints = result.keypoints.xy[0].tolist()  # Keypoints in pixel coordinates
    confidences = result.keypoints.conf[0].tolist()  # Confidence scores
    
    keypoints_data = []
    for kp, conf in zip(keypoints, confidences):
        x, y = kp
        visibility = 2 if conf > 0.90 else 0  # Use 2 for visible keypoints, 0 for not visible
        if visibility == 0:
            x, y = 0, 0
        keypoints_data.append([x, y, visibility])
    
    return bbox, keypoints_data, result

def format_output(bbox, keypoints_data, image_shape):
    height, width = image_shape
    normalized_keypoints = []
    for x, y, visibility in keypoints_data:
        x_norm = x / width
        y_norm = y / height
        normalized_keypoints.extend([x_norm, y_norm, visibility])
    
    # Combine all parts into the required format
    output = [0]  # class-index for pitch
    x_center = (bbox[0] + bbox[2]) / 2 / width
    y_center = (bbox[1] + bbox[3]) / 2 / height
    width_norm = (bbox[2] - bbox[0]) / width
    height_norm = (bbox[3] - bbox[1]) / height
    output.extend([x_center, y_center, width_norm, height_norm])  # Bounding box (x_center, y_center, width, height)
    output.extend(normalized_keypoints)  # All keypoints data (x, y, visibility for each keypoint)
    
    return output

def adjust_keypoints(image_path, bbox, keypoints_data):
    # Load the image
    image = cv2.imread(image_path)
    height, width = image.shape[:2]
    
    # Copy the image for drawing
    image_copy = image.copy()
    
    selected_keypoint = None
    selected_bbox_corner = None

    def draw_keypoints(img, keypoints, bbox):
        # Draw bounding box
        cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 255, 0), 2)
        # Draw keypoints
        for i, (x, y, vis) in enumerate(keypoints):
            if vis == 2:
                cv2.circle(img, (int(x), int(y)), 5, (0, 255, 0), -1)
                cv2.putText(img, str(i+1), (int(x) + 5, int(y) - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1, cv2.LINE_AA)
        return img

    def click_event(event, x, y, flags, param):
        nonlocal selected_keypoint, selected_bbox_corner, keypoints_data, image_copy, bbox
        if event == cv2.EVENT_LBUTTONDOWN:
            min_dist = float('inf')
            min_index = -1
            for i, (kp_x, kp_y, vis) in enumerate(keypoints_data):
                if vis == 2:
                    dist = np.sqrt((kp_x - x) ** 2 + (kp_y - y) ** 2)
                    if dist < min_dist:
                        min_dist = dist
                        min_index = i
            if min_dist < 10:
                selected_keypoint = min_index
            else:
                selected_keypoint = None
                # Check if near bounding box corners
                corners = [(bbox[0], bbox[1]), (bbox[2], bbox[1]), (bbox[2], bbox[3]), (bbox[0], bbox[3])]
                for i, (cx, cy) in enumerate(corners):
                    if np.sqrt((cx - x) ** 2 + (cy - y) ** 2) < 10:
                        selected_bbox_corner = i
                        break

        elif event == cv2.EVENT_MOUSEMOVE:
            if selected_keypoint is not None:
                keypoints_data[selected_keypoint][0] = x
                keypoints_data[selected_keypoint][1] = y
            elif selected_bbox_corner is not None:
                if selected_bbox_corner == 0:
                    bbox[0], bbox[1] = x, y
                elif selected_bbox_corner == 1:
                    bbox[2], bbox[1] = x, y
                elif selected_bbox_corner == 2:
                    bbox[2], bbox[3] = x, y
                elif selected_bbox_corner == 3:
                    bbox[0], bbox[3] = x, y
            image_copy = image.copy()
            draw_keypoints(image_copy, keypoints_data, bbox)
            cv2.imshow('Adjust Keypoints', image_copy)

        elif event == cv2.EVENT_LBUTTONUP:
            selected_keypoint = None
            selected_bbox_corner = None

    # Draw initial keypoints and bounding box
    image_copy = draw_keypoints(image_copy, keypoints_data, bbox)
    cv2.imshow('Adjust Keypoints', image_copy)
    cv2.setMouseCallback('Adjust Keypoints', click_event)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    return bbox, keypoints_data

# Example usage
output_image_path = 'dataset_maker/frame_250.jpg'
bounding_box, predicted_keypoints, result = predict_keypoints(output_image_path, model)

# Adjust keypoints interactively
adjusted_bbox, adjusted_keypoints = adjust_keypoints(output_image_path, bounding_box, predicted_keypoints)

# Format the output
formatted_output = format_output(adjusted_bbox, adjusted_keypoints, result.orig_shape)

# Print the formatted output for verification
print("Formatted Output:", formatted_output)


In [None]:

import os
import cv2
import numpy as np
import torch
from ultralytics import YOLO
import cv2

# Load the YOLOv8 model
model_path = 'models/Field_Key_Points.pt'
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = YOLO(model_path).to(device)

labels = [
    "01", "02", "03", "04", "05", "06", "07", "08", "09", "10",
    "11", "12", "13", "15", "16", "17", "18", "20", "21", "22",
    "23", "24", "25", "26", "27", "28", "29", "30", "31", "32",
    "14", "19"
]


def extract_frame(video_path, frame_number, output_path):
    cap = cv2.VideoCapture(video_path)
    cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
    ret, frame = cap.read()
    if ret:
        cv2.imwrite(output_path, frame)
    cap.release()
    return output_path




def predict_keypoints(image_path, model):
    # Predict keypoints using the YOLO model
    results = model.predict(image_path, device=device, conf=0.5)
    
    # Extract results from the prediction
    result = results[0]
    
    # Extract bounding box (normalized)
    if result.boxes:
        bbox = result.boxes.xyxy[0].tolist()  # xyxy format and convert to list of floats
    else:
        bbox = [0, 0, 0, 0]
    
    # Extract keypoints and confidences
    keypoints = result.keypoints.xy[0].tolist()  # Keypoints in pixel coordinates
    confidences = result.keypoints.conf[0].tolist()  # Confidence scores
    
    keypoints_data = []
    for kp, conf in zip(keypoints, confidences):
        x, y = kp
        visibility = 2 if conf > 0.05 else 0  # Use 2 for visible keypoints, 0 for not visible
        if visibility == 0:
            x, y = 0, 0
        keypoints_data.append([x, y, visibility])
    
    return bbox, keypoints_data, result

def format_output(bbox, keypoints_data, image_shape):
    height, width = image_shape
    normalized_keypoints = []
    for x, y, visibility in keypoints_data:
        x_norm = x / width
        y_norm = y / height
        normalized_keypoints.extend([x_norm, y_norm, visibility])
    
    # Combine all parts into the required format
    output = [0]  # class-index for pitch
    x_center = (bbox[0] + bbox[2]) / 2 / width
    y_center = (bbox[1] + bbox[3]) / 2 / height
    width_norm = (bbox[2] - bbox[0]) / width
    height_norm = (bbox[3] - bbox[1]) / height
    output.extend([x_center, y_center, width_norm, height_norm])  # Bounding box (x_center, y_center, width, height)
    output.extend(normalized_keypoints)  # All keypoints data (x, y, visibility for each keypoint)
    
    return output

def adjust_keypoints(image_path, bbox, keypoints_data):
    cv2.destroyAllWindows()
    # Load the image
    image = cv2.imread(image_path)
    height, width = image.shape[:2]
    
    # Copy the image for drawing
    image_copy = image.copy()
    
    selected_keypoint = None
    selected_bbox_corner = None
    
    def draw_keypoints(img, keypoints, bbox):
        # Draw bounding box
        cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 255, 0), 2)
        # Draw keypoints with labels
        for i, (x, y, vis) in enumerate(keypoints):
            if vis == 2:
                cv2.circle(img, (int(x), int(y)), 5, (0, 0, 255), -1)  # Red color for visibility
                if i < len(labels):  # Check if the label exists
                    cv2.putText(img, labels[i], (int(x) + 7, int(y) - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255, 255, 255), 1)
        return img

    def click_event(event, x, y, flags, param):
        nonlocal selected_keypoint, selected_bbox_corner, keypoints_data, image_copy, bbox
        if event == cv2.EVENT_LBUTTONDOWN:
            min_dist = float('inf')
            min_index = -1
            for i, (kp_x, kp_y, vis) in enumerate(keypoints_data):
                if vis == 2:
                    dist = np.sqrt((kp_x - x) ** 2 + (kp_y - y) ** 2)
                    if dist < min_dist:
                        min_dist = dist
                        min_index = i
            if min_dist < 10:
                selected_keypoint = min_index
            else:
                selected_keypoint = None
                # Check if near bounding box corners
                corners = [(bbox[0], bbox[1]), (bbox[2], bbox[1]), (bbox[2], bbox[3]), (bbox[0], bbox[3])]
                for i, (cx, cy) in enumerate(corners):
                    if np.sqrt((cx - x) ** 2 + (cy - y) ** 2) < 10:
                        selected_bbox_corner = i
                        break

        elif event == cv2.EVENT_MOUSEMOVE:
            if selected_keypoint is not None:
                keypoints_data[selected_keypoint][0] = x
                keypoints_data[selected_keypoint][1] = y
            elif selected_bbox_corner is not None:
                if selected_bbox_corner == 0:
                    bbox[0], bbox[1] = x, y
                elif selected_bbox_corner == 1:
                    bbox[2], bbox[1] = x, y
                elif selected_bbox_corner == 2:
                    bbox[2], bbox[3] = x, y
                elif selected_bbox_corner == 3:
                    bbox[0], bbox[3] = x, y
            image_copy = image.copy()
            draw_keypoints(image_copy, keypoints_data, bbox)
            cv2.imshow('Adjust Keypoints', image_copy)

        elif event == cv2.EVENT_LBUTTONUP:
            selected_keypoint = None
            selected_bbox_corner = None

    # Draw initial keypoints and bounding box
    image_copy = draw_keypoints(image_copy, keypoints_data, bbox)
    cv2.imshow('Adjust Keypoints', image_copy)
    cv2.setMouseCallback('Adjust Keypoints', click_event)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    return bbox, keypoints_data

def save_output(image_path, formatted_output):
    # Get the base name of the image file without the extension
    base_name = os.path.basename(image_path)
    file_name, _ = os.path.splitext(base_name)
    
    # Define the output file path for the txt file
    output_txt_path = os.path.join('dataset_maker', f"{file_name}.txt")
    
    # Write the formatted output to the txt file
    with open(output_txt_path, 'w') as f:
        f.write(' '.join(map(str, formatted_output)))

    print(f"Saved formatted output to {output_txt_path}")

def draw_on_image(results, filename='dataset_maker/annotated_image.jpg'):
    # Plot results image
    annotated_image = results.plot(conf=True, kpt_line=True)
    
    # Show results to screen (in supported environments)
    results.show()
    
    # Save results to disk
    results.save(filename)


# Example usage

# Example usage
video_path = 'input_videos/antwerp_angle.mp4'
frame_number = 250
output_image_path = f'dataset_maker/frame_{frame_number}.jpg'
extract_frame(video_path, frame_number, output_image_path)


bounding_box, predicted_keypoints, result = predict_keypoints(output_image_path, model)

# Adjust keypoints interactively
adjusted_bbox, adjusted_keypoints = adjust_keypoints(output_image_path, bounding_box, predicted_keypoints)

# Format the output
formatted_output = format_output(adjusted_bbox, adjusted_keypoints, result.orig_shape)

# Print the formatted output for verification
print("Formatted Output:", formatted_output)

# Save the formatted output to a .txt file
save_output(output_image_path, formatted_output)

draw_on_image(result)



In [7]:
import os
import cv2
import numpy as np
import torch
from ultralytics import YOLO

# Load the YOLOv8 model
model_path = 'models/Field_Key_Points.pt'
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = YOLO(model_path).to(device)

labels = [
    "01", "02", "03", "04", "05", "06", "07", "08", "09", "10",
    "11", "12", "13", "15", "16", "17", "18", "20", "21", "22",
    "23", "24", "25", "26", "27", "28", "29", "30", "31", "32",
    "14", "19"
]

def extract_frame(video_path, frame_number, output_path):
    cap = cv2.VideoCapture(video_path)
    cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
    ret, frame = cap.read()
    if ret:
        cv2.imwrite(output_path, frame)
    cap.release()
    return output_path

def predict_keypoints(image_path, model):
    results = model.predict(image_path, device=device, conf=0.5)
    result = results[0]
    if result.boxes:
        bbox = result.boxes.xyxy[0].tolist()
    else:
        bbox = [0, 0, 0, 0]
    keypoints = result.keypoints.xy[0].tolist()
    confidences = result.keypoints.conf[0].tolist()
    keypoints_data = []
    for kp, conf in zip(keypoints, confidences):
        x, y = kp
        visibility = 2 if conf > 0.05 else 0
        if visibility == 0:
            x, y = 0, 0
        keypoints_data.append([x, y, visibility])
    return bbox, keypoints_data, result

def format_output(bbox, keypoints_data, image_shape):
    height, width = image_shape
    normalized_keypoints = []
    for x, y, visibility in keypoints_data:
        x_norm = x / width
        y_norm = y / height
        normalized_keypoints.extend([x_norm, y_norm, visibility])
    output = [0]
    x_center = (bbox[0] + bbox[2]) / 2 / width
    y_center = (bbox[1] + bbox[3]) / 2 / height
    width_norm = (bbox[2] - bbox[0]) / width
    height_norm = (bbox[3] - bbox[1]) / height
    output.extend([x_center, y_center, width_norm, height_norm])
    output.extend(normalized_keypoints)
    return output

def adjust_keypoints(image_path, bbox, keypoints_data):
    cv2.destroyAllWindows()
    image = cv2.imread(image_path)
    height, width = image.shape[:2]
    image_copy = image.copy()
    selected_keypoint = None
    selected_bbox_corner = None
    new_points = []
    point_to_label = {}

    def draw_keypoints(img, keypoints, bbox):
        cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 255, 0), 2)
        for i, (x, y, vis) in enumerate(keypoints):
            if vis == 2:
                cv2.circle(img, (int(x), int(y)), 5, (0, 0, 255), -1)
                if i < len(labels):
                    cv2.putText(img, labels[i], (int(x) + 7, int(y) - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255, 255, 255), 1)
        for (x, y, label) in new_points:
            cv2.circle(img, (int(x), int(y)), 5, (255, 0, 0), -1)
            cv2.putText(img, label, (int(x) + 7, int(y) - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255, 255, 255), 1)
        return img

    def click_event(event, x, y, flags, param):
        nonlocal selected_keypoint, selected_bbox_corner, keypoints_data, image_copy, bbox, new_points
        if event == cv2.EVENT_LBUTTONDOWN:
            min_dist = float('inf')
            min_index = -1
            for i, (kp_x, kp_y, vis) in enumerate(keypoints_data):
                if vis == 2:
                    dist = np.sqrt((kp_x - x) ** 2 + (kp_y - y) ** 2)
                    if dist < min_dist:
                        min_dist = dist
                        min_index = i
            for i, (kp_x, kp_y, _) in enumerate(new_points):
                dist = np.sqrt((kp_x - x) ** 2 + (kp_y - y) ** 2)
                if dist < min_dist:
                    min_dist = dist
                    min_index = i + len(keypoints_data)
            if min_dist < 10:
                selected_keypoint = min_index
            else:
                selected_keypoint = None
                corners = [(bbox[0], bbox[1]), (bbox[2], bbox[1]), (bbox[2], bbox[3]), (bbox[0], bbox[3])]
                for i, (cx, cy) in enumerate(corners):
                    if np.sqrt((cx - x) ** 2 + (cy - y) ** 2) < 10:
                        selected_bbox_corner = i
                        break
        elif event == cv2.EVENT_RBUTTONDOWN:
            new_points.append((x, y, ""))
            draw_keypoints(image_copy, keypoints_data, bbox)
            cv2.imshow('Adjust Keypoints', image_copy)
        elif event == cv2.EVENT_MOUSEMOVE:
            if selected_keypoint is not None:
                if selected_keypoint < len(keypoints_data):
                    keypoints_data[selected_keypoint][0] = x
                    keypoints_data[selected_keypoint][1] = y
                else:
                    new_points[selected_keypoint - len(keypoints_data)] = (x, y, new_points[selected_keypoint - len(keypoints_data)][2])
            elif selected_bbox_corner is not None:
                if selected_bbox_corner == 0:
                    bbox[0], bbox[1] = x, y
                elif selected_bbox_corner == 1:
                    bbox[2], bbox[1] = x, y
                elif selected_bbox_corner == 2:
                    bbox[2], bbox[3] = x, y
                elif selected_bbox_corner == 3:
                    bbox[0], bbox[3] = x, y
            image_copy = image.copy()
            draw_keypoints(image_copy, keypoints_data, bbox)
            cv2.imshow('Adjust Keypoints', image_copy)
        elif event == cv2.EVENT_LBUTTONUP:
            selected_keypoint = None
            selected_bbox_corner = None

    image_copy = draw_keypoints(image_copy, keypoints_data, bbox)
    cv2.imshow('Adjust Keypoints', image_copy)
    cv2.setMouseCallback('Adjust Keypoints', click_event)

    current_label = ""
    while True:
        key = cv2.waitKey(1) & 0xFF
        if key == 27:  # ESC to exit
            break
        elif key == 13:  # Enter to confirm label
            if new_points and current_label:
                new_points[-1] = (new_points[-1][0], new_points[-1][1], current_label)
                print(f"Labeled point {len(new_points)} as {current_label}")
                current_label = ""
                image_copy = image.copy()
                draw_keypoints(image_copy, keypoints_data, bbox)
                cv2.imshow('Adjust Keypoints', image_copy)
        elif ord('0') <= key <= ord('9'):
            current_label += chr(key)
            print(f"Current label: {current_label}")

    cv2.destroyAllWindows()

    for (x, y, label) in new_points:
        if label:
            index = labels.index(label)
            keypoints_data[index] = [x, y, 2]

    return bbox, keypoints_data

def save_output(image_path, formatted_output):
    base_name = os.path.basename(image_path)
    file_name, _ = os.path.splitext(base_name)
    output_txt_path = os.path.join('dataset_maker', f"{file_name}.txt")
    with open(output_txt_path, 'w') as f:
        f.write(' '.join(map(str, formatted_output)))
    print(f"Saved formatted output to {output_txt_path}")

def draw_on_image(results, filename='dataset_maker/annotated_image.jpg'):
    annotated_image = results.plot(conf=True, kpt_line=True)
    results.show()
    results.save(filename)

video_path = 'input_videos/antwerp_angle.mp4'
frame_number = 250
output_image_path = f'dataset_maker/frame_{frame_number}.jpg'
extract_frame(video_path, frame_number, output_image_path)
bounding_box, predicted_keypoints, result = predict_keypoints(output_image_path, model)
adjusted_bbox, adjusted_keypoints = adjust_keypoints(output_image_path, bounding_box, predicted_keypoints)
formatted_output = format_output(adjusted_bbox, adjusted_keypoints, result.orig_shape)
print("Formatted Output:", formatted_output)
save_output(output_image_path, formatted_output)
draw_on_image(result)



image 1/1 c:\Users\Dokutsu\Documents\Project\usg\football_analysis\dataset_maker\frame_250.jpg: 384x640 1 pitch, 31.0ms
Speed: 3.0ms preprocess, 31.0ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)
Current label: 1
Current label: 18
Labeled point 1 as 18
Formatted Output: [0, 0.4986979166666667, 0.5, 0.9963541666666667, 1.0, 0.0, 0.0, 0, 0.0, 0.0, 0, 0.0, 0.0, 0, 0.0, 0.0, 0, 0.0, 0.0, 0, 0.0, 0.0, 0, 0.0, 0.0, 0, 0.0, 0.0, 0, 0.0, 0.0, 0, 0.0, 0.0, 0, 0.0, 0.0, 0, 0.0, 0.0, 0, 0.0, 0.0, 0, 0.096875, 0.07777777777777778, 2, 0.10885416666666667, 0.24166666666666667, 2, 0.12291666666666666, 0.43425925925925923, 2, 0.1515625, 0.9175925925925926, 2, 0.6046875, 0.09907407407407408, 2, 0.6713541666666667, 0.17777777777777778, 2, 0.7703125, 0.2953703703703704, 2, 0.8958333333333334, 0.4398148148148148, 2, 0.796875, 0.21851851851851853, 2, 0.809375, 0.14166666666666666, 2, 0.9661458333333334, 0.2796296296296296, 2, 0.725, 0.009259259259259259, 2, 0.7994791666666666, 0.06759

In [3]:
import os
import cv2
import numpy as np
import torch
from ultralytics import YOLO

# Load the YOLOv8 model
model_path = 'models/key_points_2.pt'
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = YOLO(model_path).to(device)

labels = [
    "01", "02", "03", "04", "05", "06", "07", "08", "09", "10",
    "11", "12", "13", "15", "16", "17", "18", "20", "21", "22",
    "23", "24", "25", "26", "27", "28", "29", "30", "31", "32",
    "14", "19"
]

def create_directories():
    if not os.path.exists('dataset_maker/images'):
        os.makedirs('dataset_maker/images')
    if not os.path.exists('dataset_maker/labels'):
        os.makedirs('dataset_maker/labels')

def extract_frame(video_path, frame_number):
    cap = cv2.VideoCapture(video_path)
    cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
    ret, frame = cap.read()
    cap.release()
    return ret, frame

def save_frame(output_path, frame):
    cv2.imwrite(output_path, frame)

def predict_keypoints(image_path, model):
    results = model.predict(image_path, device=device, conf=0.5)
    result = results[0]
    if result.boxes:
        bbox = result.boxes.xyxy[0].tolist()
    else:
        bbox = [0, 0, 0, 0]
    keypoints = result.keypoints.xy[0].tolist() if len(result.keypoints) > 0 else []
    confidences = result.keypoints.conf[0].tolist() if result.keypoints.conf is not None and len(result.keypoints.conf) > 0 else []
    keypoints_data = []
    for kp, conf in zip(keypoints, confidences):
        x, y = kp
        visibility = 2 if conf > 0.70 else 0
        if visibility == 0:
            x, y = 0, 0
        keypoints_data.append([x, y, visibility])
    return bbox, keypoints_data, result

def format_output(bbox, keypoints_data, image_shape):
    height, width = image_shape
    normalized_keypoints = []
    for x, y, visibility in keypoints_data:
        x_norm = x / width
        y_norm = y / height
        normalized_keypoints.extend([x_norm, y_norm, visibility])
    output = [0]
    x_center = (bbox[0] + bbox[2]) / 2 / width
    y_center = (bbox[1] + bbox[3]) / 2 / height
    width_norm = (bbox[2] - bbox[0]) / width
    height_norm = (bbox[3] - bbox[1]) / height
    output.extend([x_center, y_center, width_norm, height_norm])
    output.extend(normalized_keypoints)
    return output

def adjust_keypoints(image_path, bbox, keypoints_data, current_frame, total_frames):
    cv2.destroyAllWindows()
    image = cv2.imread(image_path)
    height, width = image.shape[:2]
    image_copy = image.copy()
    selected_keypoint = None
    selected_bbox_corner = None
    new_points = []
    point_to_label = {}
    skip_frame = False
    exit_program = False

    def draw_keypoints(img, keypoints, bbox):
        cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 255, 0), 2)
        for i, (x, y, vis) in enumerate(keypoints):
            if vis == 2:
                cv2.circle(img, (int(x), int(y)), 5, (0, 0, 255), -1)
                if i < len(labels):
                    cv2.putText(img, labels[i], (int(x) + 7, int(y) - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255, 255, 255), 1)
        for (x, y, label) in new_points:
            cv2.circle(img, (int(x), int(y)), 5, (255, 0, 0), -1)
            cv2.putText(img, label, (int(x) + 7, int(y) - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255, 255, 255), 1)
        cv2.putText(img, f'Frame {current_frame}/{total_frames}', (10, height - 30), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
        cv2.putText(img, 'Press ESC to save and go to next frame, S to skip frame, E to exit', (10, height - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
        return img

    def click_event(event, x, y, flags, param):
        nonlocal selected_keypoint, selected_bbox_corner, keypoints_data, image_copy, bbox, new_points
        if event == cv2.EVENT_LBUTTONDOWN:
            min_dist = float('inf')
            min_index = -1
            for i, (kp_x, kp_y, vis) in enumerate(keypoints_data):
                if vis == 2:
                    dist = np.sqrt((kp_x - x) ** 2 + (kp_y - y) ** 2)
                    if dist < min_dist:
                        min_dist = dist
                        min_index = i
            for i, (kp_x, kp_y, _) in enumerate(new_points):
                dist = np.sqrt((kp_x - x) ** 2 + (kp_y - y) ** 2)
                if dist < min_dist:
                    min_dist = dist
                    min_index = i + len(keypoints_data)
            if min_dist < 10:
                selected_keypoint = min_index
            else:
                selected_keypoint = None
                corners = [(bbox[0], bbox[1]), (bbox[2], bbox[1]), (bbox[2], bbox[3]), (bbox[0], bbox[3])]
                for i, (cx, cy) in enumerate(corners):
                    if np.sqrt((cx - x) ** 2 + (cy - y) ** 2) < 10:
                        selected_bbox_corner = i
                        break
        elif event == cv2.EVENT_RBUTTONDOWN:
            new_points.append((x, y, ""))
            draw_keypoints(image_copy, keypoints_data, bbox)
            cv2.imshow('Adjust Keypoints', image_copy)
        elif event == cv2.EVENT_MOUSEMOVE:
            if selected_keypoint is not None:
                if selected_keypoint < len(keypoints_data):
                    keypoints_data[selected_keypoint][0] = x
                    keypoints_data[selected_keypoint][1] = y
                else:
                    new_points[selected_keypoint - len(keypoints_data)] = (x, y, new_points[selected_keypoint - len(keypoints_data)][2])
            elif selected_bbox_corner is not None:
                if selected_bbox_corner == 0:
                    bbox[0], bbox[1] = x, y
                elif selected_bbox_corner == 1:
                    bbox[2], bbox[1] = x, y
                elif selected_bbox_corner == 2:
                    bbox[2], bbox[3] = x, y
                elif selected_bbox_corner == 3:
                    bbox[0], bbox[3] = x, y
            image_copy = image.copy()
            draw_keypoints(image_copy, keypoints_data, bbox)
            cv2.imshow('Adjust Keypoints', image_copy)
        elif event == cv2.EVENT_LBUTTONUP:
            selected_keypoint = None
            selected_bbox_corner = None

    image_copy = draw_keypoints(image_copy, keypoints_data, bbox)
    cv2.imshow('Adjust Keypoints', image_copy)
    cv2.setMouseCallback('Adjust Keypoints', click_event)

    current_label = ""
    while True:
        key = cv2.waitKey(1) & 0xFF
        if key == 27:  # ESC to exit and save
            break
        elif key == 13:  # Enter to confirm label
            if new_points and current_label:
                new_points[-1] = (new_points[-1][0], new_points[-1][1], current_label)
                print(f"Labeled point {len(new_points)} as {current_label}")
                current_label = ""
                image_copy = image.copy()
                draw_keypoints(image_copy, keypoints_data, bbox)
                cv2.imshow('Adjust Keypoints', image_copy)
        elif ord('0') <= key <= ord('9'):
            current_label += chr(key)
            print(f"Current label: {current_label}")
        elif key == ord('d') and selected_keypoint is not None:
            if selected_keypoint < len(keypoints_data):
                keypoints_data[selected_keypoint] = [0, 0, 0]
            else:
                new_points.pop(selected_keypoint - len(keypoints_data))
            selected_keypoint = None
            image_copy = image.copy()
            draw_keypoints(image_copy, keypoints_data, bbox)
            cv2.imshow('Adjust Keypoints', image_copy)
        elif key == ord('s'):  # Skip frame
            skip_frame = True
            break
        elif key == ord('e'):  # Exit program
            exit_program = True
            break

    cv2.destroyAllWindows()

    if skip_frame:
        return None, None, False
    if exit_program:
        return None, None, True

    for (x, y, label) in new_points:
        if label:
            index = labels.index(label)
            keypoints_data[index] = [x, y, 2]

    return bbox, keypoints_data, False

def save_output(image_path, formatted_output, video_name):
    base_name = os.path.basename(image_path)
    file_name, _ = os.path.splitext(base_name)
    output_txt_path = os.path.join('dataset_maker/labels', f"{file_name}.txt")
    with open(output_txt_path, 'w') as f:
        f.write(' '.join(map(str, formatted_output)))
    print(f"Saved formatted output to {output_txt_path}")

def draw_on_image(results, filename='dataset_maker/annotated_image.jpg'):
    annotated_image = results.plot(conf=True, kpt_line=True)
    results.show()
    results.save(filename)

def main(video_path):
    create_directories()
    cap = cv2.VideoCapture(video_path)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frames_to_sample = frame_count // 50
    frame_number = 0
    video_name = os.path.splitext(os.path.basename(video_path))[0]
    valid_frame_count = 0

    while frame_number < frame_count and valid_frame_count < 50:
        print(f"Frame {valid_frame_count + 1}/50")
        while True:
            ret, frame = extract_frame(video_path, frame_number)
            if not ret:
                frame_number += frames_to_sample
                if frame_number >= frame_count:
                    cap.release()
                    return
                continue
            
            bounding_box, predicted_keypoints, result = predict_keypoints(frame, model)
            if predicted_keypoints:
                output_image_path = f'dataset_maker/images/{video_name}_frame_{frame_number}.jpg'
                save_frame(output_image_path, frame)
                break
            
            frame_number += frames_to_sample
            if frame_number >= frame_count:
                cap.release()
                return

        adjusted_bbox, adjusted_keypoints, exit_program = adjust_keypoints(output_image_path, bounding_box, predicted_keypoints, valid_frame_count + 1, 50)
        if exit_program:
            cap.release()
            os.remove(output_image_path)  # Remove the image if exiting
            return
        if adjusted_bbox is None and adjusted_keypoints is None:
            os.remove(output_image_path)  # Remove the image if skipping
            frame_number += frames_to_sample
            continue

        formatted_output = format_output(adjusted_bbox, adjusted_keypoints, result.orig_shape)
        save_output(output_image_path, formatted_output, video_name)
        valid_frame_count += 1
        frame_number += frames_to_sample

    cap.release()

# Example usage
main('input_videos/riga.mp4')


Frame 1/50

0: 384x640 1 pitch, 808.0ms
Speed: 3.0ms preprocess, 808.0ms inference, 3.0ms postprocess per image at shape (1, 3, 384, 640)
Current label: 1
Current label: 11
Labeled point 1 as 11
Current label: 0
Current label: 02
Labeled point 2 as 02
Current label: 0
Current label: 01
Labeled point 3 as 01
Saved formatted output to dataset_maker/labels\riga_frame_0.txt
Frame 2/50

0: 384x640 1 pitch, 776.2ms
Speed: 9.0ms preprocess, 776.2ms inference, 5.0ms postprocess per image at shape (1, 3, 384, 640)
Current label: 0
Current label: 02
Labeled point 1 as 02
Current label: 0
Current label: 01
Labeled point 2 as 01
Current label: 1
Current label: 10
Labeled point 3 as 10
Current label: 1
Current label: 11
Labeled point 4 as 11
Saved formatted output to dataset_maker/labels\riga_frame_15.txt
Frame 3/50

0: 384x640 1 pitch, 770.0ms
Speed: 44.0ms preprocess, 770.0ms inference, 3.0ms postprocess per image at shape (1, 3, 384, 640)
Current label: 0
Current label: 01
Labeled point 1 as 01


In [31]:
import os
import random
import shutil

def create_split_directories(base_dir):
    for split in ['train', 'valid', 'test']:
        for folder in ['images', 'labels']:
            path = os.path.join(base_dir, split, folder)
            if not os.path.exists(path):
                os.makedirs(path)

def copy_existing_split(existing_dir, output_dir, split):
    images_dir = os.path.join(existing_dir, split, 'images')
    labels_dir = os.path.join(existing_dir, split, 'labels')

    output_images_dir = os.path.join(output_dir, split, 'images')
    output_labels_dir = os.path.join(output_dir, split, 'labels')

    for image in os.listdir(images_dir):
        if image.endswith('.jpg'):
            image_path = os.path.join(images_dir, image)
            label_path = os.path.join(labels_dir, os.path.splitext(image)[0] + '.txt')
            
            shutil.copy(image_path, os.path.join(output_images_dir, image))
            shutil.copy(label_path, os.path.join(output_labels_dir, os.path.splitext(image)[0] + '.txt'))

def split_and_copy_new_data(new_data_dir, output_dir, train_ratio=0.7, valid_ratio=0.2, test_ratio=0.1):
    images_dir = os.path.join(new_data_dir, 'images')
    labels_dir = os.path.join(new_data_dir, 'labels')

    images = [f for f in os.listdir(images_dir) if f.endswith('.jpg')]
    random.shuffle(images)

    train_count = int(len(images) * train_ratio)
    valid_count = int(len(images) * valid_ratio)

    for i, image in enumerate(images):
        if i < train_count:
            split = 'train'
        elif i < train_count + valid_count:
            split = 'valid'
        else:
            split = 'test'
        
        image_path = os.path.join(images_dir, image)
        label_path = os.path.join(labels_dir, os.path.splitext(image)[0] + '.txt')
        
        shutil.copy(image_path, os.path.join(output_dir, split, 'images', image))
        shutil.copy(label_path, os.path.join(output_dir, split, 'labels', os.path.splitext(image)[0] + '.txt'))

def merge_datasets(old_data_dir, new_data_dir, output_dir):
    create_split_directories(output_dir)

    # Copy existing splits from old data
    for split in ['train', 'valid', 'test']:
        copy_existing_split(old_data_dir, output_dir, split)

    # Split and copy new data
    split_and_copy_new_data(new_data_dir, output_dir)

# Example usage
output_dir = 'splitted_dataset'
merge_datasets('old_data', 'dataset_maker', output_dir)


In [23]:
import torch
print(torch.cuda.is_available())  # This should print True if a GPU is available
print(torch.cuda.device_count())  # This will print the number of GPUs available
print(torch.cuda.get_device_name(0))  # This will print the name of the GPU (if available)


True
1
NVIDIA GeForce GTX 1080


In [1]:
from ultralytics import YOLO

# Load a model
model = YOLO('models/Field_Key_Points.pt').to('cuda')

# Training
model.train(data='data.yaml', epochs=2, imgsz=640)

# Save the trained model
model.save('models/Field_Key_Points_Trained.pt')


New https://pypi.org/project/ultralytics/8.2.36 available  Update with 'pip install -U ultralytics'
[34m[1mengine\trainer: [0mtask=pose, mode=train, model=models/Field_Key_Points.pt, data=data.yaml, epochs=2, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=cuda:0, workers=8, project=None, name=train2, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=Tr

[34m[1mtrain: [0mScanning C:\Users\Dokutsu\Documents\Project\usg\football_analysis\splitted_dataset\train\labels... 281 images, 0 backgrounds, 0 corrupt: 100%|██████████| 281/281 [00:01<00:00, 210.63it/s]


[34m[1mtrain: [0mNew cache created: C:\Users\Dokutsu\Documents\Project\usg\football_analysis\splitted_dataset\train\labels.cache


[34m[1mval: [0mScanning C:\Users\Dokutsu\Documents\Project\usg\football_analysis\splitted_dataset\valid\labels... 47 images, 0 backgrounds, 0 corrupt: 100%|██████████| 47/47 [00:00<00:00, 114.36it/s]

[34m[1mval: [0mNew cache created: C:\Users\Dokutsu\Documents\Project\usg\football_analysis\splitted_dataset\valid\labels.cache





Plotting labels to C:\Users\Dokutsu\Documents\Project\usg\football_analysis\runs\pose\train2\labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.002, momentum=0.9) with parameter groups 103 weight(decay=0.0), 113 weight(decay=0.0005), 112 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added 
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to [1mC:\Users\Dokutsu\Documents\Project\usg\football_analysis\runs\pose\train2[0m
Starting training for 2 epochs...

      Epoch    GPU_mem   box_loss  pose_loss  kobj_loss   cls_loss   dfl_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
  return F.conv2d(input, weight, bias, self.stride,
        1/2      10.7G     0.3687      5.756     0.3285     0.3609     0.9925         18        640: 100%|██████████| 18/18 [13:21<00:00, 44.53s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Pose(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:59<00:00, 30.00s/it]

                   all         47         47      0.952      0.979      0.993      0.915      0.407      0.404      0.257     0.0568






      Epoch    GPU_mem   box_loss  pose_loss  kobj_loss   cls_loss   dfl_loss  Instances       Size


        2/2      10.7G      0.356      5.393      0.324     0.3322     0.9484         41        640:  11%|█         | 2/18 [01:17<10:18, 38.64s/it]


KeyboardInterrupt: 

In [8]:

def draw_formatted_output(image_path, formatted_output):
    # Load the image
    image = cv2.imread(image_path)
    
    if image is None:
        print(f"Error: Failed to load image at {image_path}")
        return
    
    # Extract bounding box and keypoints from the formatted output
    class_id = formatted_output[0]
    bbox = formatted_output[1:5]
    keypoints = formatted_output[5:]
    
    # Draw the bounding box
    x_center, y_center, bbox_width, bbox_height = bbox
    h, w, _ = image.shape
    x_center, y_center = int(x_center * w), int(y_center * h)
    bbox_width, bbox_height = int(bbox_width * w), int(bbox_height * h)
    x1, y1 = x_center - bbox_width // 2, y_center - bbox_height // 2
    x2, y2 = x_center + bbox_width // 2, y_center + bbox_height // 2
    
    cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
    
    # Draw the keypoints
    for i in range(0, len(keypoints), 3):
        if i + 2 >= len(keypoints):
            break  # Avoid index out of range error
        x, y, vis = keypoints[i], keypoints[i+1], keypoints[i+2]
        x, y = int(x * w), int(y * h)
        if vis == 2:
            cv2.circle(image, (x, y), 5, (0, 0, 255), -1)
    
    # Display the image
    cv2.imshow('Annotated Image', image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

# Example usage
output_image_path = 'dataset_maker/frame_250.jpg'


# Draw the formatted output on the image
draw_formatted_output(output_image_path, formatted_output)
