In [4]:
from collections import defaultdict
import cv2
import json
import numpy as np
from ultralytics import YOLO

# Load to YOLOv8 model
model = YOLO('LargeBumperModel.pt')

# Path to video file
video_path = 'C:/Users/antho/Desktop/frCV/Q90.mp4'

# Path to transformation JSON:
json_file_path = 'C:/Users/antho/Desktop/frCV/Q90.json'

# Field overlay image 2560 x 1240
overlay = cv2.imread('C:/Users/antho/Desktop/frCV/fieldS.png')

# Open the video
cap = cv2.VideoCapture(video_path)

# Retrieve video properties for setting up the VideoWriter
fps = int(cap.get(cv2.CAP_PROP_FPS))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Create VideoWriter object with full path
output_video_path = 'C:/Users/antho/Desktop/frCV/Q90t.avi'
fourcc = cv2.VideoWriter_fourcc(*'XVID')
output_video = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

# Store the track history
track_history = defaultdict(lambda: [])

with open(json_file_path, 'r') as json_file:
    points = json.load(json_file)

# setup variables read from json
src_points = points['src_points']
output_width = points['output_width']
output_height = points['output_height']

def calculate_matrix(src_points, output_width, output_height):
    # Convert the source points to numpy array
    src_points_np = np.array(src_points, dtype=np.float32)

    # Define destination points based on the output resolution
    dst_points = np.array([[0, 0], [output_width - 1, 0], [output_width - 1, output_height - 1], [0, output_height - 1]],
                          dtype=np.float32)

    # Calculate the perspective transformation matrix using cv2.getPerspectiveTransform
    transformation_matrix = cv2.getPerspectiveTransform(src_points_np, dst_points)

    return transformation_matrix

transformation_matrix = calculate_matrix(src_points, output_width, output_height)

def apply_transform(absolute_coords):
    # Convert input coordinates to a numpy array
    input_coordinates_np = np.array([absolute_coords], dtype=np.float32)
    
    # Apply perspective transformation
    output_coordinates_np = cv2.perspectiveTransform(input_coordinates_np, transformation_matrix)

    # Extract the output coordinates from the numpy array
    output_coordinates = output_coordinates_np[0]

    return output_coordinates

# Example usage
absolute_coords = [[1520, 343]]
result = apply_transform(absolute_coords)
print(result)

# Loop through the video frames
while cap.isOpened():
    # Read a frame from the video
    success, frame = cap.read()

    if success:
        # Run YOLOv8 tracking on the frame, persisting tracks between frames
        results = model.track(frame, persist=True)

        # Get the boxes and track IDs
        boxes = results[0].boxes.xywh.cpu()
        track_ids = results[0].boxes.id.int().cpu().tolist()

        # Visualize the results on the frame
        annotated_frame = results[0].plot()

        # Plot the tracks
        for box, track_id in zip(boxes, track_ids):
            x, y, w, h = box
            track = track_history[track_id]
            track.append((float(x), float(y)))  # x, y center point
            if len(track) > 30:  # retain 90 tracks for 90 frames
                track.pop(0)
            
            # Draw the tracking lines
            points = np.hstack(track).astype(np.int32).reshape((-1, 1, 2))
            q_points = np.squeeze(points, axis=1)
            q_points = q_points.reshape(-1, 2)  # Ensure the shape is (N, 2)
            q_points = apply_transform(q_points)  # Apply perspective transformation
            q_points = q_points.astype(np.int32)  # Ensure the points are integers
            cv2.polylines(overlay, [q_points], isClosed=False, color=(0, 0, 0), thickness=5)
        
        # Save the current overlay frame to the video.
        output_video.write(overlay)

        # Display the annotated frame
        cv2.imshow("YOLOv8 Tracking", annotated_frame)
        
        # Display the field overlay w/ path
        cv2.imshow("Walmart Zebra Motionworks:", overlay)

        # Break the loop if 'q' is pressed
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break
    else:
        # Break the loop if the end of the video is reached
        break

# Release the video writer and capture objects
output_video.release()
cap.release()
cv2.destroyAllWindows()

# Check if the video was saved successfully
if not output_video.isOpened():
    print("Error: Video not written successfully.")
else:
    print("Video saved successfully.")

[[     3133.2      430.91]]

0: 576x1024 3 BLUs, 3 REDs, 143.7ms
Speed: 6.2ms preprocess, 143.7ms inference, 1.0ms postprocess per image at shape (1, 3, 576, 1024)

0: 576x1024 3 BLUs, 3 REDs, 146.2ms
Speed: 4.6ms preprocess, 146.2ms inference, 0.0ms postprocess per image at shape (1, 3, 576, 1024)

0: 576x1024 3 BLUs, 3 REDs, 145.9ms
Speed: 5.0ms preprocess, 145.9ms inference, 1.0ms postprocess per image at shape (1, 3, 576, 1024)

0: 576x1024 3 BLUs, 3 REDs, 148.8ms
Speed: 5.0ms preprocess, 148.8ms inference, 1.5ms postprocess per image at shape (1, 3, 576, 1024)

0: 576x1024 3 BLUs, 3 REDs, 149.8ms
Speed: 4.6ms preprocess, 149.8ms inference, 0.0ms postprocess per image at shape (1, 3, 576, 1024)

0: 576x1024 3 BLUs, 3 REDs, 148.5ms
Speed: 5.0ms preprocess, 148.5ms inference, 0.0ms postprocess per image at shape (1, 3, 576, 1024)

0: 576x1024 3 BLUs, 3 REDs, 144.6ms
Speed: 5.2ms preprocess, 144.6ms inference, 1.0ms postprocess per image at shape (1, 3, 576, 1024)

0: 576x1024 3 BLUs,

Speed: 5.3ms preprocess, 146.6ms inference, 1.0ms postprocess per image at shape (1, 3, 576, 1024)

0: 576x1024 3 BLUs, 3 REDs, 145.4ms
Speed: 5.0ms preprocess, 145.4ms inference, 0.8ms postprocess per image at shape (1, 3, 576, 1024)

0: 576x1024 3 BLUs, 3 REDs, 143.3ms
Speed: 4.2ms preprocess, 143.3ms inference, 0.0ms postprocess per image at shape (1, 3, 576, 1024)

0: 576x1024 3 BLUs, 3 REDs, 143.1ms
Speed: 4.0ms preprocess, 143.1ms inference, 0.5ms postprocess per image at shape (1, 3, 576, 1024)

0: 576x1024 3 BLUs, 3 REDs, 145.8ms
Speed: 4.2ms preprocess, 145.8ms inference, 0.0ms postprocess per image at shape (1, 3, 576, 1024)

0: 576x1024 3 BLUs, 3 REDs, 151.0ms
Speed: 5.6ms preprocess, 151.0ms inference, 1.0ms postprocess per image at shape (1, 3, 576, 1024)

0: 576x1024 3 BLUs, 3 REDs, 148.5ms
Speed: 4.5ms preprocess, 148.5ms inference, 1.0ms postprocess per image at shape (1, 3, 576, 1024)

0: 576x1024 3 BLUs, 3 REDs, 152.3ms
Speed: 6.0ms preprocess, 152.3ms inference, 1.5m