In [13]:
import cv2
from ultralytics import YOLO
import math # Used for rounding confidence scores

In [14]:
import torch

# Check if CUDA is available
print("CUDA Available:", torch.cuda.is_available())

# Get GPU details
if torch.cuda.is_available():
    print("GPU Name:", torch.cuda.get_device_name(0))

CUDA Available: True
GPU Name: NVIDIA GeForce GTX 1070


## Save the results to a file

In [18]:
# --- Configuration ---
DEFAULT_MODEL = "yolo11l.pt"
TRAINED_MODEL = 'models/weights/best.pt'
VIDEO_PATH = 'sample_data/08fd33_4.mp4'

In [19]:
# --- Load the Model ---
print(f"Loading model: {TRAINED_MODEL}")
try:
    model = YOLO(TRAINED_MODEL)
    class_names = model.names # Get class names dictionary
    print("Model loaded successfully.")
except Exception as e:
    print(f"Error loading model: {e}")
    exit()

Loading model: models/weights/best.pt
Model loaded successfully.


In [20]:
results = model.predict(VIDEO_PATH, save=True, stream=True)
for r in results: # Results is a generator when stream=True
    print(results)


video 1/1 (frame 1/750) /home/bbrzyski/studia/DFL_football/PitchAnalyzer_AI/sample_data/08fd33_4.mp4: 384x640 1 ball, 1 goalkeeper, 19 players, 3 referees, 23.1ms
<generator object BasePredictor.stream_inference at 0x7f93f45df400>
video 1/1 (frame 2/750) /home/bbrzyski/studia/DFL_football/PitchAnalyzer_AI/sample_data/08fd33_4.mp4: 384x640 1 goalkeeper, 19 players, 3 referees, 25.5ms
<generator object BasePredictor.stream_inference at 0x7f93f45df400>
video 1/1 (frame 3/750) /home/bbrzyski/studia/DFL_football/PitchAnalyzer_AI/sample_data/08fd33_4.mp4: 384x640 1 goalkeeper, 18 players, 3 referees, 23.7ms
<generator object BasePredictor.stream_inference at 0x7f93f45df400>
video 1/1 (frame 4/750) /home/bbrzyski/studia/DFL_football/PitchAnalyzer_AI/sample_data/08fd33_4.mp4: 384x640 1 ball, 1 goalkeeper, 21 players, 3 referees, 21.6ms
<generator object BasePredictor.stream_inference at 0x7f93f45df400>
video 1/1 (frame 5/750) /home/bbrzyski/studia/DFL_football/PitchAnalyzer_AI/sample_data/08f

## Live render the inference

In [None]:
CONFIDENCE_THRESHOLD = 0.4
BOX_COLOR = (255, 0, 255) # BGR color for the bounding box (Magenta)
TEXT_COLOR = (255, 255, 255) # BGR color for the text (White)
FONT = cv2.FONT_HERSHEY_SIMPLEX
FONT_SCALE = 0.6
FONT_THICKNESS = 2
BOX_THICKNESS = 2

In [None]:
# --- Initialize Video Capture ---
print(f"Opening video source: {VIDEO_PATH}")
cap = cv2.VideoCapture(VIDEO_PATH)

# Check if video opened successfully
if not cap.isOpened():
    print(f"Error: Could not open video source: {VIDEO_PATH}")
    exit()

In [None]:
# --- Processing Loop ---
print("Starting video processing... Press 'q' to quit.")
while True:
    # Read a frame from the video
    ret, frame = cap.read()

    # If frame reading was not successful (end of video or error)
    if not ret:
        print("Reached end of video or failed to read frame.")
        break

    # --- Perform YOLO Prediction ---
    # Using stream=True is generally more efficient for video/sequential inputs
    results = model.predict(frame, stream=True, verbose=False) # Set verbose=False to avoid console spam

    # --- Process Detections and Draw on Frame ---
    for r in results: # Results is a generator when stream=True
        boxes = r.boxes
        for box in boxes:
            # 1. Get Confidence Score
            confidence = math.ceil(box.conf[0] * 100) / 100
            # print(f"Confidence: {confidence}") # Optional: print confidence

            # 2. Filter detections by confidence
            if confidence >= CONFIDENCE_THRESHOLD:
                # 3. Get Bounding Box Coordinates
                x1, y1, x2, y2 = box.xyxy[0] # Format: [xmin, ymin, xmax, ymax]
                x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) # Convert to integers

                # 4. Get Class ID and Name
                cls_id = int(box.cls[0])
                cls_name = class_names.get(cls_id, "Unknown") # Use .get for safety
                # print(f"Detected: {cls_name} ({confidence:.2f}) at [{x1},{y1},{x2},{y2}]") # Optional

                # 5. Draw Bounding Box
                cv2.rectangle(frame, (x1, y1), (x2, y2), BOX_COLOR, BOX_THICKNESS)

                # 6. Create Label Text
                label = f"{cls_name} {confidence:.2f}"

                # 7. Calculate text size and position for label background
                (text_width, text_height), baseline = cv2.getTextSize(label, FONT, FONT_SCALE, FONT_THICKNESS)
                label_bg_y1 = max(y1 - text_height - baseline, 0) # Position above the box, ensure it's not off-screen top
                label_bg_y2 = y1 # Base of the background rectangle

                # 8. Draw a filled rectangle as the background for the label
                cv2.rectangle(frame, (x1, label_bg_y1), (x1 + text_width, label_bg_y2), BOX_COLOR, cv2.FILLED)

                # 9. Put Label Text on the background
                cv2.putText(frame, label, (x1, y1 - baseline), FONT, FONT_SCALE, TEXT_COLOR, FONT_THICKNESS)


    # --- Display the Frame ---
    cv2.imshow("YOLO Real-Time Detection", frame)

    # --- Exit Condition ---
    # Wait for 1 millisecond and check if the 'q' key was pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        print("Exit key ('q') pressed. Stopping...")
        break

# --- Cleanup ---
print("Releasing video capture and closing windows.")
cap.release() # Release the video capture object
cv2.destroyAllWindows() # Close all OpenCV display windows

print("Video processing finished.")