In [1]:
import cv2
import numpy as np
from ultralytics import YOLO

# Load the YOLO model
model = YOLO("yolov8n-seg.pt")  # load an official model

# Open the webcam
cap = cv2.VideoCapture(0)

if not cap.isOpened():
    print("Error: Could not open webcam.")
    exit()

while True:
    # Capture frame-by-frame
    ret, frame = cap.read()

    if not ret:
        print("Error: Failed to capture image.")
        break

    # Perform prediction
    results = model(frame)

    # Get the image with predictions
    img = results[0].plot()

    # Create a heatmap
    heatmap = np.zeros((img.shape[0], img.shape[1]), dtype=np.float32)

    # Overlay the prediction masks onto the heatmap
    if results[0].masks is not None:
        for mask in results[0].masks.data:
            mask = mask.cpu().numpy().squeeze()  # Convert mask to numpy array and remove single-dimensional entries
            mask_resized = cv2.resize(mask, (heatmap.shape[1], heatmap.shape[0]))  # Resize mask to match heatmap size
            heatmap = cv2.add(heatmap, mask_resized.astype(np.float32))

    # Normalize the heatmap
    heatmap = cv2.normalize(heatmap, None, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)

    # Convert heatmap to RGB
    heatmap_img = cv2.applyColorMap(np.uint8(255 * heatmap), cv2.COLORMAP_JET)

    # Combine the original image with the heatmap
    overlay_img = cv2.addWeighted(img, 0.6, heatmap_img, 0.4, 0)

    # Display the result
    cv2.imshow('Original Image with Predictions', img)
    cv2.imshow('Heatmap Overlay', overlay_img)

    # Press 'q' to quit the webcam stream
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the webcam and close windows
cap.release()
cv2.destroyAllWindows()


0: 384x640 (no detections), 104.0ms
Speed: 5.3ms preprocess, 104.0ms inference, 6.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 110.1ms
Speed: 2.0ms preprocess, 110.1ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 70.1ms
Speed: 2.1ms preprocess, 70.1ms inference, 0.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 70.4ms
Speed: 1.6ms preprocess, 70.4ms inference, 0.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 136.8ms
Speed: 14.0ms preprocess, 136.8ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 88.4ms
Speed: 2.1ms preprocess, 88.4ms inference, 0.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 82.5ms
Speed: 1.9ms preprocess, 82.5ms inference, 0.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 70.3ms
Speed: 1.6ms preprocess, 7

KeyboardInterrupt: 