In [3]:
import cv2
import numpy as np

# !! READ ME !!
# If you run this program it turns on your webcam

# NOTES
# TODO: Data Augmentation to create a very large dataset
# Use https://www.makesense.ai/ to annotate our images once we have them
# We will annotate 3 images per card, each with slightly different rotations
# Focus on realistic variations based  on our set up
# We only need to label a few key images (the cards on the setup), then use those labeled images to generate others with data augmentation
# Since the environment is controlled, we don’t need to label every single variatio. Just the original images and a maybe few augmentations

# Paths for easy access
cfg_path = "yolov4.cfg"
weights_path = "yolov4.weights"
names_path = "coco.names"

# Load YOLO Model
net = cv2.dnn.readNet(weights_path, cfg_path) # maybe use yolo8?
layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]

# Load class labels
with open(names_path, "r") as f:
    classes = [line.strip() for line in f.readlines()]

# IMPORTANT: Update classes and filters in the config file (TODO)

print("Loading YOLO model...")
try:
    net = cv2.dnn.readNet("yolov4.weights", "yolov4.cfg")
    print("YOLO model loaded successfully!")
except Exception as e:
    print("Error loading YOLO:", e)

# Initialize video capture
cap = cv2.VideoCapture(0)  # Use 0 for webcam but we can also use a video path

while True:
    ret, frame = cap.read()
    if not ret:
        break

    height, width, channels = frame.shape

    # Prepare input image for YOLO
    blob = cv2.dnn.blobFromImage(frame, 1/255.0, (416, 416), swapRB=True, crop=False)
    net.setInput(blob)
    detections = net.forward(output_layers)

    results = []  # Store object data

    for output in detections:
        for detection in output:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]

            if confidence > 0.5:  # Confidence threshold
                # Object detected
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                results.append({
                    "id": class_id,  # Object class ID
                    "label": classes[class_id],  # Class name
                    "x": center_x,
                    "y": center_y,
                    "confidence": float(confidence)
                })

                # Draw detection
                cv2.circle(frame, (center_x, center_y), 5, (0, 255, 0), -1)
                cv2.putText(frame, f"{classes[class_id]} ({center_x}, {center_y})",
                            (center_x - 10, center_y - 10), cv2.FONT_HERSHEY_SIMPLEX,
                            0.5, (255, 0, 0), 2)

    # Display the processed frame
    cv2.imshow("YOLO Object Detection", frame)

    # TODO: Send data to Godot (could use networking with sockets or file writing based solution)

    if cv2.waitKey(1) & 0xFF == ord('q'):  # q to exit
        break

cap.release()
cv2.destroyAllWindows()

Loading YOLO model...
YOLO model loaded successfully!


IndexError: list index out of range