In [1]:
# import numpy as np
# import cv2
# import time

# # Load the COCO class labels our YOLO model was trained on
# labelsPath = 'yolo-coco/coco.names'
# LABELS = open(labelsPath).read().strip().split("\n")

# # Initialize a list of colors to represent each possible class label
# COLORS = np.random.randint(0, 255, size=(len(LABELS), 3), dtype="uint8")

# # Paths to the YOLO weights and model configuration
# weightsPath = 'yolo-coco/yolov3.weights'
# configPath = 'yolo-coco/yolov3.cfg'

# # Load YOLO object detector
# net = cv2.dnn.readNetFromDarknet(configPath, weightsPath)

# # Get output layer indices
# output_layer_indices = net.getUnconnectedOutLayers()

# # Get output layer names
# layer_names = net.getLayerNames()
# output_layers = [layer_names[i - 1] for i in output_layer_indices]

# # Confidence threshold and non-maximum suppression threshold
# confidence_threshold = 0.5
# nms_threshold = 0.3

# # Initialize video capture from the default camera (0)
# cap = cv2.VideoCapture(0)
# # Set frame resolution (adjust as needed)
# cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
# cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)


# # Allow the camera to warm up
# time.sleep(2.0)

# while True:
#     # Capture frame-by-frame
#     ret, frame = cap.read()

#     # If frame is not captured properly, break the loop
#     if not ret:
#         break

#     # Get frame dimensions
#     (H, W) = frame.shape[:2]

#     # Create blob from frame
#     blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416), swapRB=True, crop=False)

#     # Set input blob for the network
#     net.setInput(blob)

#     # Forward pass
#     layer_outputs = net.forward(output_layers)

#     # Initialize lists for detected bounding boxes, confidences, and class IDs
#     boxes = []
#     confidences = []
#     class_ids = []

#     # Process each layer output
#     for output in layer_outputs:
#         # Process each detection
#         for detection in output:
#             scores = detection[5:]
#             class_id = np.argmax(scores)
#             confidence = scores[class_id]

#             # Filter weak detections
#             if confidence > confidence_threshold:
#                 # Scale the bounding box coordinates back relative to the
#                 # size of the frame
#                 box = detection[0:4] * np.array([W, H, W, H])
#                 (centerX, centerY, width, height) = box.astype("int")

#                 # Use the center (x, y)-coordinates to derive the top
#                 # and left corner of the bounding box
#                 x = int(centerX - (width / 2))
#                 y = int(centerY - (height / 2))

#                 # Update the bounding box coordinates, confidences, and
#                 # class IDs lists
#                 boxes.append([x, y, int(width), int(height)])
#                 confidences.append(float(confidence))
#                 class_ids.append(class_id)

#     # Apply non-maxima suppression to suppress weak, overlapping bounding boxes
#     idxs = cv2.dnn.NMSBoxes(boxes, confidences, confidence_threshold, nms_threshold)

#     # Ensure at least one detection exists
#     if len(idxs) > 0:
#         # Loop over the indexes we are keeping
#         for i in idxs.flatten():
#             # Extract the bounding box coordinates
#             (x, y) = (boxes[i][0], boxes[i][1])
#             (w, h) = (boxes[i][2], boxes[i][3])

#             # Draw a bounding box rectangle and label on the frame
#             color = [int(c) for c in COLORS[class_ids[i]]]
#             cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
#             text = "{}: {:.4f}".format(LABELS[class_ids[i]], confidences[i])
#             cv2.putText(frame, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

#     # Display the resulting frame
#     cv2.imshow('Object Detection', frame)

#     # Break the loop if 'q' key is pressed
#     if cv2.waitKey(1) & 0xFF == ord('q'):
#         break

# # Release the video capture object and close all windows
# cap.release()
# cv2.destroyAllWindows()


In [None]:
import numpy as np
import cv2
import time
import threading

# Load the COCO class labels our YOLO model was trained on
labelsPath = 'yolo-coco/coco.names'
LABELS = open(labelsPath).read().strip().split("\n")

# Load YOLO object detector
weightsPath = 'yolo-coco/yolov3.weights'
configPath = 'yolo-coco/yolov3.cfg'
net = cv2.dnn.readNetFromDarknet(configPath, weightsPath)

# Confidence threshold and non-maximum suppression threshold
confidence_threshold = 0.5
nms_threshold = 0.3

# Get output layer names
layer_names = net.getLayerNames()
# output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]

# Function to perform object detection on a frame
def detect_objects(frame):
    global net, LABELS, confidence_threshold, nms_threshold, output_layers

    # Get frame dimensions
    (H, W) = frame.shape[:2]

    # Create blob from frame
    blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416), swapRB=True, crop=False)

    # Set input blob for the network
    net.setInput(blob)

    # Forward pass
    layer_outputs = net.forward(output_layers)

    # Initialize lists for detected bounding boxes, confidences, and class IDs
    boxes = []
    confidences = []
    class_ids = []

    # Process each layer output
    for output in layer_outputs:
        # Process each detection
        for detection in output:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]

            # Filter weak detections
            if confidence > confidence_threshold:
                # Scale the bounding box coordinates back relative to the size of the frame
                box = detection[0:4] * np.array([W, H, W, H])
                (x, y, width, height) = box.astype("int")

                # Update the bounding box coordinates, confidences, and class IDs lists
                boxes.append([x, y, int(width), int(height)])
                confidences.append(float(confidence))
                class_ids.append(class_id)

    # Apply non-maxima suppression to suppress weak, overlapping bounding boxes
    idxs = cv2.dnn.NMSBoxes(boxes, confidences, confidence_threshold, nms_threshold)

    # Draw bounding boxes on the frame
    if len(idxs) > 0:
        for i in idxs.flatten():
            (x, y, w, h) = boxes[i]
            color = [int(c) for c in (0, 255, 0)]
            cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
            text = "{}: {:.4f}".format(LABELS[class_ids[i]], confidences[i])
            cv2.putText(frame, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

    # Display the resulting frame
    cv2.imshow('Object Detection', frame)

# Function to capture frames from the camera and process them
def process_frames():
    cap = cv2.VideoCapture(0)

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        detect_objects(frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

# Start the frame processing thread
thread = threading.Thread(target=process_frames)
thread.daemon = True
thread.start()

# Wait for the thread to finish (shouldn't happen)
thread.join()
