In [15]:
import cv2
import numpy as np
import time

# Load YOLO model
net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")

# Load COCO class names from the coco.names file
with open("coco.names", "r") as f:
    classes = f.read().strip().split('\n')

# Initialize the webcam
cap = cv2.VideoCapture(0)  # 0 represents the default camera (you can change it if you have multiple cameras)

# Set a minimum confidence threshold
confidence_threshold = 0.3

# Initialize variables to store bounding box information
bounding_boxes = []

# Initialize a variable to keep track of the last time an object was checked
last_check_time = time.time()

# Maximum number of unique boxes to display
max_boxes = 10

# Dictionary to track unique objects
unique_objects = {}

while True:
    ret, frame = cap.read()

    if not ret:
        break

    # Calculate the time since the last object check
    elapsed_time = time.time() - last_check_time

    if elapsed_time >= 1.0:  # Check for objects every 1 second
        # Prepare the frame for detection (e.g., resizing, normalization)
        blob = cv2.dnn.blobFromImage(frame, 1/255.0, (416, 416), swapRB=True, crop=False)

        # Set the input to the YOLO model
        net.setInput(blob)

        # Get the output layer names
        layer_names = net.getUnconnectedOutLayersNames()

        # Forward pass
        detections = net.forward(layer_names)

        # Clear the existing bounding boxes and unique objects
        bounding_boxes = []
        unique_objects = {}

        # Process the detections and store the bounding box information
        for detection in detections:
            for obj in detection:
                scores = obj[5:]
                class_id = np.argmax(scores)
                confidence = scores[class_id]

                if confidence > confidence_threshold:
                    center_x = int(obj[0] * frame.shape[1])
                    center_y = int(obj[1] * frame.shape[0])
                    width = int(obj[2] * frame.shape[1])
                    height = int(obj[3] * frame.shape[0])

                    # Calculate coordinates for drawing the bounding box
                    x = int(center_x - width / 2)
                    y = int(center_y - height / 2)

                    # Determine if the object is square (close to aspect ratio 1)
                    is_square = abs(1 - width / height) < 0.1

                    # Generate a unique identifier for this object
                    obj_id = f"{classes[class_id]}_{x}_{y}"

                    # Check if this object is unique and has not been seen before
                    if obj_id not in unique_objects:
                        # Store the bounding box information
                        if is_square:
                            # Treat square-like objects as boxes
                            bounding_boxes.append((x, y, width, height, class_id, confidence))

                        # Record that this object is unique and has been seen
                        unique_objects[obj_id] = True

        # Sort the bounding boxes by confidence (highest to lowest)
        bounding_boxes.sort(key=lambda box: box[5], reverse=True)

        # Display a maximum of three unique boxes
        bounding_boxes = bounding_boxes[:max_boxes]

        # Update the last check time
        last_check_time = time.time()

    # Check if a person and a box are present together
    person_detected = False
    box_detected = False

    for x, y, width, height, class_id, confidence in bounding_boxes:
        if classes[class_id] == 'person':
            person_detected = True
        elif classes[class_id] == 'box':
            box_detected = True

    # Determine if a person and a box are present together
    person_and_box_detected = person_detected and box_detected

    # Draw bounding boxes on the frame
    for x, y, width, height, class_id, confidence in bounding_boxes:
        label = f"{classes[class_id]}: {confidence:.2f}"
        cv2.rectangle(frame, (x, y), (x + width, y + height), (0, 255, 0), 2)
        cv2.putText(frame, label, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    # Display the status of person, box, and person-and-box together at the top of the frame
    status = f"Person: {person_detected}, Box: {box_detected}, Person and Box: {person_and_box_detected}"
    cv2.putText(frame, status, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    # Display the result
    cv2.imshow("Webcam Object Detection", frame)

    if cv2.waitKey(1) & 0xFF == 27:  # Press 'Esc' key to exit
        break

cap.release()
cv2.destroyAllWindows()


In [None]:
import cv2
import numpy as np
import time

# Load YOLO model
net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")

# Load COCO class names from the coco.names file
with open("coco.names", "r") as f:
    classes = f.read().strip().split('\n')

# Initialize the webcam
cap = cv2.VideoCapture(0)  # 0 represents the default camera (you can change it if you have multiple cameras)

# Set a minimum confidence threshold
confidence_threshold = 0.3

# Initialize variables to store bounding box information
bounding_boxes = []

# Initialize a variable to keep track of the last time an object was checked
last_check_time = time.time()

# Maximum number of unique boxes to display
max_boxes = 30

# Dictionary to track unique objects
unique_objects = {}

while True:
    ret, frame = cap.read()

    if not ret:
        break

    # Calculate the time since the last object check
    elapsed_time = time.time() - last_check_time

    if elapsed_time >= 1.0:  # Check for objects every 1 second
        # Prepare the frame for detection (e.g., resizing, normalization)
        blob = cv2.dnn.blobFromImage(frame, 1/255.0, (416, 416), swapRB=True, crop=False)

        # Set the input to the YOLO model
        net.setInput(blob)

        # Get the output layer names
        layer_names = net.getUnconnectedOutLayersNames()

        # Forward pass
        detections = net.forward(layer_names)

        # Clear the existing bounding boxes and unique objects
        bounding_boxes = []
        unique_objects = {}

        # Process the detections and store the bounding box information
        for detection in detections:
            for obj in detection:
                scores = obj[5:]
                class_id = np.argmax(scores)
                confidence = scores[class_id]

                if confidence > confidence_threshold:
                    center_x = int(obj[0] * frame.shape[1])
                    center_y = int(obj[1] * frame.shape[0])
                    width = int(obj[2] * frame.shape[1])
                    height = int(obj[3] * frame.shape[0])

                    # Calculate coordinates for drawing the bounding box
                    x = int(center_x - width / 2)
                    y = int(center_y - height / 2)

                    # Determine if the object is square (close to aspect ratio 1)
                    is_square = abs(1 - width / height) < 0.1

                    # Generate a unique identifier for this object
                    obj_id = f"{classes[class_id]}_{x}_{y}"

                    # Check if this object is unique and has not been seen before
                    if obj_id not in unique_objects:
                        # Store the bounding box information
                        if is_square and classes[class_id] == 'phone':
                            # Treat square-like objects as phones
                            bounding_boxes.append((x, y, width, height, class_id, confidence))

                        # Record that this object is unique and has been seen
                        unique_objects[obj_id] = True

        # Sort the bounding boxes by confidence (highest to lowest)
        bounding_boxes.sort(key=lambda box: box[5], reverse=True)

        # Display a maximum of three unique boxes
        bounding_boxes = bounding_boxes[:max_boxes]

        # Update the last check time
        last_check_time = time.time()

    # Check if a person and a phone are present together
    person_detected = False
    phone_detected = False

    for x, y, width, height, class_id, confidence in bounding_boxes:
        if classes[class_id] == 'person':
            person_detected = True
        elif classes[class_id] == 'phone':
            phone_detected = True

    # Determine if a person and a phone are present together
    person_and_phone_detected = person_detected and phone_detected

    # Draw bounding boxes on the frame
    for x, y, width, height, class_id, confidence in bounding_boxes:
        label = f"{classes[class_id]}: {confidence:.2f}"
        cv2.rectangle(frame, (x, y), (x + width, y + height), (0, 255, 0), 2)
        cv2.putText(frame, label, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    # Display the status of person, phone, and person-and-phone together at the top of the frame
    status = f"Person: {person_detected}, Phone: {phone_detected}, Person and Phone: {person_and_phone_detected}"
    cv2.putText(frame, status, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    # Display the result
    cv2.imshow("Webcam Object Detection", frame)

    if cv2.waitKey(1) & 0xFF == 27:  # Press 'Esc' key to exit
        break

cap.release()
cv2.destroyAllWindows()
