In [None]:
import cv2
import numpy as np

# Load the pre-trained model and class labels
net = cv2.dnn.readNetFromCaffe('deploy.prototxt', 'mobilenet_iter_73000.caffemodel')
classes = ['background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat',
           'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant',
           'sheep', 'sofa', 'train', 'tvmonitor']

# Initialize the video capture object
cap = cv2.VideoCapture(0)  # 0 for webcam, or provide a video file path

# Initialize the background subtractor
fgbg = cv2.createBackgroundSubtractorMOG2()

# Counter for detected objects
object_count = 0
max_objects = 10  # Number of objects to detect before stopping

while True:
    ret, frame = cap.read()

    if not ret:
        break

    # Apply the background subtractor to get the foreground mask
    fgmask = fgbg.apply(frame)

    # Find contours in the mask
    contours, _ = cv2.findContours(fgmask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

    for contour in contours:
        if cv2.contourArea(contour) < 500:
            continue

        x, y, w, h = cv2.boundingRect(contour)

        # Extract the region of interest (ROI)
        roi = frame[y:y+h, x:x+w]

        # Prepare the ROI for classification
        blob = cv2.dnn.blobFromImage(roi, 0.007843, (300, 300), 127.5)
        net.setInput(blob)
        detections = net.forward()

        # Loop through the detected objects in the ROI
        for i in range(detections.shape[2]):
            confidence = detections[0, 0, i, 2]
            if confidence > 0.5:  # Confidence threshold
                idx = int(detections[0, 0, i, 1])
                label = classes[idx]
                label_text = f"{label}: {confidence:.2f}"

                # Draw bounding box and label on the original frame
                cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
                cv2.putText(frame, label_text, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

                # Print the detected object
                print(f"Detected object: {label} with confidence {confidence:.2f}")

                # Increment the object counter
                object_count += 1

                # Check if the object count has reached the maximum
                if object_count >= max_objects:
                    print("Detected 10 objects, stopping execution...")
                    break  # Break the inner loop

    # Check if we should break the outer loop as well
    if object_count >= max_objects:
        break

    # Display the resulting frame
    cv2.imshow('Frame', frame)
    cv2.imshow('Foreground Mask', fgmask)

    # Exit on pressing 'q'
    if cv2.waitKey(30) & 0xFF == ord('q'):
        break

# Properly release the video capture object and close all windows
cap.release()
cv2.destroyAllWindows()

print("Program execution stopped.")


Detected object: person with confidence 0.98
Detected object: cat with confidence 0.52
Detected object: bottle with confidence 0.75
Detected object: person with confidence 0.85
Detected object: sofa with confidence 0.66
Detected object: cat with confidence 0.74
Detected object: cat with confidence 0.59
Detected object: car with confidence 0.52
Detected object: cat with confidence 0.54
Detected object: bottle with confidence 0.51
Detected 10 objects, stopping execution...
Detected object: tvmonitor with confidence 0.87
Detected 10 objects, stopping execution...
Detected object: bottle with confidence 0.58
Detected 10 objects, stopping execution...
Program execution stopped.
