In [1]:
!pip install opencv-python tensorflow

Collecting numpy<2.3.0,>=2 (from opencv-python)
  Downloading numpy-2.1.3-cp311-cp311-win_amd64.whl.metadata (60 kB)
Downloading numpy-2.1.3-cp311-cp311-win_amd64.whl (12.9 MB)
   ---------------------------------------- 0.0/12.9 MB ? eta -:--:--
   ---------------------------------------- 0.0/12.9 MB ? eta -:--:--
    --------------------------------------- 0.3/12.9 MB ? eta -:--:--
   - -------------------------------------- 0.5/12.9 MB 4.2 MB/s eta 0:00:03
   -- ------------------------------------- 0.8/12.9 MB 1.2 MB/s eta 0:00:11
   ---- ----------------------------------- 1.3/12.9 MB 1.8 MB/s eta 0:00:07
   ---- ----------------------------------- 1.6/12.9 MB 1.6 MB/s eta 0:00:08
   ----- ---------------------------------- 1.8/12.9 MB 1.5 MB/s eta 0:00:08
   ----- ---------------------------------- 1.8/12.9 MB 1.5 MB/s eta 0:00:08
   ----- ---------------------------------- 1.8/12.9 MB 1.5 MB/s eta 0:00:08
   ------ --------------------------------- 2.1/12.9 MB 1.1 MB/s eta 0:00:

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow-intel 2.16.1 requires ml-dtypes~=0.3.1, but you have ml-dtypes 0.5.1 which is incompatible.
tensorflow-intel 2.16.1 requires numpy<2.0.0,>=1.23.5; python_version <= "3.11", but you have numpy 2.1.3 which is incompatible.
tensorflow-intel 2.16.1 requires tensorboard<2.17,>=2.16, but you have tensorboard 2.19.0 which is incompatible.


In [3]:
import cv2
from ultralytics import YOLO
import datetime
import os

# Create folder for saved detections
save_dir = "detections"
os.makedirs(save_dir, exist_ok=True)

# Load YOLOv8 model
model = YOLO("yolov8s.pt")  # Change to yolov8n.pt, yolov8m.pt, etc.

# Open webcam
cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("Error: Could not open webcam.")
    exit()

frame_count = 0

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # YOLO object detection
    results = model.track(frame, stream=True)
    counts = {}

    for result in results:
        class_names = result.names

        for box in result.boxes:
            if box.conf[0] > 0.4:  # Confidence threshold
                x1, y1, x2, y2 = map(int, box.xyxy[0])
                cls = int(box.cls[0])
                class_name = class_names[cls]

                counts[class_name] = counts.get(class_name, 0) + 1

                # Draw bounding box and label
                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                cv2.putText(frame, f"{class_name} {box.conf[0]:.2f}",
                            (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX,
                            0.9, (0, 255, 0), 2)

    # Show object counts in frame
    y_offset = 30
    for cls_name, count in counts.items():
        cv2.putText(frame, f"{cls_name}: {count}", (10, y_offset),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 255), 2)
        y_offset += 30

    # Save detected frame every 5 detections
    if counts:
        frame_count += 1
        if frame_count % 5 == 0:
            filename = os.path.join(save_dir, f"detection_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.jpg")
            cv2.imwrite(filename, frame)
            print(f"Saved: {filename}")

    # Show the live video feed
    cv2.imshow("YOLOv8 Object Detection", frame)

    # Exit when 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release webcam & close window
cap.release()
cv2.destroyAllWindows()


0: 480x640 6 persons, 5 chairs, 230.7ms
Speed: 2.8ms preprocess, 230.7ms inference, 1.1ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 8 persons, 5 chairs, 185.9ms
Speed: 2.1ms preprocess, 185.9ms inference, 1.3ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 9 persons, 5 chairs, 199.5ms
Speed: 1.8ms preprocess, 199.5ms inference, 1.4ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 7 persons, 5 chairs, 1 laptop, 230.1ms
Speed: 1.9ms preprocess, 230.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 7 persons, 5 chairs, 1 laptop, 223.1ms
Speed: 2.0ms preprocess, 223.1ms inference, 1.1ms postprocess per image at shape (1, 3, 480, 640)
Saved: detections\detection_20250812_114257.jpg

0: 480x640 8 persons, 5 chairs, 1 tv, 1 laptop, 237.8ms
Speed: 3.9ms preprocess, 237.8ms inference, 1.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 8 persons, 5 chairs, 1 tv, 233.4ms
Speed: 2.7ms preprocess, 233.4ms in