In [1]:
pip install ultralytics

Collecting ultralyticsNote: you may need to restart the kernel to use updated packages.

  Downloading ultralytics-8.2.92-py3-none-any.whl.metadata (41 kB)
Collecting py-cpuinfo (from ultralytics)
  Downloading py_cpuinfo-9.0.0-py3-none-any.whl.metadata (794 bytes)
Collecting seaborn>=0.11.0 (from ultralytics)
  Downloading seaborn-0.13.2-py3-none-any.whl.metadata (5.4 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.6-py3-none-any.whl.metadata (9.1 kB)
Collecting torch>=1.8.0 (from ultralytics)
  Downloading torch-2.4.1-cp39-cp39-win_amd64.whl.metadata (27 kB)
INFO: pip is looking at multiple versions of torchvision to determine which version is compatible with other requirements. This could take a while.
Collecting torchvision>=0.9.0 (from ultralytics)
  Downloading torchvision-0.19.1-cp39-cp39-win_amd64.whl.metadata (6.1 kB)
Downloading ultralytics-8.2.92-py3-none-any.whl (871 kB)
   ---------------------------------------- 0.0/871.9 kB ? 

In [2]:
import cv2
from ultralytics import YOLO

In [3]:
# Load the YOLOv8 model (YOLOv8 Nano - fastest and lightest version)
model = YOLO('yolov8n.pt')  # 'n' stands for nano, optimized for speed and efficiency

# Function to start the webcam and perform people counting
def start_people_counter():
    # Open the webcam (0 is the default index for the primary camera)
    cap = cv2.VideoCapture(0)

    # Check if the webcam is opened correctly
    if not cap.isOpened():
        print("Error: Could not open the webcam.")
        return

    while True:
        # Capture a frame from the webcam
        ret, frame = cap.read()
        if not ret:
            print("Error: Failed to capture image.")
            break

        # Perform object detection on the frame
        results = model(frame)

        # Extract detections
        detections = results[0].boxes.data.numpy()  # Get bounding boxes as numpy array
        person_count = 0

        # Iterate through detections
        for det in detections:
            class_id = int(det[5])  # Class ID
            confidence = det[4]     # Confidence score

            # Check if the detected object is a person (YOLO class ID for person is 0)
            if class_id == 0 and confidence > 0.5:
                person_count += 1
                # Extract coordinates of the bounding box
                x1, y1, x2, y2 = int(det[0]), int(det[1]), int(det[2]), int(det[3])
                # Draw the bounding box
                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                cv2.putText(frame, f'Person', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

        # Display the count on the frame
        cv2.putText(frame, f'Count: {person_count}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

        # Show the frame with the detections
        cv2.imshow('YOLOv8 People Counter', frame)

        # Press 'q' to exit the loop
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # Release the webcam and close all OpenCV windows
    cap.release()
    cv2.destroyAllWindows()


In [None]:
# Run the people counter
start_people_counter()



0: 480x640 1 person, 351.6ms
Speed: 23.0ms preprocess, 351.6ms inference, 29.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 176.1ms
Speed: 3.0ms preprocess, 176.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 141.5ms
Speed: 2.0ms preprocess, 141.5ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 140.1ms
Speed: 3.0ms preprocess, 140.1ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 133.8ms
Speed: 3.0ms preprocess, 133.8ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 121.1ms
Speed: 4.0ms preprocess, 121.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 123.5ms
Speed: 3.0ms preprocess, 123.5ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 126.1ms
Speed: 3.0ms preprocess, 126.1ms inference, 2.0ms postprocess per image 