In [1]:
!pip install ultralytics



In [2]:
from ultralytics import YOLO

In [3]:
model = YOLO('yolov10m.pt')  # load a pretrained model

In [4]:
import cv2
from ultralytics import YOLO

# Load the YOLO model
model = YOLO('yolov10m.pt')  # Using YOLOv8x as YOLOv10 is not officially released

# Open the webcam
cap = cv2.VideoCapture(0)  # 0 is usually the default webcam

# Define the objects we want to alert for
alert_objects = ['suitcase', 'handbag', 'backpack']  # YOLO uses 'backpack' instead of 'bag'

# Define the scale factor for resizing the result frame
scale_factor = 1.5  # Increase this value to make the frame larger

while True:
    # Read a frame from the webcam
    ret, frame = cap.read()
    if not ret:
        break

    # Run YOLO inference on the frame
    results = model(frame)

    # Initialize counters
    person_count = 0
    alert_items = []

    # Process the results
    for r in results:
        boxes = r.boxes
        for box in boxes:
            c = box.cls
            class_name = model.names[int(c)]
            
            if class_name == 'person':
                person_count += 1
            elif class_name in alert_objects:
                alert_items.append(class_name)

    # Visualize the results on the frame
    annotated_frame = results[0].plot()

    # Add person count to the frame
    cv2.putText(annotated_frame, f'Total Persons: {person_count}', (10, 30), 
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    # Add alerts to the frame
    if alert_items:
        alert_text = 'ALERT: ' + ', '.join(set(alert_items))
        cv2.putText(annotated_frame, alert_text, (10, 70), 
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)

    # Resize the frame
    height, width = annotated_frame.shape[:2]
    new_height, new_width = int(height * scale_factor), int(width * scale_factor)
    resized_frame = cv2.resize(annotated_frame, (new_width, new_height))

    # Display the resized annotated frame
    cv2.imshow("RESULT", resized_frame)

    # Break the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

# Release the webcam and close the window
cap.release()
cv2.destroyAllWindows()


0: 480x640 1 person, 1217.4ms
Speed: 4.1ms preprocess, 1217.4ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 1265.7ms
Speed: 3.1ms preprocess, 1265.7ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 1103.5ms
Speed: 3.1ms preprocess, 1103.5ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 1061.6ms
Speed: 3.6ms preprocess, 1061.6ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 1153.6ms
Speed: 4.1ms preprocess, 1153.6ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 1093.0ms
Speed: 3.1ms preprocess, 1093.0ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 1055.4ms
Speed: 2.0ms preprocess, 1055.4ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 1076.9ms
Speed: 3.3ms preprocess, 1076.9ms inference, 0.0ms postproc