In [10]:
%pip install ultralytics


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [11]:
%pip install ultralytics opencv-python

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [12]:
%pip install ultralytics opencv-python pillow


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


YOLOV8

In [23]:
from ultralytics import YOLO
import cv2

# ---------- 1. Load pretrained YOLOv8 COCO model ----------

# If you previously had a corrupted yolov8n.pt, delete it once manually.
# Then this will download a fresh one.
model = YOLO("yolov8n.pt")   # you can change to yolov8s.pt / m / l / x


# ---------- 2. Define COCO classes treated as "Animal" ----------

ANIMAL_CLASSES = {
    "bird", "cat", "dog", "horse", "sheep", "cow", "elephant",
    "bear", "zebra", "giraffe"
}

def map_to_three_classes(class_name: str) -> str:
    """Convert COCO class name -> Human / Animal / Object."""
    if class_name == "person":
        return "Human"
    elif class_name in ANIMAL_CLASSES:
        return "Animal"
    else:
        return "Object"


# ---------- 3. Open webcam ----------

CAMERA_INDEX = 0  # change to 1 or 2 if you have multiple cameras
cap = cv2.VideoCapture(CAMERA_INDEX)

if not cap.isOpened():
    print(f"Error: Could not open webcam (index {CAMERA_INDEX}).")
    raise SystemExit

print("Webcam opened. Press 'q' to quit.")

CONF_THRESH = 0.3  # confidence threshold

while True:
    ret, frame = cap.read()
    if not ret:
        print("Error: Failed to read frame from webcam.")
        break

    # Run inference on the current frame
    results = model(frame)[0]

    # Draw only Human / Animal / Object
    for box in results.boxes:
        conf = float(box.conf[0])
        if conf < CONF_THRESH:
            continue

        x1, y1, x2, y2 = map(int, box.xyxy[0])
        cls_id = int(box.cls[0])

        original_label = model.names[cls_id]      # e.g., 'person'
        three_label = map_to_three_classes(original_label)  # Human/Animal/Object

        label = three_label  # ONLY 3 classes

        # Draw rectangle
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)

        # Label background
        (tw, th), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
        cv2.rectangle(frame, (x1, y1 - th - 6), (x1 + tw, y1), (0, 255, 0), -1)

        # Label text
        cv2.putText(
            frame,
            label,
            (x1, y1 - 4),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.6,
            (0, 0, 0),
            2,
            cv2.LINE_AA,
        )

    # Show the frame
    cv2.imshow("YOLOv8 - Human / Animal / Object (Webcam)", frame)

    # Press 'q' to exit
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

cap.release()
cv2.destroyAllWindows()


Webcam opened. Press 'q' to quit.

0: 480x640 1 person, 1 toothbrush, 61.5ms
Speed: 1.8ms preprocess, 61.5ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 60.0ms
Speed: 2.1ms preprocess, 60.0ms inference, 1.7ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 50.0ms
Speed: 1.3ms preprocess, 50.0ms inference, 1.4ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 58.3ms
Speed: 1.3ms preprocess, 58.3ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 52.9ms
Speed: 1.1ms preprocess, 52.9ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 56.1ms
Speed: 1.2ms preprocess, 56.1ms inference, 0.7ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 60.7ms
Speed: 1.6ms preprocess, 60.7ms inference, 0.7ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 1 cat, 1 dog, 51.8ms
Speed: 1.3ms preprocess, 51