In [1]:
pip install ultralytics opencv-python numpy

Collecting ultralytics
  Downloading ultralytics-8.3.92-py3-none-any.whl.metadata (35 kB)
Collecting opencv-python
  Downloading opencv_python-4.11.0.86-cp37-abi3-win_amd64.whl.metadata (20 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Downloading ultralytics-8.3.92-py3-none-any.whl (949 kB)
   ---------------------------------------- 0.0/949.3 kB ? eta -:--:--
    --------------------------------------- 20.5/949.3 kB ? eta -:--:--
    --------------------------------------- 20.5/949.3 kB ? eta -:--:--
   - ------------------------------------- 30.7/949.3 kB 217.9 kB/s eta 0:00:05
   -- ------------------------------------ 71.7/949.3 kB 435.7 kB/s eta 0:00:03
   --- ----------------------------------- 81.9/949.3 kB 353.1 kB/s eta 0:00:03
   --- ----------------------------------- 81.9/949.3 kB 353.1 kB/s eta 0:00:03
   ---- --------------------------------- 102.4/949.3 kB 328.0 kB/s eta 0:00:03
   ----

In [1]:
from ultralytics import YOLO
import cv2
import numpy as np
from IPython.display import display , clear_output
from PIL import Image


model = YOLO("yolov8n.pt")


"""
The "yolov8n.pt" model file contains:
✅ A Convolutional Neural Network (CNN) architecture optimized for real-time object detection.
✅ Pretrained weights trained on COCO dataset (or custom datasets if fine-tuned).
✅ The full pipeline for bounding box regression + class predictions.
"""

cap = cv2.VideoCapture(0) #Here 0 is for default webcame , we can use 0 or 1 or 2 if we have multiple webcams 

while True:
    ret , frame = cap.read()

    if not ret:
        break
    results = model(frame)

    """
    results is the output of the YOLOv8 model when it processes the frame.
    Data Type: It is a list containing ultralytics.engine.results.Results objects.
    Since the model may detect multiple objects, each frame will have one Results object in the list.
    """
    
    """
    Since results is a list (with 1 item), r refers to the only Results object in the list.
    Data Type: r is an instance of ultralytics.engine.results.Results.
    """
    
    """
    results is a list of Results objectsobjects (even (even though it usually contains just one object per frame).
    Each Results object corresponds to one frame and holds all detection details for that frame.
    r refers to a single Results object, meaning it contains the detection data for one specific frame.
    """     
    for r in results:
        for box in r.boxes:
            x1,y1,x2,y2 = map(int , box.xyxy[0])
            conf = box.conf[0].item()
            class_id = int(box.cls[0].item())
            class_name = r.names[class_id]
            cv2.rectangle(frame , (x1 , y1) , (x2,y2) , (0,255 , 0) , 2)
            label_text = f"{class_name}: {conf:.2f}"
            cv2.putText(frame, label_text , (x1, y1 - 10),cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
    cv2.imshow("Hand Detection", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
cap.release()
cv2.destroyAllWindows()


0: 480x640 1 person, 302.4ms
Speed: 12.5ms preprocess, 302.4ms inference, 9.4ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 259.2ms
Speed: 2.7ms preprocess, 259.2ms inference, 2.8ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 254.4ms
Speed: 5.6ms preprocess, 254.4ms inference, 2.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 192.5ms
Speed: 2.4ms preprocess, 192.5ms inference, 1.2ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 1 umbrella, 150.0ms
Speed: 2.6ms preprocess, 150.0ms inference, 1.1ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 1 umbrella, 90.6ms
Speed: 1.9ms preprocess, 90.6ms inference, 1.1ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 1 umbrella, 113.4ms
Speed: 3.4ms preprocess, 113.4ms inference, 1.1ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 1 umbrella, 124.1ms
Speed: 1.9ms preprocess, 12