In [3]:
import torch
import cv2 as cv2
from ultralytics import YOLO
import numpy as np
from PIL import Image
from supervision.detection.core import Detections

In [4]:
model = YOLO('yolov8n.pt')

In [5]:
def count_objects(predictions, threshold=0.5):
    count = 0
    filtered_boxes = []
    
    for pred in predictions:
        for bbox in pred.boxes:
            if bbox.conf > threshold:
                count += 1
                filtered_boxes.append(bbox.xyxy.cpu().numpy()[0])
                
    return count, filtered_boxes

In [6]:
def draw_boxes(frame, boxes):
    for box in boxes:
        x1, y1, x2, y2 = map(int, box)
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
        

In [7]:
def detect_an_image(image_path):
    image = Image.open(image_path)
    
    results = model(image)
    
    num_objects, filtered_boxes = count_objects(results, threshold=0.5)
    
    image_with_boxes = np.array(image)
    
    draw_boxes(image_with_boxes, filtered_boxes)
    
    cv2.imshow("Object Detection on Image", cv2.cvtColor(image_with_boxes, cv2.COLOR_RGB2BGR))
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    
    print(f"Total objects detected in image: {num_objects}")

In [8]:
def detect_on_videos(video_path=0):
    cap = cv2.VideoCapture(video_path)
    
    total_objects_detected = 0
    
    while cap.isOpened():
        ret, frame = cap.read()
        
        if not ret:
            break
        
        results = model(frame)
        
        num_objects, filtered_boxes = count_objects(results, threshold=0.5)

        # Update the total object count
        total_objects_detected += num_objects

        # Draw bounding boxes on the frame
        draw_boxes(frame, filtered_boxes)

        # Display the object count and the frame
        cv2.putText(frame, f"Objects in frame: {num_objects}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        cv2.imshow("Object Detection on Video", frame)

        # Press 'q' to quit the video window
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # When the video ends, print the final total object count
    print(f"Total objects detected in the video: {total_objects_detected}")
    
    cap.release()
    cv2.destroyAllWindows()

In [9]:


def convert_yolov8_to_supervision(results):
    bboxes = []
    confidences = []
    class_ids = []
    
    # Loop through YOLO results and extract bounding box info
    for result in results[0].boxes:
        # Check if the detection is valid (confidence score)
        if result.conf.item() > 0:  # Use a small threshold if needed
            bboxes.append(result.xyxy[0].cpu().numpy())  # [x1, y1, x2, y2]
            confidences.append(result.conf.cpu().numpy())  # Confidence score
            class_ids.append(int(result.cls.cpu().numpy()))  # Class ID
    
    # Convert lists to numpy arrays
    bboxes = np.array(bboxes)
    confidences = np.array(confidences).ravel()  # Ensure it's a 1D array
    class_ids = np.array(class_ids)
    
    # Check if any bounding boxes were detected
    if bboxes.shape[0] == 0:
        # Return an empty Detections object if no boxes were found
        return Detections(xyxy=np.empty((0, 4)), confidence=np.empty((0,)), class_id=np.empty((0,)))

    # Create supervision.Detections object
    detections = Detections(
        xyxy=bboxes,
        confidence=confidences,
        class_id=class_ids
    )
    
    return detections


In [10]:
def draw_boxes(frame, detections):
    for bbox, class_id in zip(detections.xyxy, detections.class_id):
        x1, y1, x2, y2 = map(int, bbox)
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)  # Green box
        label = model.names[class_id]
        cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)

In [11]:
def detect_objects_in_frame(frame):
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = model(rgb_frame)
    
    # Convert YOLOv8 results to supervision.Detections format
    detections = convert_yolov8_to_supervision(results)
    
    # Draw bounding boxes on the frame
    draw_boxes(frame, detections)
    
    return frame

In [12]:
cap = cv2.VideoCapture(0)  # 0 is the default camera index

if not cap.isOpened():
    print("Error: Unable to access the webcam.")
    exit()

# Main loop to capture and process webcam frames
while cap.isOpened():
    success, frame = cap.read()
    
    if not success:
        print("Failed to capture image")
        break

    # Detect objects and draw bounding boxes on the frame
    frame_with_boxes = detect_objects_in_frame(frame)

    # Display the frame with bounding boxes
    cv2.imshow('Webcam Object Detection', frame_with_boxes)

    # Exit on pressing 'q'
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the capture and close OpenCV windows
cap.release()
cv2.destroyAllWindows()


0: 384x640 1 person, 1 kite, 72.0ms
Speed: 5.3ms preprocess, 72.0ms inference, 7.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 kite, 61.0ms
Speed: 1.2ms preprocess, 61.0ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 kite, 58.1ms
Speed: 1.1ms preprocess, 58.1ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 kite, 54.7ms
Speed: 1.2ms preprocess, 54.7ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 kite, 47.8ms
Speed: 1.4ms preprocess, 47.8ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 kite, 42.2ms
Speed: 1.2ms preprocess, 42.2ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 kite, 38.3ms
Speed: 1.2ms preprocess, 38.3ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)



2024-10-09 11:53:54.802 python[6091:115248] +[IMKClient subclass]: chose IMKClient_Legacy
2024-10-09 11:53:54.802 python[6091:115248] +[IMKInputSession subclass]: chose IMKInputSession_Legacy


0: 384x640 1 person, 1 kite, 39.7ms
Speed: 1.2ms preprocess, 39.7ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 kite, 45.2ms
Speed: 1.4ms preprocess, 45.2ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 kite, 42.3ms
Speed: 1.2ms preprocess, 42.3ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 kite, 41.9ms
Speed: 1.1ms preprocess, 41.9ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 kite, 46.8ms
Speed: 1.1ms preprocess, 46.8ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 kite, 44.0ms
Speed: 1.1ms preprocess, 44.0ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 kite, 35.7ms
Speed: 1.2ms preprocess, 35.7ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 kite, 35.9ms
Speed: 1.4ms preprocess, 3

In [13]:
# def count_live():
#     cap = cv2.VideoCapture(0)
    
#     while True:
#         ret, frame = cap.read()
#         result = model(frame)[0]
#         detections = sv.Detections.from_yolov5(result)
        
        
#         cv2.imshow('Object-Counting', frame)

#         if(cv2.waitKey(30) == 27):
#             break

In [14]:
def main():
    # video_path = input("Enter video file path or press Enter for webcam: ")
    # if video_path == "":
    #     video_path = 0  # Default to webcam
    # detect_an_image(video_path)
    count_live()

In [15]:
if __name__ == "__main__":
    main()

NameError: name 'count_live' is not defined

In [None]:
def convert_yolov8_to_supervision(results):
    bboxes = []
    confidences = []
    class_ids = []

    for result in results[0].boxes:
        if result.conf.item() > 0:  # Only consider detections with confidence > 0
            bboxes.append(result.xyxy[0].cpu().numpy())  # [x1, y1, x2, y2]
            confidences.append(result.conf.cpu().numpy())  # Confidence score
            class_ids.append(int(result.cls.cpu().numpy()))  # Class ID

    bboxes = np.array(bboxes)
    confidences = np.array(confidences).ravel()  # Ensure it's a 1D array
    class_ids = np.array(class_ids)

    if bboxes.shape[0] == 0:
        return Detections(xyxy=np.empty((0, 4)), confidence=np.empty((0,)), class_id=np.empty((0,)))

    detections = Detections(xyxy=bboxes, confidence=confidences, class_id=class_ids)
    return detections

In [None]:
def draw_boxes(frame, detections):
    for bbox, class_id in zip(detections.xyxy, detections.class_id):
        x1, y1, x2, y2 = map(int, bbox)
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)  # Green box
        label = model.names[class_id]
        cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)

In [None]:
def detect_objects_in_frame(frame):
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = model(rgb_frame)  # Detect objects

    detections = convert_yolov8_to_supervision(results)  # Convert to supervision format
    draw_boxes(frame, detections)  # Draw bounding boxes
    
    # Debugging: Check the type of detections
    print(f"Detections Type: {type(detections)}")  # Should be a Detections object
    
    return detections  