In [None]:
import cv2
import torch
import numpy as np

# Check if CUDA is available and load model accordingly
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load YOLOv5 model with half precision
model = torch.hub.load('ultralytics/yolov5', 'yolov5n', pretrained=True).to(device).half()

def detect_objects(image):
    # Resize image for faster processing (e.g., 640x480)
    resized_image = cv2.resize(image, (640, 480))

    # Convert image to FP16 and to tensor
    img_tensor = torch.from_numpy(resized_image).permute(2, 0, 1).unsqueeze(0).to(device).half()

    # Perform object detection
    with torch.no_grad():
        results = model(img_tensor)

    # Extract bounding boxes, confidences, and class IDs from the results tensor
    detections = results[0].cpu().numpy()  # Convert to numpy array
    boxes = detections[:, :4]
    confidences = detections[:, 4]
    class_ids = detections[:, 5].astype(int)

    # Apply Non-Maximum Suppression (NMS)
    indices = cv2.dnn.NMSBoxes(boxes.tolist(), confidences.tolist(), score_threshold=0.5, nms_threshold=0.4)
    indices = np.array(indices).flatten()  # Flatten the list of indices

    return boxes, confidences, class_ids, indices


cap = cv2.VideoCapture(1)  # Use 0 for the default webcam

# Check if the webcam is opened successfully
if not cap.isOpened():
    print("Error: Could not open webcam")
else:
    while True:
        ret, frame = cap.read()
        if not ret:
            print("Error: Failed to capture frame")
            break

        boxes, confidences, class_ids, indices = detect_objects(frame)
        font = cv2.FONT_HERSHEY_PLAIN
        for i in indices:
            x1, y1, x2, y2 = boxes[i]
            if not np.isnan(x1) and not np.isnan(y1) and not np.isnan(x2) and not np.isnan(y2):
                x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)  # Convert to integers
                label = model.names[class_ids[i]]  # Get class name
                color = (0, 255, 0)  # Green color for bounding box
                cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
                cv2.putText(frame, label, (x1, y1 - 10), font, 1, color, 2)

        cv2.imshow("Webcam", frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()


Using cache found in C:\Users\Hare Krishna/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2024-5-28 Python-3.11.4 torch-2.3.0+cpu CPU

Fusing layers... 
YOLOv5n summary: 213 layers, 1867405 parameters, 0 gradients, 4.5 GFLOPs
Adding AutoShape... 
