In [2]:
#!/usr/bin/env python3

import sys
import rclpy
from rclpy.node import Node

from sensor_msgs.msg import Image
from cv_bridge import CvBridge
import cv2

import numpy as np
import time
from pathlib import Path
import torch
import torch.backends.cudnn as cudnn

# FILE = Path(__file__).absolute()
# sys.path.append(FILE.parents[0].as_posix())

from yolobot_recognition.models.common import DetectMultiBackend
from yolobot_recognition.utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadScreenshots, LoadStreams
from yolobot_recognition.utils.general import (LOGGER, Profile, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2,
                           increment_path, non_max_suppression, print_args, scale_boxes, strip_optimizer, xyxy2xywh)
from yolobot_recognition.utils.plots import Annotator, colors, save_one_box
from yolobot_recognition.utils.torch_utils import select_device, smart_inference_mode

In [3]:
# Parameters
weights = 'yolov5s.pt'  # model.pt path(s)
imgsz = (640, 480)  # inference size (pixels)
conf_thres = 0.25  # confidence threshold
iou_thres = 0.45  # NMS IOU threshold
max_det = 1000  # maximum detections per image
device_num = ''  # cuda device, i.e. 0 or 0,1,2,3 or cpu
dnn = False
data = 'data/coco128.yaml'  # dataset.yaml path
line_thickness = 3  # bounding box thickness (pixels)
hide_labels = False  # hide labels
hide_conf = False  # hide confidences
image_path = 'images/test_humans_up.png'  # path to your image

# Initialize
device = select_device(device_num)

# Load model
model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=False)
stride, names, pt = model.stride, model.names, model.pt
imgsz = check_img_size(imgsz, s=stride)  # check image size

# Load image
img0 = cv2.imread(image_path)  # BGR
assert img0 is not None, 'Image not found ' + image_path

# Padded resize
img = cv2.resize(img0, (imgsz[0], imgsz[1]))

# Convert BGR to RGB
img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
img = np.ascontiguousarray(img)

# Run inference
model.warmup(imgsz=(1, 3, *imgsz))  # warmup
img = torch.from_numpy(img).to(device)
img = img.float()  # uint8 to fp16/32
img /= 255.0  # 0 - 255 to 0.0 - 1.0
if img.ndimension() == 3:
    img = img.unsqueeze(0)

# Inference
pred = model(img, augment=False, visualize=False)

# Apply NMS
pred = non_max_suppression(pred, conf_thres, iou_thres, None, False, max_det=max_det)

# Process detections
for i, det in enumerate(pred):  # detections per image
    s = f'{i}: '
    s += '%gx%g ' % img.shape[2:]  # print string

    annotator = Annotator(img0, line_width=line_thickness, example=str(names))
    if len(det):
        det[:, :4] = scale_boxes(img.shape[2:], det[:, :4], img0.shape).round()

        # Print results
        for c in det[:, -1].unique():
            n = (det[:, -1] == c).sum()  # detections per class
            s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string
        
        for *xyxy, conf, cls in reversed(det):
            c = int(cls)  # integer class
            label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}')
            annotator.box_label(xyxy, label, color=colors(c, True))

# Display the image
cv2.imshow("IMAGE", img0)
cv2.waitKey(0)  # Press any key to close the window
cv2.destroyAllWindows()


YOLOv5 🚀 2024-6-10 Python-3.10.12 torch-2.3.1+cu121 CUDA:0 (NVIDIA GeForce RTX 2060, 5919MiB)

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs


In [5]:
# Sample set with new values
new_values = set()

new_values.add(2)
new_values.add(2)
new_values.add(3)