In [41]:
import torch
import numpy as np
from ultralytics.yolo.engine.results import Results
from ultralytics.yolo.utils import ops, DEFAULT_CFG
from ultralytics.yolo.cfg import get_cfg
from ultralytics import YOLO
import cv2
import time
import torchvision.transforms as T
from torch import nn
from utils.meter import AverageMeter, Timer


In [42]:
device = "cuda"

In [43]:
img = cv2.imread('data/test.jpg')
# img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# print(img.shape)
#
# img = cv2.resize(img, (224,224), interpolation=cv2.INTER_LINEAR)
# # cv2.imshow('Image', img)
# # cv2.waitKey(0)
# # cv2.destroyAllWindows()
#
# img_tensor = T.ToTensor()(img).unsqueeze(0)
# img_tensor.shape

In [44]:
img.shape

(640, 960, 3)

In [54]:
class ObjectDetector(nn.Module):
    def __init__(self, model_name="weights/yolov8n.pt", cfg=DEFAULT_CFG, overrides=None):
        super().__init__()
        self.reshape_shape = (224,224)
        self.model = YOLO(model_name).model

        self.args = get_cfg(cfg, overrides)
        self.scale = None

        if self.args.device == 'cuda' and self.args.half == True:
            self.model = self.model.to(self.args.device).half()
        elif self.args.device == 'cuda':
            self.model = self.model.to(self.args.device)


    def forward(self, x):
        x, org_shape = self.image_preprocess(x)
        x = self.model(x)

        x = ops.non_max_suppression(x,
                                    self.args.conf,
                                    self.args.iou,
                                    agnostic=self.args.agnostic_nms,
                                    max_det=self.args.max_det,
                                    classes=self.args.classes)[0]
        x = Results(boxes=x, orig_shape=org_shape)
        return x, self.scale


    def image_preprocess(self, img):
        # numpy image
        org_shape = img.shape[:2]
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.resize(img, self.reshape_shape, interpolation=cv2.INTER_LINEAR)
        img_tensor = T.ToTensor()(img).unsqueeze(0)

        self.scale = (torch.tensor([[org_shape[1], org_shape[0], org_shape[1], org_shape[0]]]) / torch.tensor([[self.reshape_shape[1],self.reshape_shape[0],self.reshape_shape[1],self.reshape_shape[0]]]))

        if self.args.device == 'cuda' and self.args.half == True:
            img_tensor = img_tensor.to(self.args.device).half()
        elif self.args.device == 'cuda':
            img_tensor = img_tensor.to(self.args.device)

        return img_tensor, org_shape



In [55]:
od = ObjectDetector(model_name="weights/yolov8n.pt", overrides={'device':'cpu', 'half':True, 'conf' : 0.25})

In [56]:
fps = AverageMeter()
timer = Timer()

In [57]:
timer.start()
o, scale = od(img)
timer.stop()
fps.update(timer.get_duration())
print(f"FPS :: {1/fps.avg:.2f}")

FPS :: 12.80


In [58]:
o.boxes.xyxy * scale

tensor([[644.01422, 351.05991, 797.71857, 539.85022],
        [293.81586,  85.65953, 442.31845, 521.66730],
        [438.73297, 116.15919, 603.28241, 524.45398]])

In [31]:
od



ObjectDetector(
  (model): DetectionModel(
    (model): Sequential(
      (0): Conv(
        (conv): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(16, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
      (1): Conv(
        (conv): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
      (2): C2f(
        (cv1): Conv(
          (conv): Conv2d(32, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
          (act): SiLU(inplace=True)
        )
        (cv2): Conv(
          (conv): Conv2d(48, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track