In [None]:
#1. import dsml base module
from dsml_s8e.module import DSMLModule
import json

In [None]:
#2. specify parameters
# Parameters
run_parameters = {
    "env_name": "user",
    "product_name": "cv_example",
    "stand_name": "YOLOX_mmdet",
    "docker_image": "cv-no-gpu:latest",
    "conda_env": "gpu",
    "business_report_repo": "",
    "infra": {},
    "comment": {},
}

parameters = {}

In [None]:
import os
import os.path as osp

module = DSMLModule(parameters, run_parameters)

cache_urls = module.make_cache_urls(
    new_cache_entity_names=[],
    last_cache_entity_names=["cache_data"]
)

module.print_urls()

In [None]:
# Get parameters from config of model 
import json

config_fn = os.path.join(cache_urls.cache_data, 'config.json')

with open(config_fn) as f_id:
    CONFIG = json.load(f_id)

MODEL_NAME = CONFIG["train_config_parameters"]["MODEL_NAME"]
WORK_DIR = CONFIG['work_dir'] 
ONNX_DIR = CONFIG["onnx_dir"]
CONFIG_DEPLOY = CONFIG["config_deploy"]
DEVICE = CONFIG["device"]
CONFIG_MODEL = CONFIG["config_model"]
TORCH_MODEL = CONFIG["torch_model"]
ONNX_MODEL = CONFIG["onnx_model"]

In [None]:
# TEST IMAGE
test_image_path = osp.join(cache_urls.cache_data, "test.png")
assert osp.exists(test_image_path)

In [None]:
from mmdet.datasets import PIPELINES

@PIPELINES.register_module()
class DataAsList:
    def __call__(self, results):
        aug_data_dict = {key: [val] for key, val in results.items()}
        return aug_data_dict

## Конвертирование модели YOLOX через torch.export    
### препроцессинг и постпроцессинг отдельными модулями

In [None]:
from mmcv import Config, ConfigDict
from mmdet.apis import init_detector, inference_detector, show_result_pyplot
from mmdet.core.export import preprocess_example_input
import torch

Чтение конфигов модели и деплоя, через библиотеку mmcv

In [None]:
cfg_deploy = Config.fromfile(CONFIG_DEPLOY)
cfg_model = Config.fromfile(CONFIG_MODEL)

Инициализация torch модели через api библиотеки mmdetection

In [None]:
model = init_detector(CONFIG_MODEL, 
                      TORCH_MODEL, 
                      device=DEVICE)

Получение основных параметров для экспорта модели в формат onnx.    
Чтение тестового изображения и препроцессинг изображения для подготовкм инференса модели

In [None]:
input_names = cfg_deploy.onnx_config.input_names
opset_version = cfg_deploy.onnx_config.opset_version
dynamic_axes = cfg_deploy.onnx_config.dynamic_axes
# output_names = ['dets', 'labels']
output_file = os.path.join(ONNX_DIR, ONNX_MODEL)

from mmcv.parallel import collate
from mmdet.datasets.pipelines import Compose

test_pipeline = Compose(cfg_model.data.test.pipeline)
datas = []
data = dict(img_info=dict(filename=test_image_path), img_prefix=None)
data = test_pipeline(data)
datas.append(data)

data = collate(datas, samples_per_gpu=1)
data['img_metas'] = [img_metas.data[0] for img_metas in data['img_metas']]
data['imgs'] = data.pop('img')
data['imgs'] = [img.data[0].to(DEVICE) for img in data['imgs']]

Конвертирование torch модели в формат ONNX, через torch.onnx.export   
более подробно с параметрами экспорта в ONNX можно ознакомится в документации torch: https://pytorch.org/docs/1.13/onnx.html#contributing-developing

In [None]:
model.forward = model.forward_dummy

torch.onnx.export(
        model,
        data["imgs"][0],
        output_file,
        input_names=input_names,
        # output_names=output_names,
        export_params=True,
        keep_initializers_as_inputs=False,
        training= torch.onnx.TrainingMode.EVAL,
        do_constant_folding=True,
        verbose=False,
        opset_version=opset_version,
        dynamic_axes=dynamic_axes)

print(output_file)

Загрузка модели ONNX и запуск ее в сессии, для последующего инференса.    
Используется библиотека onnxruntime.
Загрузка модели ONNX на device (CPU или CUDA) происходит через явное указание параметра providers  (CUDAExecutionProvider или CPUExecutionProvider).

https://onnxruntime.ai/docs/api/python/api_summary.html

In [None]:
import onnxruntime

providers = ['CUDAExecutionProvider'] # or ['CPUExecutionProvider']
ort_session = onnxruntime.InferenceSession(output_file, providers=['CUDAExecutionProvider'])
input_names = [out.name for out in ort_session.get_inputs()]
output_names = [out.name for out in ort_session.get_outputs()]

print(f"input_names: {input_names}")
print(f"output_names: {output_names}")

Выполняем инференс torch модели на тестовом изображении. На выходе получаем обработанные bbox (после nms и др алгоритмов). Постпроцессинг скрыт по "капотом" библиотеки mmdetection

In [None]:
model.forward = model.forward_test

with torch.no_grad():
    # results = model(img = data['img'][0].to("cuda"))
    results = model.forward(rescale=True, **data)

print(f"results of image {test_image_path}")
for id_cls, results_cls in enumerate(results[0]):
    print(f"class_id = {id_cls}, objects: {results_cls.shape}")

In [None]:
show_result_pyplot(model, test_image_path, results[0], score_thr=0.05)

Выполняем инференс сконвертированной onnx модели на тетовом изображении

In [None]:
np_input = data['imgs'][0].cpu().detach().numpy()
# np_input = np.concatenate([np_input, np_input], axis=0)
out_onnx = ort_session.run(output_names, {input_names[0]: np_input})
[[output_names[out_id], out.shape] for out_id, out in enumerate(out_onnx)]

Как видно из данных полученных из инференса onnx модели - необходима постобработка этих данных.  

### View graph onnx model on netron.app
### https://netron.app/

Как можно увидеть результатом инференса модели в формате onnx, сконвертированной без mmdeploy (без включения постпроцессинга в сам граф модели) - через torch.onnx.export - будет выход состоящий из 9 массив данных.    
Необходимо создать постпроцссинга для преобразования этого выхода (9 массивов) в соответвии с инфересом моедли через фреймворк mmdetection   
   
```sh
with torch.no_grad():   
    results = model.forward(rescale=True, **data) 
```

```out: class_id = 0, finded objects: (37, 5) ```

### Создание функционала постобработки на основе библиотеки mmdetection и mmcv

##### Шаг 1.   
С помощью библиотеки inspect (входящая в окружение python по умолчанию) выведем код используемый при инференсе модели mmdetection

In [None]:
import inspect

def getsource(func):
    print(inspect.getsourcefile(func))
    print()
    print(inspect.getsource(func))

In [None]:
getsource(model.forward)

В данном коде функции model.forward() видно что вызывается также функции model.simple_test() и model.aug_test()   
при разных параметрах:   
model.simple_test() - вызов при одном изображении   
model.aug_test() - вызов при нескольких изображений  

![step_1.drawio.png](attachment:a05d507b-e5bc-4bfa-93ee-f6fa5a602842.png)

##### Шаг 2.   
Расмотрим код функций model.simple_test и model.aug_test

In [None]:
getsource(model.simple_test)
print()
getsource(model.aug_test)

В функции model.simple_test вызвываются следующие функции    
model.extract_feat(), model.bbox_head.simple_test(), bbox2result 

![step_2_1.drawio.png](attachment:3c3cde72-257d-43ff-acca-4bf8090a2fd4.png)

В функции model.aug_test вызвываются следующие функции    
model.extract_feats(), model.bbox_head.aug_test(), bbox2result 

![step_2_2.drawio.png](attachment:7edc7436-1221-4eca-9173-7113d66bf517.png)


Рассморим вывод функций model.extract_feat() и model.bbox_head.simple_test() на тестовом изображении.    

In [None]:
feat = model.extract_feat(data['imgs'][0])
print([x_feat.shape for x_feat in feat])

model.bbox_head.simple_test(feat, data['img_metas'][0], rescale=True)[0]

##### Шаг 3.   
Расмотрим код функций model.bbox_head.simple_test

In [None]:
getsource(model.bbox_head.simple_test)

In [None]:
getsource(model.bbox_head.simple_test_bboxes)

В функции model.bbox_head.simple_test вызывается функция model.bbox_head.simple_test_bboxes   
В ней уже вызвывается следующие функции model.bbox_head.forward и model.bbox_head.get_bboxes

Рассмотрим вывод функции model.bbox_head.forward. Можно заметить, что выход совпадает с выходом инференса onnx модели (разница в 3 знаке)   
Следовательно, пост процессинг опрелляется в функицонале model.bbox_head.get_bboxes

In [None]:
torch_outs = model.bbox_head.forward(feat)

tensor_id = 0
for out_id, out in enumerate(torch_outs):
    print("out_id: ", out_id)
    for out_tensor in out:
        print(f"tensor_id: {tensor_id}, shape: {out_tensor.shape}")
        print(f"diff torch_out and onnx_out : {np.median(out_tensor.detach().cpu().numpy() - out_onnx[tensor_id])}")
        tensor_id += 1

##### Шаг 4.   
Расмотрим код функций model.bbox_head.get_bboxes

In [None]:
getsource(model.bbox_head.get_bboxes)

Функции используемые в model.bbox_head.get_bboxes:    
model.bbox_head.prior_generator.grid_priors   
model.bbox_head._bbox_decode   
model.bbox_head._bboxes_nms   

Инспектируем эти функции и функционал используемый в них

In [None]:
# model.bbox_head.prior_generator.grid_priors
getsource(model.bbox_head.prior_generator.grid_priors)
getsource(model.bbox_head.prior_generator.single_level_grid_priors)
getsource(model.bbox_head.prior_generator._meshgrid)

In [None]:
# model.bbox_head._bbox_decode
getsource(model.bbox_head._bbox_decode)

In [None]:
# model.bbox_head._bboxes_nms
getsource(model.bbox_head._bboxes_nms)

##### Шаг 5.   
Расмотрим код функций batched_nms
Который следуя из названия должен вклбчать в себя алгоритм NMS (Non-maximum Suppression)
    
Данная функция не принадлжеит классу model. И используется из другой библиотеки

In [None]:
for member in inspect.getmembers(inspect.getmodule(model.bbox_head._bboxes_nms)):
    if "batched_nms" in member[0]:
        print(member)
        member_filepath = inspect.getabsfile(member[1])
        print(member_filepath)
        break

In [None]:
# !cat /opt/conda/lib/python3.9/site-packages/mmcv/ops/nms.py

In [None]:
from mmcv.ops.nms import *

In [None]:
getsource(batched_nms)

In [None]:
model.test_cfg 

In [None]:
getsource(nms)

In [None]:
getsource(NMSop)

# for member in inspect.getmembers(inspect.getmodule(NMSop)):
#     if "ext_module" in member[0]:
#         print(member)
#         member_filepath = inspect.getabsfile(member[1])
#         print(member_filepath)
#         break

In [None]:
print(ext_module)
print(ext_module.nms)

Из класса NMSop следует, что основной алгоритм NMS скомпилирован в либу *.so    
И сам python код функции nms не достать.   
Однако, если поискать в исходниках mmcv https://github.com/open-mmlab/mmcv, то можно найти испольуземый код NMS, но в реализации C++

```
Tensor nms_cpu(Tensor boxes, Tensor scores, float iou_threshold, int offset) {
  if (boxes.numel() == 0) {
    return at::empty({0}, boxes.options().dtype(at::kLong));
  }
  auto x1_t = boxes.select(1, 0).contiguous();
  auto y1_t = boxes.select(1, 1).contiguous();
  auto x2_t = boxes.select(1, 2).contiguous();
  auto y2_t = boxes.select(1, 3).contiguous();

  Tensor areas_t = (x2_t - x1_t + offset) * (y2_t - y1_t + offset);

  auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));

  auto nboxes = boxes.size(0);
  Tensor select_t = at::ones({nboxes}, boxes.options().dtype(at::kBool));

  auto select = select_t.data_ptr<bool>();
  auto order = order_t.data_ptr<int64_t>();
  auto x1 = x1_t.data_ptr<float>();
  auto y1 = y1_t.data_ptr<float>();
  auto x2 = x2_t.data_ptr<float>();
  auto y2 = y2_t.data_ptr<float>();
  auto areas = areas_t.data_ptr<float>();

  for (int64_t _i = 0; _i < nboxes; _i++) {
    if (select[_i] == false) continue;
    auto i = order[_i];
    auto ix1 = x1[i];
    auto iy1 = y1[i];
    auto ix2 = x2[i];
    auto iy2 = y2[i];
    auto iarea = areas[i];

    for (int64_t _j = _i + 1; _j < nboxes; _j++) {
      if (select[_j] == false) continue;
      auto j = order[_j];
      auto xx1 = std::max(ix1, x1[j]);
      auto yy1 = std::max(iy1, y1[j]);
      auto xx2 = std::min(ix2, x2[j]);
      auto yy2 = std::min(iy2, y2[j]);

      auto w = std::max(0.f, xx2 - xx1 + offset);
      auto h = std::max(0.f, yy2 - yy1 + offset);
      auto inter = w * h;
      auto ovr = inter / (iarea + areas[j] - inter);
      if (ovr > iou_threshold) select[_j] = false;
    }
  }
  return order_t.masked_select(select_t);
}
```

Перенесем этот код на python, с учетом использования numpy вместо torch

    def nms_cpu(boxes, scores, iou_threshold, offset):
        if boxes.size == 0:
            return np.array([])
        x1_t = boxes[:, 0]
        y1_t = boxes[:, 1]
        x2_t = boxes[:, 2]
        y2_t = boxes[:, 3]

        areas_t = (x2_t - x1_t + offset) * (y2_t - y1_t + offset)
        order_t = np.argsort(scores)[::-1]

        nboxes = boxes.shape[0]
        select_t = np.ones(nboxes, dtype = np.bool_)

        for _i in range(nboxes):
            if not select_t[_i]:
                continue
            i = order_t[_i]
            ix1 = x1_t[i]
            iy1 = y1_t[i]
            ix2 = x2_t[i]
            iy2 = y2_t[i]
            iarea = areas_t[i]

            for _j in range(_i+1, nboxes):
                if not select_t[_j]:
                    continue
                j = order_t[_j];
                xx1 = max(ix1, x1_t[j])
                yy1 = max(iy1, y1_t[j])
                xx2 = min(ix2, x2_t[j])
                yy2 = min(iy2, y2_t[j])

                w = max(0, xx2 - xx1 + offset)
                h = max(0, yy2 - yy1 + offset)

                inter = w * h
                ovr = inter / (iarea + areas_t[j] - inter)
                if (ovr > iou_threshold):
                    select_t[_j] = False
        return order_t[select_t]

#### Шаг 6. Пост процессинга для YOLOX_ONNX   
Следует также, уточнить, что при инференсе onnx модели получаем numpy массивы, когда как в функционале mmdetection используется torch.   
Поэтому отредактируем код функций по использования numpy


In [None]:
class YOLOXPostProcessing():
    r""" postprocessing for output inference of onnx model YOLOX"""
    
    def __init__(self,
                strides = [(8, 8), (16, 16), (32, 32)],             # model.bbox_head.prior_generator.strides
                offset = 0,                                         # model.bbox_head.prior_generator.offset
                num_levels = 3,                                     # model.bbox_head.prior_generator.num_levels
                cls_out_channels = 1,                               # model.bbox_head.cls_out_channels
                num_classes = 1):                                   # model.bbox_head.num_classes
        self.strides = strides
        self.offset = offset
        self.num_levels = num_levels
        self.cls_out_channels = cls_out_channels
        self.num_classes = num_classes
    
    @staticmethod
    def _sigmoid(x):
        return 1 / (1 + np.exp(-x))
    
    @staticmethod
    def _meshgrid(x, y, row_major=True):   # model.bbox_head.prior_generator._meshgrid
        xx, yy = np.meshgrid(y, x)   
        # yy, xx = torch.meshgrid(y, x)
        if row_major:
            return xx.reshape(-1), yy.reshape(-1)
        else:
            return yy.reshape(-1), xx.reshape(-1)
    
    @staticmethod
    def bbox2result(bboxes, labels, num_classes):  
        """Convert detection results to a list of numpy arrays.
        Args:
            bboxes (np.ndarray): shape (n, 5)
            labels (np.ndarray): shape (n, )
            num_classes (int): class number, including background class

        Returns:
            list(ndarray): bbox results of each class
        """
        if bboxes.shape[0] == 0:
            return [np.zeros((0, 5), dtype=np.float32) for i in range(num_classes)]
        else:
            return [bboxes[labels == i, :] for i in range(num_classes)]
        
    def _bbox_decode(self, priors, bbox_preds):   # model.bbox_head._bbox_decode
        xys = (bbox_preds[..., :2] * priors[:, 2:]) + priors[:, :2]
        whs = np.exp(bbox_preds[..., 2:]) * priors[:, 2:]

        tl_x = (xys[..., 0] - whs[..., 0] / 2)
        tl_y = (xys[..., 1] - whs[..., 1] / 2)
        br_x = (xys[..., 0] + whs[..., 0] / 2)
        br_y = (xys[..., 1] + whs[..., 1] / 2)

        decoded_bboxes = np.stack([tl_x, tl_y, br_x, br_y], -1)    
        # decoded_bboxes = torch.stack([tl_x, tl_y, br_x, br_y], -1)
        return decoded_bboxes
        
    def single_level_grid_priors(self,       
                                 featmap_size,
                                 level_idx,
                                 dtype=np.float32,
                                 with_stride=False):           # model.bbox_head.prior_generator.single_level_grid_priors
        """Generate grid Points of a single level.

        Note:
            This function is usually called by method ``self.grid_priors``.

        Args:
            featmap_size (tuple[int]): Size of the feature maps, arrange as
                (h, w).
            level_idx (int): The index of corresponding feature map level.
            dtype (:obj:`dtype`): Dtype of priors. Default: torch.float32.
            device (str, optional): The device the tensor will be put on.
                Defaults to 'cuda'.
            with_stride (bool): Concatenate the stride to the last dimension
                of points.

        Return:
            Tensor: Points of single feature levels.
            The shape of tensor should be (N, 2) when with stride is
            ``False``, where N = width * height, width and height
            are the sizes of the corresponding feature level,
            and the last dimension 2 represent (coord_x, coord_y),
            otherwise the shape should be (N, 4),
            and the last dimension 4 represent
            (coord_x, coord_y, stride_w, stride_h).
        """
        feat_h, feat_w = featmap_size
        stride_w, stride_h = self.strides[level_idx]
        shift_x = (np.arange(0, feat_w) + self.offset) * stride_w   
        # shift_x = (torch.arange(0, feat_w, device=device) + self.offset) * stride_w
        shift_x = shift_x.astype(dtype)

        shift_y = (np.arange(0, feat_h) + self.offset) * stride_h  
        # shift_y = (torch.arange(0, feat_h, device=device) + self.offset) * stride_h
        shift_y = shift_y.astype(dtype)
        shift_xx, shift_yy = self._meshgrid(shift_x, shift_y)
        if not with_stride:
            shifts = np.stack([shift_xx, shift_yy], axis=-1)   
            # shifts = torch.stack([shift_xx, shift_yy], dim=-1)
        else:
            stride_w = np.full((shift_xx.shape[0], ), stride_w).astype(dtype)  
            # stride_w = shift_xx.new_full((shift_xx.shape[0], ), stride_w).to(dtype)
            stride_h = np.full((shift_yy.shape[0], ),stride_h).astype(dtype)   
            # stride_h = shift_xx.new_full((shift_yy.shape[0], ), stride_h).to(dtype)
            shifts = np.stack([shift_xx, shift_yy, stride_w, stride_h], axis=-1) 
            # shifts = torch.stack([shift_xx, shift_yy, stride_w, stride_h], dim=-1)
        all_points = shifts
        return all_points
    
    def grid_priors(self,
                    featmap_sizes,
                    dtype=np.float32,
                    with_stride=False):  # model.bbox_head.prior_generator.grid_priors
        """Generate grid points of multiple feature levels.

        Args:
            featmap_sizes (list[tuple]): List of feature map sizes in
                multiple feature levels, each size arrange as
                as (h, w).
            dtype (:obj:`dtype`): Dtype of priors. Default: numpy.float32.
            with_stride (bool): Whether to concatenate the stride to
                the last dimension of points.

        Return:
            list[torch.Tensor]: Points of  multiple feature levels.
            The sizes of each tensor should be (N, 2) when with stride is
            ``False``, where N = width * height, width and height
            are the sizes of the corresponding feature level,
            and the last dimension 2 represent (coord_x, coord_y),
            otherwise the shape should be (N, 4),
            and the last dimension 4 represent
            (coord_x, coord_y, stride_w, stride_h).
        """

        assert self.num_levels == len(featmap_sizes)
        multi_level_priors = []
        for i in range(self.num_levels):
            priors = self.single_level_grid_priors(
                featmap_sizes[i],
                level_idx=i,
                dtype=dtype,
                with_stride=with_stride)
            multi_level_priors.append(priors)
        return multi_level_priors
    
    def get_bboxes(self, 
                   cls_scores,
                   bbox_preds,
                   objectnesses,
                   scale_factor = None,
                   score_thr: float = 0.01,    
                   iou_threshold: float = 0.5,
                   with_nms=True):
        """Transform network outputs of a batch into bbox results.
        Args:
            cls_scores (list[numpy.array]): Classification scores for all
                scale levels, each is a 4D-tensor, has shape
                (batch_size, num_priors * num_classes, H, W).
            bbox_preds (list[numpy.array]): Box energies / deltas for all
                scale levels, each is a 4D-tensor, has shape
                (batch_size, num_priors * 4, H, W).
            objectnesses (list[numpy.array], Optional): Score factor for
                all scale level, each is a 4D-tensor, has shape
                (batch_size, 1, H, W).
            scale_factor (numpy.array[numpy.array], Optional): Rescale coefficents for input images. Default None.
            with_nms (bool): If True, do nms before return boxes.
                Default True.
        Returns:
            list[list[Tensor, Tensor]]: Each item in result_list is 2-tuple.
                The first item is an (n, 5) tensor, where the first 4 columns
                are bounding box positions (tl_x, tl_y, br_x, br_y) and the
                5-th column is a score between 0 and 1. The second item is a
                (n,) tensor where each item is the predicted class label of
                the corresponding box.
        """
        assert len(cls_scores) == len(bbox_preds) == len(objectnesses)
        # cfg = self.test_cfg if cfg is None else cfg
        num_imgs = cls_scores[0].shape[0]
        
        if not isinstance(scale_factor, np.ndarray):
            scale_factor = np.array([[1.0, 1.0, 1.0, 1.0] for img_id in range(num_imgs)], dtype=np.float32)
        if not scale_factor.any():
            scale_factor = np.array([[1.0, 1.0, 1.0, 1.0] for img_id in range(num_imgs)], dtype=np.float32)        
        
        featmap_sizes = [cls_score.shape[2:] for cls_score in cls_scores]
        mlvl_priors = self.grid_priors(featmap_sizes, 
                                       dtype=cls_scores[0].dtype,
                                       with_stride=True)
        
        # flatten cls_scores, bbox_preds and objectness
        flatten_cls_scores = [
            cls_score.transpose(0, 2, 3, 1).reshape(num_imgs, -1,
                                                  self.cls_out_channels)
            # cls_score.permute(0, 2, 3, 1).reshape(num_imgs, -1,
            #                                       self.cls_out_channels)
            for cls_score in cls_scores
        ]
        flatten_bbox_preds = [
            bbox_pred.transpose(0, 2, 3, 1).reshape(num_imgs, -1, 4)
            # bbox_pred.permute(0, 2, 3, 1).reshape(num_imgs, -1, 4)
            for bbox_pred in bbox_preds
        ]
        flatten_objectness = [
            objectness.transpose(0, 2, 3, 1).reshape(num_imgs, -1)
            # objectness.permute(0, 2, 3, 1).reshape(num_imgs, -1)
            for objectness in objectnesses
        ]
        
        flatten_cls_scores = self._sigmoid(np.concatenate(flatten_cls_scores, axis=1))
        # flatten_cls_scores = torch.cat(flatten_cls_scores, dim=1).sigmoid()
        flatten_bbox_preds = np.concatenate(flatten_bbox_preds, axis=1)
        # flatten_bbox_preds = torch.cat(flatten_bbox_preds, dim=1)
        flatten_objectness = self._sigmoid(np.concatenate(flatten_objectness, axis=1))
        # flatten_objectness = torch.cat(flatten_objectness, dim=1).sigmoid()
        flatten_priors = np.concatenate(mlvl_priors)
        # flatten_priors = torch.cat(mlvl_priors)
        
        flatten_bboxes = self._bbox_decode(flatten_priors, flatten_bbox_preds)
        
        # rescale - return boxes in original image space
        print(scale_factor)
        flatten_bboxes = [flatten_bboxes[id_img, ..., :4] / np.expand_dims(scale_factor[id_img], axis=0) for id_img in range(num_imgs)]
        flatten_bboxes = np.stack(flatten_bboxes, axis=0)
        
        result_list = []
        
        for img_id in range(num_imgs):
            cls_scores = flatten_cls_scores[img_id]
            score_factor = flatten_objectness[img_id]
            bboxes = flatten_bboxes[img_id]
            
            # return cls_scores, score_factor, bboxes
            
            result_list.append(
                self._bboxes_nms(cls_scores, bboxes, score_factor, score_thr=score_thr, iou_thr=iou_threshold))
        
        bbox_results = [self.bbox2result(det_bboxes, det_labels, self.num_classes) for det_bboxes, det_labels in result_list]
        
        return bbox_results
    
    def _bboxes_nms(self, cls_scores, bboxes, score_factor, score_thr, iou_thr):  #model.bbox_head._bboxes_nms
        max_scores = np.max(cls_scores, 1)
        labels = np.argmax(cls_scores, 1)
        # max_scores, labels = torch.max(cls_scores, 1)
        
        valid_mask = score_factor * max_scores >= score_thr
        # return max_scores, labels

        bboxes = bboxes[valid_mask]
        scores = max_scores[valid_mask] * score_factor[valid_mask]
        labels = labels[valid_mask]
        
        # return bboxes, scores, labels
        if labels.size == 0:
            return bboxes, labels
        else:
            dets, keep = NMS.batched_nms(bboxes, scores, labels, iou_threshold = iou_thr, score_threshold=score_thr)
            return dets, labels[keep]        

class NMS: 
    @staticmethod
    def nms_op(boxes,
               scores,
               iou_threshold: float = 0.5,
               score_threshold: float = 0.1,               
               offset: int = 0,
               max_num: int = -1):

        assert boxes.shape[-1] == 4
        assert boxes.shape[0] == scores.shape[0]
        assert offset in (0, 1)
               
        valid_mask = scores > score_threshold
        boxes, scores = boxes[valid_mask], scores[valid_mask]
        valid_inds = np.nonzero(valid_mask)[0]
        # valid_inds = torch.nonzero(valid_mask, as_tuple=False).squeeze(dim=1)        

        inds = NMS.nms_cpu(boxes, scores, iou_threshold, offset)
        
        if max_num > 0:
            inds = inds[:max_num]
        inds = valid_inds[inds]
        
        dets = np.concatenate([boxes[inds], scores[inds].reshape(-1, 1)], axis=-1)
        # dets = torch.cat((boxes[inds], scores[inds].reshape(-1, 1)), dim=1)        
        return dets, inds
    
    @staticmethod
    def nms_cpu(boxes, scores, iou_threshold, offset):
        if boxes.size == 0:
            return np.array([])
        x1_t = boxes[:, 0]
        y1_t = boxes[:, 1]
        x2_t = boxes[:, 2]
        y2_t = boxes[:, 3]

        areas_t = (x2_t - x1_t + offset) * (y2_t - y1_t + offset)
        order_t = np.argsort(scores)[::-1]

        nboxes = boxes.shape[0]
        select_t = np.ones(nboxes, dtype = np.bool_)

        for _i in range(nboxes):
            if not select_t[_i]:
                continue
            i = order_t[_i]
            ix1 = x1_t[i]
            iy1 = y1_t[i]
            ix2 = x2_t[i]
            iy2 = y2_t[i]
            iarea = areas_t[i]

            for _j in range(_i+1, nboxes):
                if not select_t[_j]:
                    continue
                j = order_t[_j];
                xx1 = max(ix1, x1_t[j])
                yy1 = max(iy1, y1_t[j])
                xx2 = min(ix2, x2_t[j])
                yy2 = min(iy2, y2_t[j])

                w = max(0, xx2 - xx1 + offset)
                h = max(0, yy2 - yy1 + offset)

                inter = w * h
                ovr = inter / (iarea + areas_t[j] - inter)
                if (ovr > iou_threshold):
                    select_t[_j] = False
        return order_t[select_t]
    
    @staticmethod
    def batched_nms(boxes,
                    scores,
                    idxs, 
                    iou_threshold: float = 0.5,
                    score_threshold: float = 0.1,
                    class_agnostic: bool = False,
                    offset:int = 0):
        r"""Performs non-maximum suppression in a batched fashion.

        Modified from `torchvision/ops/boxes.py#L39
        <https://github.com/pytorch/vision/blob/
        505cd6957711af790211896d32b40291bea1bc21/torchvision/ops/boxes.py#L39>`_.
        In order to perform NMS independently per class, we add an offset to all
        the boxes. The offset is dependent only on the class idx, and is large
        enough so that boxes from different classes do not overlap.

        Args:
            boxes (torch.Tensor): boxes in shape (N, 4) or (N, 5).
            scores (torch.Tensor): scores in shape (N, ).
            idxs (torch.Tensor): each index value correspond to a bbox cluster,
                and NMS will not be applied between elements of different idxs,
                shape (N, ).
            iou_threshold (float): IoU threshold used for NMS.
            class_agnostic (bool): if true, nms is class agnostic,
                i.e. IoU thresholding happens over all boxes,
                regardless of the predicted class. Defaults to False.

        Returns:
            tuple: kept dets and indice.

            - boxes (Tensor): Bboxes with score after nms, has shape
              (num_bboxes, 5). last dimension 5 arrange as
              (x1, y1, x2, y2, score)
            - keep (Tensor): The indices of remaining boxes in input
              boxes.
        """
        if class_agnostic:
            boxes_for_nms = boxes
        else:
            # When using rotated boxes, only apply offsets on center.
            # if boxes.size(-1) == 5:
            if boxes.shape[-1] == 5:
                # Strictly, the maximum coordinates of the rotating box
                # (x,y,w,h,a) should be calculated by polygon coordinates.
                # But the conversion from rotated box to polygon will
                # slow down the speed.
                # So we use max(x,y) + max(w,h) as max coordinate
                # which is larger than polygon max coordinate
                # max(x1, y1, x2, y2,x3, y3, x4, y4)
                max_coordinate = boxes[..., :2].max() + boxes[..., 2:4].max()
                offsets = idxs.astype(boxes.dtype) + (max_coordinate + np.array(1).astype(boxes.dtype))
                boxes_ctr_for_nms = boxes[..., :2] + offsets[:, None]
                boxes_for_nms = np.concatenate([boxes_ctr_for_nms, bboxes[..., 2:5]], axis=-1)
                # boxes_for_nms = torch.cat([boxes_ctr_for_nms, boxes[..., 2:5]],
                #                           dim=-1)
            else:
                max_coordinate = boxes.max()
                offsets = idxs.astype(boxes.dtype) + (max_coordinate + np.array(1).astype(boxes.dtype))
                # offsets = idxs.to(boxes) * (max_coordinate + torch.tensor(1).to(boxes))                
                boxes_for_nms = boxes + offsets[:, None]
                
        dets, keep = NMS.nms_op(boxes_for_nms, scores, iou_threshold, score_threshold, offset)
        boxes = boxes[keep]
        scores = dets[:, -1]
        boxes = np.concatenate([boxes, scores[:, None]], axis= -1)                               
        # boxes = torch.cat([boxes, scores[:, None]], -1)
        return boxes, keep
    



In [None]:
print(f"strides = {model.bbox_head.prior_generator.strides}")
print(f"offset = {model.bbox_head.prior_generator.offset}")
print(f"num_levels = {model.bbox_head.prior_generator.num_levels}")
print(f"cls_out_channels = {model.bbox_head.cls_out_channels}")
print(f"num_channels = {model.bbox_head.num_classes}")

In [None]:
model.test_cfg.nms

In [None]:
cls_scores = [out_onnx[i] for i in range(3)]
bbox_preds = [out_onnx[i] for i in range(3, 6)]
objectnesses =[out_onnx[i] for i in range(6, 9)]


In [None]:
# Число классов объектов
cls_out_channels = model.bbox_head.cls_out_channels
num_classes = model.bbox_head.num_classes
print("num_classes", num_classes)

Важное замечание!   
При изменении параметров обучаемой модели - в данном случае изенение числа классов объектов (по умолчанию = 1)    
Также требуется изменить и класс описания bentoservice (bento_service.py или bento_service_nms.py)    

```
self.yolox_postprocessing = YOLOXPostProcessing(strides = [(8, 8), (16, 16), (32, 32)],
                                                offset = 0, 
                                                num_levels = 3, 
                                                cls_out_channels = 1,
                                                num_classes=1) 
```

In [None]:
num_imgs = cls_scores[0].shape[0]
scale_factors = np.array([data['img_metas'][img_id][0]["scale_factor"] for img_id in range(num_imgs)])
# scale_factors = np.array([np.array([1.0, 1.0, 1.0, 1.0])*(img_id+1) for img_id in range(num_imgs)], dtype=np.float32)    
scale_factors

In [None]:
yolox_postprocessing = YOLOXPostProcessing(strides = [(8, 8), (16, 16), (32, 32)],
                                           offset = 0, 
                                           num_levels = 3, 
                                           cls_out_channels = 1,
                                           num_classes=1)

bbox_results = yolox_postprocessing.get_bboxes(cls_scores, bbox_preds, objectnesses, scale_factor=scale_factors, score_thr=0.01, iou_threshold=0.5)
for bbox_result in bbox_results:
    for bboxes in bbox_result:
        print(bboxes.shape)
        print(bboxes)

In [None]:
show_result_pyplot(model, test_image_path, bbox_results[0], score_thr=0.05)

In [None]:
cls_scores = [torch.Tensor(out_onnx[i]) for i in range(3)]
bbox_preds = [torch.Tensor(out_onnx[i]) for i in range(3, 6)]
objectnesses =[torch.Tensor(out_onnx[i]) for i in range(6, 9)]

results_list = model.bbox_head.get_bboxes(cls_scores, bbox_preds, objectnesses, img_metas= data['img_metas'][0], rescale=True)
bbox_results = [yolox_postprocessing.bbox2result(det_bboxes, det_labels, model.bbox_head.num_classes) for det_bboxes, det_labels in results_list]
for bbox_result in bbox_results:
    for bboxes in bbox_result:
        print(bboxes.shape)
        print(bboxes)

In [None]:
show_result_pyplot(model, test_image_path, bbox_results[0], score_thr=0.05)

In [None]:
#11 #SparkEnvironment.stopSparkSession()

SparkEnvironment.stopSparkSession()