In [1]:
# Copyright (c) OpenMMLab. All rights reserved.
from argparse import ArgumentParser
import os
import sys

sys.path.append("..")

import mmcv
from pyk4a import PyK4A
import cv2 as cv
import torch
import numpy as np
from mmdet.apis import inference_detector, init_detector
from mmrotate.registry import VISUALIZERS
from mmrotate.utils import register_all_modules
from mmcv.ops import nms_rotated
from mmdet.structures import DetDataSample
from mmengine.structures import InstanceData
import time

IR_MAX_CLIP = 3000
DEPTH_MAX_CLIP = 1500
DEPTH_MIN_CLIP = 800

def get_3_channel_feature(ir_img, apply_padding=True, use_depth=False):
    '''
    Just some weird way to create a 3-Channel image from collected Data
    '''

    global IR_MAX_CLIP, DEPTH_MAX_CLIP, DEPTH_MIN_CLIP

    def normalize_ir(img):
        return np.clip(img.copy(), a_min=0, a_max=IR_MAX_CLIP)/float(IR_MAX_CLIP)

    def normalize_depth(img):
        return (np.clip(img.copy(), a_min=DEPTH_MIN_CLIP, a_max=DEPTH_MAX_CLIP)-DEPTH_MIN_CLIP)/float(DEPTH_MAX_CLIP-DEPTH_MIN_CLIP)

    depth_img = None

    if use_depth:
        img =  np.stack([
            normalize_ir(ir_img),
            normalize_depth(depth_img),
            normalize_ir(ir_img)**normalize_depth(depth_img)
        ], axis=-1)
    else:
         img =  np.stack([
             normalize_ir(ir_img)
         ]*3, axis=-1)
        #img =  np.stack([
        #    normalize_depth(depth_img)
        #]*3, axis=-1)

    if apply_padding:
        big_size = max(img.shape[0], img.shape[1])
        result = np.zeros((big_size, big_size, 3))
        h, w = img.shape[0], img.shape[1]
        result[0:h, 0:w] = img
        return result
    else:
        return img



def parse_args():
    parser = ArgumentParser()
    parser.add_argument('config', help='Config file')
    parser.add_argument('checkpoint', help='Checkpoint file')
    parser.add_argument('--out-folder', default=None, help='Path to output file')
    parser.add_argument(
        '--device', default='cuda:0', help='Device used for inference')
    parser.add_argument(
        '--palette',
        default='dota',
        choices=['dota', 'sar', 'hrsc', 'random'],
        help='Color palette used for visualization')
    parser.add_argument(
        '--score-thr', type=float, default=0.3, help='bbox score threshold')
    args = parser.parse_args()
    return args


def main(args):
    # register all modules in mmrotate into the registries
    register_all_modules()

    model_setups = [
        (
            "../../models_360/custom_rretina_ir_512_360.py",
            "../../models_360/custom_rretina_ir_512_360.pth"
        ),
        (
            "../../models_360/custom_rretina_ir_512_360_tiny.py",
            "../../models_360/custom_rretina_ir_512_360_tiny.pth"
        ),
        (
            "../../models_360/custom_rretina_ir_340_360_tiny.py",
            "../../models_360/custom_rretina_ir_340_360_tiny.pth"
        ),
        ("../../models_360/new_lusee_rotated_rtmdet.py", "../../models_360/new_lusee_rotated_rtmdet.pth"),
        ("../../models_360/new_lusee_rotated_rtmdet_tiny.py", "../../models_360/new_lusee_rotated_rtmdet_tiny.pth"),
        ("../../models_360/total_ds_rotated_rtmdet.py", "../../models_360/total_ds_rotated_rtmdet.pth"),
        ("../../models_360/total_ds_big_rotated_rtmdet.py", "../../models_360/total_ds_big_rotated_rtmdet.pth"),
        ("../../models_360/total_ds_big_rotated_rtmdet_experiment.py", "../../models_360/total_ds_big_rotated_rtmdet.pth")
    ]

    model_setup = model_setups[-1]
    config_path, checkpoint_path = model_setup 
    score_threshold = 0.05

    # build the model from a config file and a checkpoint file
    model = init_detector(config_path, checkpoint_path, palette='dota', device='cuda:0')
    visualizer = VISUALIZERS.build(model.cfg.visualizer)
    visualizer.dataset_meta = model.dataset_meta

    # Load camera with the default config
    k4a = PyK4A()
    k4a.start()

    font = cv.FONT_HERSHEY_SIMPLEX

    while True:
        capture = k4a.get_capture()
        ir_image = capture.ir
        feature = get_3_channel_feature(ir_image[::-1])*255
        #feature = cv.resize(feature, (256, 256))
        start_time = time.time()
        result, inf_time = inference_detector(model, feature)
        end_time = time.time()
        delta = end_time-start_time
        millis = round(delta * 1000, 1)
        inf_time = round(inf_time, 1)
        img = feature.copy()
        
        class_dets = []
        instances = result.pred_instances
        boxes, labels, scores = instances.bboxes, instances.labels, instances.scores
        
        #dets = np.hstack((boxes.cpu(), scores.cpu()[:, np.newaxis])).astype(np.float32, copy=False)
        # Apply NMS
        iou_threshold = 0.01
        dets, indices = nms_rotated(boxes.cpu(), scores.cpu(), iou_threshold)

        filtered_boxes = []
        filtered_labels = []
        filtered_scores = []
        for i in indices:
            filtered_boxes.append(boxes[i].cpu())
            filtered_labels.append(labels[i].cpu())
            filtered_scores.append(scores[i].cpu())

        print(len(filtered_boxes))
        for i in range(len(filtered_boxes)):
            print(i, filtered_boxes[i].shape)

        img_meta = dict(img_shape=(512, 512), pad_shape=(512, 512))
        instance_data = InstanceData(metainfo=img_meta)
        instance_data.bboxes = torch.stack(filtered_boxes)
        instance_data.labels = torch.stack(filtered_labels)
        instance_data.scores = torch.from_numpy(np.array(filtered_scores, dtype=np.float32))
        visualizer.add_datasample( 'result', img, data_sample=DetDataSample(pred_instances=instance_data), draw_gt=False, show=True, wait_time=0, pred_score_thr=score_threshold, )
        vis = visualizer.get_image()
        vis = cv.resize(vis, (1200, 1200))
        cv.putText(vis, f"Latency in MS: {inf_time}", (30, 30), font, 1, (0, 255, 0), 2, cv.LINE_AA)
        cv.putText(vis, f"Score threshold: {score_threshold}", (30, 60), font, 1, (0, 255, 0), 2, cv.LINE_AA)
        cv.imshow("Visualization", vis)
        key = cv.waitKey(1)
        if key == 27:
            break
        elif key == ord("a"):
            score_threshold -= 0.01
        elif key == ord("d"):
            score_threshold += 0.01
    cv.destroyAllWindows()
    k4a.close()

main(None)


Loads checkpoint by local backend from path: ../../models_360/total_ds_big_rotated_rtmdet.pth


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


4
0 torch.Size([5])
1 torch.Size([5])
2 torch.Size([5])
3 torch.Size([5])




4
0 torch.Size([5])
1 torch.Size([5])
2 torch.Size([5])
3 torch.Size([5])




3
0 torch.Size([5])
1 torch.Size([5])
2 torch.Size([5])
3
0 torch.Size([5])
1 torch.Size([5])
2 torch.Size([5])
6
0 torch.Size([5])
1 torch.Size([5])
2 torch.Size([5])
3 torch.Size([5])
4 torch.Size([5])
5 torch.Size([5])
4
0 torch.Size([5])
1 torch.Size([5])
2 torch.Size([5])
3 torch.Size([5])
5
0 torch.Size([5])
1 torch.Size([5])
2 torch.Size([5])
3 torch.Size([5])
4 torch.Size([5])
4
0 torch.Size([5])
1 torch.Size([5])
2 torch.Size([5])
3 torch.Size([5])
4
0 torch.Size([5])
1 torch.Size([5])
2 torch.Size([5])
3 torch.Size([5])
4
0 torch.Size([5])
1 torch.Size([5])
2 torch.Size([5])
3 torch.Size([5])
4
0 torch.Size([5])
1 torch.Size([5])
2 torch.Size([5])
3 torch.Size([5])
6
0 torch.Size([5])
1 torch.Size([5])
2 torch.Size([5])
3 torch.Size([5])
4 torch.Size([5])
5 torch.Size([5])
4
0 torch.Size([5])
1 torch.Size([5])
2 torch.Size([5])
3 torch.Size([5])
3
0 torch.Size([5])
1 torch.Size([5])
2 torch.Size([5])
5
0 torch.Size([5])
1 torch.Size([5])
2 torch.Size([5])
3 torch.Size([5])
4 

: 

: 

: 