## video to imgs   

In [4]:
import cv2

# MP4 비디오 파일 경로
video_path = "nail_videos/source/vid3.mp4"

# 비디오 캡처 객체 생성
cap = cv2.VideoCapture(video_path)

# 저장할 이미지 프레임 카운터
frame_count = 0

while cap.isOpened():
    # 비디오 프레임 읽기
    ret, frame = cap.read()

    # 비디오 읽기가 끝났으면 종료
    if not ret:
        break

    # 프레임 크기 조정
    resized_frame = cv2.resize(frame, (frame.shape[1] // 2, frame.shape[0] // 2))

    # 5프레임 간격으로 이미지 저장
    if frame_count % 5 == 0:
        cv2.imwrite(f"nail_videos/output/vid3_frame_{frame_count}.jpg", resized_frame)

    frame_count += 1

# 비디오 캡처 객체 해제
cap.release()


In [41]:

#!/usr/bin/env python
# -*- coding: utf-8 -*-
import copy
import cv2
import numpy as np


class CV2NanoDetONNX(object):
    # NanoDet後処理用定義
    STRIDES = (8, 16, 32)
    REG_MAX = 7
    PROJECT = np.arange(REG_MAX + 1)

    # 標準化用定義
    MEAN = np.array([103.53, 116.28, 123.675], dtype=np.float32)
    MEAN = MEAN.reshape(1, 1, 3)
    STD = np.array([57.375, 57.12, 58.395], dtype=np.float32)
    STD = STD.reshape(1, 1, 3)

    def __init__(
        self,
        model_path='nanodet_m.onnx',
        input_shape=320,
        class_score_th=0.35,
        nms_th=0.6,
    ):
        # 入力サイズ
        self.input_shape = (input_shape, input_shape)

        # 閾値
        self.class_score_th = class_score_th
        self.nms_th = nms_th

        # モデル読み込み        
        self.net = cv2.dnn.readNet('nanodet_finger_v2_sim.onnx')
        self.output_names = [
             "cls_pred_stride_8",
             "dis_pred_stride_8",
             "cls_pred_stride_16",
             "dis_pred_stride_16",
             "cls_pred_stride_32",
             "dis_pred_stride_32"
        ]

        print(self.output_names)
        # ストライド毎のグリッド点を算出
        self.grid_points = []
        for index in range(len(self.STRIDES)):
            grid_point = self._make_grid_point(
                (int(self.input_shape[0] / self.STRIDES[index]),
                 int(self.input_shape[1] / self.STRIDES[index])),
                self.STRIDES[index],
            )
            self.grid_points.append(grid_point)

    def inference(self, image):
        temp_image = copy.deepcopy(image)
        image_height, image_width = image.shape[0], image.shape[1]

        # 前処理：標準化、リシェイプ
        resize_image, new_height, new_width, top, left = self._resize_image(
            temp_image)
        blob = self._pre_process(resize_image)
        self.net.setInput(blob)
        preds = self.net.forward(self.output_names)
        # 後処理：NMS、グリッド->座標変換
        bboxes, scores, class_ids = self._post_process(preds)

        print(bboxes)
        # 後処理：リサイズ前の座標に変換
        ratio_height = image_height / new_height
        ratio_width = image_width / new_width
        for i in range(bboxes.shape[0]):
            bboxes[i, 0] = max(int((bboxes[i, 0] - left) * ratio_width), 0)
            bboxes[i, 1] = max(int((bboxes[i, 1] - top) * ratio_height), 0)
            bboxes[i, 2] = min(
                int((bboxes[i, 2] - left) * ratio_width),
                image_width,
            )
            bboxes[i, 3] = min(
                int((bboxes[i, 3] - top) * ratio_height),
                image_height,
            )
        return bboxes, scores, class_ids

    def _make_grid_point(self, grid_size, stride):
        grid_height, grid_width = grid_size

        shift_x = np.arange(0, grid_width) * stride
        shift_y = np.arange(0, grid_height) * stride

        xv, yv = np.meshgrid(shift_x, shift_y)
        xv = xv.flatten()
        yv = yv.flatten()

        cx = xv + 0.5 * (stride - 1)
        cy = yv + 0.5 * (stride - 1)

        return np.stack((cx, cy), axis=-1)

    def _resize_image(self, image, keep_ratio=True):
        top, left = 0, 0
        new_height, new_width = self.input_shape[0], self.input_shape[1]

        if keep_ratio and image.shape[0] != image.shape[1]:
            hw_scale = image.shape[0] / image.shape[1]
            if hw_scale > 1:
                new_height = self.input_shape[0]
                new_width = int(self.input_shape[1] / hw_scale)

                resize_image = cv2.resize(
                    image,
                    (new_width, new_height),
                    interpolation=cv2.INTER_AREA,
                )

                left = int((self.input_shape[1] - new_width) * 0.5)

                resize_image = cv2.copyMakeBorder(
                    resize_image,
                    0,
                    0,
                    left,
                    self.input_shape[1] - new_width - left,
                    cv2.BORDER_CONSTANT,
                    value=0,
                )
            else:
                new_height = int(self.input_shape[0] * hw_scale)
                new_width = self.input_shape[1]

                resize_image = cv2.resize(
                    image,
                    (new_width, new_height),
                    interpolation=cv2.INTER_AREA,
                )

                top = int((self.input_shape[0] - new_height) * 0.5)

                resize_image = cv2.copyMakeBorder(
                    resize_image,
                    top,
                    self.input_shape[0] - new_height - top,
                    0,
                    0,
                    cv2.BORDER_CONSTANT,
                    value=0,
                )
        else:
            resize_image = cv2.resize(
                image,
                self.input_shape,
                interpolation=cv2.INTER_AREA,
            )

        return resize_image, new_height, new_width, top, left

    def _pre_process(self, image):
        # 標準化
        image = image.astype(np.float32)
        image = (image - self.MEAN) / self.STD

        # リシェイプ
        image = image.transpose(2, 0, 1).astype('float32')
        image = image.reshape(-1, 3, self.input_shape[0], self.input_shape[1])

        return image

    def _softmax(self, x, axis=1):
        x_exp = np.exp(x)
        x_sum = np.sum(x_exp, axis=axis, keepdims=True)
        s = x_exp / x_sum
        return s

    def _post_process(self, predict_results):
        class_scores = predict_results[::2]
        bbox_predicts = predict_results[1::2]
        bboxes, scores, class_ids = self._get_bboxes_single(
            class_scores,
            bbox_predicts,
            1,
            rescale=False,
        )

        return bboxes.astype(np.int32), scores, class_ids

    def _get_bboxes_single(
        self,
        class_scores,
        bbox_predicts,
        scale_factor,
        rescale=False,
        topk=1000,
    ):
        bboxes = []
        scores = []

        # ストライド毎にバウンディングボックスの座標を変換
        for stride, class_score, bbox_predict, grid_point in zip(
                self.STRIDES, class_scores, bbox_predicts, self.grid_points):
            # 次元調整
            if class_score.ndim == 3:
                class_score = class_score.squeeze(axis=0)
            if bbox_predict.ndim == 3:
                bbox_predict = bbox_predict.squeeze(axis=0)

            # バウンディングボックスを相対座標と相対距離に変換
            bbox_predict = bbox_predict.reshape(-1, self.REG_MAX + 1)
            bbox_predict = self._softmax(bbox_predict, axis=1)
            bbox_predict = np.dot(bbox_predict, self.PROJECT).reshape(-1, 4)
            bbox_predict *= stride

            # スコア降順で対象を絞る
            if 0 < topk < class_score.shape[0]:
                max_scores = class_score.max(axis=1)
                topk_indexes = max_scores.argsort()[::-1][0:topk]

                grid_point = grid_point[topk_indexes, :]
                bbox_predict = bbox_predict[topk_indexes, :]
                class_score = class_score[topk_indexes, :]

            # バウンディングボックスを絶対座標に変換
            x1 = grid_point[:, 0] - bbox_predict[:, 0]
            y1 = grid_point[:, 1] - bbox_predict[:, 1]
            x2 = grid_point[:, 0] + bbox_predict[:, 2]
            y2 = grid_point[:, 1] + bbox_predict[:, 3]
            x1 = np.clip(x1, 0, self.input_shape[1])
            y1 = np.clip(y1, 0, self.input_shape[0])
            x2 = np.clip(x2, 0, self.input_shape[1])
            y2 = np.clip(y2, 0, self.input_shape[0])
            bbox = np.stack([x1, y1, x2, y2], axis=-1)

            bboxes.append(bbox)
            scores.append(class_score)

        # スケール調整
        bboxes = np.concatenate(bboxes, axis=0)
        if rescale:
            bboxes /= scale_factor
        scores = np.concatenate(scores, axis=0)

        # Non-Maximum Suppression
        bboxes_wh = bboxes.copy()
        bboxes_wh[:, 2:4] = bboxes_wh[:, 2:4] - bboxes_wh[:, 0:2]
        class_ids = np.argmax(scores, axis=1)
        scores = np.max(scores, axis=1)

        indexes = cv2.dnn.NMSBoxes(
            bboxes_wh.tolist(),
            scores.tolist(),
            self.class_score_th,
            self.nms_th,
        )

        # NMS処理後の件数確認
        if len(indexes) > 0:
            bboxes = bboxes[indexes]
            scores = scores[indexes]
            class_ids = class_ids[indexes]
        else:
            bboxes = np.array([])
            scores = np.array([])
            class_ids = np.array([])

        return bboxes, scores, class_ids


## nanodet check

In [42]:
import cv2



net = CV2NanoDetONNX(model_path="nanodet_finger_v2_sim.onnx")
img = cv2.imread("22.jpg")
res = net.inference(img)
print(res)


['cls_pred_stride_8', 'dis_pred_stride_8', 'cls_pred_stride_16', 'dis_pred_stride_16', 'cls_pred_stride_32', 'dis_pred_stride_32']
[[ 56 189  71 205]
 [178 200 191 215]
 [225 141 234 150]
 [195 153 204 162]
 [252 146 260 155]
 [271 163 278 172]
 [121 185 136 210]
 [  8 139  17 149]
 [ 36 143  47 152]]
(array([[ 84, 223, 106, 247],
       [267, 240, 286, 262],
       [337, 151, 351, 165],
       [292, 169, 306, 183],
       [378, 159, 390, 172],
       [406, 184, 417, 198],
       [181, 217, 204, 255],
       [ 12, 148,  25, 163],
       [ 54, 154,  70, 168]]), array([0.91651934, 0.9011039 , 0.826799  , 0.69342405, 0.6870235 ,
       0.5996295 , 0.5972529 , 0.50495267, 0.4856481 ], dtype=float32), array([0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int64))


## nanodet cv2 inference

In [5]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import time
import copy
import cv2
import cv2_nanodet



def draw_debug(image, elapsed_time, bboxes, scores, class_ids):
    debug_image = copy.deepcopy(image)

    for bbox, score, class_id in zip(bboxes, scores, class_ids):
        x1, y1, x2, y2 = bbox[0], bbox[1], bbox[2], bbox[3]

        debug_image = cv2.rectangle(
            debug_image,
            (x1, y1),
            (x2, y2),
            (0, 255, 0),
            thickness=2,
        )

        score = '%.2f' % score
        text = '%s:%s' % (str(class_id), score)
        debug_image = cv2.putText(
            debug_image,
            text,
            (bbox[0], bbox[1] - 10),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.7,
            (0, 255, 0),
            thickness=2,
        )

    text = 'Elapsed time:' + '%.0f' % (elapsed_time * 1000)
    text = text + 'ms'
    debug_image = cv2.putText(
        debug_image,
        text,
        (10, 30),
        cv2.FONT_HERSHEY_SIMPLEX,
        0.7,
        (0, 255, 0),
        thickness=2,
    )

    return debug_image


nanodet = cv2_nanodet.CV2NanoDetONNX(
    model_path="nanodet_finger_v3_sim.onnx",
    input_shape=320,
    class_score_th=0.3,
    nms_th=0.5
)

#rtsp_url = "rtsp://192.168.0.181:8080/video/h264"

#cap = cv2.VideoCapture(0)
#cap = cv2.VideoCapture(rtsp_url)
cap = cv2.VideoCapture("nanodet_nail_test_crop.mp4")

#roi = frame[y:y+h, x:x+w]

while True:
    start_time = time.time()
    ret, frame = cap.read()
    if not ret:
        break
    debug_image = copy.deepcopy(frame)

    bboxes, scores, class_ids = nanodet.inference(frame)
    elapsed_time = time.time() - start_time

    debug_image = draw_debug(
        debug_image,
        elapsed_time,
        bboxes,
        scores,
        class_ids,
    )

    key = cv2.waitKey(1)
    if key == 27:  # ESC
        break
    cv2.imshow('NanoDet Sample', debug_image)
cap.release()
cv2.destroyAllWindows()


## skip frames

In [20]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import time
import copy
import cv2
import cv2_nanodet


def draw_debug(image, elapsed_time, bboxes, scores, class_ids):
    debug_image = copy.deepcopy(image)

    for bbox, score, class_id in zip(bboxes, scores, class_ids):
        x1, y1, x2, y2 = bbox[0], bbox[1], bbox[2], bbox[3]

        debug_image = cv2.rectangle(
            debug_image,
            (x1, y1),
            (x2, y2),
            (0, 255, 0),
            thickness=2,
        )

        score = '%.2f' % score
        text = '%s:%s' % (str(class_id), score)
        debug_image = cv2.putText(
            debug_image,
            text,
            (bbox[0], bbox[1] - 10),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.7,
            (0, 255, 0),
            thickness=2,
        )

    text = 'Elapsed time:' + '%.0f' % (elapsed_time * 1000)
    text = text + 'ms'
    debug_image = cv2.putText(
        debug_image,
        text,
        (10, 30),
        cv2.FONT_HERSHEY_SIMPLEX,
        0.7,
        (0, 255, 0),
        thickness=2,
    )

    return debug_image


nanodet = cv2_nanodet.CV2NanoDetONNX(
    model_path="nanodet_finger_v3_sim.onnx",
    input_shape=320,
    class_score_th=0.3,
    nms_th=0.6
)

#cap = cv2.VideoCapture(0)
rtsp_url = "rtsp://192.168.0.181:8080/video/h264"
cap = cv2.VideoCapture(rtsp_url)
#cap = cv2.VideoCapture("nanodet_nail_test_crop.mp4")

fps = cap.get(cv2.CAP_PROP_FPS)
# 영상 재생 시간 조정
# 원래의 FPS에 맞게 딜레이를 계산하여 대기
delay = int(1000 / fps) / 1000

elapsed_time = 0
while True:
    ret, frame = cap.read()

    if elapsed_time > delay:
        elapsed_time = elapsed_time - delay
        if elapsed_time <= 0:
            pass
        else:
           continue

    if not ret:
        break

    start_time = cv2.getTickCount()

    debug_image = copy.deepcopy(frame)


    bboxes, scores, class_ids = nanodet.inference(frame)

    debug_image = draw_debug(
        debug_image,
        elapsed_time,
        bboxes,
        scores,
        class_ids,
    )


    elapsed_time = (cv2.getTickCount() - start_time) / cv2.getTickFrequency()  




    key = cv2.waitKey(1)
    if key == 27:  # ESC
        break
    cv2.imshow('NanoDet Sample', debug_image)
cap.release()
cv2.destroyAllWindows()


## inference hand roi

In [1]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import numpy as np
import time
import copy
import cv2
import cv2_nanodet

# 살색 영역의 범위 지정 (HSV 색 공간)
lower_skin = np.array([0, 20, 70], dtype=np.uint8)
upper_skin = np.array([20, 255, 255], dtype=np.uint8)

def get_color_filtered_boxes(image):
    # 이미지를 HSV 색 공간으로 변환
    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    # 살색 영역을 마스크로 만들기
    skin_mask = cv2.inRange(hsv_image, lower_skin, upper_skin)
    # 모폴로지 연산을 위한 구조 요소 생성
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (9, 9))
    # 모폴로지 열림 연산 적용
    skin_mask = cv2.morphologyEx(skin_mask, cv2.MORPH_OPEN, kernel)

    # 마스크를 이용하여 살색 영역 추출
    skin_image = cv2.bitwise_and(image, image, mask=skin_mask)

    # 살색 영역에 대한 바운딩 박스 추출
    contours, _ = cv2.findContours(skin_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    bounding_boxes = [cv2.boundingRect(cnt) for cnt in contours]
        
    # 크기가 작은 박스와 큰 박스 제거
    color_boxes = []
    for (x, y, w, h) in bounding_boxes:
        if w * h > 100 * 100:
            # 약간 박스 더크게
            color_boxes.append((x - 10, y - 10, w + 20, h + 20))

    return color_boxes, skin_image

def draw_debug(image, elapsed_time, bboxes, scores, class_ids):
    debug_image = copy.deepcopy(image)

    for bbox, score, class_id in zip(bboxes, scores, class_ids):
        x1, y1, x2, y2 = bbox[0], bbox[1], bbox[2], bbox[3]

        debug_image = cv2.rectangle(
            debug_image,
            (x1, y1),
            (x2, y2),
            (0, 255, 0),
            thickness=2,
        )

        score = '%.2f' % score
        text = '%s:%s' % (str(class_id), score)
        debug_image = cv2.putText(
            debug_image,
            text,
            (bbox[0], bbox[1] - 10),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.7,
            (0, 255, 0),
            thickness=2,
        )

    text = 'Elapsed time:' + '%.0f' % (elapsed_time * 1000)
    text = text + 'ms'
    debug_image = cv2.putText(
        debug_image,
        text,
        (10, 30),
        cv2.FONT_HERSHEY_SIMPLEX,
        0.7,
        (0, 255, 0),
        thickness=2,
    )

    return debug_image

def draw_debug_roi(image, bboxes, scores, class_ids, x, y):
    #debug_image = copy.deepcopy(image)

    for bbox, score, class_id in zip(bboxes, scores, class_ids):
        x1, y1, x2, y2 = bbox[0], bbox[1], bbox[2], bbox[3]

        image = cv2.rectangle(
            image,
            (x1 + x, y1 + y),
            (x2 + x, y2 + y),
            (0, 255, 0),
            thickness=2,
        )

        score = '%.2f' % score
        text = '%s:%s' % (str(class_id), score)
        image = cv2.putText(
            image,
            text,
            (bbox[0] + x, bbox[1] - 10 + y),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.7,
            (0, 255, 0),
            thickness=2,
        )
    
    return image


nanodet = cv2_nanodet.CV2NanoDetONNX(
    model_path="nanodet_finger_v3_sim_fp16.onnx",
    input_shape=320,
    class_score_th=0.3,
    nms_th=0.6
)

#cap = cv2.VideoCapture(0)
rtsp_url = "rtsp://192.168.0.181:8080/video/h264"
cap = cv2.VideoCapture(rtsp_url)
#cap = cv2.VideoCapture("nanodet_nail_test_crop.mp4")

fps = cap.get(cv2.CAP_PROP_FPS)
# 영상 재생 시간 조정
# 원래의 FPS에 맞게 딜레이를 계산하여 대기
delay = int(1000 / fps) / 1000

elapsed_time = 0
while True:
    ret, frame = cap.read()

    if elapsed_time > delay:
        elapsed_time = elapsed_time - delay
        if elapsed_time <= 0:
            pass
        else:
           continue

    if not ret:
        break

    start_time = cv2.getTickCount()

    debug_image = copy.deepcopy(frame)

    # 크기가 작은 박스와 큰 박스 제거
    color_boxes, skin_image = get_color_filtered_boxes(debug_image)

    bboxes, scores, class_ids = nanodet.inference(frame)

    debug_image = draw_debug(
        debug_image,
        elapsed_time,
        bboxes,
        scores,
        class_ids,
    )

    infer_time_start = time.time()
    for (x, y, w, h) in color_boxes:
        roi_img = skin_image[y:y+h, x:x+h]
        bboxes, scores, class_ids = nanodet.inference(roi_img)
        draw_debug_roi(skin_image, bboxes, scores, class_ids, x, y)
    infer_time_end = time.time()

    skin_image = cv2.putText(
        skin_image,
        f"one hand infer time : {infer_time_end - infer_time_start} s",
        (10, 50),
        cv2.FONT_HERSHEY_SIMPLEX,
        0.7,
        (0, 255, 0),
        thickness=2,
    )




    for (x, y, w, h) in color_boxes:
        cv2.rectangle(skin_image, (x, y), (x + w, y + h), (0, 255, 0), 2)


    elapsed_time = (cv2.getTickCount() - start_time) / cv2.getTickFrequency()  
    key = cv2.waitKey(1)
    if key == 27:  # ESC
        break

    debug_image = cv2.resize(debug_image, (480, 360))
    skin_image = cv2.resize(skin_image, (480, 360))

    cv2.imshow('NanoDet Sample', debug_image)
    cv2.imshow('skin_image', skin_image)

cap.release()
cv2.destroyAllWindows()


## simplify

In [3]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import numpy as np
import time
import copy
import cv2
import cv2_nanodet


nanodet = cv2_nanodet.CV2NanoDetONNX(
    model_path="nanodet_finger_v3_sim_fp16.onnx",
    input_shape=320,
    class_score_th=0.3,
    nms_th=0.6
)

#cap = cv2.VideoCapture(0)
rtsp_url = "rtsp://192.168.0.181:8080/video/h264"
cap = cv2.VideoCapture(rtsp_url)
#cap = cv2.VideoCapture("nanodet_nail_test_crop.mp4")

fps = cap.get(cv2.CAP_PROP_FPS)
# 영상 재생 시간 조정
# 원래의 FPS에 맞게 딜레이를 계산하여 대기
delay = int(1000 / fps) / 1000

elapsed_time = 0
while True:
    ret, frame = cap.read()

    if elapsed_time > delay:
        elapsed_time = elapsed_time - delay
        if elapsed_time <= 0:
            pass
        else:
           continue

    if not ret:
        break

    debug_image = copy.deepcopy(frame)
    color_boxes, skin_image = cv2_nanodet.get_color_filtered_boxes(debug_image)

    infer_time_start = time.time()
    for (x, y, w, h) in color_boxes:
        roi_img = skin_image[y:y+h, x:x+h]
        bboxes, scores, class_ids = nanodet.inference(roi_img)
        cv2_nanodet.draw_debug_roi(skin_image, bboxes, scores, class_ids, x, y)
    infer_time_end = time.time()

    skin_image = cv2.putText(
        skin_image,
        f"one hand infer time : {infer_time_end - infer_time_start} s",
        (10, 50),
        cv2.FONT_HERSHEY_SIMPLEX,
        0.7,
        (0, 255, 0),
        thickness=2,
    )

    for (x, y, w, h) in color_boxes:
        cv2.rectangle(skin_image, (x, y), (x + w, y + h), (0, 255, 0), 2)

    key = cv2.waitKey(1)
    if key == 27:  # ESC
        break

    debug_image = cv2.resize(debug_image, (480, 360))
    skin_image = cv2.resize(skin_image, (480, 360))

    cv2.imshow('NanoDet Sample', debug_image)
    cv2.imshow('skin_image', skin_image)

cap.release()
cv2.destroyAllWindows()


[[0.00733506]
 [0.00295116]
 [0.00442629]
 [0.00171057]
 [0.00158401]
 [0.00109863]
 [0.00068868]
 [0.00037091]
 [0.00029846]
 [0.00015663]]
[[0.00750954]
 [0.00297057]
 [0.00437735]
 [0.00167494]
 [0.00152141]
 [0.00103325]
 [0.00058418]
 [0.00027402]
 [0.00022169]
 [0.00013437]]
[[0.00771139]
 [0.0030244 ]
 [0.00444149]
 [0.00164013]
 [0.00148171]
 [0.00099715]
 [0.00057502]
 [0.00025396]
 [0.00017666]
 [0.00011535]]
[[7.7780918e-03]
 [3.1702737e-03]
 [4.7816606e-03]
 [1.7483699e-03]
 [1.5197913e-03]
 [9.8865188e-04]
 [5.8167404e-04]
 [2.7456434e-04]
 [1.5175754e-04]
 [4.9984221e-05]]
[[7.7708680e-03]
 [3.1589591e-03]
 [4.8009502e-03]
 [1.7664058e-03]
 [1.5341437e-03]
 [9.8712952e-04]
 [5.9562619e-04]
 [2.7510227e-04]
 [1.3616977e-04]
 [5.1548632e-05]]
[[7.4530561e-03]
 [3.0583446e-03]
 [4.7263182e-03]
 [1.7354377e-03]
 [1.5492834e-03]
 [1.0687057e-03]
 [6.9712067e-04]
 [3.0303060e-04]
 [1.8284995e-04]
 [6.6438763e-05]]
[[7.3200790e-03]
 [3.0866663e-03]
 [4.8983116e-03]
 [1.8134189e-

## merge 2 hands to 1 infer img

In [4]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import numpy as np
import time
import copy
import cv2
import cv2_nanodet


nanodet = cv2_nanodet.CV2NanoDetONNX(
    model_path="nanodet_finger_v3_sim_fp16.onnx",
    input_shape=320,
    class_score_th=0.3,
    nms_th=0.6
)

#cap = cv2.VideoCapture(0)
rtsp_url = "rtsp://192.168.0.181:8080/video/h264"
cap = cv2.VideoCapture(rtsp_url)
#cap = cv2.VideoCapture("nanodet_nail_test_crop.mp4")

fps = cap.get(cv2.CAP_PROP_FPS)
# 영상 재생 시간 조정
# 원래의 FPS에 맞게 딜레이를 계산하여 대기
delay = int(1000 / fps) / 1000

elapsed_time = 0
while True:
    ret, frame = cap.read()
    start_time = cv2.getTickCount()

    if elapsed_time > delay:
        elapsed_time = elapsed_time - delay
        if elapsed_time <= 0:
            pass
        else:
           continue

    if not ret:
        break

    debug_image = copy.deepcopy(frame)
    color_boxes, skin_image = cv2_nanodet.get_color_filtered_boxes(debug_image)

    infer_time_start = time.time()
    for (x, y, w, h) in color_boxes:
        roi_img = skin_image[y:y+h, x:x+h]
        bboxes, scores, class_ids = nanodet.inference(roi_img)
        cv2_nanodet.draw_debug_roi(skin_image, bboxes, scores, class_ids, x, y)
    infer_time_end = time.time()

    skin_image = cv2.putText(
        skin_image,
        f"one hand infer time : {infer_time_end - infer_time_start} s",
        (10, 50),
        cv2.FONT_HERSHEY_SIMPLEX,
        0.7,
        (0, 255, 0),
        thickness=2,
    )

    for (x, y, w, h) in color_boxes:
        cv2.rectangle(skin_image, (x, y), (x + w, y + h), (0, 255, 0), 2)

    key = cv2.waitKey(1)
    if key == 27:  # ESC
        break

    debug_image = cv2.resize(debug_image, (480, 360))
    skin_image = cv2.resize(skin_image, (480, 360))



    elapsed_time = (cv2.getTickCount() - start_time) / cv2.getTickFrequency()  
    cv2.imshow('NanoDet Sample', debug_image)
    cv2.imshow('skin_image', skin_image)

cap.release()
cv2.destroyAllWindows()


ZeroDivisionError: division by zero

: 

## test for cpp conversion

In [4]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import numpy as np
import time
import copy
import cv2
import cv2_nanodet


nanodet = cv2_nanodet.CV2NanoDetONNX(
    model_path="nanodet_finger_v3_sim_fp16.onnx",
    input_shape=320,
    class_score_th=0.3,
    nms_th=0.6
)

img = cv2.imread("test.jpg")

img = cv2.resize(img, (640, 480))


bboxes, scores, class_ids = nanodet.inference(img)
cv2_nanodet.draw_debug_roi(img, bboxes, scores, class_ids, 0, 0)



print(bboxes.shape)
print(scores.shape)
print(class_ids.shape)

cv2.imshow("img", img)
cv2.waitKey(0)
cv2.destroyAllWindows()



[[0.00555824]
 [0.0037716 ]
 [0.005579  ]
 [0.00335624]
 [0.0045167 ]
 [0.00485691]
 [0.00501244]
 [0.0052529 ]
 [0.00587563]
 [0.00676904]]
(11, 4)
(11,)
(11,)
