In [9]:
from PIL import Image, ImageOps
import glob
import numpy as np
import onnxruntime as ort
import pandas as pd

In [10]:
target_size = 640
path = glob.glob('*.jpg')
img_list = []
for img in path:
    image = Image.open(path[0])
    image.thumbnail((target_size, target_size))
    padded_img = ImageOps.pad(image, (target_size, target_size))
    img_np = np.array(padded_img).astype(np.float32)/255.0
    img_np = img_np.transpose([2, 0, 1])
    img_np = np.expand_dims(img_np, axis=0)
    img_list.append(img_np)

np.array(img_list).shape

(1, 1, 3, 640, 640)

In [11]:
session = ort.InferenceSession('best.onnx')
input_name = session.get_inputs()[0].name
output_name = session.get_outputs()[0].name

result = session.run([output_name], {input_name: img_list[0]})

In [12]:
result[0].shape

(1, 25200, 7)

## 推論後の処理

In [13]:
# 推論結果
detections = result[0][0]  # [25200, 7]

# 信頼度スコアの閾値
confidence_threshold = 0.6

# クラス確率が最も高いクラスのインデックスを取得
class_ids = np.argmax(detections[:, 5:], axis=1)
class_confidences = np.max(detections[:, 5:], axis=1)

# 信頼度スコアが閾値以上の検出のみを選択
selected = detections[:, 4] > confidence_threshold

# 選択された検出を取得
final_detections = detections[selected]
final_class_ids = class_ids[selected]
final_class_confidences = class_confidences[selected]

# for det, class_id, class_confidence in zip(final_detections, final_class_ids, final_class_confidences):
#     print(det)

In [14]:
selected

array([False, False, False, ..., False, False, False])

In [15]:
class_conf = (class_confidences>0.5)

In [16]:
selected.shape

(25200,)

In [17]:
class_conf.shape

(25200,)

In [18]:
(selected*class_conf).sum()

77

In [19]:
def nms(bboxes, score_threshold, iou_threshold):
    """
    非最大抑制（NMS）を実行します。
    
    :param bboxes: バウンディングボックスのリスト。各ボックスは[x, y, width, height, confidence, class_id]の形式。
    :param score_threshold: 信頼度スコアの閾値。この値以上のボックスのみが考慮されます。
    :param iou_threshold: IoUの閾値。この値以上のボックスは排除されます。
    :return: 抑制後のバウンディングボックスのインデックスのリスト。
    """
    # 信頼度スコアに基づいてフィルタリング
    scores = bboxes[:, 4]
    keep = scores > score_threshold
    bboxes = bboxes[keep]
    scores = scores[keep]  # フィルタリングされたスコアも更新
    
    # バウンディングボックスの座標を[x1, y1, x2, y2]形式に変換（フィルタリング後のbboxesに対して）
    x1 = bboxes[:, 0] - bboxes[:, 2] / 2
    y1 = bboxes[:, 1] - bboxes[:, 3] / 2
    x2 = bboxes[:, 0] + bboxes[:, 2] / 2
    y2 = bboxes[:, 1] + bboxes[:, 3] / 2
    
    # エリアを計算（フィルタリング後のbboxesに対して）
    areas = (x2 - x1) * (y2 - y1)
    order = scores.argsort()[::-1]

    keep = []
    while order.size > 0:
        i = order[0]
        keep.append(i)
        xx1 = np.maximum(x1[i], x1[order[1:]])
        yy1 = np.maximum(y1[i], y1[order[1:]])
        xx2 = np.minimum(x2[i], x2[order[1:]])
        yy2 = np.minimum(y2[i], y2[order[1:]])

        w = np.maximum(0.0, xx2 - xx1)
        h = np.maximum(0.0, yy2 - yy1)
        inter = w * h
        iou = inter / (areas[i] + areas[order[1:]] - inter)

        inds = np.where(iou <= iou_threshold)[0]
        order = order[inds + 1]
    
    return keep


In [20]:
selected_bboxs = np.concatenate((final_detections[:, :5], final_class_ids.reshape(-1, 1)),axis=1)

In [21]:
a = nms(selected_bboxs, 0.6, 0.45)

In [23]:
selected_bboxs[a]

array([[     506.57,      570.86,      45.328,      54.833,     0.96756,           0],
       [     411.23,      98.826,      43.501,      57.181,     0.96155,           0],
       [     452.92,      98.285,      42.485,      58.394,     0.95768,           1],
       [     494.58,      96.644,      42.293,      57.775,     0.94943,           1]])

In [326]:
np.unique(selected_bboxs[:, -1])

array([          0,           1])

In [322]:
bboxes = selected_bboxs.copy()
x1 = bboxes[:, 0] - bboxes[:, 2] / 2
y1 = bboxes[:, 1] - bboxes[:, 3] / 2
x2 = bboxes[:, 0] + bboxes[:, 2] / 2
y2 = bboxes[:, 1] + bboxes[:, 3] / 2


# エリアを計算（フィルタリング後のbboxesに対して）
areas = (x2 - x1) * (y2 - y1)
order = scores.argsort()[::-1]


i = order[0]
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])

w = np.maximum(0.0, xx2 - xx1)
h = np.maximum(0.0, yy2 - yy1)


inter = w * h
iou = inter / (areas[i] + areas[order[1:]] - inter)

inds = np.where(iou <= 0.45)[0]

In [323]:
inds

array([ 2,  5,  6,  8, 10, 11, 12, 13, 14, 16, 18, 19, 20, 21, 22, 24, 25, 26, 27, 28, 29, 31, 32, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 46, 48, 49, 50, 52, 53, 54, 55, 56, 58, 59, 61, 62, 63, 64, 65, 66, 67, 68, 69, 73, 74, 75])

In [324]:
iou

array([    0.95553,     0.96329,           0,     0.97505,     0.96247,           0,           0,      0.9611,           0,     0.96389,           0,           0,           0,           0,           0,     0.96023,           0,      0.9446,           0,           0,           0,           0,           0,     0.95197,
                 0,           0,           0,           0,           0,           0,     0.98044,           0,           0,           0,           0,      0.9659,           0,           0,     0.97687,           0,           0,           0,           0,     0.94639,           0,           0,           0,     0.94655,
                 0,           0,           0,     0.94876,           0,           0,           0,           0,           0,     0.96961,           0,           0,      0.9385,           0,           0,           0,           0,           0,           0,           0,           0,           0,     0.93292,     0.95305,
           0.95588,           0,           

In [4]:
from utils.general import non_max_suppression

In [5]:
non_max_suppression(result)

AttributeError: 'numpy.ndarray' object has no attribute 'device'