-
Notifications
You must be signed in to change notification settings - Fork 1k
/
box_ops.py
435 lines (344 loc) · 17.6 KB
/
box_ops.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
# Copyright (c) MONAI Consortium
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations
from collections.abc import Sequence
from copy import deepcopy
import numpy as np
import torch
from monai.config.type_definitions import DtypeLike, NdarrayOrTensor, NdarrayTensor
from monai.data.box_utils import COMPUTE_DTYPE, TO_REMOVE, get_spatial_dims
from monai.transforms import Resize
from monai.transforms.utils import create_scale
from monai.utils import look_up_option
from monai.utils.misc import ensure_tuple, ensure_tuple_rep
from monai.utils.type_conversion import convert_data_type, convert_to_dst_type
def _apply_affine_to_points(points: torch.Tensor, affine: torch.Tensor, include_shift: bool = True) -> torch.Tensor:
"""
This internal function applies affine matrices to the point coordinate
Args:
points: point coordinates, Nx2 or Nx3 torch tensor or ndarray, representing [x, y] or [x, y, z]
affine: affine matrix to be applied to the point coordinates, sized (spatial_dims+1,spatial_dims+1)
include_shift: default True, whether the function apply translation (shift) in the affine transform
Returns:
transformed point coordinates, with same data type as ``points``, does not share memory with ``points``
"""
spatial_dims = get_spatial_dims(points=points)
# compute new points
if include_shift:
# append 1 to form Nx(spatial_dims+1) vector, then transpose
points_affine = torch.cat(
[points, torch.ones(points.shape[0], 1, device=points.device, dtype=points.dtype)], dim=1
).transpose(0, 1)
# apply affine
points_affine = torch.matmul(affine, points_affine)
# remove appended 1 and transpose back
points_affine = points_affine[:spatial_dims, :].transpose(0, 1)
else:
points_affine = points.transpose(0, 1)
points_affine = torch.matmul(affine[:spatial_dims, :spatial_dims], points_affine)
points_affine = points_affine.transpose(0, 1)
return points_affine
def apply_affine_to_boxes(boxes: NdarrayTensor, affine: NdarrayOrTensor) -> NdarrayTensor:
"""
This function applies affine matrices to the boxes
Args:
boxes: bounding boxes, Nx4 or Nx6 torch tensor or ndarray. The box mode is assumed to be StandardMode
affine: affine matrix to be applied to the box coordinates, sized (spatial_dims+1,spatial_dims+1)
Returns:
returned affine transformed boxes, with same data type as ``boxes``, does not share memory with ``boxes``
"""
# convert numpy to tensor if needed
boxes_t, *_ = convert_data_type(boxes, torch.Tensor)
# some operation does not support torch.float16
# convert to float32
boxes_t = boxes_t.to(dtype=COMPUTE_DTYPE)
affine_t, *_ = convert_to_dst_type(src=affine, dst=boxes_t)
spatial_dims = get_spatial_dims(boxes=boxes_t)
# affine transform left top and bottom right points
# might flipped, thus lt may not be left top any more
lt: torch.Tensor = _apply_affine_to_points(boxes_t[:, :spatial_dims], affine_t, include_shift=True)
rb: torch.Tensor = _apply_affine_to_points(boxes_t[:, spatial_dims:], affine_t, include_shift=True)
# make sure lt_new is left top, and rb_new is bottom right
lt_new, _ = torch.min(torch.stack([lt, rb], dim=2), dim=2)
rb_new, _ = torch.max(torch.stack([lt, rb], dim=2), dim=2)
boxes_t_affine = torch.cat([lt_new, rb_new], dim=1)
# convert tensor back to numpy if needed
boxes_affine: NdarrayOrTensor
boxes_affine, *_ = convert_to_dst_type(src=boxes_t_affine, dst=boxes)
return boxes_affine # type: ignore[return-value]
def zoom_boxes(boxes: NdarrayTensor, zoom: Sequence[float] | float) -> NdarrayTensor:
"""
Zoom boxes
Args:
boxes: bounding boxes, Nx4 or Nx6 torch tensor or ndarray. The box mode is assumed to be StandardMode
zoom: The zoom factor along the spatial axes.
If a float, zoom is the same for each spatial axis.
If a sequence, zoom should contain one value for each spatial axis.
Returns:
zoomed boxes, with same data type as ``boxes``, does not share memory with ``boxes``
Example:
.. code-block:: python
boxes = torch.ones(1,4)
zoom_boxes(boxes, zoom=[0.5,2.2]) # will return tensor([[0.5, 2.2, 0.5, 2.2]])
"""
spatial_dims = get_spatial_dims(boxes=boxes)
# generate affine transform corresponding to ``zoom``
affine = create_scale(spatial_dims=spatial_dims, scaling_factor=zoom)
return apply_affine_to_boxes(boxes=boxes, affine=affine)
def resize_boxes(
boxes: NdarrayOrTensor, src_spatial_size: Sequence[int] | int, dst_spatial_size: Sequence[int] | int
) -> NdarrayOrTensor:
"""
Resize boxes when the corresponding image is resized
Args:
boxes: source bounding boxes, Nx4 or Nx6 torch tensor or ndarray. The box mode is assumed to be ``StandardMode``
src_spatial_size: source image spatial size.
dst_spatial_size: target image spatial size.
Returns:
resized boxes, with same data type as ``boxes``, does not share memory with ``boxes``
Example:
.. code-block:: python
boxes = torch.ones(1,4)
src_spatial_size = [100, 100]
dst_spatial_size = [128, 256]
resize_boxes(boxes, src_spatial_size, dst_spatial_size) # will return tensor([[1.28, 2.56, 1.28, 2.56]])
"""
spatial_dims: int = get_spatial_dims(boxes=boxes)
src_spatial_size = ensure_tuple_rep(src_spatial_size, spatial_dims)
dst_spatial_size = ensure_tuple_rep(dst_spatial_size, spatial_dims)
zoom = [dst_spatial_size[axis] / float(src_spatial_size[axis]) for axis in range(spatial_dims)]
return zoom_boxes(boxes=boxes, zoom=zoom)
def flip_boxes(
boxes: NdarrayTensor, spatial_size: Sequence[int] | int, flip_axes: Sequence[int] | int | None = None
) -> NdarrayTensor:
"""
Flip boxes when the corresponding image is flipped
Args:
boxes: bounding boxes, Nx4 or Nx6 torch tensor or ndarray. The box mode is assumed to be ``StandardMode``
spatial_size: image spatial size.
flip_axes: spatial axes along which to flip over. Default is None.
The default `axis=None` will flip over all of the axes of the input array.
If axis is negative it counts from the last to the first axis.
If axis is a tuple of ints, flipping is performed on all of the axes
specified in the tuple.
Returns:
flipped boxes, with same data type as ``boxes``, does not share memory with ``boxes``
"""
spatial_dims: int = get_spatial_dims(boxes=boxes)
spatial_size = ensure_tuple_rep(spatial_size, spatial_dims)
if flip_axes is None:
flip_axes = tuple(range(0, spatial_dims))
flip_axes = ensure_tuple(flip_axes)
# flip box
_flip_boxes: NdarrayTensor = boxes.clone() if isinstance(boxes, torch.Tensor) else deepcopy(boxes) # type: ignore[assignment]
for axis in flip_axes:
_flip_boxes[:, axis + spatial_dims] = spatial_size[axis] - boxes[:, axis] - TO_REMOVE
_flip_boxes[:, axis] = spatial_size[axis] - boxes[:, axis + spatial_dims] - TO_REMOVE
return _flip_boxes
def convert_box_to_mask(
boxes: NdarrayOrTensor,
labels: NdarrayOrTensor,
spatial_size: Sequence[int] | int,
bg_label: int = -1,
ellipse_mask: bool = False,
) -> NdarrayOrTensor:
"""
Convert box to int16 mask image, which has the same size with the input image.
Args:
boxes: bounding boxes, Nx4 or Nx6 torch tensor or ndarray. The box mode is assumed to be ``StandardMode``.
labels: classification foreground(fg) labels corresponding to `boxes`, dtype should be int, sized (N,).
spatial_size: image spatial size.
bg_label: background labels for the output mask image, make sure it is smaller than any fg labels.
ellipse_mask: bool.
- If True, it assumes the object shape is close to ellipse or ellipsoid.
- If False, it assumes the object shape is close to rectangle or cube and well occupies the bounding box.
- If the users are going to apply random rotation as data augmentation, we suggest setting ellipse_mask=True
See also Kalra et al. "Towards Rotation Invariance in Object Detection", ICCV 2021.
Return:
- int16 array, sized (num_box, H, W). Each channel represents a box.
The foreground region in channel c has intensity of labels[c].
The background intensity is bg_label.
"""
spatial_dims: int = get_spatial_dims(boxes=boxes)
spatial_size = ensure_tuple_rep(spatial_size, spatial_dims)
# if no box, return empty mask
if labels.shape[0] == 0:
boxes_mask_np = np.ones((1,) + spatial_size, dtype=np.int16) * np.int16(bg_label)
boxes_mask, *_ = convert_to_dst_type(src=boxes_mask_np, dst=boxes, dtype=torch.int16)
return boxes_mask
# bg_label should be smaller than labels
if bg_label >= min(labels):
raise ValueError(
f"bg_label should be smaller than any foreground box labels.\n"
f"min(labels)={min(labels)}, while bg_label={bg_label}"
)
if labels.shape[0] != boxes.shape[0]:
raise ValueError("Number of labels should equal to number of boxes.")
# allocate memory for boxes_mask_np
boxes_mask_np = np.ones((labels.shape[0],) + spatial_size, dtype=np.int16) * np.int16(bg_label)
boxes_np: np.ndarray = convert_data_type(boxes, np.ndarray, dtype=np.int32)[0]
if np.any(boxes_np[:, spatial_dims:] > np.array(spatial_size)):
raise ValueError("Some boxes are larger than the image.")
labels_np, *_ = convert_to_dst_type(src=labels, dst=boxes_np)
for b in range(boxes_np.shape[0]):
# generate a foreground mask
box_size = [boxes_np[b, axis + spatial_dims] - boxes_np[b, axis] for axis in range(spatial_dims)]
if ellipse_mask:
# initialize a square/cube mask
max_box_size = max(box_size) # max of box w/h/d
radius = max_box_size / 2.0
center = (max_box_size - 1) / 2.0
boxes_only_mask = np.ones([max_box_size] * spatial_dims, dtype=np.int16) * np.int16(bg_label)
# apply label intensity to generate circle/ball foreground
ranges = tuple(slice(0, max_box_size) for _ in range(spatial_dims))
dist_from_center = sum((grid - center) ** 2 for grid in np.ogrid[ranges])
boxes_only_mask[dist_from_center <= radius**2] = np.int16(labels_np[b])
# squeeze it to a ellipse/ellipsoid mask
resizer = Resize(spatial_size=box_size, mode="nearest", anti_aliasing=False)
boxes_only_mask = resizer(boxes_only_mask[None])[0] # type: ignore
else:
# generate a rect mask
boxes_only_mask = np.ones(box_size, dtype=np.int16) * np.int16(labels_np[b])
# apply to global mask
slicing = [b]
slicing.extend(slice(boxes_np[b, d], boxes_np[b, d + spatial_dims]) for d in range(spatial_dims)) # type:ignore
boxes_mask_np[tuple(slicing)] = boxes_only_mask
return convert_to_dst_type(src=boxes_mask_np, dst=boxes, dtype=torch.int16)[0]
def convert_mask_to_box(
boxes_mask: NdarrayOrTensor,
bg_label: int = -1,
box_dtype: DtypeLike | torch.dtype = torch.float32,
label_dtype: DtypeLike | torch.dtype = torch.long,
) -> tuple[NdarrayOrTensor, NdarrayOrTensor]:
"""
Convert int16 mask image to box, which has the same size with the input image
Args:
boxes_mask: int16 array, sized (num_box, H, W). Each channel represents a box.
The foreground region in channel c has intensity of labels[c].
The background intensity is bg_label.
bg_label: background labels for the boxes_mask
box_dtype: output dtype for boxes
label_dtype: output dtype for labels
Return:
- bounding boxes, Nx4 or Nx6 torch tensor or ndarray. The box mode is assumed to be ``StandardMode``.
- classification foreground(fg) labels, dtype should be int, sized (N,).
"""
look_up_option(len(boxes_mask.shape), [3, 4])
spatial_size = list(boxes_mask.shape[1:])
spatial_dims = get_spatial_dims(spatial_size=spatial_size)
boxes_mask_np, *_ = convert_data_type(boxes_mask, np.ndarray)
boxes_list = []
labels_list = []
for b in range(boxes_mask_np.shape[0]):
fg_indices = np.nonzero(boxes_mask_np[b, ...] - bg_label)
if fg_indices[0].shape[0] == 0:
continue
boxes_b = []
for fd_i in fg_indices:
boxes_b.append(min(fd_i)) # top left corner
for fd_i in fg_indices:
boxes_b.append(max(fd_i) + 1 - TO_REMOVE) # bottom right corner
boxes_list.append(boxes_b)
if spatial_dims == 2:
labels_list.append(boxes_mask_np[b, fg_indices[0][0], fg_indices[1][0]])
if spatial_dims == 3:
labels_list.append(boxes_mask_np[b, fg_indices[0][0], fg_indices[1][0], fg_indices[2][0]])
if len(boxes_list) == 0:
boxes_np, labels_np = np.zeros([0, 2 * spatial_dims]), np.zeros([0])
else:
boxes_np, labels_np = np.asarray(boxes_list), np.asarray(labels_list)
boxes, *_ = convert_to_dst_type(src=boxes_np, dst=boxes_mask, dtype=box_dtype)
labels, *_ = convert_to_dst_type(src=labels_np, dst=boxes_mask, dtype=label_dtype)
return boxes, labels
def select_labels(
labels: Sequence[NdarrayOrTensor] | NdarrayOrTensor, keep: NdarrayOrTensor
) -> tuple | NdarrayOrTensor:
"""
For element in labels, select indices keep from it.
Args:
labels: Sequence of array. Each element represents classification labels or scores
corresponding to ``boxes``, sized (N,).
keep: the indices to keep, same length with each element in labels.
Return:
selected labels, does not share memory with original labels.
"""
labels_tuple = ensure_tuple(labels, True)
labels_select_list = []
keep_t: torch.Tensor = convert_data_type(keep, torch.Tensor)[0]
for item in labels_tuple:
labels_t: torch.Tensor = convert_data_type(item, torch.Tensor)[0]
labels_t = labels_t[keep_t, ...]
labels_select_list.append(convert_to_dst_type(src=labels_t, dst=item)[0])
if isinstance(labels, (torch.Tensor, np.ndarray)):
return labels_select_list[0] # type: ignore
return tuple(labels_select_list)
def swapaxes_boxes(boxes: NdarrayTensor, axis1: int, axis2: int) -> NdarrayTensor:
"""
Interchange two axes of boxes.
Args:
boxes: bounding boxes, Nx4 or Nx6 torch tensor or ndarray. The box mode is assumed to be ``StandardMode``
axis1: First axis.
axis2: Second axis.
Returns:
boxes with two axes interchanged.
"""
spatial_dims: int = get_spatial_dims(boxes=boxes)
if isinstance(boxes, torch.Tensor):
boxes_swap = boxes.clone()
else:
boxes_swap = deepcopy(boxes) # type: ignore
boxes_swap[:, [axis1, axis2]] = boxes_swap[:, [axis2, axis1]]
boxes_swap[:, [spatial_dims + axis1, spatial_dims + axis2]] = boxes_swap[
:, [spatial_dims + axis2, spatial_dims + axis1]
]
return boxes_swap # type: ignore[return-value]
def rot90_boxes(
boxes: NdarrayTensor, spatial_size: Sequence[int] | int, k: int = 1, axes: tuple[int, int] = (0, 1)
) -> NdarrayTensor:
"""
Rotate boxes by 90 degrees in the plane specified by axes.
Rotation direction is from the first towards the second axis.
Args:
boxes: bounding boxes, Nx4 or Nx6 torch tensor or ndarray. The box mode is assumed to be ``StandardMode``
spatial_size: image spatial size.
k : number of times the array is rotated by 90 degrees.
axes: (2,) array_like
The array is rotated in the plane defined by the axes. Axes must be different.
Returns:
A rotated view of `boxes`.
Notes:
``rot90_boxes(boxes, spatial_size, k=1, axes=(1,0))`` is the reverse of
``rot90_boxes(boxes, spatial_size, k=1, axes=(0,1))``
``rot90_boxes(boxes, spatial_size, k=1, axes=(1,0))`` is equivalent to
``rot90_boxes(boxes, spatial_size, k=-1, axes=(0,1))``
"""
spatial_dims: int = get_spatial_dims(boxes=boxes)
spatial_size_ = list(ensure_tuple_rep(spatial_size, spatial_dims))
axes = ensure_tuple(axes)
if len(axes) != 2:
raise ValueError("len(axes) must be 2.")
if axes[0] == axes[1] or abs(axes[0] - axes[1]) == spatial_dims:
raise ValueError("Axes must be different.")
if axes[0] >= spatial_dims or axes[0] < -spatial_dims or axes[1] >= spatial_dims or axes[1] < -spatial_dims:
raise ValueError(f"Axes={axes} out of range for array of ndim={spatial_dims}.")
k %= 4
if k == 0:
return boxes
if k == 2:
return flip_boxes(flip_boxes(boxes, spatial_size_, axes[0]), spatial_size_, axes[1])
if k == 1:
boxes_ = flip_boxes(boxes, spatial_size_, axes[1])
return swapaxes_boxes(boxes_, axes[0], axes[1])
else:
# k == 3
boxes_ = swapaxes_boxes(boxes, axes[0], axes[1])
spatial_size_[axes[0]], spatial_size_[axes[1]] = spatial_size_[axes[1]], spatial_size_[axes[0]]
return flip_boxes(boxes_, spatial_size_, axes[1])