Adds a transform to generate heatmap from landmarks

eclipse0922 · eclipse0922 · commit 8ef905b36b26 · 2025-09-21T14:31:28.000+09:00
Adds a `GenerateHeatmap` transform to create gaussian response maps from landmark coordinates.
This transform is implemented for both array and dictionary-based workflows.
It enables the generation of heatmaps from landmark data, facilitating tasks
like landmark localization and visualization.
The transform supports 2D and 3D coordinates and offers options for controlling
the gaussian standard deviation, spatial shape, truncation, normalization, and data type.
diff --git a/monai/transforms/post/array.py b/monai/transforms/post/array.py
@@ -38,7 +38,14 @@
     remove_small_objects,
 )
 from monai.transforms.utils_pytorch_numpy_unification import unravel_index
-from monai.utils import TransformBackends, convert_data_type, convert_to_tensor, ensure_tuple, look_up_option
+from monai.utils import (
+    TransformBackends,
+    convert_data_type,
+    convert_to_tensor,
+    ensure_tuple,
+    get_equivalent_dtype,
+    look_up_option,
+)
 from monai.utils.type_conversion import convert_to_dst_type
 
 __all__ = [
@@ -54,6 +61,7 @@
     "SobelGradients",
     "VoteEnsemble",
     "Invert",
+    "GenerateHeatmap",
     "DistanceTransformEDT",
 ]
 
@@ -742,6 +750,146 @@ def __call__(self, img: Sequence[NdarrayOrTensor] | NdarrayOrTensor) -> NdarrayO
         return self.post_convert(out_pt, img)
 
 
+class GenerateHeatmap(Transform):
+    """
+    Generate per-landmark gaussian response maps for 2D or 3D coordinates.
+
+    Args:
+        sigma: gaussian standard deviation. A single value is broadcast across all spatial dimensions.
+        spatial_shape: optional fallback spatial shape. If ``None`` it must be provided when calling the transform.
+        truncate: extent, in multiples of ``sigma``, used to crop the gaussian support window.
+        normalize: normalize every heatmap channel to ``[0, 1]`` when ``True``.
+        dtype: target dtype for the generated heatmaps (accepts numpy or torch dtypes).
+
+    Raises:
+        ValueError: when ``sigma`` is non-positive or ``spatial_shape`` cannot be resolved.
+
+    """
+
+    backend = [TransformBackends.NUMPY, TransformBackends.TORCH]
+
+    def __init__(
+        self,
+        sigma: Sequence[float] | float = 5.0,
+        spatial_shape: Sequence[int] | None = None,
+        truncate: float = 3.0,
+        normalize: bool = True,
+        dtype: np.dtype | torch.dtype | type = np.float32,
+    ) -> None:
+        if isinstance(sigma, Sequence) and not isinstance(sigma, (str, bytes)):
+            if any(s <= 0 for s in sigma):
+                raise ValueError("sigma values must be positive.")
+            self._sigma = tuple(float(s) for s in sigma)
+        else:
+            if float(sigma) <= 0:
+                raise ValueError("sigma must be positive.")
+            self._sigma = float(sigma)
+        if truncate <= 0:
+            raise ValueError("truncate must be positive.")
+        self.truncate = float(truncate)
+        self.normalize = normalize
+        self.torch_dtype = get_equivalent_dtype(dtype, torch.Tensor)
+        self.numpy_dtype = get_equivalent_dtype(dtype, np.ndarray)
+        self.spatial_shape = None if spatial_shape is None else tuple(int(s) for s in spatial_shape)
+
+    def __call__(
+        self,
+        points: NdarrayOrTensor,
+        spatial_shape: Sequence[int] | None = None,
+    ) -> NdarrayOrTensor:
+        original_points = points
+        points_t = convert_to_tensor(points, dtype=torch.float32, track_meta=False)
+        if points_t.ndim != 2:
+            raise ValueError("points must be a 2D array with shape (num_points, spatial_dims).")
+        device = points_t.device
+        num_points, spatial_dims = points_t.shape
+        if spatial_dims not in (2, 3):
+            raise ValueError("GenerateHeatmap only supports 2D or 3D landmarks.")
+
+        target_shape = self._resolve_spatial_shape(spatial_shape, spatial_dims)
+        sigma = self._resolve_sigma(spatial_dims)
+        radius = tuple(int(np.ceil(self.truncate * s)) for s in sigma)
+
+        heatmap = torch.zeros((num_points, *target_shape), dtype=self.torch_dtype, device=device)
+        image_bounds = tuple(int(s) for s in target_shape)
+        for idx, center in enumerate(points_t):
+            center_vals = center.tolist()
+            if not np.all(np.isfinite(center_vals)):
+                continue
+            if not self._is_inside(center_vals, image_bounds):
+                continue
+            window_slices, coord_shifts = self._make_window(center_vals, radius, image_bounds, device)
+            if window_slices is None:
+                continue
+            region = heatmap[(idx, *window_slices)]
+            gaussian = self._evaluate_gaussian(coord_shifts, sigma)
+            torch.maximum(region, gaussian, out=region)
+            if self.normalize:
+                max_val = heatmap[idx].max()
+                if max_val.item() > 0:
+                    heatmap[idx] /= max_val
+
+        target_dtype = self.torch_dtype if isinstance(original_points, (torch.Tensor, MetaTensor)) else self.numpy_dtype
+        converted, _, _ = convert_to_dst_type(heatmap, original_points, dtype=target_dtype)
+        return converted
+
+    def _resolve_spatial_shape(self, call_shape: Sequence[int] | None, spatial_dims: int) -> tuple[int, ...]:
+        shape = call_shape if call_shape is not None else self.spatial_shape
+        if shape is None:
+            raise ValueError("spatial_shape must be provided either at construction time or call time.")
+        shape_tuple = ensure_tuple(shape)
+        if len(shape_tuple) != spatial_dims:
+            if len(shape_tuple) == 1:
+                shape_tuple = shape_tuple * spatial_dims  # type: ignore
+            else:
+                raise ValueError("spatial_shape length must match spatial dimension of the landmarks.")
+        return tuple(int(s) for s in shape_tuple)
+
+    def _resolve_sigma(self, spatial_dims: int) -> tuple[float, ...]:
+        if isinstance(self._sigma, tuple):
+            if len(self._sigma) == spatial_dims:
+                return self._sigma
+            if len(self._sigma) == 1:
+                return self._sigma * spatial_dims
+            raise ValueError("sigma sequence length must equal the number of spatial dimensions.")
+        return (self._sigma,) * spatial_dims
+
+    @staticmethod
+    def _is_inside(center: Sequence[float], bounds: tuple[int, ...]) -> bool:
+        return all(0 <= c < size for c, size in zip(center, bounds))
+
+    def _make_window(
+        self,
+        center: Sequence[float],
+        radius: tuple[int, ...],
+        bounds: tuple[int, ...],
+        device: torch.device,
+    ) -> tuple[tuple[slice, ...] | None, tuple[torch.Tensor, ...]]:
+        slices: list[slice] = []
+        coord_shifts: list[torch.Tensor] = []
+        for dim, (c, r, size) in enumerate(zip(center, radius, bounds)):
+            start = max(int(np.floor(c - r)), 0)
+            stop = min(int(np.ceil(c + r)) + 1, size)
+            if start >= stop:
+                return None, ()
+            slices.append(slice(start, stop))
+            coord_shifts.append(torch.arange(start, stop, device=device, dtype=self.torch_dtype) - float(c))
+        return tuple(slices), tuple(coord_shifts)
+
+    def _evaluate_gaussian(self, coord_shifts: tuple[torch.Tensor, ...], sigma: tuple[float, ...]) -> torch.Tensor:
+        device = coord_shifts[0].device
+        shape = tuple(len(axis) for axis in coord_shifts)
+        if 0 in shape:
+            return torch.zeros(shape, dtype=self.torch_dtype, device=device)
+        exponent = torch.zeros(shape, dtype=self.torch_dtype, device=device)
+        for dim, (shift, sig) in enumerate(zip(coord_shifts, sigma)):
+            scaled = (shift / float(sig)) ** 2
+            reshape_shape = [1] * len(coord_shifts)
+            reshape_shape[dim] = shift.numel()
+            exponent += scaled.reshape(reshape_shape)
+        return torch.exp(-0.5 * exponent)
+
+
 class ProbNMS(Transform):
     """
     Performs probability based non-maximum suppression (NMS) on the probabilities map via
diff --git a/monai/transforms/post/dictionary.py b/monai/transforms/post/dictionary.py
@@ -35,6 +35,7 @@
     AsDiscrete,
     DistanceTransformEDT,
     FillHoles,
+    GenerateHeatmap,
     KeepLargestConnectedComponent,
     LabelFilter,
     LabelToContour,
@@ -48,6 +49,7 @@
 from monai.transforms.utility.array import ToTensor
 from monai.transforms.utils import allow_missing_keys_mode, convert_applied_interp_mode
 from monai.utils import PostFix, convert_to_tensor, ensure_tuple, ensure_tuple_rep
+from monai.utils.type_conversion import convert_to_dst_type
 
 __all__ = [
     "ActivationsD",
@@ -95,6 +97,9 @@
     "DistanceTransformEDTd",
     "DistanceTransformEDTD",
     "DistanceTransformEDTDict",
+    "GenerateHeatmapd",
+    "GenerateHeatmapD",
+    "GenerateHeatmapDict",
 ]
 
 DEFAULT_POST_FIX = PostFix.meta()
@@ -508,6 +513,137 @@ def __init__(self, keys: KeysCollection, output_key: str | None = None, num_clas
         super().__init__(keys, ensemble, output_key)
 
 
+class GenerateHeatmapd(MapTransform):
+    """
+    Dictionary-based wrapper of :py:class:`monai.transforms.GenerateHeatmap`.
+    Converts landmark coordinates into gaussian heatmaps and optionally copies metadata from a reference image.
+    """
+
+    backend = GenerateHeatmap.backend
+
+    def __init__(
+        self,
+        keys: KeysCollection,
+        sigma: Sequence[float] | float = 5.0,
+        heatmap_keys: KeysCollection | None = None,
+        ref_image_keys: KeysCollection | None = None,
+        spatial_shape: Sequence[int] | Sequence[Sequence[int]] | None = None,
+        truncate: float = 3.0,
+        normalize: bool = True,
+        dtype: np.dtype | type = np.float32,
+        allow_missing_keys: bool = False,
+    ) -> None:
+        super().__init__(keys, allow_missing_keys)
+        self.heatmap_keys = self._prepare_heatmap_keys(heatmap_keys)
+        self.ref_image_keys = self._prepare_optional_keys(ref_image_keys)
+        self.static_shapes = self._prepare_shapes(spatial_shape)
+        self.generator = GenerateHeatmap(
+            sigma=sigma,
+            spatial_shape=None,
+            truncate=truncate,
+            normalize=normalize,
+            dtype=dtype,
+        )
+
+    def __call__(self, data: Mapping[Hashable, Any]) -> dict[Hashable, Any]:
+        d = dict(data)
+        for key, out_key, ref_key, static_shape in self.key_iterator(
+            d, self.heatmap_keys, self.ref_image_keys, self.static_shapes
+        ):
+            points = d[key]
+            shape = self._determine_shape(points, static_shape, d, ref_key)
+            heatmap = self.generator(points, spatial_shape=shape)
+            reference = d.get(ref_key) if ref_key is not None and ref_key in d else None
+            d[out_key] = self._prepare_output(heatmap, reference)
+        return d
+
+    def _prepare_heatmap_keys(self, heatmap_keys: KeysCollection | None) -> tuple[Hashable, ...]:
+        if heatmap_keys is None:
+            return tuple(f"{key}_heatmap" for key in self.keys)
+        keys_tuple = ensure_tuple(heatmap_keys)
+        if len(keys_tuple) == 1 and len(self.keys) > 1:
+            keys_tuple = keys_tuple * len(self.keys)
+        if len(keys_tuple) != len(self.keys):
+            raise ValueError("heatmap_keys length must match keys length.")
+        return keys_tuple
+
+    def _prepare_optional_keys(self, maybe_keys: KeysCollection | None) -> tuple[Hashable | None, ...]:
+        if maybe_keys is None:
+            return (None,) * len(self.keys)
+        keys_tuple = ensure_tuple(maybe_keys)
+        if len(keys_tuple) == 1 and len(self.keys) > 1:
+            keys_tuple = keys_tuple * len(self.keys)
+        if len(keys_tuple) != len(self.keys):
+            raise ValueError("ref_image_keys length must match keys length when provided.")
+        return tuple(keys_tuple)
+
+    def _prepare_shapes(
+        self, spatial_shape: Sequence[int] | Sequence[Sequence[int]] | None
+    ) -> tuple[tuple[int, ...] | None, ...]:
+        if spatial_shape is None:
+            return (None,) * len(self.keys)
+        shape_tuple = ensure_tuple(spatial_shape)
+        if shape_tuple and all(isinstance(v, (int, np.integer)) for v in shape_tuple):
+            shape = tuple(int(v) for v in shape_tuple)
+            return (shape,) * len(self.keys)
+        if len(shape_tuple) == 1 and len(self.keys) > 1:
+            shape_tuple = shape_tuple * len(self.keys)
+        if len(shape_tuple) != len(self.keys):
+            raise ValueError("spatial_shape length must match keys length when providing per-key shapes.")
+        prepared: list[tuple[int, ...] | None] = []
+        for item in shape_tuple:
+            if item is None:
+                prepared.append(None)
+            else:
+                dims = ensure_tuple(item)
+                prepared.append(tuple(int(v) for v in dims))
+        return tuple(prepared)
+
+    def _determine_shape(
+        self,
+        points: Any,
+        static_shape: tuple[int, ...] | None,
+        data: Mapping[Hashable, Any],
+        ref_key: Hashable | None,
+    ) -> tuple[int, ...]:
+        if static_shape is not None:
+            return static_shape
+        points_t = convert_to_tensor(points, dtype=torch.float32, track_meta=False)
+        if points_t.ndim != 2:
+            raise ValueError("landmark arrays must be 2D with shape (num_points, spatial_dims).")
+        spatial_dims = int(points_t.shape[1])
+        if ref_key is not None and ref_key in data:
+            return self._shape_from_reference(data[ref_key], spatial_dims)
+        raise ValueError(
+            "Unable to determine spatial shape for GenerateHeatmapd. Provide spatial_shape or ref_image_keys."
+        )
+
+    def _shape_from_reference(self, reference: Any, spatial_dims: int) -> tuple[int, ...]:
+        if isinstance(reference, MetaTensor):
+            meta_shape = reference.meta.get("spatial_shape")
+            if meta_shape is not None:
+                dims = ensure_tuple(meta_shape)
+                if len(dims) == spatial_dims:
+                    return tuple(int(v) for v in dims)
+            return tuple(int(v) for v in reference.shape[-spatial_dims:])
+        if hasattr(reference, "shape"):
+            return tuple(int(v) for v in reference.shape[-spatial_dims:])
+        raise ValueError("Reference data must define a shape attribute.")
+
+    def _prepare_output(self, heatmap: NdarrayOrTensor, reference: Any) -> Any:
+        if isinstance(reference, MetaTensor):
+            converted, _, _ = convert_to_dst_type(heatmap, reference, dtype=reference.dtype, device=reference.device)
+            converted.meta["spatial_shape"] = tuple(int(v) for v in heatmap.shape[1:])
+            return converted
+        if isinstance(reference, torch.Tensor):
+            converted, _, _ = convert_to_dst_type(heatmap, reference, dtype=reference.dtype, device=reference.device)
+            return converted
+        return heatmap
+
+
+GenerateHeatmapD = GenerateHeatmapDict = GenerateHeatmapd
+
+
 class ProbNMSd(MapTransform):
     """
     Performs probability based non-maximum suppression (NMS) on the probabilities map via
diff --git a/tests/test_generate_heatmap.py b/tests/test_generate_heatmap.py