From 8f110a0fcd52e4e8173b62b10d81065250ed9a7f Mon Sep 17 00:00:00 2001
From: Nic Ma <nma@nvidia.com>
Date: Fri, 22 Jan 2021 11:45:43 +0800
Subject: [PATCH 01/39] [DLMED] add IterationHandler refer to the EpochHandler
 in ignite

Signed-off-by: Nic Ma <nma@nvidia.com>
---
 docs/source/handlers.rst                 |  6 ++
 monai/handlers/__init__.py               |  1 +
 monai/handlers/confusion_matrix.py       | 82 +++-----------------
 monai/handlers/hausdorff_distance.py     | 55 ++-----------
 monai/handlers/iteration_metric.py       | 98 ++++++++++++++++++++++++
 monai/handlers/mean_dice.py              | 56 ++------------
 monai/handlers/surface_distance.py       | 56 ++------------
 monai/metrics/utils.py                   |  6 +-
 tests/test_handler_confusion_matrix.py   | 13 ++--
 tests/test_handler_hausdorff_distance.py |  3 +-
 tests/test_handler_surface_distance.py   |  3 +-
 11 files changed, 146 insertions(+), 233 deletions(-)
 create mode 100644 monai/handlers/iteration_metric.py

diff --git a/docs/source/handlers.rst b/docs/source/handlers.rst
index 2962f725d8..d1ce257cb7 100644
--- a/docs/source/handlers.rst
+++ b/docs/source/handlers.rst
@@ -22,6 +22,12 @@ CSV saver
     :members:
 
 
+Iteration Metric
+----------------
+.. autoclass:: IterationMetric
+    :members:
+
+
 Mean Dice metrics handler
 -------------------------
 .. autoclass:: MeanDice
diff --git a/monai/handlers/__init__.py b/monai/handlers/__init__.py
index 1df516eaf0..a873cd8b15 100644
--- a/monai/handlers/__init__.py
+++ b/monai/handlers/__init__.py
@@ -14,6 +14,7 @@
 from .classification_saver import ClassificationSaver
 from .confusion_matrix import ConfusionMatrix
 from .hausdorff_distance import HausdorffDistance
+from .iteration_metric import IterationMetric
 from .lr_schedule_handler import LrScheduleHandler
 from .mean_dice import MeanDice
 from .metric_logger import MetricLogger
diff --git a/monai/handlers/confusion_matrix.py b/monai/handlers/confusion_matrix.py
index fe60b964a7..7ca10fa91a 100644
--- a/monai/handlers/confusion_matrix.py
+++ b/monai/handlers/confusion_matrix.py
@@ -9,21 +9,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Callable, Optional, Sequence
+from typing import Callable, Optional
 
 import torch
 
 from monai.metrics import ConfusionMatrixMetric, compute_confusion_matrix_metric
 from monai.metrics.utils import MetricReduction, do_metric_reduction
-from monai.utils import exact_version, optional_import
+from monai.handlers.iteration_metric import IterationMetric
 
-NotComputableError, _ = optional_import("ignite.exceptions", "0.4.2", exact_version, "NotComputableError")
-Metric, _ = optional_import("ignite.metrics", "0.4.2", exact_version, "Metric")
-reinit__is_reduced, _ = optional_import("ignite.metrics.metric", "0.4.2", exact_version, "reinit__is_reduced")
-sync_all_reduce, _ = optional_import("ignite.metrics.metric", "0.4.2", exact_version, "sync_all_reduce")
 
-
-class ConfusionMatrix(Metric):  # type: ignore[valid-type, misc] # due to optional_import
+class ConfusionMatrix(IterationMetric):
     """
     Compute confusion matrix related metrics from full size Tensor and collects average over batch, class-channels, iterations.
     """
@@ -32,7 +27,6 @@ def __init__(
         self,
         include_background: bool = True,
         metric_name: str = "hit_rate",
-        compute_sample: bool = False,
         output_transform: Callable = lambda x: x,
         device: Optional[torch.device] = None,
     ) -> None:
@@ -48,79 +42,21 @@ def __init__(
                 ``"informedness"``, ``"markedness"``]
                 Some of the metrics have multiple aliases (as shown in the wikipedia page aforementioned),
                 and you can also input those names instead.
-            compute_sample: if ``True``, each sample's metric will be computed first.
-                If ``False``, the confusion matrix for all samples will be accumulated first. Defaults to ``False``.
             output_transform: transform the ignite.engine.state.output into [y_pred, y] pair.
             device: device specification in case of distributed computation usage.
 
         See also:
             :py:meth:`monai.metrics.confusion_matrix`
         """
-        super().__init__(output_transform, device=device)
-        self.confusion_matrix = ConfusionMatrixMetric(
+        metric_fn = ConfusionMatrixMetric(
             include_background=include_background,
             metric_name=metric_name,
-            compute_sample=compute_sample,
-            reduction=MetricReduction.MEAN,
+            compute_sample=False,
+            reduction=MetricReduction.NONE,
         )
-        self._sum = 0.0
-        self._num_examples = 0
-        self.compute_sample = compute_sample
         self.metric_name = metric_name
-        self._total_tp = 0.0
-        self._total_fp = 0.0
-        self._total_tn = 0.0
-        self._total_fn = 0.0
-
-    @reinit__is_reduced
-    def reset(self) -> None:
-        self._sum = 0.0
-        self._num_examples = 0
-        self._total_tp = 0.0
-        self._total_fp = 0.0
-        self._total_tn = 0.0
-        self._total_fn = 0.0
-
-    @reinit__is_reduced
-    def update(self, output: Sequence[torch.Tensor]) -> None:
-        """
-        Args:
-            output: sequence with contents [y_pred, y].
-
-        Raises:
-            ValueError: When ``output`` length is not 2. This metric can only support y_pred and y.
+        super().__init__(metric_fn=metric_fn, output_transform=output_transform, device=device)
 
-        """
-        if len(output) != 2:
-            raise ValueError(f"output must have length 2, got {len(output)}.")
-        y_pred, y = output
-        if self.compute_sample is True:
-            score, not_nans = self.confusion_matrix(y_pred, y)
-            not_nans = int(not_nans.item())
-
-            # add all items in current batch
-            self._sum += score.item() * not_nans
-            self._num_examples += not_nans
-        else:
-            confusion_matrix = self.confusion_matrix(y_pred, y)
-            confusion_matrix, _ = do_metric_reduction(confusion_matrix, MetricReduction.SUM)
-            self._total_tp += confusion_matrix[0].item()
-            self._total_fp += confusion_matrix[1].item()
-            self._total_tn += confusion_matrix[2].item()
-            self._total_fn += confusion_matrix[3].item()
-
-    @sync_all_reduce("_sum", "_num_examples", "_total_tp", "_total_fp", "_total_tn", "_total_fn")
-    def compute(self):
-        """
-        Raises:
-            NotComputableError: When ``compute`` is called before an ``update`` occurs.
-
-        """
-        if self.compute_sample is True:
-            if self._num_examples == 0:
-                raise NotComputableError(
-                    "ConfusionMatrix metric must have at least one example before it can be computed."
-                )
-            return self._sum / self._num_examples
-        confusion_matrix = torch.tensor([self._total_tp, self._total_fp, self._total_tn, self._total_fn])
+    def _reduce(self, scores) -> torch.Tensor:
+        confusion_matrix, _ = do_metric_reduction(scores, MetricReduction.MEAN)
         return compute_confusion_matrix_metric(self.metric_name, confusion_matrix)
diff --git a/monai/handlers/hausdorff_distance.py b/monai/handlers/hausdorff_distance.py
index 581550a703..f87ba8f3ea 100644
--- a/monai/handlers/hausdorff_distance.py
+++ b/monai/handlers/hausdorff_distance.py
@@ -9,20 +9,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Callable, Optional, Sequence
+from typing import Callable, Optional
 
 import torch
 
 from monai.metrics import HausdorffDistanceMetric
-from monai.utils import MetricReduction, exact_version, optional_import
+from monai.utils import MetricReduction
+from monai.handlers.iteration_metric import IterationMetric
 
-NotComputableError, _ = optional_import("ignite.exceptions", "0.4.2", exact_version, "NotComputableError")
-Metric, _ = optional_import("ignite.metrics", "0.4.2", exact_version, "Metric")
-reinit__is_reduced, _ = optional_import("ignite.metrics.metric", "0.4.2", exact_version, "reinit__is_reduced")
-sync_all_reduce, _ = optional_import("ignite.metrics.metric", "0.4.2", exact_version, "sync_all_reduce")
 
-
-class HausdorffDistance(Metric):  # type: ignore[valid-type, misc] # due to optional_import
+class HausdorffDistance(IterationMetric):
     """
     Computes Hausdorff distance from full size Tensor and collects average over batch, class-channels, iterations.
     """
@@ -52,48 +48,11 @@ def __init__(
 
         """
         super().__init__(output_transform, device=device)
-        self.hd = HausdorffDistanceMetric(
+        metric_fn = HausdorffDistanceMetric(
             include_background=include_background,
             distance_metric=distance_metric,
             percentile=percentile,
             directed=directed,
-            reduction=MetricReduction.MEAN,
+            reduction=MetricReduction.NONE,
         )
-        self._sum = 0.0
-        self._num_examples = 0
-
-    @reinit__is_reduced
-    def reset(self) -> None:
-        self._sum = 0.0
-        self._num_examples = 0
-
-    @reinit__is_reduced
-    def update(self, output: Sequence[torch.Tensor]) -> None:
-        """
-        Args:
-            output: sequence with contents [y_pred, y].
-
-        Raises:
-            ValueError: When ``output`` length is not 2. The metric can only support y_pred and y.
-
-        """
-        if len(output) != 2:
-            raise ValueError(f"output must have length 2, got {len(output)}.")
-        y_pred, y = output
-        score, not_nans = self.hd(y_pred, y)
-        not_nans = int(not_nans.item())
-
-        # add all items in current batch
-        self._sum += score.item() * not_nans
-        self._num_examples += not_nans
-
-    @sync_all_reduce("_sum", "_num_examples")
-    def compute(self) -> float:
-        """
-        Raises:
-            NotComputableError: When ``compute`` is called before an ``update`` occurs.
-
-        """
-        if self._num_examples == 0:
-            raise NotComputableError("HausdorffDistance must have at least one example before it can be computed.")
-        return self._sum / self._num_examples
+        super().__init__(metric_fn=metric_fn, output_transform=output_transform, device=device)
diff --git a/monai/handlers/iteration_metric.py b/monai/handlers/iteration_metric.py
new file mode 100644
index 0000000000..d44375d04e
--- /dev/null
+++ b/monai/handlers/iteration_metric.py
@@ -0,0 +1,98 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Callable, Optional, Sequence
+
+import torch
+
+from monai.utils import MetricReduction, exact_version, optional_import
+from monai.metrics import do_metric_reduction
+
+NotComputableError, _ = optional_import("ignite.exceptions", "0.4.2", exact_version, "NotComputableError")
+idist, _ = optional_import("ignite", "0.4.2", exact_version, "distributed")
+Metric, _ = optional_import("ignite.metrics", "0.4.2", exact_version, "Metric")
+reinit__is_reduced, _ = optional_import("ignite.metrics.metric", "0.4.2", exact_version, "reinit__is_reduced")
+
+
+class IterationMetric(Metric):  # type: ignore[valid-type, misc] # due to optional_import
+    """
+    Class for metrics that should be computed on every iteration and compute final results when epoch completed.
+    Similar to the `EpochMetric` in ignite:
+    https://github.com/pytorch/ignite/blob/v0.4.2/ignite/metrics/epoch_metric.py#L13.
+
+    Args:
+        metric_fn: callable function or class to compute raw metric results after every iteration.
+            expect to return a Tensor with shape (batch, channel, ...) or tuple (Tensor, not_nans).
+        output_transform: transform the ignite.engine.state.output into [y_pred, y] pair.
+        device: device specification in case of distributed computation usage.
+
+    """
+
+    def __init__(
+        self,
+        metric_fn: Callable,
+        output_transform: Callable = lambda x: x,
+        device: Optional[torch.device] = None,
+    ) -> None:
+        super().__init__(output_transform, device=device)
+        self.metric_fn = metric_fn
+        self._scores = []
+
+    @reinit__is_reduced
+    def reset(self) -> None:
+        self._scores = []
+
+    @reinit__is_reduced
+    def update(self, output: Sequence[torch.Tensor]) -> None:
+        """
+        Args:
+            output: sequence with contents [y_pred, y].
+
+        Raises:
+            ValueError: When ``output`` length is not 2. metric_fn can only support y_pred and y.
+
+        """
+        if len(output) != 2:
+            raise ValueError(f"output must have length 2, got {len(output)}.")
+        y_pred, y = output
+        score = self.metric_fn(y_pred, y)
+        if isinstance(score, (tuple, list)):
+            score = score[0]
+        self._scores.append(score)
+
+    def compute(self) -> None:
+        """
+        Raises:
+            NotComputableError: When ``compute`` is called before an ``update`` occurs.
+
+        """
+        _scores = torch.cat(self._scores, dim=0)
+
+        ws = idist.get_world_size()
+
+        if ws > 1 and not self._is_reduced:
+            # all gather across all processes
+            _scores = idist.all_gather(_scores)
+        self._is_reduced = True
+
+        result = 0.0
+        if idist.get_rank() == 0:
+            # run compute_fn on zero rank only
+            result = self._reduce(_scores)
+
+        if ws > 1:
+            # broadcast result to all processes
+            result = idist.broadcast(result, src=0)
+
+        return result
+
+    def _reduce(self, scores) -> torch.Tensor:
+        return do_metric_reduction(scores, MetricReduction.MEAN)[0].item()
diff --git a/monai/handlers/mean_dice.py b/monai/handlers/mean_dice.py
index 3c34948604..df22d62f19 100644
--- a/monai/handlers/mean_dice.py
+++ b/monai/handlers/mean_dice.py
@@ -9,20 +9,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Callable, Optional, Sequence
+from typing import Callable, Optional
 
 import torch
 
 from monai.metrics import DiceMetric
-from monai.utils import MetricReduction, exact_version, optional_import
+from monai.utils import MetricReduction
+from monai.handlers.iteration_metric import IterationMetric
 
-NotComputableError, _ = optional_import("ignite.exceptions", "0.4.2", exact_version, "NotComputableError")
-Metric, _ = optional_import("ignite.metrics", "0.4.2", exact_version, "Metric")
-reinit__is_reduced, _ = optional_import("ignite.metrics.metric", "0.4.2", exact_version, "reinit__is_reduced")
-sync_all_reduce, _ = optional_import("ignite.metrics.metric", "0.4.2", exact_version, "sync_all_reduce")
 
-
-class MeanDice(Metric):  # type: ignore[valid-type, misc] # due to optional_import
+class MeanDice(IterationMetric):
     """
     Computes Dice score metric from full size Tensor and collects average over batch, class-channels, iterations.
     """
@@ -44,46 +40,8 @@ def __init__(
         See also:
             :py:meth:`monai.metrics.meandice.compute_meandice`
         """
-        super().__init__(output_transform, device=device)
-        self.dice = DiceMetric(
+        metric_fn = DiceMetric(
             include_background=include_background,
-            reduction=MetricReduction.MEAN,
+            reduction=MetricReduction.NONE,
         )
-        self._sum = 0.0
-        self._num_examples = 0
-
-    @reinit__is_reduced
-    def reset(self) -> None:
-        self._sum = 0.0
-        self._num_examples = 0
-
-    @reinit__is_reduced
-    def update(self, output: Sequence[torch.Tensor]) -> None:
-        """
-        Args:
-            output: sequence with contents [y_pred, y].
-
-        Raises:
-            ValueError: When ``output`` length is not 2. MeanDice metric can only support y_pred and y.
-
-        """
-        if len(output) != 2:
-            raise ValueError(f"output must have length 2, got {len(output)}.")
-        y_pred, y = output
-        score, not_nans = self.dice(y_pred, y)
-        not_nans = int(not_nans.item())
-
-        # add all items in current batch
-        self._sum += score.item() * not_nans
-        self._num_examples += not_nans
-
-    @sync_all_reduce("_sum", "_num_examples")
-    def compute(self) -> float:
-        """
-        Raises:
-            NotComputableError: When ``compute`` is called before an ``update`` occurs.
-
-        """
-        if self._num_examples == 0:
-            raise NotComputableError("MeanDice must have at least one example before it can be computed.")
-        return self._sum / self._num_examples
+        super().__init__(metric_fn=metric_fn, output_transform=output_transform, device=device)
diff --git a/monai/handlers/surface_distance.py b/monai/handlers/surface_distance.py
index 514cf3e6c7..4e2366c666 100644
--- a/monai/handlers/surface_distance.py
+++ b/monai/handlers/surface_distance.py
@@ -9,20 +9,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Callable, Optional, Sequence
+from typing import Callable, Optional
 
 import torch
 
 from monai.metrics import SurfaceDistanceMetric
-from monai.utils import MetricReduction, exact_version, optional_import
+from monai.utils import MetricReduction
+from monai.handlers.iteration_metric import IterationMetric
 
-NotComputableError, _ = optional_import("ignite.exceptions", "0.4.2", exact_version, "NotComputableError")
-Metric, _ = optional_import("ignite.metrics", "0.4.2", exact_version, "Metric")
-reinit__is_reduced, _ = optional_import("ignite.metrics.metric", "0.4.2", exact_version, "reinit__is_reduced")
-sync_all_reduce, _ = optional_import("ignite.metrics.metric", "0.4.2", exact_version, "sync_all_reduce")
 
-
-class SurfaceDistance(Metric):  # type: ignore[valid-type, misc] # due to optional_import
+class SurfaceDistance(IterationMetric):
     """
     Computes surface distance from full size Tensor and collects average over batch, class-channels, iterations.
     """
@@ -48,48 +44,10 @@ def __init__(
             device: device specification in case of distributed computation usage.
 
         """
-        super().__init__(output_transform, device=device)
-        self.hd = SurfaceDistanceMetric(
+        metric_fn = SurfaceDistanceMetric(
             include_background=include_background,
             symmetric=symmetric,
             distance_metric=distance_metric,
-            reduction=MetricReduction.MEAN,
+            reduction=MetricReduction.NONE,
         )
-        self._sum = 0.0
-        self._num_examples = 0
-
-    @reinit__is_reduced
-    def reset(self) -> None:
-        self._sum = 0.0
-        self._num_examples = 0
-
-    @reinit__is_reduced
-    def update(self, output: Sequence[torch.Tensor]) -> None:
-        """
-        Args:
-            output: sequence with contents [y_pred, y].
-
-        Raises:
-            ValueError: When ``output`` length is not 2. The metric can only support y_pred and y.
-
-        """
-        if len(output) != 2:
-            raise ValueError(f"output must have length 2, got {len(output)}.")
-        y_pred, y = output
-        score, not_nans = self.hd(y_pred, y)
-        not_nans = int(not_nans.item())
-
-        # add all items in current batch
-        self._sum += score.item() * not_nans
-        self._num_examples += not_nans
-
-    @sync_all_reduce("_sum", "_num_examples")
-    def compute(self) -> float:
-        """
-        Raises:
-            NotComputableError: When ``compute`` is called before an ``update`` occurs.
-
-        """
-        if self._num_examples == 0:
-            raise NotComputableError("SurfaceDistance must have at least one example before it can be computed.")
-        return self._sum / self._num_examples
+        super().__init__(metric_fn=metric_fn, output_transform=output_transform, device=device)
diff --git a/monai/metrics/utils.py b/monai/metrics/utils.py
index 68f21f1613..cc7049ff81 100644
--- a/monai/metrics/utils.py
+++ b/monai/metrics/utils.py
@@ -53,7 +53,7 @@ def do_metric_reduction(
         f: a tensor that contains the calculated metric scores per batch and
             per class. The first two dims should be batch and class.
         reduction: {``"none"``, ``"mean"``, ``"sum"``, ``"mean_batch"``, ``"sum_batch"``,
-        ``"mean_channel"``, ``"sum_channel"``}
+            ``"mean_channel"``, ``"sum_channel"``}, if "none", return the input f tensor and not_nans.
         Define the mode to reduce computation result of 1 batch data. Defaults to ``"mean"``.
 
     Raises:
@@ -65,11 +65,13 @@ def do_metric_reduction(
     # we need to account for it
     nans = torch.isnan(f)
     not_nans = (~nans).float()
-    f[nans] = 0
 
     t_zero = torch.zeros(1, device=f.device, dtype=f.dtype)
     reduction = MetricReduction(reduction)
+    if reduction == MetricReduction.NONE:
+        return f, not_nans
 
+    f[nans] = 0
     if reduction == MetricReduction.MEAN:
         # 2 steps, first, mean by channel (accounting for nans), then by batch
         not_nans = not_nans.sum(dim=1)
diff --git a/tests/test_handler_confusion_matrix.py b/tests/test_handler_confusion_matrix.py
index ac5edb72e2..e533245536 100644
--- a/tests/test_handler_confusion_matrix.py
+++ b/tests/test_handler_confusion_matrix.py
@@ -17,11 +17,10 @@
 
 from monai.handlers import ConfusionMatrix
 
-TEST_CASE_1 = [{"include_background": True, "metric_name": "f1", "compute_sample": False}, 0.75]
-TEST_CASE_2 = [{"include_background": False, "metric_name": "ppv", "compute_sample": False}, 1.0]
+TEST_CASE_1 = [{"include_background": True, "metric_name": "f1"}, 0.75]
+TEST_CASE_2 = [{"include_background": False, "metric_name": "ppv"}, 1.0]
 
-TEST_CASE_SEG_1 = [{"include_background": True, "metric_name": "tpr", "compute_sample": True}, 0.8333]
-TEST_CASE_SEG_2 = [{"include_background": True, "metric_name": "tpr", "compute_sample": False}, 0.7]
+TEST_CASE_SEG_1 = [{"include_background": True, "metric_name": "tpr"}, 0.7]
 
 data_1: Dict[Any, Any] = {
     "y_pred": torch.tensor(
@@ -70,7 +69,7 @@ def test_compute(self, input_params, expected_avg):
         avg_metric = metric.compute()
         self.assertAlmostEqual(avg_metric, expected_avg, places=4)
 
-    @parameterized.expand([TEST_CASE_SEG_1, TEST_CASE_SEG_2])
+    @parameterized.expand([TEST_CASE_SEG_1])
     def test_compute_seg(self, input_params, expected_avg):
         metric = ConfusionMatrix(**input_params)
 
@@ -82,9 +81,7 @@ def test_compute_seg(self, input_params, expected_avg):
         y = data_2["y"]
         metric.update([y_pred, y])
 
-        avg_metric = metric.compute()
-        if input_params["compute_sample"] is False:
-            avg_metric = avg_metric.item()
+        avg_metric = metric.compute().item()
         self.assertAlmostEqual(avg_metric, expected_avg, places=4)
 
     @parameterized.expand([TEST_CASE_1, TEST_CASE_2])
diff --git a/tests/test_handler_hausdorff_distance.py b/tests/test_handler_hausdorff_distance.py
index ee30040cc8..edf59320ea 100644
--- a/tests/test_handler_hausdorff_distance.py
+++ b/tests/test_handler_hausdorff_distance.py
@@ -71,10 +71,9 @@ def test_compute(self):
         y_pred, y = TEST_SAMPLE_3
         hd_metric.update([y_pred, y])
         self.assertEqual(hd_metric.compute(), float("inf"))
-        self.assertEqual(hd_metric._num_examples, 3)
         y_pred, y = TEST_SAMPLE_4
         hd_metric.update([y_pred, y])
-        self.assertEqual(hd_metric._num_examples, 3)
+        self.assertEqual(hd_metric.compute(), float("inf"))
 
     def test_shape_mismatch(self):
         hd_metric = HausdorffDistance(include_background=True)
diff --git a/tests/test_handler_surface_distance.py b/tests/test_handler_surface_distance.py
index b4d9584289..656b0d64b2 100644
--- a/tests/test_handler_surface_distance.py
+++ b/tests/test_handler_surface_distance.py
@@ -71,10 +71,9 @@ def test_compute(self):
         y_pred, y = TEST_SAMPLE_3
         sur_metric.update([y_pred, y])
         self.assertAlmostEqual(sur_metric.compute(), float("inf"))
-        self.assertAlmostEqual(sur_metric._num_examples, 3)
         y_pred, y = TEST_SAMPLE_4
         sur_metric.update([y_pred, y])
-        self.assertAlmostEqual(sur_metric._num_examples, 3)
+        self.assertAlmostEqual(sur_metric.compute(), float("inf"))
 
     def test_shape_mismatch(self):
         sur_metric = SurfaceDistance(include_background=True)

From 560802dfd4c8059794961a453fd0467d13ee794f Mon Sep 17 00:00:00 2001
From: monai-bot <monai.miccai2019@gmail.com>
Date: Fri, 22 Jan 2021 10:25:50 +0000
Subject: [PATCH 02/39] [MONAI] python code formatting

Signed-off-by: monai-bot <monai.miccai2019@gmail.com>
---
 monai/handlers/confusion_matrix.py   | 2 +-
 monai/handlers/hausdorff_distance.py | 2 +-
 monai/handlers/iteration_metric.py   | 2 +-
 monai/handlers/mean_dice.py          | 2 +-
 monai/handlers/surface_distance.py   | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/monai/handlers/confusion_matrix.py b/monai/handlers/confusion_matrix.py
index 7ca10fa91a..eba75ef957 100644
--- a/monai/handlers/confusion_matrix.py
+++ b/monai/handlers/confusion_matrix.py
@@ -13,9 +13,9 @@
 
 import torch
 
+from monai.handlers.iteration_metric import IterationMetric
 from monai.metrics import ConfusionMatrixMetric, compute_confusion_matrix_metric
 from monai.metrics.utils import MetricReduction, do_metric_reduction
-from monai.handlers.iteration_metric import IterationMetric
 
 
 class ConfusionMatrix(IterationMetric):
diff --git a/monai/handlers/hausdorff_distance.py b/monai/handlers/hausdorff_distance.py
index f87ba8f3ea..3e4a3d70ba 100644
--- a/monai/handlers/hausdorff_distance.py
+++ b/monai/handlers/hausdorff_distance.py
@@ -13,9 +13,9 @@
 
 import torch
 
+from monai.handlers.iteration_metric import IterationMetric
 from monai.metrics import HausdorffDistanceMetric
 from monai.utils import MetricReduction
-from monai.handlers.iteration_metric import IterationMetric
 
 
 class HausdorffDistance(IterationMetric):
diff --git a/monai/handlers/iteration_metric.py b/monai/handlers/iteration_metric.py
index d44375d04e..76586bdb00 100644
--- a/monai/handlers/iteration_metric.py
+++ b/monai/handlers/iteration_metric.py
@@ -13,8 +13,8 @@
 
 import torch
 
-from monai.utils import MetricReduction, exact_version, optional_import
 from monai.metrics import do_metric_reduction
+from monai.utils import MetricReduction, exact_version, optional_import
 
 NotComputableError, _ = optional_import("ignite.exceptions", "0.4.2", exact_version, "NotComputableError")
 idist, _ = optional_import("ignite", "0.4.2", exact_version, "distributed")
diff --git a/monai/handlers/mean_dice.py b/monai/handlers/mean_dice.py
index df22d62f19..057acbee97 100644
--- a/monai/handlers/mean_dice.py
+++ b/monai/handlers/mean_dice.py
@@ -13,9 +13,9 @@
 
 import torch
 
+from monai.handlers.iteration_metric import IterationMetric
 from monai.metrics import DiceMetric
 from monai.utils import MetricReduction
-from monai.handlers.iteration_metric import IterationMetric
 
 
 class MeanDice(IterationMetric):
diff --git a/monai/handlers/surface_distance.py b/monai/handlers/surface_distance.py
index 4e2366c666..17b667ab46 100644
--- a/monai/handlers/surface_distance.py
+++ b/monai/handlers/surface_distance.py
@@ -13,9 +13,9 @@
 
 import torch
 
+from monai.handlers.iteration_metric import IterationMetric
 from monai.metrics import SurfaceDistanceMetric
 from monai.utils import MetricReduction
-from monai.handlers.iteration_metric import IterationMetric
 
 
 class SurfaceDistance(IterationMetric):

From b2c317e95f44bc474fc9db6bd7007fb515632262 Mon Sep 17 00:00:00 2001
From: Nic Ma <nma@nvidia.com>
Date: Fri, 22 Jan 2021 18:44:44 +0800
Subject: [PATCH 03/39] [DLMED] fix flake8 issue

Signed-off-by: Nic Ma <nma@nvidia.com>
---
 monai/handlers/confusion_matrix.py |  4 ++--
 monai/handlers/iteration_metric.py | 13 +++++++------
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/monai/handlers/confusion_matrix.py b/monai/handlers/confusion_matrix.py
index eba75ef957..46226f530b 100644
--- a/monai/handlers/confusion_matrix.py
+++ b/monai/handlers/confusion_matrix.py
@@ -9,7 +9,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Callable, Optional
+from typing import Any, Callable, Optional
 
 import torch
 
@@ -57,6 +57,6 @@ def __init__(
         self.metric_name = metric_name
         super().__init__(metric_fn=metric_fn, output_transform=output_transform, device=device)
 
-    def _reduce(self, scores) -> torch.Tensor:
+    def _reduce(self, scores) -> Any:
         confusion_matrix, _ = do_metric_reduction(scores, MetricReduction.MEAN)
         return compute_confusion_matrix_metric(self.metric_name, confusion_matrix)
diff --git a/monai/handlers/iteration_metric.py b/monai/handlers/iteration_metric.py
index 76586bdb00..c3dd0f9cb5 100644
--- a/monai/handlers/iteration_metric.py
+++ b/monai/handlers/iteration_metric.py
@@ -9,7 +9,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Callable, Optional, Sequence
+from typing import Any, Callable, List, Optional, Sequence
 
 import torch
 
@@ -42,9 +42,10 @@ def __init__(
         output_transform: Callable = lambda x: x,
         device: Optional[torch.device] = None,
     ) -> None:
-        super().__init__(output_transform, device=device)
+        self._is_reduced: bool = False
         self.metric_fn = metric_fn
-        self._scores = []
+        self._scores: List = []
+        super().__init__(output_transform, device=device)
 
     @reinit__is_reduced
     def reset(self) -> None:
@@ -68,7 +69,7 @@ def update(self, output: Sequence[torch.Tensor]) -> None:
             score = score[0]
         self._scores.append(score)
 
-    def compute(self) -> None:
+    def compute(self) -> float:
         """
         Raises:
             NotComputableError: When ``compute`` is called before an ``update`` occurs.
@@ -83,7 +84,7 @@ def compute(self) -> None:
             _scores = idist.all_gather(_scores)
         self._is_reduced = True
 
-        result = 0.0
+        result: float = 0.0
         if idist.get_rank() == 0:
             # run compute_fn on zero rank only
             result = self._reduce(_scores)
@@ -94,5 +95,5 @@ def compute(self) -> None:
 
         return result
 
-    def _reduce(self, scores) -> torch.Tensor:
+    def _reduce(self, scores) -> Any:
         return do_metric_reduction(scores, MetricReduction.MEAN)[0].item()

From 6f2371e4a498865a84116e59a88f405b32e83558 Mon Sep 17 00:00:00 2001
From: Nic Ma <nma@nvidia.com>
Date: Sat, 23 Jan 2021 01:22:01 +0800
Subject: [PATCH 04/39] [DLMED] fix the multi-gpu issue

Signed-off-by: Nic Ma <nma@nvidia.com>
---
 monai/handlers/iteration_metric.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/monai/handlers/iteration_metric.py b/monai/handlers/iteration_metric.py
index c3dd0f9cb5..0715514702 100644
--- a/monai/handlers/iteration_metric.py
+++ b/monai/handlers/iteration_metric.py
@@ -75,9 +75,14 @@ def compute(self) -> float:
             NotComputableError: When ``compute`` is called before an ``update`` occurs.
 
         """
-        _scores = torch.cat(self._scores, dim=0)
-
         ws = idist.get_world_size()
+        if ws > 1 and not self._is_reduced:
+            # make sure the _scores is evenly-divisible on multi-GPUs
+            length = len(self._scores)
+            for _ in range(length, max(idist.all_gather(length)).item()):
+                self._scores.append(self._scores[0].new_full(self._scores[0].shape, float("NaN")))
+
+        _scores = torch.cat(self._scores, dim=0)
 
         if ws > 1 and not self._is_reduced:
             # all gather across all processes

From 4b13a4e4b160b8bdc3855261fc7e114daab4dccc Mon Sep 17 00:00:00 2001
From: Nic Ma <nma@nvidia.com>
Date: Sat, 23 Jan 2021 02:04:07 +0800
Subject: [PATCH 05/39] [DLMED] fix typo

Signed-off-by: Nic Ma <nma@nvidia.com>
---
 monai/handlers/iteration_metric.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/monai/handlers/iteration_metric.py b/monai/handlers/iteration_metric.py
index 0715514702..81bdfa87ca 100644
--- a/monai/handlers/iteration_metric.py
+++ b/monai/handlers/iteration_metric.py
@@ -75,16 +75,17 @@ def compute(self) -> float:
             NotComputableError: When ``compute`` is called before an ``update`` occurs.
 
         """
+        _scores = torch.cat(self._scores, dim=0)
+
         ws = idist.get_world_size()
         if ws > 1 and not self._is_reduced:
             # make sure the _scores is evenly-divisible on multi-GPUs
-            length = len(self._scores)
-            for _ in range(length, max(idist.all_gather(length)).item()):
-                self._scores.append(self._scores[0].new_full(self._scores[0].shape, float("NaN")))
+            length = _scores.shape[0]
+            max_len = max(idist.all_gather(length)).item()
+            if length < max_len:
+                size = [max_len - length] + list(_scores.shape[1:])
+                _scores = torch.cat([_scores, _scores.new_full(size, float("NaN"))], dim=0)
 
-        _scores = torch.cat(self._scores, dim=0)
-
-        if ws > 1 and not self._is_reduced:
             # all gather across all processes
             _scores = idist.all_gather(_scores)
         self._is_reduced = True

From af9fdc61bf7bc37c32ec3727fb65d98b797a40bc Mon Sep 17 00:00:00 2001
From: Nic Ma <nma@nvidia.com>
Date: Sat, 23 Jan 2021 08:28:09 +0800
Subject: [PATCH 06/39] [DLMED] fix distributed tests

Signed-off-by: Nic Ma <nma@nvidia.com>
---
 monai/handlers/iteration_metric.py          |  4 ++--
 tests/test_handler_confusion_matrix.py      |  2 +-
 tests/test_handler_confusion_matrix_dist.py | 16 ++++------------
 3 files changed, 7 insertions(+), 15 deletions(-)

diff --git a/monai/handlers/iteration_metric.py b/monai/handlers/iteration_metric.py
index 81bdfa87ca..08ef4362ea 100644
--- a/monai/handlers/iteration_metric.py
+++ b/monai/handlers/iteration_metric.py
@@ -99,7 +99,7 @@ def compute(self) -> float:
             # broadcast result to all processes
             result = idist.broadcast(result, src=0)
 
-        return result
+        return result.item() if torch.is_tensor(result) else result
 
     def _reduce(self, scores) -> Any:
-        return do_metric_reduction(scores, MetricReduction.MEAN)[0].item()
+        return do_metric_reduction(scores, MetricReduction.MEAN)[0]
diff --git a/tests/test_handler_confusion_matrix.py b/tests/test_handler_confusion_matrix.py
index e533245536..cc231b82db 100644
--- a/tests/test_handler_confusion_matrix.py
+++ b/tests/test_handler_confusion_matrix.py
@@ -81,7 +81,7 @@ def test_compute_seg(self, input_params, expected_avg):
         y = data_2["y"]
         metric.update([y_pred, y])
 
-        avg_metric = metric.compute().item()
+        avg_metric = metric.compute()
         self.assertAlmostEqual(avg_metric, expected_avg, places=4)
 
     @parameterized.expand([TEST_CASE_1, TEST_CASE_2])
diff --git a/tests/test_handler_confusion_matrix_dist.py b/tests/test_handler_confusion_matrix_dist.py
index 583ba716aa..ebe0eb9ca7 100644
--- a/tests/test_handler_confusion_matrix_dist.py
+++ b/tests/test_handler_confusion_matrix_dist.py
@@ -21,17 +21,13 @@
 
 
 class DistributedConfusionMatrix(DistTestCase):
-    @DistCall(nnodes=1, nproc_per_node=2)
-    def test_compute_sample(self):
-        self._compute(True)
-
     @DistCall(nnodes=1, nproc_per_node=2)
     def test_compute(self):
-        self._compute(False)
+        self._compute()
 
-    def _compute(self, compute_sample=True):
+    def _compute(self):
         device = f"cuda:{dist.get_rank()}" if torch.cuda.is_available() else "cpu"
-        metric = ConfusionMatrix(include_background=True, metric_name="tpr", compute_sample=compute_sample)
+        metric = ConfusionMatrix(include_background=True, metric_name="tpr")
 
         if dist.get_rank() == 0:
             y_pred = torch.tensor(
@@ -62,11 +58,7 @@ def _compute(self, compute_sample=True):
             metric.update([y_pred, y])
 
         avg_metric = metric.compute()
-        if compute_sample is False:
-            avg_metric = avg_metric.item()
-            np.testing.assert_allclose(avg_metric, 0.7, rtol=1e-04, atol=1e-04)
-        else:
-            np.testing.assert_allclose(avg_metric, 0.8333, rtol=1e-04, atol=1e-04)
+        np.testing.assert_allclose(avg_metric, 0.7, rtol=1e-04, atol=1e-04)
 
 
 if __name__ == "__main__":

From f586b0888e15c4feca45976c27d20533a11373c3 Mon Sep 17 00:00:00 2001
From: Nic Ma <nma@nvidia.com>
Date: Sat, 23 Jan 2021 08:43:26 +0800
Subject: [PATCH 07/39] [DLMED] fix flake8 issue

Signed-off-by: Nic Ma <nma@nvidia.com>
---
 monai/handlers/iteration_metric.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/monai/handlers/iteration_metric.py b/monai/handlers/iteration_metric.py
index 08ef4362ea..4d555b9dcb 100644
--- a/monai/handlers/iteration_metric.py
+++ b/monai/handlers/iteration_metric.py
@@ -69,7 +69,7 @@ def update(self, output: Sequence[torch.Tensor]) -> None:
             score = score[0]
         self._scores.append(score)
 
-    def compute(self) -> float:
+    def compute(self) -> Any:
         """
         Raises:
             NotComputableError: When ``compute`` is called before an ``update`` occurs.
@@ -90,7 +90,7 @@ def compute(self) -> float:
             _scores = idist.all_gather(_scores)
         self._is_reduced = True
 
-        result: float = 0.0
+        result: torch.Tensor = torch.zeros(1)
         if idist.get_rank() == 0:
             # run compute_fn on zero rank only
             result = self._reduce(_scores)

From 85048398529023684891eb74d16fbfce6989d3e5 Mon Sep 17 00:00:00 2001
From: Nic Ma <nma@nvidia.com>
Date: Mon, 25 Jan 2021 10:46:52 +0800
Subject: [PATCH 08/39] [DLMED] add engine to metrics

Signed-off-by: Nic Ma <nma@nvidia.com>
---
 monai/handlers/iteration_metric.py | 26 ++++++++++++--
 monai/handlers/metrics_saver.py    | 55 ++++++++++++++++++++++++++++++
 2 files changed, 79 insertions(+), 2 deletions(-)
 create mode 100644 monai/handlers/metrics_saver.py

diff --git a/monai/handlers/iteration_metric.py b/monai/handlers/iteration_metric.py
index 4d555b9dcb..5eecad3c1b 100644
--- a/monai/handlers/iteration_metric.py
+++ b/monai/handlers/iteration_metric.py
@@ -9,7 +9,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Any, Callable, List, Optional, Sequence
+from typing import TYPE_CHECKING, Any, Callable, List, Optional, Sequence, Union
 
 import torch
 
@@ -20,6 +20,14 @@
 idist, _ = optional_import("ignite", "0.4.2", exact_version, "distributed")
 Metric, _ = optional_import("ignite.metrics", "0.4.2", exact_version, "Metric")
 reinit__is_reduced, _ = optional_import("ignite.metrics.metric", "0.4.2", exact_version, "reinit__is_reduced")
+if TYPE_CHECKING:
+    from ignite.engine import Engine
+    from ignite.metrics import MetricUsage
+    from ignite.metrics import EpochWise
+else:
+    Engine, _ = optional_import("ignite.engine", "0.4.2", exact_version, "Engine")
+    MetricUsage, _ = optional_import("ignite.metrics", "0.4.2", exact_version, "MetricUsage")
+    EpochWise, _ = optional_import("ignite.metrics", "0.4.2", exact_version, "EpochWise")
 
 
 class IterationMetric(Metric):  # type: ignore[valid-type, misc] # due to optional_import
@@ -35,7 +43,6 @@ class IterationMetric(Metric):  # type: ignore[valid-type, misc] # due to option
         device: device specification in case of distributed computation usage.
 
     """
-
     def __init__(
         self,
         metric_fn: Callable,
@@ -103,3 +110,18 @@ def compute(self) -> Any:
 
     def _reduce(self, scores) -> Any:
         return do_metric_reduction(scores, MetricReduction.MEAN)[0]
+
+    def attach(self, engine: Engine, name: str, usage: Union[str, MetricUsage] = EpochWise()) -> None:
+        """
+        Attaches current metric to provided engine. On the end of engine's run,
+        `engine.state.metrics` dictionary will contain computed metric's value under provided name.
+
+        Args:
+            engine: the engine to which the metric must be attached.
+            name: the name of the metric to attach
+            usage: the usage of the metric.
+
+        """
+        # FIXME: record engine for communication, ignite will support it in the future version soon
+        self.engine = engine
+        super().attach(engine=engine, name=name, usage=usage)
diff --git a/monai/handlers/metrics_saver.py b/monai/handlers/metrics_saver.py
new file mode 100644
index 0000000000..b110999887
--- /dev/null
+++ b/monai/handlers/metrics_saver.py
@@ -0,0 +1,55 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import TYPE_CHECKING, Sequence, Optional, Callable
+
+import torch
+
+from monai.utils import exact_version, optional_import
+
+Events, _ = optional_import("ignite.engine", "0.4.2", exact_version, "Events")
+if TYPE_CHECKING:
+    from ignite.engine import Engine
+else:
+    Engine, _ = optional_import("ignite.engine", "0.4.2", exact_version, "Engine")
+
+
+class MetricsSaver:
+    """
+    ignite handler to save metrics values and details into expected files.
+
+    """
+
+    def __init__(
+        self,
+        save_dir: str,
+        metrics: Union[Optional, str, Sequence[str]] = None,
+        metrics_details: Union[Optional, str, Sequence[str]] = None,
+        batch_transform: Callable = lambda x: x,
+    ) -> None:
+        self.save_dir = save_dir
+        self.metrics = metrics
+        self.metrics_details = metrics_details
+        self.batch_transform = batch_transform
+
+    def attach(self, engine: Engine) -> None:
+        """
+        Args:
+            engine: Ignite Engine, it can be a trainer, validator or evaluator.
+        """
+        engine.add_event_handler(Events.EPOCH_COMPLETED, self)
+
+    def __call__(self, engine: Engine) -> None:
+        """
+        Args:
+            engine: Ignite Engine, it can be a trainer, validator or evaluator.
+        """
+        pass

From 16b12106294e7060dbd83a6682c288226ba8a3c3 Mon Sep 17 00:00:00 2001
From: Nic Ma <nma@nvidia.com>
Date: Mon, 25 Jan 2021 13:20:06 +0800
Subject: [PATCH 09/39] [DLMED] share metric details in engine

Signed-off-by: Nic Ma <nma@nvidia.com>
---
 monai/engines/workflow.py            |  1 +
 monai/handlers/confusion_matrix.py   | 10 +++++++++-
 monai/handlers/hausdorff_distance.py | 10 +++++++++-
 monai/handlers/iteration_metric.py   | 13 ++++++++++++-
 monai/handlers/mean_dice.py          | 10 +++++++++-
 monai/handlers/metrics_saver.py      |  4 ++--
 monai/handlers/surface_distance.py   | 10 +++++++++-
 tests/test_handler_mean_dice.py      | 11 +++++++++--
 8 files changed, 60 insertions(+), 9 deletions(-)

diff --git a/monai/engines/workflow.py b/monai/engines/workflow.py
index 1d8c74c4bb..d2843c722a 100644
--- a/monai/engines/workflow.py
+++ b/monai/engines/workflow.py
@@ -110,6 +110,7 @@ def set_sampler_epoch(engine: Engine):
             output=None,
             batch=None,
             metrics={},
+            metric_details={},
             dataloader=None,
             device=device,
             key_metric_name=None,  # we can set many metrics, only use key_metric to compare and save the best model
diff --git a/monai/handlers/confusion_matrix.py b/monai/handlers/confusion_matrix.py
index 46226f530b..3f7bfce947 100644
--- a/monai/handlers/confusion_matrix.py
+++ b/monai/handlers/confusion_matrix.py
@@ -29,6 +29,7 @@ def __init__(
         metric_name: str = "hit_rate",
         output_transform: Callable = lambda x: x,
         device: Optional[torch.device] = None,
+        save_details: bool = False,
     ) -> None:
         """
 
@@ -44,6 +45,8 @@ def __init__(
                 and you can also input those names instead.
             output_transform: transform the ignite.engine.state.output into [y_pred, y] pair.
             device: device specification in case of distributed computation usage.
+            save_details: whether to save metric computation details per image, for example: TP/TN/FP/FN of every image.
+                if True, will save to `engine.state.metric_details` dict with the metric name as key.
 
         See also:
             :py:meth:`monai.metrics.confusion_matrix`
@@ -55,7 +58,12 @@ def __init__(
             reduction=MetricReduction.NONE,
         )
         self.metric_name = metric_name
-        super().__init__(metric_fn=metric_fn, output_transform=output_transform, device=device)
+        super().__init__(
+            metric_fn=metric_fn,
+            output_transform=output_transform,
+            device=device,
+            save_details=save_details,
+        )
 
     def _reduce(self, scores) -> Any:
         confusion_matrix, _ = do_metric_reduction(scores, MetricReduction.MEAN)
diff --git a/monai/handlers/hausdorff_distance.py b/monai/handlers/hausdorff_distance.py
index 3e4a3d70ba..8dec373bdd 100644
--- a/monai/handlers/hausdorff_distance.py
+++ b/monai/handlers/hausdorff_distance.py
@@ -31,6 +31,7 @@ def __init__(
         directed: bool = False,
         output_transform: Callable = lambda x: x,
         device: Optional[torch.device] = None,
+        save_details: bool = False,
     ) -> None:
         """
 
@@ -45,6 +46,8 @@ def __init__(
             directed: whether to calculate directed Hausdorff distance. Defaults to ``False``.
             output_transform: transform the ignite.engine.state.output into [y_pred, y] pair.
             device: device specification in case of distributed computation usage.
+            save_details: whether to save metric computation details per image, for example: hausdorff distance
+                of every image. if True, will save to `engine.state.metric_details` dict with the metric name as key.
 
         """
         super().__init__(output_transform, device=device)
@@ -55,4 +58,9 @@ def __init__(
             directed=directed,
             reduction=MetricReduction.NONE,
         )
-        super().__init__(metric_fn=metric_fn, output_transform=output_transform, device=device)
+        super().__init__(
+            metric_fn=metric_fn,
+            output_transform=output_transform,
+            device=device,
+            save_details=save_details,
+        )
diff --git a/monai/handlers/iteration_metric.py b/monai/handlers/iteration_metric.py
index 5eecad3c1b..62660920d9 100644
--- a/monai/handlers/iteration_metric.py
+++ b/monai/handlers/iteration_metric.py
@@ -41,6 +41,8 @@ class IterationMetric(Metric):  # type: ignore[valid-type, misc] # due to option
             expect to return a Tensor with shape (batch, channel, ...) or tuple (Tensor, not_nans).
         output_transform: transform the ignite.engine.state.output into [y_pred, y] pair.
         device: device specification in case of distributed computation usage.
+        save_details: whether to save metric computation details per image, for example: mean_dice of every image.
+            if True, will save to `engine.state.metric_details` dict with the metric name as key.
 
     """
     def __init__(
@@ -48,9 +50,11 @@ def __init__(
         metric_fn: Callable,
         output_transform: Callable = lambda x: x,
         device: Optional[torch.device] = None,
+        save_details: bool = False,
     ) -> None:
         self._is_reduced: bool = False
         self.metric_fn = metric_fn
+        self.save_details = save_details
         self._scores: List = []
         super().__init__(output_transform, device=device)
 
@@ -97,6 +101,10 @@ def compute(self) -> Any:
             _scores = idist.all_gather(_scores)
         self._is_reduced = True
 
+        # save score of every image into engine.state for other components
+        if self.save_details:
+            self.engine.state.metric_details[self.name] = _scores
+
         result: torch.Tensor = torch.zeros(1)
         if idist.get_rank() == 0:
             # run compute_fn on zero rank only
@@ -122,6 +130,9 @@ def attach(self, engine: Engine, name: str, usage: Union[str, MetricUsage] = Epo
             usage: the usage of the metric.
 
         """
+        super().attach(engine=engine, name=name, usage=usage)
         # FIXME: record engine for communication, ignite will support it in the future version soon
         self.engine = engine
-        super().attach(engine=engine, name=name, usage=usage)
+        self.name = name
+        if self.save_details and getattr(engine.state, "metric_details", None) is None:
+            setattr(engine.state, "metric_details", dict())
diff --git a/monai/handlers/mean_dice.py b/monai/handlers/mean_dice.py
index 057acbee97..223bc909dc 100644
--- a/monai/handlers/mean_dice.py
+++ b/monai/handlers/mean_dice.py
@@ -28,6 +28,7 @@ def __init__(
         include_background: bool = True,
         output_transform: Callable = lambda x: x,
         device: Optional[torch.device] = None,
+        save_details: bool = False,
     ) -> None:
         """
 
@@ -36,6 +37,8 @@ def __init__(
                 Defaults to True.
             output_transform: transform the ignite.engine.state.output into [y_pred, y] pair.
             device: device specification in case of distributed computation usage.
+            save_details: whether to save metric computation details per image, for example: mean dice of every image.
+                if True, will save to `engine.state.metric_details` dict with the metric name as key.
 
         See also:
             :py:meth:`monai.metrics.meandice.compute_meandice`
@@ -44,4 +47,9 @@ def __init__(
             include_background=include_background,
             reduction=MetricReduction.NONE,
         )
-        super().__init__(metric_fn=metric_fn, output_transform=output_transform, device=device)
+        super().__init__(
+            metric_fn=metric_fn,
+            output_transform=output_transform,
+            device=device,
+            save_details=save_details,
+        )
diff --git a/monai/handlers/metrics_saver.py b/monai/handlers/metrics_saver.py
index b110999887..a30b58a814 100644
--- a/monai/handlers/metrics_saver.py
+++ b/monai/handlers/metrics_saver.py
@@ -32,12 +32,12 @@ def __init__(
         self,
         save_dir: str,
         metrics: Union[Optional, str, Sequence[str]] = None,
-        metrics_details: Union[Optional, str, Sequence[str]] = None,
+        metric_details: Union[Optional, str, Sequence[str]] = None,
         batch_transform: Callable = lambda x: x,
     ) -> None:
         self.save_dir = save_dir
         self.metrics = metrics
-        self.metrics_details = metrics_details
+        self.metric_details = metric_details
         self.batch_transform = batch_transform
 
     def attach(self, engine: Engine) -> None:
diff --git a/monai/handlers/surface_distance.py b/monai/handlers/surface_distance.py
index 17b667ab46..2f8d65d83a 100644
--- a/monai/handlers/surface_distance.py
+++ b/monai/handlers/surface_distance.py
@@ -30,6 +30,7 @@ def __init__(
         distance_metric: str = "euclidean",
         output_transform: Callable = lambda x: x,
         device: Optional[torch.device] = None,
+        save_details: bool = False,
     ) -> None:
         """
 
@@ -42,6 +43,8 @@ def __init__(
                 the metric used to compute surface distance. Defaults to ``"euclidean"``.
             output_transform: transform the ignite.engine.state.output into [y_pred, y] pair.
             device: device specification in case of distributed computation usage.
+            save_details: whether to save metric computation details per image, for example: surface dice
+                of every image. if True, will save to `engine.state.metric_details` dict with the metric name as key.
 
         """
         metric_fn = SurfaceDistanceMetric(
@@ -50,4 +53,9 @@ def __init__(
             distance_metric=distance_metric,
             reduction=MetricReduction.NONE,
         )
-        super().__init__(metric_fn=metric_fn, output_transform=output_transform, device=device)
+        super().__init__(
+            metric_fn=metric_fn,
+            output_transform=output_transform,
+            device=device,
+            save_details=save_details,
+        )
diff --git a/tests/test_handler_mean_dice.py b/tests/test_handler_mean_dice.py
index 9983918f2d..5dfd1b29e9 100644
--- a/tests/test_handler_mean_dice.py
+++ b/tests/test_handler_mean_dice.py
@@ -13,10 +13,10 @@
 
 import torch
 from parameterized import parameterized
-
+from ignite.engine import Engine, Events
 from monai.handlers import MeanDice
 
-TEST_CASE_1 = [{"include_background": True}, 0.75]
+TEST_CASE_1 = [{"include_background": True, "save_details": True}, 0.75]
 TEST_CASE_2 = [{"include_background": False}, 0.66666]
 
 
@@ -26,7 +26,12 @@ class TestHandlerMeanDice(unittest.TestCase):
     @parameterized.expand([TEST_CASE_1, TEST_CASE_2])
     def test_compute(self, input_params, expected_avg):
         dice_metric = MeanDice(**input_params)
+        # set up engine
+        def _val_func(engine, batch):
+            pass
 
+        engine = Engine(_val_func)
+        dice_metric.attach(engine, "mean_dice")
         y_pred = torch.Tensor([[[0], [1]], [[1], [0]]])
         y = torch.Tensor([[[0], [1]], [[0], [1]]])
         dice_metric.update([y_pred, y])
@@ -37,6 +42,8 @@ def test_compute(self, input_params, expected_avg):
 
         avg_dice = dice_metric.compute()
         self.assertAlmostEqual(avg_dice, expected_avg, places=4)
+        if getattr(engine.state, "metric_details", None) is not None:
+            self.assertTupleEqual(tuple(engine.state.metric_details["mean_dice"].shape), (4, 2))
 
     @parameterized.expand([TEST_CASE_1, TEST_CASE_2])
     def test_shape_mismatch(self, input_params, _expected):

From 8036dc08ec062be632304c2ade7ff86ff12b080f Mon Sep 17 00:00:00 2001
From: Nic Ma <nma@nvidia.com>
Date: Mon, 25 Jan 2021 19:55:17 +0800
Subject: [PATCH 10/39] [DLMED] add metrics report

Signed-off-by: Nic Ma <nma@nvidia.com>
---
 monai/handlers/__init__.py         |  2 +-
 monai/handlers/iteration_metric.py | 12 ++---
 monai/handlers/metrics_saver.py    | 86 ++++++++++++++++++++++++++----
 monai/handlers/utils.py            | 32 +++++++----
 4 files changed, 103 insertions(+), 29 deletions(-)

diff --git a/monai/handlers/__init__.py b/monai/handlers/__init__.py
index a873cd8b15..260b236d7b 100644
--- a/monai/handlers/__init__.py
+++ b/monai/handlers/__init__.py
@@ -24,5 +24,5 @@
 from .stats_handler import StatsHandler
 from .surface_distance import SurfaceDistance
 from .tensorboard_handlers import TensorBoardImageHandler, TensorBoardStatsHandler
-from .utils import all_gather, stopping_fn_from_loss, stopping_fn_from_metric
+from .utils import evenly_divisible_all_gather, stopping_fn_from_loss, stopping_fn_from_metric
 from .validation_handler import ValidationHandler
diff --git a/monai/handlers/iteration_metric.py b/monai/handlers/iteration_metric.py
index 62660920d9..5939fa69b7 100644
--- a/monai/handlers/iteration_metric.py
+++ b/monai/handlers/iteration_metric.py
@@ -15,6 +15,7 @@
 
 from monai.metrics import do_metric_reduction
 from monai.utils import MetricReduction, exact_version, optional_import
+from monai.handlers.utils import evenly_divisible_all_gather
 
 NotComputableError, _ = optional_import("ignite.exceptions", "0.4.2", exact_version, "NotComputableError")
 idist, _ = optional_import("ignite", "0.4.2", exact_version, "distributed")
@@ -90,15 +91,8 @@ def compute(self) -> Any:
 
         ws = idist.get_world_size()
         if ws > 1 and not self._is_reduced:
-            # make sure the _scores is evenly-divisible on multi-GPUs
-            length = _scores.shape[0]
-            max_len = max(idist.all_gather(length)).item()
-            if length < max_len:
-                size = [max_len - length] + list(_scores.shape[1:])
-                _scores = torch.cat([_scores, _scores.new_full(size, float("NaN"))], dim=0)
-
             # all gather across all processes
-            _scores = idist.all_gather(_scores)
+            _scores = evenly_divisible_all_gather(data=_scores, pad_dim=0)
         self._is_reduced = True
 
         # save score of every image into engine.state for other components
@@ -134,5 +128,5 @@ def attach(self, engine: Engine, name: str, usage: Union[str, MetricUsage] = Epo
         # FIXME: record engine for communication, ignite will support it in the future version soon
         self.engine = engine
         self.name = name
-        if self.save_details and getattr(engine.state, "metric_details", None) is None:
+        if self.save_details and not hasattr(engine.state, "metric_details"):
             setattr(engine.state, "metric_details", dict())
diff --git a/monai/handlers/metrics_saver.py b/monai/handlers/metrics_saver.py
index a30b58a814..4d21a9b07e 100644
--- a/monai/handlers/metrics_saver.py
+++ b/monai/handlers/metrics_saver.py
@@ -9,13 +9,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import TYPE_CHECKING, Sequence, Optional, Callable
-
+from typing import TYPE_CHECKING, Sequence, Optional, Callable, Union
+import os
 import torch
 
-from monai.utils import exact_version, optional_import
-
+from monai.utils import exact_version, optional_import, ensure_tuple
+from monai.utils.module import get_torch_version_tuple
 Events, _ = optional_import("ignite.engine", "0.4.2", exact_version, "Events")
+idist, _ = optional_import("ignite", "0.4.2", exact_version, "distributed")
 if TYPE_CHECKING:
     from ignite.engine import Engine
 else:
@@ -26,30 +27,97 @@ class MetricsSaver:
     """
     ignite handler to save metrics values and details into expected files.
 
+    Args:
+        save_dir: directory to save the metrics and metric details.
+        metrics: expected final metrics to save into files, can be: None, "*" or list of strings.
+            None - don't save any metrics into files.
+            "*" - save all the existing metrics in `engine.state.metrics` dict into separate files.
+            list of strings - specify the expected metrics to save.
+        metric_details: expected metric details to save into files, for example: mean dice
+            of every channel of every image in the validation dataset.
+            the data in `engine.state.details` must contain 2 dims: (batch, channel).
+            it can be: None, "*" or list of strings.
+            None - don't save any metrics into files.
+            "*" - save all the existing metrics in `engine.state.metric_details` dict into separate files.
+            list of strings - specify the expected metrics to save.
+        batch_transform: callable function to extract the meta_dict from input batch data if saving metric details.
+            used to extract filenames from input dict data.
+        compute_summary: whether to compute a summary report against all the images.
+        save_rank: only the handler on specified rank will save to files in multi-gpus validation, default to 0.
+
     """
 
     def __init__(
         self,
         save_dir: str,
-        metrics: Union[Optional, str, Sequence[str]] = None,
-        metric_details: Union[Optional, str, Sequence[str]] = None,
+        metrics: Optional[Union[str, Sequence[str]]] = None,
+        metric_details: Optional[Union[str, Sequence[str]]] = None,
         batch_transform: Callable = lambda x: x,
+        compute_summary: bool = False,
+        save_rank: int = 0,
     ) -> None:
         self.save_dir = save_dir
-        self.metrics = metrics
-        self.metric_details = metric_details
+        self.metrics = ensure_tuple(metrics) if metrics is not None else None
+        self.metric_details = ensure_tuple(metric_details) if metric_details is not None else None
         self.batch_transform = batch_transform
+        self.compute_summary = compute_summary
+        self.save_rank = save_rank
+        self._filenames = None
 
     def attach(self, engine: Engine) -> None:
         """
         Args:
             engine: Ignite Engine, it can be a trainer, validator or evaluator.
         """
+        engine.add_event_handler(Events.STARTED, self._started)
+        engine.add_event_handler(Events.ITERATION_COMPLETED, self._get_filenames)
         engine.add_event_handler(Events.EPOCH_COMPLETED, self)
 
+    def _started(self, engine: Engine) -> None:
+        self._filenames = list()
+
+    def _get_filenames(self, engine: Engine) -> None:
+        if self.metric_details is not None:
+            self._filenames += self.batch_transform(engine.state.batch)["filename_or_obj"]
+
     def __call__(self, engine: Engine) -> None:
         """
         Args:
             engine: Ignite Engine, it can be a trainer, validator or evaluator.
         """
-        pass
+        if not os.path.exists(self.save_dir):
+            os.makedirs(self.save_dir)
+
+        ws = idist.get_world_size()
+        if self.save_rank >= ws:
+            raise ValueError("target rank is greater than the distributed group size.s")
+
+        if self.metrics is not None and len(engine.state.metrics) > 0:
+            if idist.get_rank() == self.save_rank:
+                with open(os.path.join(self.save_dir, "metrics.csv"), "w") as f:
+                    for k, v in engine.state.metrics.items():
+                        if k in self.metrics or "*" in self.metrics:
+                            f.write(k + "\t" + str(v) + "\n")
+
+        if self.metric_details is not None and hasattr(engine.state, "metric_details") \
+                and len(engine.state.metric_details) > 0:
+            _filenames = "\t".join(self._filenames)
+
+            if ws > 1:
+                if get_torch_version_tuple() > (1, 6, 0):
+                    # all gather across all processes
+                    _filenames = "\t".join(idist.all_gather(_filenames))
+                else:
+                    raise RuntimeError(
+                        "MetricsSaver can not save metric details in distributed mode with PyTorch < 1.7.0."
+                    )
+            if idist.get_rank() == self.save_rank:
+                _filenames = _filenames.split("\t")
+                for k, v in engine.state.metric_details.items():
+                    if k in self.metric_details or "*" in self.metric_details:
+                        with open(os.path.join(self.save_dir, k + ".csv"), "w") as f:
+                            channels = "\t".join(["channel" + str(i) for i in range(v.shape[1])])
+                            f.write("filename" + "\t" + channels + "\n")
+                            for i, image in enumerate(v):
+                                channels = "\t".join([str(i.item()) for i in image])
+                                f.write(_filenames[i] + "\t" + channels + "\n")
diff --git a/monai/handlers/utils.py b/monai/handlers/utils.py
index 8f22501737..390dfaa519 100644
--- a/monai/handlers/utils.py
+++ b/monai/handlers/utils.py
@@ -15,13 +15,13 @@
 import torch.distributed as dist
 
 from monai.utils import exact_version, optional_import
-
+idist, _ = optional_import("ignite", "0.4.2", exact_version, "distributed")
 if TYPE_CHECKING:
     from ignite.engine import Engine
 else:
     Engine, _ = optional_import("ignite.engine", "0.4.2", exact_version, "Engine")
 
-__all__ = ["stopping_fn_from_metric", "stopping_fn_from_loss", "all_gather"]
+__all__ = ["stopping_fn_from_metric", "stopping_fn_from_loss", "evenly_divisible_all_gather"]
 
 
 def stopping_fn_from_metric(metric_name: str) -> Callable[[Engine], Any]:
@@ -46,13 +46,25 @@ def stopping_fn(engine: Engine):
     return stopping_fn
 
 
-def all_gather(tensor):
+def evenly_divisible_all_gather(data: torch.Tensor, pad_dim: int = 0):
     """
-    All gather the data of tensor value in distributed data parallel.
+    Utility function for distributed data parallel to pad tensor to make it evenly divisible for all_gather.
+
+    Args:
+        data: source tensor to pad and execute all_gather in distributed data parallel.
+        pad_dim: which dimension to pad NaN data to make it evenly divisible, default to dim 0.
+
     """
-    if not dist.is_available() or not dist.is_initialized():
-        raise RuntimeError("should not execute all_gather operation before torch.distributed is ready.")
-    # create placeholder to collect the data from all processes
-    output = [torch.zeros_like(tensor) for _ in range(dist.get_world_size())]
-    dist.all_gather(output, tensor)
-    return torch.cat(output, dim=0)
+    if idist.get_world_size() <= 1:
+        return data
+    # make sure the data is evenly-divisible on multi-GPUs
+    length = data.shape[pad_dim]
+    all_lens = idist.all_gather(length)
+    max_len = max(all_lens).item()
+    if length < max_len:
+        size = [max_len - length] + list(data.shape[1:])
+        data = torch.cat([data, data.new_full(size, float("NaN"))], dim=0)
+    # all gather across all processes
+    data = idist.all_gather(data)
+    # delete the padding NaN items
+    return torch.cat([data[i * max_len: i * max_len + l, ...] for i, l in enumerate(all_lens)], dim=0)

From f87c46cb7a8273fd2a4fefd7884a1dc8204424e9 Mon Sep 17 00:00:00 2001
From: Nic Ma <nma@nvidia.com>
Date: Mon, 25 Jan 2021 22:24:53 +0800
Subject: [PATCH 11/39] [DLMED] add average value to report

Signed-off-by: Nic Ma <nma@nvidia.com>
---
 monai/handlers/metrics_saver.py | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/monai/handlers/metrics_saver.py b/monai/handlers/metrics_saver.py
index 4d21a9b07e..7cdad972b3 100644
--- a/monai/handlers/metrics_saver.py
+++ b/monai/handlers/metrics_saver.py
@@ -35,7 +35,7 @@ class MetricsSaver:
             list of strings - specify the expected metrics to save.
         metric_details: expected metric details to save into files, for example: mean dice
             of every channel of every image in the validation dataset.
-            the data in `engine.state.details` must contain 2 dims: (batch, channel).
+            the data in `engine.state.details` must contain 2 dims: (batch, classes).
             it can be: None, "*" or list of strings.
             None - don't save any metrics into files.
             "*" - save all the existing metrics in `engine.state.metric_details` dict into separate files.
@@ -94,6 +94,7 @@ def __call__(self, engine: Engine) -> None:
 
         if self.metrics is not None and len(engine.state.metrics) > 0:
             if idist.get_rank() == self.save_rank:
+                # only save metrics to file in specified rank
                 with open(os.path.join(self.save_dir, "metrics.csv"), "w") as f:
                     for k, v in engine.state.metrics.items():
                         if k in self.metrics or "*" in self.metrics:
@@ -115,9 +116,14 @@ def __call__(self, engine: Engine) -> None:
                 _filenames = _filenames.split("\t")
                 for k, v in engine.state.metric_details.items():
                     if k in self.metric_details or "*" in self.metric_details:
-                        with open(os.path.join(self.save_dir, k + ".csv"), "w") as f:
-                            channels = "\t".join(["channel" + str(i) for i in range(v.shape[1])])
-                            f.write("filename" + "\t" + channels + "\n")
+                        nans = torch.isnan(v)
+                        not_nans = (~nans).float()
+                        average = list()
+                        with open(os.path.join(self.save_dir, k + "_raw.csv"), "w") as f:
+                            classes = "\t".join(["class" + str(i) for i in range(v.shape[1])])
+                            f.write("filename\t" + classes + "\taverage\n")
                             for i, image in enumerate(v):
-                                channels = "\t".join([str(i.item()) for i in image])
-                                f.write(_filenames[i] + "\t" + channels + "\n")
+                                ave = image.nansum() / not_nans[i].sum()
+                                average.append(ave)
+                                classes = "\t".join([str(i.item()) for i in image])
+                                f.write(_filenames[i] + "\t" + classes + "\t" + str(ave.item()) + "\n")

From f247d6946dd8c603892ae3d712d71d570a2dd3f6 Mon Sep 17 00:00:00 2001
From: Nic Ma <nma@nvidia.com>
Date: Mon, 25 Jan 2021 23:46:07 +0800
Subject: [PATCH 12/39] [DLMED] add summary report

Signed-off-by: Nic Ma <nma@nvidia.com>
---
 monai/handlers/metrics_saver.py | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/monai/handlers/metrics_saver.py b/monai/handlers/metrics_saver.py
index 7cdad972b3..0a274f20da 100644
--- a/monai/handlers/metrics_saver.py
+++ b/monai/handlers/metrics_saver.py
@@ -12,6 +12,7 @@
 from typing import TYPE_CHECKING, Sequence, Optional, Callable, Union
 import os
 import torch
+import numpy as np
 
 from monai.utils import exact_version, optional_import, ensure_tuple
 from monai.utils.module import get_torch_version_tuple
@@ -98,7 +99,7 @@ def __call__(self, engine: Engine) -> None:
                 with open(os.path.join(self.save_dir, "metrics.csv"), "w") as f:
                     for k, v in engine.state.metrics.items():
                         if k in self.metrics or "*" in self.metrics:
-                            f.write(k + "\t" + str(v) + "\n")
+                            f.write(f"f{k}\t{str(v)}\n")
 
         if self.metric_details is not None and hasattr(engine.state, "metric_details") \
                 and len(engine.state.metric_details) > 0:
@@ -116,14 +117,18 @@ def __call__(self, engine: Engine) -> None:
                 _filenames = _filenames.split("\t")
                 for k, v in engine.state.metric_details.items():
                     if k in self.metric_details or "*" in self.metric_details:
-                        nans = torch.isnan(v)
-                        not_nans = (~nans).float()
-                        average = list()
+                        v = v.cpu().numpy()
+                        v = np.concatenate([v, np.nanmean(v, axis=1, keepdims=True)], axis=1)
                         with open(os.path.join(self.save_dir, k + "_raw.csv"), "w") as f:
-                            classes = "\t".join(["class" + str(i) for i in range(v.shape[1])])
-                            f.write("filename\t" + classes + "\taverage\n")
+                            labels = "\t".join(["class" + str(i) for i in range(v.shape[1] - 1)]) + "\taverage"
+                            f.write(f"filename\t{labels}\n")
                             for i, image in enumerate(v):
-                                ave = image.nansum() / not_nans[i].sum()
-                                average.append(ave)
                                 classes = "\t".join([str(i.item()) for i in image])
-                                f.write(_filenames[i] + "\t" + classes + "\t" + str(ave.item()) + "\n")
+                                f.write(f"{_filenames[i]}\t{classes}\n")
+
+                        if self.compute_summary:
+                            labels = labels.split("\t")
+                            with open(os.path.join(self.save_dir, k + "_summary.csv"), "w") as f:
+                                f.write("class\tmean\tmedian\tmax\tmin\t90percent\tstd\n")
+                                for i, d in enumerate(v.transpose()):
+                                    f.write(f"{labels[i]}\t{np.nanmean(d):.4f}\t{np.nanmedian(d):.4f}\t{np.nanmax(d):.4f}\t{np.nanmin(d):.4f}\t{np.nanpercentile(d, 10):.4f}\t{np.nanstd(d):.4f}\n")     

From 423f72477516976c8a77472f81e5004d0f20a9f5 Mon Sep 17 00:00:00 2001
From: Nic Ma <nma@nvidia.com>
Date: Mon, 25 Jan 2021 23:53:49 +0800
Subject: [PATCH 13/39] [DLMED] add docs

Signed-off-by: Nic Ma <nma@nvidia.com>
---
 docs/source/handlers.rst   | 7 +++++++
 monai/handlers/__init__.py | 1 +
 2 files changed, 8 insertions(+)

diff --git a/docs/source/handlers.rst b/docs/source/handlers.rst
index d1ce257cb7..81d28fb4ac 100644
--- a/docs/source/handlers.rst
+++ b/docs/source/handlers.rst
@@ -16,6 +16,13 @@ Model checkpoint saver
 .. autoclass:: CheckpointSaver
   :members:
 
+
+Metrics saver
+-------------
+.. autoclass:: MetricsSaver
+    :members:
+
+
 CSV saver
 ---------
 .. autoclass:: ClassificationSaver
diff --git a/monai/handlers/__init__.py b/monai/handlers/__init__.py
index 260b236d7b..36ed534d7a 100644
--- a/monai/handlers/__init__.py
+++ b/monai/handlers/__init__.py
@@ -18,6 +18,7 @@
 from .lr_schedule_handler import LrScheduleHandler
 from .mean_dice import MeanDice
 from .metric_logger import MetricLogger
+from .metrics_saver import MetricsSaver
 from .roc_auc import ROCAUC
 from .segmentation_saver import SegmentationSaver
 from .smartcache_handler import SmartCacheHandler

From ae89aa3d4f28ad11369eb76ce5ca2a0bda436289 Mon Sep 17 00:00:00 2001
From: monai-bot <monai.miccai2019@gmail.com>
Date: Mon, 25 Jan 2021 15:57:53 +0000
Subject: [PATCH 14/39] [MONAI] python code formatting

Signed-off-by: monai-bot <monai.miccai2019@gmail.com>
---
 monai/handlers/iteration_metric.py |  6 +++---
 monai/handlers/metrics_saver.py    | 19 +++++++++++++------
 monai/handlers/utils.py            |  3 ++-
 tests/test_handler_mean_dice.py    |  4 +++-
 4 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/monai/handlers/iteration_metric.py b/monai/handlers/iteration_metric.py
index 5939fa69b7..5280fd8866 100644
--- a/monai/handlers/iteration_metric.py
+++ b/monai/handlers/iteration_metric.py
@@ -13,9 +13,9 @@
 
 import torch
 
+from monai.handlers.utils import evenly_divisible_all_gather
 from monai.metrics import do_metric_reduction
 from monai.utils import MetricReduction, exact_version, optional_import
-from monai.handlers.utils import evenly_divisible_all_gather
 
 NotComputableError, _ = optional_import("ignite.exceptions", "0.4.2", exact_version, "NotComputableError")
 idist, _ = optional_import("ignite", "0.4.2", exact_version, "distributed")
@@ -23,8 +23,7 @@
 reinit__is_reduced, _ = optional_import("ignite.metrics.metric", "0.4.2", exact_version, "reinit__is_reduced")
 if TYPE_CHECKING:
     from ignite.engine import Engine
-    from ignite.metrics import MetricUsage
-    from ignite.metrics import EpochWise
+    from ignite.metrics import EpochWise, MetricUsage
 else:
     Engine, _ = optional_import("ignite.engine", "0.4.2", exact_version, "Engine")
     MetricUsage, _ = optional_import("ignite.metrics", "0.4.2", exact_version, "MetricUsage")
@@ -46,6 +45,7 @@ class IterationMetric(Metric):  # type: ignore[valid-type, misc] # due to option
             if True, will save to `engine.state.metric_details` dict with the metric name as key.
 
     """
+
     def __init__(
         self,
         metric_fn: Callable,
diff --git a/monai/handlers/metrics_saver.py b/monai/handlers/metrics_saver.py
index 0a274f20da..605064c663 100644
--- a/monai/handlers/metrics_saver.py
+++ b/monai/handlers/metrics_saver.py
@@ -9,13 +9,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import TYPE_CHECKING, Sequence, Optional, Callable, Union
 import os
-import torch
+from typing import TYPE_CHECKING, Callable, Optional, Sequence, Union
+
 import numpy as np
+import torch
 
-from monai.utils import exact_version, optional_import, ensure_tuple
+from monai.utils import ensure_tuple, exact_version, optional_import
 from monai.utils.module import get_torch_version_tuple
+
 Events, _ = optional_import("ignite.engine", "0.4.2", exact_version, "Events")
 idist, _ = optional_import("ignite", "0.4.2", exact_version, "distributed")
 if TYPE_CHECKING:
@@ -101,8 +103,11 @@ def __call__(self, engine: Engine) -> None:
                         if k in self.metrics or "*" in self.metrics:
                             f.write(f"f{k}\t{str(v)}\n")
 
-        if self.metric_details is not None and hasattr(engine.state, "metric_details") \
-                and len(engine.state.metric_details) > 0:
+        if (
+            self.metric_details is not None
+            and hasattr(engine.state, "metric_details")
+            and len(engine.state.metric_details) > 0
+        ):
             _filenames = "\t".join(self._filenames)
 
             if ws > 1:
@@ -131,4 +136,6 @@ def __call__(self, engine: Engine) -> None:
                             with open(os.path.join(self.save_dir, k + "_summary.csv"), "w") as f:
                                 f.write("class\tmean\tmedian\tmax\tmin\t90percent\tstd\n")
                                 for i, d in enumerate(v.transpose()):
-                                    f.write(f"{labels[i]}\t{np.nanmean(d):.4f}\t{np.nanmedian(d):.4f}\t{np.nanmax(d):.4f}\t{np.nanmin(d):.4f}\t{np.nanpercentile(d, 10):.4f}\t{np.nanstd(d):.4f}\n")     
+                                    f.write(
+                                        f"{labels[i]}\t{np.nanmean(d):.4f}\t{np.nanmedian(d):.4f}\t{np.nanmax(d):.4f}\t{np.nanmin(d):.4f}\t{np.nanpercentile(d, 10):.4f}\t{np.nanstd(d):.4f}\n"
+                                    )
diff --git a/monai/handlers/utils.py b/monai/handlers/utils.py
index 390dfaa519..013d9b86a3 100644
--- a/monai/handlers/utils.py
+++ b/monai/handlers/utils.py
@@ -15,6 +15,7 @@
 import torch.distributed as dist
 
 from monai.utils import exact_version, optional_import
+
 idist, _ = optional_import("ignite", "0.4.2", exact_version, "distributed")
 if TYPE_CHECKING:
     from ignite.engine import Engine
@@ -67,4 +68,4 @@ def evenly_divisible_all_gather(data: torch.Tensor, pad_dim: int = 0):
     # all gather across all processes
     data = idist.all_gather(data)
     # delete the padding NaN items
-    return torch.cat([data[i * max_len: i * max_len + l, ...] for i, l in enumerate(all_lens)], dim=0)
+    return torch.cat([data[i * max_len : i * max_len + l, ...] for i, l in enumerate(all_lens)], dim=0)
diff --git a/tests/test_handler_mean_dice.py b/tests/test_handler_mean_dice.py
index 5dfd1b29e9..15379f68c4 100644
--- a/tests/test_handler_mean_dice.py
+++ b/tests/test_handler_mean_dice.py
@@ -12,8 +12,9 @@
 import unittest
 
 import torch
-from parameterized import parameterized
 from ignite.engine import Engine, Events
+from parameterized import parameterized
+
 from monai.handlers import MeanDice
 
 TEST_CASE_1 = [{"include_background": True, "save_details": True}, 0.75]
@@ -27,6 +28,7 @@ class TestHandlerMeanDice(unittest.TestCase):
     def test_compute(self, input_params, expected_avg):
         dice_metric = MeanDice(**input_params)
         # set up engine
+
         def _val_func(engine, batch):
             pass
 

From 35bf113aff5ad19203f4e8641e792134978d495d Mon Sep 17 00:00:00 2001
From: Nic Ma <nma@nvidia.com>
Date: Tue, 26 Jan 2021 00:36:29 +0800
Subject: [PATCH 15/39] [DLMED] fix flake8 issue

Signed-off-by: Nic Ma <nma@nvidia.com>
---
 monai/handlers/iteration_metric.py | 15 +++++----------
 monai/handlers/metrics_saver.py    | 21 +++++++++++----------
 monai/handlers/utils.py            |  1 -
 tests/test_handler_mean_dice.py    |  2 +-
 4 files changed, 17 insertions(+), 22 deletions(-)

diff --git a/monai/handlers/iteration_metric.py b/monai/handlers/iteration_metric.py
index 5280fd8866..e4bc210230 100644
--- a/monai/handlers/iteration_metric.py
+++ b/monai/handlers/iteration_metric.py
@@ -9,7 +9,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import TYPE_CHECKING, Any, Callable, List, Optional, Sequence, Union
+from typing import TYPE_CHECKING, Any, Callable, List, Optional, Sequence
 
 import torch
 
@@ -17,17 +17,13 @@
 from monai.metrics import do_metric_reduction
 from monai.utils import MetricReduction, exact_version, optional_import
 
-NotComputableError, _ = optional_import("ignite.exceptions", "0.4.2", exact_version, "NotComputableError")
 idist, _ = optional_import("ignite", "0.4.2", exact_version, "distributed")
 Metric, _ = optional_import("ignite.metrics", "0.4.2", exact_version, "Metric")
 reinit__is_reduced, _ = optional_import("ignite.metrics.metric", "0.4.2", exact_version, "reinit__is_reduced")
 if TYPE_CHECKING:
     from ignite.engine import Engine
-    from ignite.metrics import EpochWise, MetricUsage
 else:
     Engine, _ = optional_import("ignite.engine", "0.4.2", exact_version, "Engine")
-    MetricUsage, _ = optional_import("ignite.metrics", "0.4.2", exact_version, "MetricUsage")
-    EpochWise, _ = optional_import("ignite.metrics", "0.4.2", exact_version, "EpochWise")
 
 
 class IterationMetric(Metric):  # type: ignore[valid-type, misc] # due to optional_import
@@ -113,20 +109,19 @@ def compute(self) -> Any:
     def _reduce(self, scores) -> Any:
         return do_metric_reduction(scores, MetricReduction.MEAN)[0]
 
-    def attach(self, engine: Engine, name: str, usage: Union[str, MetricUsage] = EpochWise()) -> None:
+    def attach(self, engine: Engine, name: str) -> None:
         """
         Attaches current metric to provided engine. On the end of engine's run,
         `engine.state.metrics` dictionary will contain computed metric's value under provided name.
 
         Args:
             engine: the engine to which the metric must be attached.
-            name: the name of the metric to attach
-            usage: the usage of the metric.
+            name: the name of the metric to attach.
 
         """
-        super().attach(engine=engine, name=name, usage=usage)
+        super().attach(engine=engine, name=name)
         # FIXME: record engine for communication, ignite will support it in the future version soon
         self.engine = engine
         self.name = name
         if self.save_details and not hasattr(engine.state, "metric_details"):
-            setattr(engine.state, "metric_details", dict())
+            engine.state.metric_details = dict()
diff --git a/monai/handlers/metrics_saver.py b/monai/handlers/metrics_saver.py
index 605064c663..76338865df 100644
--- a/monai/handlers/metrics_saver.py
+++ b/monai/handlers/metrics_saver.py
@@ -10,10 +10,9 @@
 # limitations under the License.
 
 import os
-from typing import TYPE_CHECKING, Callable, Optional, Sequence, Union
+from typing import TYPE_CHECKING, Callable, Optional, Sequence, List, Union
 
 import numpy as np
-import torch
 
 from monai.utils import ensure_tuple, exact_version, optional_import
 from monai.utils.module import get_torch_version_tuple
@@ -65,7 +64,7 @@ def __init__(
         self.batch_transform = batch_transform
         self.compute_summary = compute_summary
         self.save_rank = save_rank
-        self._filenames = None
+        self._filenames: List[str] = list()
 
     def attach(self, engine: Engine) -> None:
         """
@@ -101,7 +100,7 @@ def __call__(self, engine: Engine) -> None:
                 with open(os.path.join(self.save_dir, "metrics.csv"), "w") as f:
                     for k, v in engine.state.metrics.items():
                         if k in self.metrics or "*" in self.metrics:
-                            f.write(f"f{k}\t{str(v)}\n")
+                            f.write(f"{k}\t{str(v)}\n")
 
         if (
             self.metric_details is not None
@@ -119,23 +118,25 @@ def __call__(self, engine: Engine) -> None:
                         "MetricsSaver can not save metric details in distributed mode with PyTorch < 1.7.0."
                     )
             if idist.get_rank() == self.save_rank:
-                _filenames = _filenames.split("\t")
+                _files = _filenames.split("\t")
                 for k, v in engine.state.metric_details.items():
                     if k in self.metric_details or "*" in self.metric_details:
                         v = v.cpu().numpy()
                         v = np.concatenate([v, np.nanmean(v, axis=1, keepdims=True)], axis=1)
                         with open(os.path.join(self.save_dir, k + "_raw.csv"), "w") as f:
-                            labels = "\t".join(["class" + str(i) for i in range(v.shape[1] - 1)]) + "\taverage"
-                            f.write(f"filename\t{labels}\n")
+                            row_labels = "\t".join(["class" + str(i) for i in range(v.shape[1] - 1)]) + "\taverage"
+                            f.write(f"filename\t{row_labels}\n")
                             for i, image in enumerate(v):
                                 classes = "\t".join([str(i.item()) for i in image])
-                                f.write(f"{_filenames[i]}\t{classes}\n")
+                                f.write(f"{_files[i]}\t{classes}\n")
 
                         if self.compute_summary:
-                            labels = labels.split("\t")
+                            col_labels = row_labels.split("\t")
                             with open(os.path.join(self.save_dir, k + "_summary.csv"), "w") as f:
                                 f.write("class\tmean\tmedian\tmax\tmin\t90percent\tstd\n")
                                 for i, d in enumerate(v.transpose()):
                                     f.write(
-                                        f"{labels[i]}\t{np.nanmean(d):.4f}\t{np.nanmedian(d):.4f}\t{np.nanmax(d):.4f}\t{np.nanmin(d):.4f}\t{np.nanpercentile(d, 10):.4f}\t{np.nanstd(d):.4f}\n"
+                                        f"{col_labels[i]}\t{np.nanmean(d):.4f}\t{np.nanmedian(d):.4f}\t"
+                                        f"{np.nanmax(d):.4f}\t{np.nanmin(d):.4f}\t"
+                                        f"{np.nanpercentile(d, 10):.4f}\t{np.nanstd(d):.4f}\n"
                                     )
diff --git a/monai/handlers/utils.py b/monai/handlers/utils.py
index 013d9b86a3..fc6aa4d74a 100644
--- a/monai/handlers/utils.py
+++ b/monai/handlers/utils.py
@@ -12,7 +12,6 @@
 from typing import TYPE_CHECKING, Any, Callable
 
 import torch
-import torch.distributed as dist
 
 from monai.utils import exact_version, optional_import
 
diff --git a/tests/test_handler_mean_dice.py b/tests/test_handler_mean_dice.py
index 15379f68c4..3014d662c6 100644
--- a/tests/test_handler_mean_dice.py
+++ b/tests/test_handler_mean_dice.py
@@ -12,7 +12,7 @@
 import unittest
 
 import torch
-from ignite.engine import Engine, Events
+from ignite.engine import Engine
 from parameterized import parameterized
 
 from monai.handlers import MeanDice

From 432177e0913ea169286f3754c5e8c6f0ed94039e Mon Sep 17 00:00:00 2001
From: monai-bot <monai.miccai2019@gmail.com>
Date: Mon, 25 Jan 2021 17:15:55 +0000
Subject: [PATCH 16/39] [MONAI] python code formatting

Signed-off-by: monai-bot <monai.miccai2019@gmail.com>
---
 monai/handlers/metrics_saver.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/monai/handlers/metrics_saver.py b/monai/handlers/metrics_saver.py
index 76338865df..b21821da04 100644
--- a/monai/handlers/metrics_saver.py
+++ b/monai/handlers/metrics_saver.py
@@ -10,7 +10,7 @@
 # limitations under the License.
 
 import os
-from typing import TYPE_CHECKING, Callable, Optional, Sequence, List, Union
+from typing import TYPE_CHECKING, Callable, List, Optional, Sequence, Union
 
 import numpy as np
 

From be5972adeb41eae3a06cda87982056093a784d70 Mon Sep 17 00:00:00 2001
From: Nic Ma <nma@nvidia.com>
Date: Tue, 26 Jan 2021 16:40:57 +0800
Subject: [PATCH 17/39] [DLMED] add unit tests and distributed tests

Signed-off-by: Nic Ma <nma@nvidia.com>
---
 monai/handlers/utils.py                  |   7 +-
 tests/test_handler_metrics_saver.py      |  83 ++++++++++++++++++
 tests/test_handler_metrics_saver_dist.py | 104 +++++++++++++++++++++++
 3 files changed, 190 insertions(+), 4 deletions(-)
 create mode 100644 tests/test_handler_metrics_saver.py
 create mode 100644 tests/test_handler_metrics_saver_dist.py

diff --git a/monai/handlers/utils.py b/monai/handlers/utils.py
index fc6aa4d74a..aaae2ee6ef 100644
--- a/monai/handlers/utils.py
+++ b/monai/handlers/utils.py
@@ -46,19 +46,18 @@ def stopping_fn(engine: Engine):
     return stopping_fn
 
 
-def evenly_divisible_all_gather(data: torch.Tensor, pad_dim: int = 0):
+def evenly_divisible_all_gather(data: torch.Tensor):
     """
-    Utility function for distributed data parallel to pad tensor to make it evenly divisible for all_gather.
+    Utility function for distributed data parallel to pad at first dim to make it evenly divisible and all_gather.
 
     Args:
         data: source tensor to pad and execute all_gather in distributed data parallel.
-        pad_dim: which dimension to pad NaN data to make it evenly divisible, default to dim 0.
 
     """
     if idist.get_world_size() <= 1:
         return data
     # make sure the data is evenly-divisible on multi-GPUs
-    length = data.shape[pad_dim]
+    length = data.shape[0]
     all_lens = idist.all_gather(length)
     max_len = max(all_lens).item()
     if length < max_len:
diff --git a/tests/test_handler_metrics_saver.py b/tests/test_handler_metrics_saver.py
new file mode 100644
index 0000000000..d07a92d772
--- /dev/null
+++ b/tests/test_handler_metrics_saver.py
@@ -0,0 +1,83 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+import os
+import csv
+import tempfile
+import torch
+from ignite.engine import Engine, Events
+
+from monai.handlers import MetricsSaver
+
+
+class TestHandlerMetricsSaver(unittest.TestCase):
+    def test_content(self):
+        with tempfile.TemporaryDirectory() as tempdir:
+            metrics_saver = MetricsSaver(
+                save_dir=tempdir,
+                metrics=["metric1", "metric2"],
+                metric_details=["metric3", "metric4"],
+                batch_transform=lambda x: x["image_meta_dict"],
+                compute_summary=True,
+            )
+            # set up engine
+            data = [
+                {"image_meta_dict": {"filename_or_obj": ["filepath1"]}},
+                {"image_meta_dict": {"filename_or_obj": ["filepath2"]}},
+            ]
+
+            def _val_func(engine, batch):
+                pass
+
+            engine = Engine(_val_func)
+
+            @engine.on(Events.EPOCH_COMPLETED)
+            def _save_metrics(engine):
+                engine.state.metrics = {"metric1": 1, "metric2": 2}
+                engine.state.metric_details = {
+                    "metric3": torch.tensor([[1, 2], [2, 3]]),
+                    "metric4": torch.tensor([[5, 6], [7, 8]]),
+                }
+
+            metrics_saver.attach(engine)
+            engine.run(data, max_epochs=1)
+
+            # check the metrics.csv and content
+            self.assertTrue(os.path.exists(os.path.join(tempdir, "metrics.csv")))
+            with open(os.path.join(tempdir, "metrics.csv")) as f:
+                f_csv = csv.reader(f)
+                for i, row in enumerate(f_csv):
+                    self.assertEqual(row, [f"metric{i + 1}\t{i + 1}"])
+            self.assertTrue(os.path.exists(os.path.join(tempdir, "metric3_raw.csv")))
+            # check the metric_raw.csv and content
+            with open(os.path.join(tempdir, "metric3_raw.csv")) as f:
+                f_csv = csv.reader(f)
+                for i, row in enumerate(f_csv):
+                    if i > 0:
+                        self.assertEqual(row, [f"filepath{i}\t{float(i)}\t{float(i + 1)}\t{i + 0.5}"])
+            self.assertTrue(os.path.exists(os.path.join(tempdir, "metric3_summary.csv")))
+            # check the metric_summary.csv and content
+            with open(os.path.join(tempdir, "metric3_summary.csv")) as f:
+                f_csv = csv.reader(f)
+                for i, row in enumerate(f_csv):
+                    if i == 1:
+                        self.assertEqual(row, ["class0\t1.5000\t1.5000\t2.0000\t1.0000\t1.1000\t0.5000"])
+                    elif i == 2:
+                        self.assertEqual(row, ["class1\t2.5000\t2.5000\t3.0000\t2.0000\t2.1000\t0.5000"])
+                    elif i == 3:
+                        self.assertEqual(row, ["average\t2.0000\t2.0000\t2.5000\t1.5000\t1.6000\t0.5000"])
+            self.assertTrue(os.path.exists(os.path.join(tempdir, "metric4_raw.csv")))
+            self.assertTrue(os.path.exists(os.path.join(tempdir, "metric4_summary.csv")))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_handler_metrics_saver_dist.py b/tests/test_handler_metrics_saver_dist.py
new file mode 100644
index 0000000000..a499fce8b3
--- /dev/null
+++ b/tests/test_handler_metrics_saver_dist.py
@@ -0,0 +1,104 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import unittest
+import os
+import csv
+import tempfile
+import torch
+import torch.distributed as dist
+from ignite.engine import Engine, Events
+
+from monai.handlers import MetricsSaver
+from tests.utils import DistCall, DistTestCase
+
+
+class DistributedMetricsSaver(DistTestCase):
+    @DistCall(nnodes=1, nproc_per_node=2)
+    def test_content(self):
+        self._run()
+
+    def _run(self):
+        device = f"cuda:{dist.get_rank()}" if torch.cuda.is_available() else "cpu"
+        with tempfile.TemporaryDirectory() as tempdir:
+            metrics_saver = MetricsSaver(
+                save_dir=tempdir,
+                metrics=["metric1", "metric2"],
+                metric_details=["metric3", "metric4"],
+                batch_transform=lambda x: x["image_meta_dict"],
+                compute_summary=True,
+            )
+
+            def _val_func(engine, batch):
+                pass
+
+            engine = Engine(_val_func)
+
+            if dist.get_rank() == 0:
+                data = [{"image_meta_dict": {"filename_or_obj": ["filepath1"]}}]
+
+                @engine.on(Events.EPOCH_COMPLETED)
+                def _save_metrics(engine):
+                    engine.state.metrics = {"metric1": 1, "metric2": 2}
+                    engine.state.metric_details = {
+                        "metric3": torch.tensor([[1, 2]]), "metric4": torch.tensor([[5, 6]]),
+                    }
+
+            if dist.get_rank() == 1:
+                # different ranks have different data length
+                data = [
+                    {"image_meta_dict": {"filename_or_obj": ["filepath2"]}},
+                    {"image_meta_dict": {"filename_or_obj": ["filepath3"]}},
+                ]
+
+                @engine.on(Events.EPOCH_COMPLETED)
+                def _save_metrics(engine):
+                    engine.state.metrics = {"metric1": 1, "metric2": 2}
+                    engine.state.metric_details = {
+                        "metric3": torch.tensor([[2, 3], [3, 4]]),
+                        "metric4": torch.tensor([[6, 7], [7, 8]]),
+                    }
+
+            metrics_saver.attach(engine)
+            engine.run(data, max_epochs=1)
+
+            if dist.get_rank() == 0:
+                # check the metrics.csv and content
+                self.assertTrue(os.path.exists(os.path.join(tempdir, "metrics.csv")))
+                with open(os.path.join(tempdir, "metrics.csv")) as f:
+                    f_csv = csv.reader(f)
+                    for i, row in enumerate(f_csv):
+                        self.assertEqual(row, [f"metric{i + 1}\t{i + 1}"])
+                self.assertTrue(os.path.exists(os.path.join(tempdir, "metric3_raw.csv")))
+                # check the metric_raw.csv and content
+                with open(os.path.join(tempdir, "metric3_raw.csv")) as f:
+                    f_csv = csv.reader(f)
+                    for i, row in enumerate(f_csv):
+                        if i > 0:
+                            self.assertEqual(row, [f"filepath{i}\t{float(i)}\t{float(i + 1)}\t{i + 0.5}"])
+                self.assertTrue(os.path.exists(os.path.join(tempdir, "metric3_summary.csv")))
+                # check the metric_summary.csv and content
+                with open(os.path.join(tempdir, "metric3_summary.csv")) as f:
+                    f_csv = csv.reader(f)
+                    for i, row in enumerate(f_csv):
+                        if i == 1:
+                            self.assertEqual(row, ["class0\t1.0000\t1.0000\t1.0000\t1.0000\t1.0000\t0.0000"])
+                        elif i == 2:
+                            self.assertEqual(row, ["class1\t2.0000\t2.0000\t2.0000\t2.0000\t2.0000\t0.0000"])
+                        elif i == 3:
+                            self.assertEqual(row, ["average\t1.5000\t1.5000\t1.5000\t1.5000\t1.5000\t0.0000"])
+                self.assertTrue(os.path.exists(os.path.join(tempdir, "metric4_raw.csv")))
+                self.assertTrue(os.path.exists(os.path.join(tempdir, "metric4_summary.csv")))
+
+
+if __name__ == "__main__":
+    unittest.main()

From 982aa5a010a9a5b28aee7728380c231b264cd273 Mon Sep 17 00:00:00 2001
From: monai-bot <monai.miccai2019@gmail.com>
Date: Tue, 26 Jan 2021 08:45:21 +0000
Subject: [PATCH 18/39] [MONAI] python code formatting

Signed-off-by: monai-bot <monai.miccai2019@gmail.com>
---
 tests/test_handler_metrics_saver.py      | 5 +++--
 tests/test_handler_metrics_saver_dist.py | 8 +++++---
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/tests/test_handler_metrics_saver.py b/tests/test_handler_metrics_saver.py
index d07a92d772..8bfbe21aec 100644
--- a/tests/test_handler_metrics_saver.py
+++ b/tests/test_handler_metrics_saver.py
@@ -9,10 +9,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import unittest
-import os
 import csv
+import os
 import tempfile
+import unittest
+
 import torch
 from ignite.engine import Engine, Events
 
diff --git a/tests/test_handler_metrics_saver_dist.py b/tests/test_handler_metrics_saver_dist.py
index a499fce8b3..c1f292dfb3 100644
--- a/tests/test_handler_metrics_saver_dist.py
+++ b/tests/test_handler_metrics_saver_dist.py
@@ -10,10 +10,11 @@
 # limitations under the License.
 
 
-import unittest
-import os
 import csv
+import os
 import tempfile
+import unittest
+
 import torch
 import torch.distributed as dist
 from ignite.engine import Engine, Events
@@ -50,7 +51,8 @@ def _val_func(engine, batch):
                 def _save_metrics(engine):
                     engine.state.metrics = {"metric1": 1, "metric2": 2}
                     engine.state.metric_details = {
-                        "metric3": torch.tensor([[1, 2]]), "metric4": torch.tensor([[5, 6]]),
+                        "metric3": torch.tensor([[1, 2]]),
+                        "metric4": torch.tensor([[5, 6]]),
                     }
 
             if dist.get_rank() == 1:

From 8cbbbc46b15bbe912a36f98f4ec0d9f31629d568 Mon Sep 17 00:00:00 2001
From: Nic Ma <nma@nvidia.com>
Date: Tue, 26 Jan 2021 16:49:39 +0800
Subject: [PATCH 19/39] [DLMED] fix flake8 issue

Signed-off-by: Nic Ma <nma@nvidia.com>
---
 tests/test_handler_metrics_saver_dist.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_handler_metrics_saver_dist.py b/tests/test_handler_metrics_saver_dist.py
index c1f292dfb3..eb4eee7717 100644
--- a/tests/test_handler_metrics_saver_dist.py
+++ b/tests/test_handler_metrics_saver_dist.py
@@ -48,7 +48,7 @@ def _val_func(engine, batch):
                 data = [{"image_meta_dict": {"filename_or_obj": ["filepath1"]}}]
 
                 @engine.on(Events.EPOCH_COMPLETED)
-                def _save_metrics(engine):
+                def _save_metrics0(engine):
                     engine.state.metrics = {"metric1": 1, "metric2": 2}
                     engine.state.metric_details = {
                         "metric3": torch.tensor([[1, 2]]),
@@ -63,7 +63,7 @@ def _save_metrics(engine):
                 ]
 
                 @engine.on(Events.EPOCH_COMPLETED)
-                def _save_metrics(engine):
+                def _save_metrics1(engine):
                     engine.state.metrics = {"metric1": 1, "metric2": 2}
                     engine.state.metric_details = {
                         "metric3": torch.tensor([[2, 3], [3, 4]]),

From 46a0abfd139610ef9aac8933af44bec56b748586 Mon Sep 17 00:00:00 2001
From: Nic Ma <nma@nvidia.com>
Date: Tue, 26 Jan 2021 17:38:44 +0800
Subject: [PATCH 20/39] [DLMED] fix typo

Signed-off-by: Nic Ma <nma@nvidia.com>
---
 monai/handlers/iteration_metric.py | 2 +-
 monai/handlers/utils.py            | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/monai/handlers/iteration_metric.py b/monai/handlers/iteration_metric.py
index e4bc210230..26062893e4 100644
--- a/monai/handlers/iteration_metric.py
+++ b/monai/handlers/iteration_metric.py
@@ -88,7 +88,7 @@ def compute(self) -> Any:
         ws = idist.get_world_size()
         if ws > 1 and not self._is_reduced:
             # all gather across all processes
-            _scores = evenly_divisible_all_gather(data=_scores, pad_dim=0)
+            _scores = evenly_divisible_all_gather(data=_scores)
         self._is_reduced = True
 
         # save score of every image into engine.state for other components
diff --git a/monai/handlers/utils.py b/monai/handlers/utils.py
index aaae2ee6ef..013e21a68c 100644
--- a/monai/handlers/utils.py
+++ b/monai/handlers/utils.py
@@ -46,7 +46,7 @@ def stopping_fn(engine: Engine):
     return stopping_fn
 
 
-def evenly_divisible_all_gather(data: torch.Tensor):
+def evenly_divisible_all_gather(data: torch.Tensor) -> torch.Tensor:
     """
     Utility function for distributed data parallel to pad at first dim to make it evenly divisible and all_gather.
 

From 645fcc13f727c3185e6b8c2cae26850290d5f9e1 Mon Sep 17 00:00:00 2001
From: Nic Ma <nma@nvidia.com>
Date: Tue, 26 Jan 2021 18:31:22 +0800
Subject: [PATCH 21/39] [DLMED] remove from min_tests

Signed-off-by: Nic Ma <nma@nvidia.com>
---
 tests/min_tests.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/min_tests.py b/tests/min_tests.py
index 9a2dc0f05f..f9f63f4ce0 100644
--- a/tests/min_tests.py
+++ b/tests/min_tests.py
@@ -100,6 +100,8 @@ def run_testsuit():
         "test_occlusion_sensitivity",
         "test_torchvision",
         "test_torchvisiond",
+        "test_handler_metrics_saver",
+        "test_handler_metrics_saver_dist",
     ]
     assert sorted(exclude_cases) == sorted(set(exclude_cases)), f"Duplicated items in {exclude_cases}"
 

From 3ab555fc245d20e5b13068e4b2a4543dc805a46f Mon Sep 17 00:00:00 2001
From: Nic Ma <nma@nvidia.com>
Date: Tue, 26 Jan 2021 19:40:19 +0800
Subject: [PATCH 22/39] [DLMED] remove useless var

Signed-off-by: Nic Ma <nma@nvidia.com>
---
 tests/test_handler_metrics_saver_dist.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/test_handler_metrics_saver_dist.py b/tests/test_handler_metrics_saver_dist.py
index eb4eee7717..9914e3c425 100644
--- a/tests/test_handler_metrics_saver_dist.py
+++ b/tests/test_handler_metrics_saver_dist.py
@@ -29,7 +29,6 @@ def test_content(self):
         self._run()
 
     def _run(self):
-        device = f"cuda:{dist.get_rank()}" if torch.cuda.is_available() else "cpu"
         with tempfile.TemporaryDirectory() as tempdir:
             metrics_saver = MetricsSaver(
                 save_dir=tempdir,

From 3ba905d361fa5fe1af45ce7124942d204cd9f4f1 Mon Sep 17 00:00:00 2001
From: Nic Ma <nma@nvidia.com>
Date: Tue, 26 Jan 2021 20:42:34 +0800
Subject: [PATCH 23/39] [DLMED] add skip flag

Signed-off-by: Nic Ma <nma@nvidia.com>
---
 tests/test_handler_metrics_saver_dist.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/test_handler_metrics_saver_dist.py b/tests/test_handler_metrics_saver_dist.py
index 9914e3c425..58fa5bd3d7 100644
--- a/tests/test_handler_metrics_saver_dist.py
+++ b/tests/test_handler_metrics_saver_dist.py
@@ -20,9 +20,10 @@
 from ignite.engine import Engine, Events
 
 from monai.handlers import MetricsSaver
-from tests.utils import DistCall, DistTestCase
+from tests.utils import DistCall, DistTestCase, SkipIfBeforePyTorchVersion
 
 
+@SkipIfBeforePyTorchVersion((1, 7))
 class DistributedMetricsSaver(DistTestCase):
     @DistCall(nnodes=1, nproc_per_node=2)
     def test_content(self):

From 17df30edba26a66379f0fae69e19ae455dcfc4ed Mon Sep 17 00:00:00 2001
From: Nic Ma <nma@nvidia.com>
Date: Wed, 27 Jan 2021 12:55:34 +0800
Subject: [PATCH 24/39] [DLMED] update according to comments

Signed-off-by: Nic Ma <nma@nvidia.com>
---
 monai/handlers/confusion_matrix.py            |   4 +-
 monai/handlers/hausdorff_distance.py          |   4 +-
 monai/handlers/iteration_metric.py            |  16 ++-
 monai/handlers/mean_dice.py                   |   4 +-
 monai/handlers/metrics_saver.py               |  87 ++++++++++-----
 monai/handlers/surface_distance.py            |   4 +-
 .../test_evenly_divisible_all_gather_dist.py  | 105 ++++++++++++++++++
 tests/test_handler_mean_dice.py               |   5 +-
 tests/test_handler_metrics_saver.py           |   8 +-
 tests/test_handler_metrics_saver_dist.py      |   2 +-
 10 files changed, 189 insertions(+), 50 deletions(-)
 create mode 100644 tests/test_evenly_divisible_all_gather_dist.py

diff --git a/monai/handlers/confusion_matrix.py b/monai/handlers/confusion_matrix.py
index 3f7bfce947..1741aa305a 100644
--- a/monai/handlers/confusion_matrix.py
+++ b/monai/handlers/confusion_matrix.py
@@ -29,7 +29,7 @@ def __init__(
         metric_name: str = "hit_rate",
         output_transform: Callable = lambda x: x,
         device: Optional[torch.device] = None,
-        save_details: bool = False,
+        save_details: bool = True,
     ) -> None:
         """
 
@@ -46,7 +46,7 @@ def __init__(
             output_transform: transform the ignite.engine.state.output into [y_pred, y] pair.
             device: device specification in case of distributed computation usage.
             save_details: whether to save metric computation details per image, for example: TP/TN/FP/FN of every image.
-                if True, will save to `engine.state.metric_details` dict with the metric name as key.
+                default to True, will save to `engine.state.metric_details` dict with the metric name as key.
 
         See also:
             :py:meth:`monai.metrics.confusion_matrix`
diff --git a/monai/handlers/hausdorff_distance.py b/monai/handlers/hausdorff_distance.py
index 8dec373bdd..7ac52d642a 100644
--- a/monai/handlers/hausdorff_distance.py
+++ b/monai/handlers/hausdorff_distance.py
@@ -31,7 +31,7 @@ def __init__(
         directed: bool = False,
         output_transform: Callable = lambda x: x,
         device: Optional[torch.device] = None,
-        save_details: bool = False,
+        save_details: bool = True,
     ) -> None:
         """
 
@@ -47,7 +47,7 @@ def __init__(
             output_transform: transform the ignite.engine.state.output into [y_pred, y] pair.
             device: device specification in case of distributed computation usage.
             save_details: whether to save metric computation details per image, for example: hausdorff distance
-                of every image. if True, will save to `engine.state.metric_details` dict with the metric name as key.
+                of every image. default to True, will save to `engine.state.metric_details` dict with the metric name as key.
 
         """
         super().__init__(output_transform, device=device)
diff --git a/monai/handlers/iteration_metric.py b/monai/handlers/iteration_metric.py
index 26062893e4..c6157668f8 100644
--- a/monai/handlers/iteration_metric.py
+++ b/monai/handlers/iteration_metric.py
@@ -38,7 +38,7 @@ class IterationMetric(Metric):  # type: ignore[valid-type, misc] # due to option
         output_transform: transform the ignite.engine.state.output into [y_pred, y] pair.
         device: device specification in case of distributed computation usage.
         save_details: whether to save metric computation details per image, for example: mean_dice of every image.
-            if True, will save to `engine.state.metric_details` dict with the metric name as key.
+            default to True, will save to `engine.state.metric_details` dict with the metric name as key.
 
     """
 
@@ -47,12 +47,14 @@ def __init__(
         metric_fn: Callable,
         output_transform: Callable = lambda x: x,
         device: Optional[torch.device] = None,
-        save_details: bool = False,
+        save_details: bool = True,
     ) -> None:
         self._is_reduced: bool = False
         self.metric_fn = metric_fn
         self.save_details = save_details
         self._scores: List = []
+        self._engine = None
+        self._name = None
         super().__init__(output_transform, device=device)
 
     @reinit__is_reduced
@@ -93,7 +95,9 @@ def compute(self) -> Any:
 
         # save score of every image into engine.state for other components
         if self.save_details:
-            self.engine.state.metric_details[self.name] = _scores
+            if self._engine is None or self._name is None:
+                raise RuntimeError("plesae call the attach() function to connect expected engine first.")
+            self._engine.state.metric_details[self.name] = _scores
 
         result: torch.Tensor = torch.zeros(1)
         if idist.get_rank() == 0:
@@ -121,7 +125,7 @@ def attach(self, engine: Engine, name: str) -> None:
         """
         super().attach(engine=engine, name=name)
         # FIXME: record engine for communication, ignite will support it in the future version soon
-        self.engine = engine
-        self.name = name
+        self._engine = engine
+        self._name = name
         if self.save_details and not hasattr(engine.state, "metric_details"):
-            engine.state.metric_details = dict()
+            engine.state.metric_details = {}
diff --git a/monai/handlers/mean_dice.py b/monai/handlers/mean_dice.py
index 223bc909dc..7decc3ab9b 100644
--- a/monai/handlers/mean_dice.py
+++ b/monai/handlers/mean_dice.py
@@ -28,7 +28,7 @@ def __init__(
         include_background: bool = True,
         output_transform: Callable = lambda x: x,
         device: Optional[torch.device] = None,
-        save_details: bool = False,
+        save_details: bool = True,
     ) -> None:
         """
 
@@ -38,7 +38,7 @@ def __init__(
             output_transform: transform the ignite.engine.state.output into [y_pred, y] pair.
             device: device specification in case of distributed computation usage.
             save_details: whether to save metric computation details per image, for example: mean dice of every image.
-                if True, will save to `engine.state.metric_details` dict with the metric name as key.
+                default to True, will save to `engine.state.metric_details` dict with the metric name as key.
 
         See also:
             :py:meth:`monai.metrics.meandice.compute_meandice`
diff --git a/monai/handlers/metrics_saver.py b/monai/handlers/metrics_saver.py
index b21821da04..7f8c6815d2 100644
--- a/monai/handlers/metrics_saver.py
+++ b/monai/handlers/metrics_saver.py
@@ -11,8 +11,9 @@
 
 import os
 from typing import TYPE_CHECKING, Callable, List, Optional, Sequence, Union
-
+from collections import OrderedDict
 import numpy as np
+import torch
 
 from monai.utils import ensure_tuple, exact_version, optional_import
 from monai.utils.module import get_torch_version_tuple
@@ -35,36 +36,48 @@ class MetricsSaver:
             None - don't save any metrics into files.
             "*" - save all the existing metrics in `engine.state.metrics` dict into separate files.
             list of strings - specify the expected metrics to save.
+            default to "*" to save all the metrics into `metrics.csv`.
         metric_details: expected metric details to save into files, for example: mean dice
             of every channel of every image in the validation dataset.
-            the data in `engine.state.details` must contain 2 dims: (batch, classes).
-            it can be: None, "*" or list of strings.
+            the data in `engine.state.metric_details` must contain at least 2 dims: (batch, classes, ...),
+            if not, will unsequeeze to 2 dims.
+            this arg can be: None, "*" or list of strings.
             None - don't save any metrics into files.
             "*" - save all the existing metrics in `engine.state.metric_details` dict into separate files.
             list of strings - specify the expected metrics to save.
+            if not None, every metric will save a separate `{metric name}_raw.csv` file.
         batch_transform: callable function to extract the meta_dict from input batch data if saving metric details.
             used to extract filenames from input dict data.
-        compute_summary: whether to compute a summary report against all the images.
+        summary_ops: expected computation operations to generate the summary report.
+            it can be: None, "*" or list of strings.
+            None - don't generate summary report for every expected metric_details
+            "*" - generate summary report for every metric_details with all the supported operations.
+            list of strings - generate summary report for every metric_details with specified operations, they
+            should be within this list: [`mean`, `median`, `max`, `min`, `90percent`, `std`].
+            default to None.
         save_rank: only the handler on specified rank will save to files in multi-gpus validation, default to 0.
+        delimiter: the delimiter charactor in CSV file, default to "\t".
 
     """
 
     def __init__(
         self,
         save_dir: str,
-        metrics: Optional[Union[str, Sequence[str]]] = None,
+        metrics: Optional[Union[str, Sequence[str]]] = "*",
         metric_details: Optional[Union[str, Sequence[str]]] = None,
         batch_transform: Callable = lambda x: x,
-        compute_summary: bool = False,
+        summary_ops: Optional[Union[str, Sequence[str]]] = None,
         save_rank: int = 0,
+        delimiter: str = "\t",
     ) -> None:
         self.save_dir = save_dir
         self.metrics = ensure_tuple(metrics) if metrics is not None else None
         self.metric_details = ensure_tuple(metric_details) if metric_details is not None else None
         self.batch_transform = batch_transform
-        self.compute_summary = compute_summary
+        self.summary_ops = ensure_tuple(summary_ops) if summary_ops is not None else None
         self.save_rank = save_rank
-        self._filenames: List[str] = list()
+        self.deli = delimiter
+        self._filenames: List[str] = []
 
     def attach(self, engine: Engine) -> None:
         """
@@ -76,11 +89,12 @@ def attach(self, engine: Engine) -> None:
         engine.add_event_handler(Events.EPOCH_COMPLETED, self)
 
     def _started(self, engine: Engine) -> None:
-        self._filenames = list()
+        self._filenames = []
 
     def _get_filenames(self, engine: Engine) -> None:
         if self.metric_details is not None:
-            self._filenames += self.batch_transform(engine.state.batch)["filename_or_obj"]
+            _filenames = list(ensure_tuple(self.batch_transform(engine.state.batch)["filename_or_obj"]))
+            self._filenames += _filenames
 
     def __call__(self, engine: Engine) -> None:
         """
@@ -100,43 +114,60 @@ def __call__(self, engine: Engine) -> None:
                 with open(os.path.join(self.save_dir, "metrics.csv"), "w") as f:
                     for k, v in engine.state.metrics.items():
                         if k in self.metrics or "*" in self.metrics:
-                            f.write(f"{k}\t{str(v)}\n")
+                            f.write(f"{k}{self.deli}{str(v)}\n")
 
         if (
             self.metric_details is not None
             and hasattr(engine.state, "metric_details")
             and len(engine.state.metric_details) > 0
         ):
-            _filenames = "\t".join(self._filenames)
+            _filenames = self.deli.join(self._filenames)
 
             if ws > 1:
                 if get_torch_version_tuple() > (1, 6, 0):
                     # all gather across all processes
-                    _filenames = "\t".join(idist.all_gather(_filenames))
+                    _filenames = self.deli.join(idist.all_gather(_filenames))
                 else:
                     raise RuntimeError(
                         "MetricsSaver can not save metric details in distributed mode with PyTorch < 1.7.0."
                     )
             if idist.get_rank() == self.save_rank:
-                _files = _filenames.split("\t")
+                _files = _filenames.split(self.deli)
                 for k, v in engine.state.metric_details.items():
                     if k in self.metric_details or "*" in self.metric_details:
-                        v = v.cpu().numpy()
+                        if torch.is_tensor(v):
+                            v = v.cpu().numpy()
+                        if v.ndim == 0:
+                            # reshape to [1, 1] if no batch and class dims
+                            v = v.reshape((1, 1))
+                        elif v.ndim == 1:
+                            # reshape to [N, 1] if no class dim
+                            v = v.reshape((-1, 1))
+
+                        # add the average value to v
                         v = np.concatenate([v, np.nanmean(v, axis=1, keepdims=True)], axis=1)
                         with open(os.path.join(self.save_dir, k + "_raw.csv"), "w") as f:
-                            row_labels = "\t".join(["class" + str(i) for i in range(v.shape[1] - 1)]) + "\taverage"
-                            f.write(f"filename\t{row_labels}\n")
+                            class_labels = self.deli.join(["class" + str(i) for i in range(v.shape[1] - 1)])
+                            row_labels = class_labels + f"{self.deli}average"
+                            f.write(f"filename{self.deli}{row_labels}\n")
                             for i, image in enumerate(v):
-                                classes = "\t".join([str(i.item()) for i in image])
-                                f.write(f"{_files[i]}\t{classes}\n")
-
-                        if self.compute_summary:
-                            col_labels = row_labels.split("\t")
+                                classes = self.deli.join([str(d) for d in image])
+                                f.write(f"{_files[i]}{self.deli}{classes}\n")
+
+                        if self.summary_ops is not None:
+                            supported_ops = OrderedDict({
+                                "mean": np.nanmean,
+                                "median": np.nanmedian,
+                                "max": np.nanmax,
+                                "min": np.nanmin,
+                                "90percent": lambda x: np.nanpercentile(x, 10),
+                                "std": np.nanstd,
+                            })
+                            ops = supported_ops.keys() if "*" in self.summary_ops else self.summary_ops
+
+                            col_labels = row_labels.split(self.deli)
                             with open(os.path.join(self.save_dir, k + "_summary.csv"), "w") as f:
-                                f.write("class\tmean\tmedian\tmax\tmin\t90percent\tstd\n")
+                                f.write(f"class{self.deli}{self.deli.join(ops)}\n")
                                 for i, d in enumerate(v.transpose()):
-                                    f.write(
-                                        f"{col_labels[i]}\t{np.nanmean(d):.4f}\t{np.nanmedian(d):.4f}\t"
-                                        f"{np.nanmax(d):.4f}\t{np.nanmin(d):.4f}\t"
-                                        f"{np.nanpercentile(d, 10):.4f}\t{np.nanstd(d):.4f}\n"
-                                    )
+                                    ops_labels = self.deli.join([f"{supported_ops[k](d):.4f}" for k in ops])
+                                    f.write(f"{col_labels[i]}{self.deli}{ops_labels}\n")
diff --git a/monai/handlers/surface_distance.py b/monai/handlers/surface_distance.py
index 2f8d65d83a..d3fa69bfce 100644
--- a/monai/handlers/surface_distance.py
+++ b/monai/handlers/surface_distance.py
@@ -30,7 +30,7 @@ def __init__(
         distance_metric: str = "euclidean",
         output_transform: Callable = lambda x: x,
         device: Optional[torch.device] = None,
-        save_details: bool = False,
+        save_details: bool = True,
     ) -> None:
         """
 
@@ -44,7 +44,7 @@ def __init__(
             output_transform: transform the ignite.engine.state.output into [y_pred, y] pair.
             device: device specification in case of distributed computation usage.
             save_details: whether to save metric computation details per image, for example: surface dice
-                of every image. if True, will save to `engine.state.metric_details` dict with the metric name as key.
+                of every image. default to True, will save to `engine.state.metric_details` dict with the metric name as key.
 
         """
         metric_fn = SurfaceDistanceMetric(
diff --git a/tests/test_evenly_divisible_all_gather_dist.py b/tests/test_evenly_divisible_all_gather_dist.py
new file mode 100644
index 0000000000..a2363f964e
--- /dev/null
+++ b/tests/test_evenly_divisible_all_gather_dist.py
@@ -0,0 +1,105 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import csv
+import os
+import tempfile
+import unittest
+
+import torch
+import torch.distributed as dist
+from ignite.engine import Engine, Events
+
+from monai.handlers import evenly_divisible_all_gather
+from tests.utils import DistCall, DistTestCase, SkipIfBeforePyTorchVersion
+
+
+class DistributedEvenlyDivisibleAllGather(DistTestCase):
+    @DistCall(nnodes=1, nproc_per_node=2)
+    def test_data(self):
+        self._run()
+
+    def _run(self):
+        with tempfile.TemporaryDirectory() as tempdir:
+            metrics_saver = MetricsSaver(
+                save_dir=tempdir,
+                metrics=["metric1", "metric2"],
+                metric_details=["metric3", "metric4"],
+                batch_transform=lambda x: x["image_meta_dict"],
+                summary_ops="*",
+            )
+
+            def _val_func(engine, batch):
+                pass
+
+            engine = Engine(_val_func)
+
+            if dist.get_rank() == 0:
+                data = [{"image_meta_dict": {"filename_or_obj": ["filepath1"]}}]
+
+                @engine.on(Events.EPOCH_COMPLETED)
+                def _save_metrics0(engine):
+                    engine.state.metrics = {"metric1": 1, "metric2": 2}
+                    engine.state.metric_details = {
+                        "metric3": torch.tensor([[1, 2]]),
+                        "metric4": torch.tensor([[5, 6]]),
+                    }
+
+            if dist.get_rank() == 1:
+                # different ranks have different data length
+                data = [
+                    {"image_meta_dict": {"filename_or_obj": ["filepath2"]}},
+                    {"image_meta_dict": {"filename_or_obj": ["filepath3"]}},
+                ]
+
+                @engine.on(Events.EPOCH_COMPLETED)
+                def _save_metrics1(engine):
+                    engine.state.metrics = {"metric1": 1, "metric2": 2}
+                    engine.state.metric_details = {
+                        "metric3": torch.tensor([[2, 3], [3, 4]]),
+                        "metric4": torch.tensor([[6, 7], [7, 8]]),
+                    }
+
+            metrics_saver.attach(engine)
+            engine.run(data, max_epochs=1)
+
+            if dist.get_rank() == 0:
+                # check the metrics.csv and content
+                self.assertTrue(os.path.exists(os.path.join(tempdir, "metrics.csv")))
+                with open(os.path.join(tempdir, "metrics.csv")) as f:
+                    f_csv = csv.reader(f)
+                    for i, row in enumerate(f_csv):
+                        self.assertEqual(row, [f"metric{i + 1}\t{i + 1}"])
+                self.assertTrue(os.path.exists(os.path.join(tempdir, "metric3_raw.csv")))
+                # check the metric_raw.csv and content
+                with open(os.path.join(tempdir, "metric3_raw.csv")) as f:
+                    f_csv = csv.reader(f)
+                    for i, row in enumerate(f_csv):
+                        if i > 0:
+                            self.assertEqual(row, [f"filepath{i}\t{float(i)}\t{float(i + 1)}\t{i + 0.5}"])
+                self.assertTrue(os.path.exists(os.path.join(tempdir, "metric3_summary.csv")))
+                # check the metric_summary.csv and content
+                with open(os.path.join(tempdir, "metric3_summary.csv")) as f:
+                    f_csv = csv.reader(f)
+                    for i, row in enumerate(f_csv):
+                        if i == 1:
+                            self.assertEqual(row, ["class0\t1.0000\t1.0000\t1.0000\t1.0000\t1.0000\t0.0000"])
+                        elif i == 2:
+                            self.assertEqual(row, ["class1\t2.0000\t2.0000\t2.0000\t2.0000\t2.0000\t0.0000"])
+                        elif i == 3:
+                            self.assertEqual(row, ["average\t1.5000\t1.5000\t1.5000\t1.5000\t1.5000\t0.0000"])
+                self.assertTrue(os.path.exists(os.path.join(tempdir, "metric4_raw.csv")))
+                self.assertTrue(os.path.exists(os.path.join(tempdir, "metric4_summary.csv")))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_handler_mean_dice.py b/tests/test_handler_mean_dice.py
index 3014d662c6..c573e418c8 100644
--- a/tests/test_handler_mean_dice.py
+++ b/tests/test_handler_mean_dice.py
@@ -17,7 +17,7 @@
 
 from monai.handlers import MeanDice
 
-TEST_CASE_1 = [{"include_background": True, "save_details": True}, 0.75]
+TEST_CASE_1 = [{"include_background": True}, 0.75]
 TEST_CASE_2 = [{"include_background": False}, 0.66666]
 
 
@@ -44,8 +44,7 @@ def _val_func(engine, batch):
 
         avg_dice = dice_metric.compute()
         self.assertAlmostEqual(avg_dice, expected_avg, places=4)
-        if getattr(engine.state, "metric_details", None) is not None:
-            self.assertTupleEqual(tuple(engine.state.metric_details["mean_dice"].shape), (4, 2))
+        self.assertTupleEqual(tuple(engine.state.metric_details["mean_dice"].shape), (4, 2))
 
     @parameterized.expand([TEST_CASE_1, TEST_CASE_2])
     def test_shape_mismatch(self, input_params, _expected):
diff --git a/tests/test_handler_metrics_saver.py b/tests/test_handler_metrics_saver.py
index 8bfbe21aec..881528e56c 100644
--- a/tests/test_handler_metrics_saver.py
+++ b/tests/test_handler_metrics_saver.py
@@ -28,7 +28,7 @@ def test_content(self):
                 metrics=["metric1", "metric2"],
                 metric_details=["metric3", "metric4"],
                 batch_transform=lambda x: x["image_meta_dict"],
-                compute_summary=True,
+                summary_ops=["mean", "median", "max", "90percent"],
             )
             # set up engine
             data = [
@@ -71,11 +71,11 @@ def _save_metrics(engine):
                 f_csv = csv.reader(f)
                 for i, row in enumerate(f_csv):
                     if i == 1:
-                        self.assertEqual(row, ["class0\t1.5000\t1.5000\t2.0000\t1.0000\t1.1000\t0.5000"])
+                        self.assertEqual(row, ["class0\t1.5000\t1.5000\t2.0000\t1.1000"])
                     elif i == 2:
-                        self.assertEqual(row, ["class1\t2.5000\t2.5000\t3.0000\t2.0000\t2.1000\t0.5000"])
+                        self.assertEqual(row, ["class1\t2.5000\t2.5000\t3.0000\t2.1000"])
                     elif i == 3:
-                        self.assertEqual(row, ["average\t2.0000\t2.0000\t2.5000\t1.5000\t1.6000\t0.5000"])
+                        self.assertEqual(row, ["average\t2.0000\t2.0000\t2.5000\t1.6000"])
             self.assertTrue(os.path.exists(os.path.join(tempdir, "metric4_raw.csv")))
             self.assertTrue(os.path.exists(os.path.join(tempdir, "metric4_summary.csv")))
 
diff --git a/tests/test_handler_metrics_saver_dist.py b/tests/test_handler_metrics_saver_dist.py
index 58fa5bd3d7..28ca45d9aa 100644
--- a/tests/test_handler_metrics_saver_dist.py
+++ b/tests/test_handler_metrics_saver_dist.py
@@ -36,7 +36,7 @@ def _run(self):
                 metrics=["metric1", "metric2"],
                 metric_details=["metric3", "metric4"],
                 batch_transform=lambda x: x["image_meta_dict"],
-                compute_summary=True,
+                summary_ops="*",
             )
 
             def _val_func(engine, batch):

From cd5b0eda21c6fef3cb779f2972cf7c1bce00d8f5 Mon Sep 17 00:00:00 2001
From: Nic Ma <nma@nvidia.com>
Date: Wed, 27 Jan 2021 15:02:44 +0800
Subject: [PATCH 25/39] [DLMED] add dist tests

Signed-off-by: Nic Ma <nma@nvidia.com>
---
 monai/handlers/utils.py                       |  6 +-
 .../test_evenly_divisible_all_gather_dist.py  | 89 +++----------------
 2 files changed, 18 insertions(+), 77 deletions(-)

diff --git a/monai/handlers/utils.py b/monai/handlers/utils.py
index 013e21a68c..abc40e74d8 100644
--- a/monai/handlers/utils.py
+++ b/monai/handlers/utils.py
@@ -54,15 +54,19 @@ def evenly_divisible_all_gather(data: torch.Tensor) -> torch.Tensor:
         data: source tensor to pad and execute all_gather in distributed data parallel.
 
     """
+    if not torch.is_tensor(data):
+        raise ValueError("input data must be PyTorch Tensor.")
+
     if idist.get_world_size() <= 1:
         return data
+
     # make sure the data is evenly-divisible on multi-GPUs
     length = data.shape[0]
     all_lens = idist.all_gather(length)
     max_len = max(all_lens).item()
     if length < max_len:
         size = [max_len - length] + list(data.shape[1:])
-        data = torch.cat([data, data.new_full(size, float("NaN"))], dim=0)
+        data = torch.cat([data, data.new_full(size, 0)], dim=0)
     # all gather across all processes
     data = idist.all_gather(data)
     # delete the padding NaN items
diff --git a/tests/test_evenly_divisible_all_gather_dist.py b/tests/test_evenly_divisible_all_gather_dist.py
index a2363f964e..d09711345b 100644
--- a/tests/test_evenly_divisible_all_gather_dist.py
+++ b/tests/test_evenly_divisible_all_gather_dist.py
@@ -9,18 +9,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
-import csv
-import os
-import tempfile
 import unittest
 
 import torch
 import torch.distributed as dist
-from ignite.engine import Engine, Events
 
 from monai.handlers import evenly_divisible_all_gather
-from tests.utils import DistCall, DistTestCase, SkipIfBeforePyTorchVersion
+from tests.utils import DistCall, DistTestCase
 
 
 class DistributedEvenlyDivisibleAllGather(DistTestCase):
@@ -29,76 +24,18 @@ def test_data(self):
         self._run()
 
     def _run(self):
-        with tempfile.TemporaryDirectory() as tempdir:
-            metrics_saver = MetricsSaver(
-                save_dir=tempdir,
-                metrics=["metric1", "metric2"],
-                metric_details=["metric3", "metric4"],
-                batch_transform=lambda x: x["image_meta_dict"],
-                summary_ops="*",
-            )
-
-            def _val_func(engine, batch):
-                pass
-
-            engine = Engine(_val_func)
-
-            if dist.get_rank() == 0:
-                data = [{"image_meta_dict": {"filename_or_obj": ["filepath1"]}}]
-
-                @engine.on(Events.EPOCH_COMPLETED)
-                def _save_metrics0(engine):
-                    engine.state.metrics = {"metric1": 1, "metric2": 2}
-                    engine.state.metric_details = {
-                        "metric3": torch.tensor([[1, 2]]),
-                        "metric4": torch.tensor([[5, 6]]),
-                    }
-
-            if dist.get_rank() == 1:
-                # different ranks have different data length
-                data = [
-                    {"image_meta_dict": {"filename_or_obj": ["filepath2"]}},
-                    {"image_meta_dict": {"filename_or_obj": ["filepath3"]}},
-                ]
-
-                @engine.on(Events.EPOCH_COMPLETED)
-                def _save_metrics1(engine):
-                    engine.state.metrics = {"metric1": 1, "metric2": 2}
-                    engine.state.metric_details = {
-                        "metric3": torch.tensor([[2, 3], [3, 4]]),
-                        "metric4": torch.tensor([[6, 7], [7, 8]]),
-                    }
-
-            metrics_saver.attach(engine)
-            engine.run(data, max_epochs=1)
-
-            if dist.get_rank() == 0:
-                # check the metrics.csv and content
-                self.assertTrue(os.path.exists(os.path.join(tempdir, "metrics.csv")))
-                with open(os.path.join(tempdir, "metrics.csv")) as f:
-                    f_csv = csv.reader(f)
-                    for i, row in enumerate(f_csv):
-                        self.assertEqual(row, [f"metric{i + 1}\t{i + 1}"])
-                self.assertTrue(os.path.exists(os.path.join(tempdir, "metric3_raw.csv")))
-                # check the metric_raw.csv and content
-                with open(os.path.join(tempdir, "metric3_raw.csv")) as f:
-                    f_csv = csv.reader(f)
-                    for i, row in enumerate(f_csv):
-                        if i > 0:
-                            self.assertEqual(row, [f"filepath{i}\t{float(i)}\t{float(i + 1)}\t{i + 0.5}"])
-                self.assertTrue(os.path.exists(os.path.join(tempdir, "metric3_summary.csv")))
-                # check the metric_summary.csv and content
-                with open(os.path.join(tempdir, "metric3_summary.csv")) as f:
-                    f_csv = csv.reader(f)
-                    for i, row in enumerate(f_csv):
-                        if i == 1:
-                            self.assertEqual(row, ["class0\t1.0000\t1.0000\t1.0000\t1.0000\t1.0000\t0.0000"])
-                        elif i == 2:
-                            self.assertEqual(row, ["class1\t2.0000\t2.0000\t2.0000\t2.0000\t2.0000\t0.0000"])
-                        elif i == 3:
-                            self.assertEqual(row, ["average\t1.5000\t1.5000\t1.5000\t1.5000\t1.5000\t0.0000"])
-                self.assertTrue(os.path.exists(os.path.join(tempdir, "metric4_raw.csv")))
-                self.assertTrue(os.path.exists(os.path.join(tempdir, "metric4_summary.csv")))
+        if dist.get_rank() == 0:
+            data1 = torch.tensor([[1, 2], [3, 4]])
+            data2 = torch.tensor([[1.0, 2.0]])
+
+        if dist.get_rank() == 1:
+            data1 = torch.tensor([[5, 6]])
+            data2 = torch.tensor([[3.0, 4.0], [5.0, 6.0]])
+
+        result1 = evenly_divisible_all_gather(data=data1)
+        torch.testing.assert_allclose(result1, torch.tensor([[1, 2], [3, 4], [5, 6]]))
+        result2 = evenly_divisible_all_gather(data=data2)
+        torch.testing.assert_allclose(result2, torch.tensor([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]]))
 
 
 if __name__ == "__main__":

From 25b05ddcee4c8f3fc0a70bea8a0824c5e0002b12 Mon Sep 17 00:00:00 2001
From: monai-bot <monai.miccai2019@gmail.com>
Date: Wed, 27 Jan 2021 07:06:22 +0000
Subject: [PATCH 26/39] [MONAI] python code formatting

Signed-off-by: monai-bot <monai.miccai2019@gmail.com>
---
 monai/handlers/metrics_saver.py | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/monai/handlers/metrics_saver.py b/monai/handlers/metrics_saver.py
index 7f8c6815d2..3a561a7c7f 100644
--- a/monai/handlers/metrics_saver.py
+++ b/monai/handlers/metrics_saver.py
@@ -10,8 +10,9 @@
 # limitations under the License.
 
 import os
-from typing import TYPE_CHECKING, Callable, List, Optional, Sequence, Union
 from collections import OrderedDict
+from typing import TYPE_CHECKING, Callable, List, Optional, Sequence, Union
+
 import numpy as np
 import torch
 
@@ -155,14 +156,16 @@ def __call__(self, engine: Engine) -> None:
                                 f.write(f"{_files[i]}{self.deli}{classes}\n")
 
                         if self.summary_ops is not None:
-                            supported_ops = OrderedDict({
-                                "mean": np.nanmean,
-                                "median": np.nanmedian,
-                                "max": np.nanmax,
-                                "min": np.nanmin,
-                                "90percent": lambda x: np.nanpercentile(x, 10),
-                                "std": np.nanstd,
-                            })
+                            supported_ops = OrderedDict(
+                                {
+                                    "mean": np.nanmean,
+                                    "median": np.nanmedian,
+                                    "max": np.nanmax,
+                                    "min": np.nanmin,
+                                    "90percent": lambda x: np.nanpercentile(x, 10),
+                                    "std": np.nanstd,
+                                }
+                            )
                             ops = supported_ops.keys() if "*" in self.summary_ops else self.summary_ops
 
                             col_labels = row_labels.split(self.deli)

From cc5936ce53124b13017d223080a402fc943b93a7 Mon Sep 17 00:00:00 2001
From: Nic Ma <nma@nvidia.com>
Date: Wed, 27 Jan 2021 15:25:37 +0800
Subject: [PATCH 27/39] [DLMED] fix flake8 issue

Signed-off-by: Nic Ma <nma@nvidia.com>
---
 monai/handlers/iteration_metric.py             | 4 ++--
 tests/test_evenly_divisible_all_gather_dist.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/monai/handlers/iteration_metric.py b/monai/handlers/iteration_metric.py
index c6157668f8..21d2f90429 100644
--- a/monai/handlers/iteration_metric.py
+++ b/monai/handlers/iteration_metric.py
@@ -53,8 +53,8 @@ def __init__(
         self.metric_fn = metric_fn
         self.save_details = save_details
         self._scores: List = []
-        self._engine = None
-        self._name = None
+        self._engine: Optional[Engine] = None
+        self._name: Optional[str] = None
         super().__init__(output_transform, device=device)
 
     @reinit__is_reduced
diff --git a/tests/test_evenly_divisible_all_gather_dist.py b/tests/test_evenly_divisible_all_gather_dist.py
index d09711345b..70dcd7ca6a 100644
--- a/tests/test_evenly_divisible_all_gather_dist.py
+++ b/tests/test_evenly_divisible_all_gather_dist.py
@@ -14,7 +14,7 @@
 import torch
 import torch.distributed as dist
 
-from monai.handlers import evenly_divisible_all_gather
+from monai.handlers.utils import evenly_divisible_all_gather
 from tests.utils import DistCall, DistTestCase
 
 

From 6b32ce84bdac35725126184be763e2da613c037b Mon Sep 17 00:00:00 2001
From: Nic Ma <nma@nvidia.com>
Date: Wed, 27 Jan 2021 16:04:18 +0800
Subject: [PATCH 28/39] [DLMED] enhance some unit tests

Signed-off-by: Nic Ma <nma@nvidia.com>
---
 monai/handlers/iteration_metric.py          |  2 +-
 tests/test_handler_confusion_matrix.py      | 12 +++++++++---
 tests/test_handler_confusion_matrix_dist.py |  6 ++++++
 tests/test_handler_hausdorff_distance.py    |  9 ++++++++-
 tests/test_handler_mean_dice.py             | 13 +++++++------
 tests/test_handler_surface_distance.py      |  9 ++++++++-
 6 files changed, 39 insertions(+), 12 deletions(-)

diff --git a/monai/handlers/iteration_metric.py b/monai/handlers/iteration_metric.py
index 21d2f90429..bfc7252b2f 100644
--- a/monai/handlers/iteration_metric.py
+++ b/monai/handlers/iteration_metric.py
@@ -97,7 +97,7 @@ def compute(self) -> Any:
         if self.save_details:
             if self._engine is None or self._name is None:
                 raise RuntimeError("plesae call the attach() function to connect expected engine first.")
-            self._engine.state.metric_details[self.name] = _scores
+            self._engine.state.metric_details[self._name] = _scores
 
         result: torch.Tensor = torch.zeros(1)
         if idist.get_rank() == 0:
diff --git a/tests/test_handler_confusion_matrix.py b/tests/test_handler_confusion_matrix.py
index cc231b82db..bb84a852d8 100644
--- a/tests/test_handler_confusion_matrix.py
+++ b/tests/test_handler_confusion_matrix.py
@@ -14,11 +14,11 @@
 
 import torch
 from parameterized import parameterized
-
+from ignite.engine import Engine
 from monai.handlers import ConfusionMatrix
 
-TEST_CASE_1 = [{"include_background": True, "metric_name": "f1"}, 0.75]
-TEST_CASE_2 = [{"include_background": False, "metric_name": "ppv"}, 1.0]
+TEST_CASE_1 = [{"include_background": True, "save_details": False, "metric_name": "f1"}, 0.75]
+TEST_CASE_2 = [{"include_background": False, "save_details": False, "metric_name": "ppv"}, 1.0]
 
 TEST_CASE_SEG_1 = [{"include_background": True, "metric_name": "tpr"}, 0.7]
 
@@ -73,6 +73,12 @@ def test_compute(self, input_params, expected_avg):
     def test_compute_seg(self, input_params, expected_avg):
         metric = ConfusionMatrix(**input_params)
 
+        def _val_func(engine, batch):
+            pass
+
+        engine = Engine(_val_func)
+        metric.attach(engine, "confusion_matrix")
+
         y_pred = data_1["y_pred"]
         y = data_1["y"]
         metric.update([y_pred, y])
diff --git a/tests/test_handler_confusion_matrix_dist.py b/tests/test_handler_confusion_matrix_dist.py
index ebe0eb9ca7..40245bce2e 100644
--- a/tests/test_handler_confusion_matrix_dist.py
+++ b/tests/test_handler_confusion_matrix_dist.py
@@ -15,6 +15,7 @@
 import numpy as np
 import torch
 import torch.distributed as dist
+from ignite.engine import Engine
 
 from monai.handlers import ConfusionMatrix
 from tests.utils import DistCall, DistTestCase
@@ -29,6 +30,11 @@ def _compute(self):
         device = f"cuda:{dist.get_rank()}" if torch.cuda.is_available() else "cpu"
         metric = ConfusionMatrix(include_background=True, metric_name="tpr")
 
+        def _val_func(engine, batch):
+            pass
+
+        engine = Engine(_val_func)
+        metric.attach(engine, "confusion_matrix")
         if dist.get_rank() == 0:
             y_pred = torch.tensor(
                 [
diff --git a/tests/test_handler_hausdorff_distance.py b/tests/test_handler_hausdorff_distance.py
index edf59320ea..55dd6130b8 100644
--- a/tests/test_handler_hausdorff_distance.py
+++ b/tests/test_handler_hausdorff_distance.py
@@ -14,7 +14,7 @@
 
 import numpy as np
 import torch
-
+from ignite.engine import Engine
 from monai.handlers import HausdorffDistance
 
 
@@ -62,6 +62,13 @@ class TestHandlerHausdorffDistance(unittest.TestCase):
 
     def test_compute(self):
         hd_metric = HausdorffDistance(include_background=True)
+
+        def _val_func(engine, batch):
+            pass
+
+        engine = Engine(_val_func)
+        hd_metric.attach(engine, "hausdorff_distance")
+
         y_pred, y = TEST_SAMPLE_1
         hd_metric.update([y_pred, y])
         self.assertEqual(hd_metric.compute(), 10)
diff --git a/tests/test_handler_mean_dice.py b/tests/test_handler_mean_dice.py
index c573e418c8..d15b549d86 100644
--- a/tests/test_handler_mean_dice.py
+++ b/tests/test_handler_mean_dice.py
@@ -17,15 +17,15 @@
 
 from monai.handlers import MeanDice
 
-TEST_CASE_1 = [{"include_background": True}, 0.75]
-TEST_CASE_2 = [{"include_background": False}, 0.66666]
+TEST_CASE_1 = [{"include_background": True}, 0.75, (4, 2)]
+TEST_CASE_2 = [{"include_background": False}, 0.66666, (4, 1)]
 
 
 class TestHandlerMeanDice(unittest.TestCase):
     # TODO test multi node averaged dice
 
     @parameterized.expand([TEST_CASE_1, TEST_CASE_2])
-    def test_compute(self, input_params, expected_avg):
+    def test_compute(self, input_params, expected_avg, details_shape):
         dice_metric = MeanDice(**input_params)
         # set up engine
 
@@ -33,7 +33,8 @@ def _val_func(engine, batch):
             pass
 
         engine = Engine(_val_func)
-        dice_metric.attach(engine, "mean_dice")
+        dice_metric.attach(engine=engine, name="mean_dice")
+
         y_pred = torch.Tensor([[[0], [1]], [[1], [0]]])
         y = torch.Tensor([[[0], [1]], [[0], [1]]])
         dice_metric.update([y_pred, y])
@@ -44,10 +45,10 @@ def _val_func(engine, batch):
 
         avg_dice = dice_metric.compute()
         self.assertAlmostEqual(avg_dice, expected_avg, places=4)
-        self.assertTupleEqual(tuple(engine.state.metric_details["mean_dice"].shape), (4, 2))
+        self.assertTupleEqual(tuple(engine.state.metric_details["mean_dice"].shape), details_shape)
 
     @parameterized.expand([TEST_CASE_1, TEST_CASE_2])
-    def test_shape_mismatch(self, input_params, _expected):
+    def test_shape_mismatch(self, input_params, _expected_avg, _details_shape):
         dice_metric = MeanDice(**input_params)
         with self.assertRaises((AssertionError, ValueError)):
             y_pred = torch.Tensor([[0, 1], [1, 0]])
diff --git a/tests/test_handler_surface_distance.py b/tests/test_handler_surface_distance.py
index 656b0d64b2..a0f43436c2 100644
--- a/tests/test_handler_surface_distance.py
+++ b/tests/test_handler_surface_distance.py
@@ -14,7 +14,7 @@
 
 import numpy as np
 import torch
-
+from ignite.engine import Engine
 from monai.handlers import SurfaceDistance
 
 
@@ -62,6 +62,13 @@ class TestHandlerSurfaceDistance(unittest.TestCase):
 
     def test_compute(self):
         sur_metric = SurfaceDistance(include_background=True)
+
+        def _val_func(engine, batch):
+            pass
+
+        engine = Engine(_val_func)
+        sur_metric.attach(engine, "surface_distance")
+
         y_pred, y = TEST_SAMPLE_1
         sur_metric.update([y_pred, y])
         self.assertAlmostEqual(sur_metric.compute(), 4.17133, places=4)

From d6f1a7d23556308580b418c63d01a8d0ca7a35d5 Mon Sep 17 00:00:00 2001
From: monai-bot <monai.miccai2019@gmail.com>
Date: Wed, 27 Jan 2021 08:08:34 +0000
Subject: [PATCH 29/39] [MONAI] python code formatting

Signed-off-by: monai-bot <monai.miccai2019@gmail.com>
---
 tests/test_handler_confusion_matrix.py   | 3 ++-
 tests/test_handler_hausdorff_distance.py | 1 +
 tests/test_handler_surface_distance.py   | 1 +
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/test_handler_confusion_matrix.py b/tests/test_handler_confusion_matrix.py
index bb84a852d8..0524676763 100644
--- a/tests/test_handler_confusion_matrix.py
+++ b/tests/test_handler_confusion_matrix.py
@@ -13,8 +13,9 @@
 from typing import Any, Dict
 
 import torch
-from parameterized import parameterized
 from ignite.engine import Engine
+from parameterized import parameterized
+
 from monai.handlers import ConfusionMatrix
 
 TEST_CASE_1 = [{"include_background": True, "save_details": False, "metric_name": "f1"}, 0.75]
diff --git a/tests/test_handler_hausdorff_distance.py b/tests/test_handler_hausdorff_distance.py
index 55dd6130b8..c0d2e723ca 100644
--- a/tests/test_handler_hausdorff_distance.py
+++ b/tests/test_handler_hausdorff_distance.py
@@ -15,6 +15,7 @@
 import numpy as np
 import torch
 from ignite.engine import Engine
+
 from monai.handlers import HausdorffDistance
 
 
diff --git a/tests/test_handler_surface_distance.py b/tests/test_handler_surface_distance.py
index a0f43436c2..fbd86edb03 100644
--- a/tests/test_handler_surface_distance.py
+++ b/tests/test_handler_surface_distance.py
@@ -15,6 +15,7 @@
 import numpy as np
 import torch
 from ignite.engine import Engine
+
 from monai.handlers import SurfaceDistance
 
 

From 6d3b08beaecc40843f272e49883e9b61b786f988 Mon Sep 17 00:00:00 2001
From: Nic Ma <nma@nvidia.com>
Date: Wed, 27 Jan 2021 16:23:08 +0800
Subject: [PATCH 30/39] [DLMED] remove from min_tests

Signed-off-by: Nic Ma <nma@nvidia.com>
---
 tests/min_tests.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/min_tests.py b/tests/min_tests.py
index f9f63f4ce0..665ead6cc6 100644
--- a/tests/min_tests.py
+++ b/tests/min_tests.py
@@ -102,6 +102,7 @@ def run_testsuit():
         "test_torchvisiond",
         "test_handler_metrics_saver",
         "test_handler_metrics_saver_dist",
+        "test_evenly_divisible_all_gather_dist",
     ]
     assert sorted(exclude_cases) == sorted(set(exclude_cases)), f"Duplicated items in {exclude_cases}"
 

From 45e5e0afdca2956822dc57c0bf2f082f2827e64e Mon Sep 17 00:00:00 2001
From: Nic Ma <nma@nvidia.com>
Date: Thu, 28 Jan 2021 00:41:05 +0800
Subject: [PATCH 31/39] [DLMED] change to standlone APIs to write files

Signed-off-by: Nic Ma <nma@nvidia.com>
---
 monai/handlers/metrics_saver.py          | 40 ++++-------
 monai/handlers/utils.py                  | 90 ++++++++++++++++++++++--
 tests/test_handler_metrics_saver.py      |  2 +-
 tests/test_handler_metrics_saver_dist.py |  2 +-
 4 files changed, 103 insertions(+), 31 deletions(-)

diff --git a/monai/handlers/metrics_saver.py b/monai/handlers/metrics_saver.py
index 3a561a7c7f..6afc531d78 100644
--- a/monai/handlers/metrics_saver.py
+++ b/monai/handlers/metrics_saver.py
@@ -10,7 +10,6 @@
 # limitations under the License.
 
 import os
-from collections import OrderedDict
 from typing import TYPE_CHECKING, Callable, List, Optional, Sequence, Union
 
 import numpy as np
@@ -18,6 +17,7 @@
 
 from monai.utils import ensure_tuple, exact_version, optional_import
 from monai.utils.module import get_torch_version_tuple
+from monai.handlers.utils import write_per_image_metric, write_metric_summary
 
 Events, _ = optional_import("ignite.engine", "0.4.2", exact_version, "Events")
 idist, _ = optional_import("ignite", "0.4.2", exact_version, "distributed")
@@ -146,31 +146,21 @@ def __call__(self, engine: Engine) -> None:
                             v = v.reshape((-1, 1))
 
                         # add the average value to v
+                        class_labels = ["class" + str(i) for i in range(v.shape[1])] + ["mean"]
                         v = np.concatenate([v, np.nanmean(v, axis=1, keepdims=True)], axis=1)
-                        with open(os.path.join(self.save_dir, k + "_raw.csv"), "w") as f:
-                            class_labels = self.deli.join(["class" + str(i) for i in range(v.shape[1] - 1)])
-                            row_labels = class_labels + f"{self.deli}average"
-                            f.write(f"filename{self.deli}{row_labels}\n")
-                            for i, image in enumerate(v):
-                                classes = self.deli.join([str(d) for d in image])
-                                f.write(f"{_files[i]}{self.deli}{classes}\n")
+                        write_per_image_metric(
+                            class_labels=class_labels,
+                            images=_files,
+                            metric=v,
+                            filepath=os.path.join(self.save_dir, k + "_raw.csv"),
+                            deli=self.deli,
+                        )
 
                         if self.summary_ops is not None:
-                            supported_ops = OrderedDict(
-                                {
-                                    "mean": np.nanmean,
-                                    "median": np.nanmedian,
-                                    "max": np.nanmax,
-                                    "min": np.nanmin,
-                                    "90percent": lambda x: np.nanpercentile(x, 10),
-                                    "std": np.nanstd,
-                                }
+                            write_metric_summary(
+                                class_labels=class_labels,
+                                metric=v,
+                                filepath=os.path.join(self.save_dir, k + "_summary.csv"),
+                                summary_ops=self.summary_ops,
+                                deli=self.deli,
                             )
-                            ops = supported_ops.keys() if "*" in self.summary_ops else self.summary_ops
-
-                            col_labels = row_labels.split(self.deli)
-                            with open(os.path.join(self.save_dir, k + "_summary.csv"), "w") as f:
-                                f.write(f"class{self.deli}{self.deli.join(ops)}\n")
-                                for i, d in enumerate(v.transpose()):
-                                    ops_labels = self.deli.join([f"{supported_ops[k](d):.4f}" for k in ops])
-                                    f.write(f"{col_labels[i]}{self.deli}{ops_labels}\n")
diff --git a/monai/handlers/utils.py b/monai/handlers/utils.py
index abc40e74d8..5b97a453fe 100644
--- a/monai/handlers/utils.py
+++ b/monai/handlers/utils.py
@@ -9,11 +9,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import TYPE_CHECKING, Any, Callable
-
+from typing import TYPE_CHECKING, Any, Callable, Sequence, Union
+from collections import OrderedDict
+import numpy as np
 import torch
 
-from monai.utils import exact_version, optional_import
+from monai.utils import exact_version, optional_import, ensure_tuple
 
 idist, _ = optional_import("ignite", "0.4.2", exact_version, "distributed")
 if TYPE_CHECKING:
@@ -21,7 +22,13 @@
 else:
     Engine, _ = optional_import("ignite.engine", "0.4.2", exact_version, "Engine")
 
-__all__ = ["stopping_fn_from_metric", "stopping_fn_from_loss", "evenly_divisible_all_gather"]
+__all__ = [
+    "stopping_fn_from_metric",
+    "stopping_fn_from_loss",
+    "evenly_divisible_all_gather",
+    "write_per_image_metric",
+    "write_metric_summary",
+]
 
 
 def stopping_fn_from_metric(metric_name: str) -> Callable[[Engine], Any]:
@@ -71,3 +78,78 @@ def evenly_divisible_all_gather(data: torch.Tensor) -> torch.Tensor:
     data = idist.all_gather(data)
     # delete the padding NaN items
     return torch.cat([data[i * max_len : i * max_len + l, ...] for i, l in enumerate(all_lens)], dim=0)
+
+
+def write_per_image_metric(
+    class_labels: Sequence[str],
+    images: Sequence[str],
+    metric: np.ndarray,
+    filepath: str,
+    deli: str = "\t",
+):
+    """
+    Utility function to write the raw metric data of every image into a file, every line is for 1 image.
+    The input metric data metric must have at least 2 dims(batch, classes).
+
+    Args:
+        class_labels: label string for every class in the metric data.
+        images: name or path of every image corresponding to the metric data.
+        metric: raw metric data for all images, it must have at least 2 dims(batch, classes).
+        filepath: target file path to save the result, for example: "/workspace/data/mean_dice_raw.csv".
+        deli: the delimiter charactor in the file, default to "\t".
+
+    """
+
+    if metric.ndim < 2:
+        raise ValueError("metric must have at least 2 dims(batch, classes).")
+
+    with open(filepath, "w") as f:
+        f.write(f"filename{deli}{deli.join(class_labels)}\n")
+        for i, image in enumerate(metric):
+            f.write(f"{images[i]}{deli}{deli.join([str(c) for c in image])}\n")
+
+
+def write_metric_summary(
+    class_labels: Sequence[str],
+    metric: np.ndarray,
+    filepath: str,
+    summary_ops: Union[str, Sequence[str]],
+    deli: str = "\t",
+):
+    """
+    Utility function to compute summary report of metric data on all the images, every line is for 1 class.
+
+    Args:
+        class_labels: label string for every class in the metric data.
+        metric: raw metric data for all images, it must have at least 2 dims(batch, classes).
+        filepath: target file path to save the result, for example: "/workspace/data/mean_dice_summary.csv".
+        summary_ops: expected computation operations to generate the summary report.
+            it can be: "*" or list of strings.
+            "*" - generate summary report with all the supported operations.
+            list of strings - generate summary report with specified operations, they should be within this list:
+            [`mean`, `median`, `max`, `min`, `90percent`, `std`].
+        deli: the delimiter charactor in the file, default to "\t".
+
+    """
+
+    if metric.ndim < 2:
+        raise ValueError("metric must have at least 2 dims(batch, classes).")
+
+    supported_ops = OrderedDict(
+        {
+            "mean": np.nanmean,
+            "median": np.nanmedian,
+            "max": np.nanmax,
+            "min": np.nanmin,
+            "90percent": lambda x: np.nanpercentile(x, 10),
+            "std": np.nanstd,
+        }
+    )
+    ops = ensure_tuple(summary_ops)
+    if "*" in ops:
+        ops = tuple(supported_ops.keys())
+
+    with open(filepath, "w") as f:
+        f.write(f"class{deli}{deli.join(ops)}\n")
+        for i, c in enumerate(metric.transpose()):
+            f.write(f"{class_labels[i]}{deli}{deli.join([f'{supported_ops[k](c):.4f}' for k in ops])}\n")
diff --git a/tests/test_handler_metrics_saver.py b/tests/test_handler_metrics_saver.py
index 881528e56c..58a6f10d33 100644
--- a/tests/test_handler_metrics_saver.py
+++ b/tests/test_handler_metrics_saver.py
@@ -75,7 +75,7 @@ def _save_metrics(engine):
                     elif i == 2:
                         self.assertEqual(row, ["class1\t2.5000\t2.5000\t3.0000\t2.1000"])
                     elif i == 3:
-                        self.assertEqual(row, ["average\t2.0000\t2.0000\t2.5000\t1.6000"])
+                        self.assertEqual(row, ["mean\t2.0000\t2.0000\t2.5000\t1.6000"])
             self.assertTrue(os.path.exists(os.path.join(tempdir, "metric4_raw.csv")))
             self.assertTrue(os.path.exists(os.path.join(tempdir, "metric4_summary.csv")))
 
diff --git a/tests/test_handler_metrics_saver_dist.py b/tests/test_handler_metrics_saver_dist.py
index 28ca45d9aa..1b17d0adb4 100644
--- a/tests/test_handler_metrics_saver_dist.py
+++ b/tests/test_handler_metrics_saver_dist.py
@@ -97,7 +97,7 @@ def _save_metrics1(engine):
                         elif i == 2:
                             self.assertEqual(row, ["class1\t2.0000\t2.0000\t2.0000\t2.0000\t2.0000\t0.0000"])
                         elif i == 3:
-                            self.assertEqual(row, ["average\t1.5000\t1.5000\t1.5000\t1.5000\t1.5000\t0.0000"])
+                            self.assertEqual(row, ["mean\t1.5000\t1.5000\t1.5000\t1.5000\t1.5000\t0.0000"])
                 self.assertTrue(os.path.exists(os.path.join(tempdir, "metric4_raw.csv")))
                 self.assertTrue(os.path.exists(os.path.join(tempdir, "metric4_summary.csv")))
 

From 1dde629414520097549e97c347d19ea86bc93fa4 Mon Sep 17 00:00:00 2001
From: monai-bot <monai.miccai2019@gmail.com>
Date: Wed, 27 Jan 2021 16:45:21 +0000
Subject: [PATCH 32/39] [MONAI] python code formatting

Signed-off-by: monai-bot <monai.miccai2019@gmail.com>
---
 monai/handlers/metrics_saver.py | 2 +-
 monai/handlers/utils.py         | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/monai/handlers/metrics_saver.py b/monai/handlers/metrics_saver.py
index 6afc531d78..a6637bd227 100644
--- a/monai/handlers/metrics_saver.py
+++ b/monai/handlers/metrics_saver.py
@@ -15,9 +15,9 @@
 import numpy as np
 import torch
 
+from monai.handlers.utils import write_metric_summary, write_per_image_metric
 from monai.utils import ensure_tuple, exact_version, optional_import
 from monai.utils.module import get_torch_version_tuple
-from monai.handlers.utils import write_per_image_metric, write_metric_summary
 
 Events, _ = optional_import("ignite.engine", "0.4.2", exact_version, "Events")
 idist, _ = optional_import("ignite", "0.4.2", exact_version, "distributed")
diff --git a/monai/handlers/utils.py b/monai/handlers/utils.py
index 5b97a453fe..d730e1fbb5 100644
--- a/monai/handlers/utils.py
+++ b/monai/handlers/utils.py
@@ -9,12 +9,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import TYPE_CHECKING, Any, Callable, Sequence, Union
 from collections import OrderedDict
+from typing import TYPE_CHECKING, Any, Callable, Sequence, Union
+
 import numpy as np
 import torch
 
-from monai.utils import exact_version, optional_import, ensure_tuple
+from monai.utils import ensure_tuple, exact_version, optional_import
 
 idist, _ = optional_import("ignite", "0.4.2", exact_version, "distributed")
 if TYPE_CHECKING:

From af77ebb3760fde0b13f781843ad7161c49d1168c Mon Sep 17 00:00:00 2001
From: Nic Ma <nma@nvidia.com>
Date: Thu, 28 Jan 2021 16:07:18 +0800
Subject: [PATCH 33/39] [DLMED] add file type check

Signed-off-by: Nic Ma <nma@nvidia.com>
---
 monai/handlers/utils.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/monai/handlers/utils.py b/monai/handlers/utils.py
index d730e1fbb5..e770877cb8 100644
--- a/monai/handlers/utils.py
+++ b/monai/handlers/utils.py
@@ -104,6 +104,9 @@ def write_per_image_metric(
     if metric.ndim < 2:
         raise ValueError("metric must have at least 2 dims(batch, classes).")
 
+    if not (isinstance(filepath, str) and filepath[-4:] == ".csv"):
+        raise AssertionError("filepath must be a string with CSV format.")
+
     with open(filepath, "w") as f:
         f.write(f"filename{deli}{deli.join(class_labels)}\n")
         for i, image in enumerate(metric):
@@ -136,6 +139,9 @@ def write_metric_summary(
     if metric.ndim < 2:
         raise ValueError("metric must have at least 2 dims(batch, classes).")
 
+    if not (isinstance(filepath, str) and filepath[-4:] == ".csv"):
+        raise AssertionError("filepath must be a string with CSV format.")
+
     supported_ops = OrderedDict(
         {
             "mean": np.nanmean,

From f23bb4d74aa943d60966934faaa63e77dca66752 Mon Sep 17 00:00:00 2001
From: Nic Ma <nma@nvidia.com>
Date: Thu, 28 Jan 2021 17:40:21 +0800
Subject: [PATCH 34/39] [DLMED] add output_type arg

Signed-off-by: Nic Ma <nma@nvidia.com>
---
 monai/handlers/metrics_saver.py |  5 +++++
 monai/handlers/utils.py         | 32 ++++++++++++++++++--------------
 2 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/monai/handlers/metrics_saver.py b/monai/handlers/metrics_saver.py
index a6637bd227..6fbf428a98 100644
--- a/monai/handlers/metrics_saver.py
+++ b/monai/handlers/metrics_saver.py
@@ -58,6 +58,7 @@ class MetricsSaver:
             default to None.
         save_rank: only the handler on specified rank will save to files in multi-gpus validation, default to 0.
         delimiter: the delimiter charactor in CSV file, default to "\t".
+        output_type: expected output file type, supported types: ["csv"], default to "csv".
 
     """
 
@@ -70,6 +71,7 @@ def __init__(
         summary_ops: Optional[Union[str, Sequence[str]]] = None,
         save_rank: int = 0,
         delimiter: str = "\t",
+        output_type: str = "csv",
     ) -> None:
         self.save_dir = save_dir
         self.metrics = ensure_tuple(metrics) if metrics is not None else None
@@ -78,6 +80,7 @@ def __init__(
         self.summary_ops = ensure_tuple(summary_ops) if summary_ops is not None else None
         self.save_rank = save_rank
         self.deli = delimiter
+        self.output_type = output_type
         self._filenames: List[str] = []
 
     def attach(self, engine: Engine) -> None:
@@ -154,6 +157,7 @@ def __call__(self, engine: Engine) -> None:
                             metric=v,
                             filepath=os.path.join(self.save_dir, k + "_raw.csv"),
                             deli=self.deli,
+                            output_type=self.output_type,
                         )
 
                         if self.summary_ops is not None:
@@ -163,4 +167,5 @@ def __call__(self, engine: Engine) -> None:
                                 filepath=os.path.join(self.save_dir, k + "_summary.csv"),
                                 summary_ops=self.summary_ops,
                                 deli=self.deli,
+                                output_type=self.output_type,
                             )
diff --git a/monai/handlers/utils.py b/monai/handlers/utils.py
index e770877cb8..6b0428fad0 100644
--- a/monai/handlers/utils.py
+++ b/monai/handlers/utils.py
@@ -87,6 +87,7 @@ def write_per_image_metric(
     metric: np.ndarray,
     filepath: str,
     deli: str = "\t",
+    output_type: str = "csv",
 ):
     """
     Utility function to write the raw metric data of every image into a file, every line is for 1 image.
@@ -98,19 +99,20 @@ def write_per_image_metric(
         metric: raw metric data for all images, it must have at least 2 dims(batch, classes).
         filepath: target file path to save the result, for example: "/workspace/data/mean_dice_raw.csv".
         deli: the delimiter charactor in the file, default to "\t".
+        output_type: expected output file type, supported types: ["csv"], default to "csv".
 
     """
 
     if metric.ndim < 2:
         raise ValueError("metric must have at least 2 dims(batch, classes).")
 
-    if not (isinstance(filepath, str) and filepath[-4:] == ".csv"):
-        raise AssertionError("filepath must be a string with CSV format.")
-
-    with open(filepath, "w") as f:
-        f.write(f"filename{deli}{deli.join(class_labels)}\n")
-        for i, image in enumerate(metric):
-            f.write(f"{images[i]}{deli}{deli.join([str(c) for c in image])}\n")
+    if output_type.lower() == "csv":
+        with open(filepath, "w") as f:
+            f.write(f"filename{deli}{deli.join(class_labels)}\n")
+            for i, image in enumerate(metric):
+                f.write(f"{images[i]}{deli}{deli.join([str(c) for c in image])}\n")
+    else:
+        raise ValueError(f"unsupported output type: {output_type}.")
 
 
 def write_metric_summary(
@@ -119,6 +121,7 @@ def write_metric_summary(
     filepath: str,
     summary_ops: Union[str, Sequence[str]],
     deli: str = "\t",
+    output_type: str = "csv",
 ):
     """
     Utility function to compute summary report of metric data on all the images, every line is for 1 class.
@@ -133,15 +136,13 @@ def write_metric_summary(
             list of strings - generate summary report with specified operations, they should be within this list:
             [`mean`, `median`, `max`, `min`, `90percent`, `std`].
         deli: the delimiter charactor in the file, default to "\t".
+        output_type: expected output file type, supported types: ["csv"], default to "csv".
 
     """
 
     if metric.ndim < 2:
         raise ValueError("metric must have at least 2 dims(batch, classes).")
 
-    if not (isinstance(filepath, str) and filepath[-4:] == ".csv"):
-        raise AssertionError("filepath must be a string with CSV format.")
-
     supported_ops = OrderedDict(
         {
             "mean": np.nanmean,
@@ -156,7 +157,10 @@ def write_metric_summary(
     if "*" in ops:
         ops = tuple(supported_ops.keys())
 
-    with open(filepath, "w") as f:
-        f.write(f"class{deli}{deli.join(ops)}\n")
-        for i, c in enumerate(metric.transpose()):
-            f.write(f"{class_labels[i]}{deli}{deli.join([f'{supported_ops[k](c):.4f}' for k in ops])}\n")
+    if output_type.lower() == "csv":
+        with open(filepath, "w") as f:
+            f.write(f"class{deli}{deli.join(ops)}\n")
+            for i, c in enumerate(metric.transpose()):
+                f.write(f"{class_labels[i]}{deli}{deli.join([f'{supported_ops[k](c):.4f}' for k in ops])}\n")
+    else:
+        raise ValueError(f"unsupported output type: {output_type}.")

From 13954c9bf0a34c8a4a4cebc0ee2770ae02238b23 Mon Sep 17 00:00:00 2001
From: Nic Ma <nma@nvidia.com>
Date: Fri, 29 Jan 2021 10:26:03 +0800
Subject: [PATCH 35/39] [DLMED] develop standlone API

Signed-off-by: Nic Ma <nma@nvidia.com>
---
 monai/handlers/__init__.py          |   2 +-
 monai/handlers/metrics_saver.py     |  98 ++++++------------
 monai/handlers/utils.py             | 149 ++++++++++++++--------------
 tests/test_write_metrics_reports.py |  65 ++++++++++++
 4 files changed, 173 insertions(+), 141 deletions(-)
 create mode 100644 tests/test_write_metrics_reports.py

diff --git a/monai/handlers/__init__.py b/monai/handlers/__init__.py
index 36ed534d7a..6b190518fb 100644
--- a/monai/handlers/__init__.py
+++ b/monai/handlers/__init__.py
@@ -25,5 +25,5 @@
 from .stats_handler import StatsHandler
 from .surface_distance import SurfaceDistance
 from .tensorboard_handlers import TensorBoardImageHandler, TensorBoardStatsHandler
-from .utils import evenly_divisible_all_gather, stopping_fn_from_loss, stopping_fn_from_metric
+from .utils import evenly_divisible_all_gather, stopping_fn_from_loss, stopping_fn_from_metric, write_metrics_reports
 from .validation_handler import ValidationHandler
diff --git a/monai/handlers/metrics_saver.py b/monai/handlers/metrics_saver.py
index 6fbf428a98..fcef049fe5 100644
--- a/monai/handlers/metrics_saver.py
+++ b/monai/handlers/metrics_saver.py
@@ -9,13 +9,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import os
 from typing import TYPE_CHECKING, Callable, List, Optional, Sequence, Union
 
-import numpy as np
-import torch
-
-from monai.handlers.utils import write_metric_summary, write_per_image_metric
+from monai.handlers.utils import write_metrics_reports
 from monai.utils import ensure_tuple, exact_version, optional_import
 from monai.utils.module import get_torch_version_tuple
 
@@ -105,67 +101,39 @@ def __call__(self, engine: Engine) -> None:
         Args:
             engine: Ignite Engine, it can be a trainer, validator or evaluator.
         """
-        if not os.path.exists(self.save_dir):
-            os.makedirs(self.save_dir)
-
         ws = idist.get_world_size()
         if self.save_rank >= ws:
-            raise ValueError("target rank is greater than the distributed group size.s")
-
-        if self.metrics is not None and len(engine.state.metrics) > 0:
-            if idist.get_rank() == self.save_rank:
-                # only save metrics to file in specified rank
-                with open(os.path.join(self.save_dir, "metrics.csv"), "w") as f:
-                    for k, v in engine.state.metrics.items():
-                        if k in self.metrics or "*" in self.metrics:
-                            f.write(f"{k}{self.deli}{str(v)}\n")
-
-        if (
-            self.metric_details is not None
-            and hasattr(engine.state, "metric_details")
-            and len(engine.state.metric_details) > 0
-        ):
-            _filenames = self.deli.join(self._filenames)
-
-            if ws > 1:
-                if get_torch_version_tuple() > (1, 6, 0):
-                    # all gather across all processes
-                    _filenames = self.deli.join(idist.all_gather(_filenames))
-                else:
-                    raise RuntimeError(
-                        "MetricsSaver can not save metric details in distributed mode with PyTorch < 1.7.0."
-                    )
-            if idist.get_rank() == self.save_rank:
-                _files = _filenames.split(self.deli)
+            raise ValueError("target rank is greater than the distributed group size.")
+
+        _filenames = self._filenames
+        if ws > 1:
+            _filenames = self.deli.join(_filenames)
+            if get_torch_version_tuple() > (1, 6, 0):
+                # all gather across all processes
+                _filenames = self.deli.join(idist.all_gather(_filenames))
+            else:
+                raise RuntimeError(
+                    "MetricsSaver can not save metric details in distributed mode with PyTorch < 1.7.0."
+                )
+            _filenames = _filenames.split(self.deli)
+
+        # only save metrics to file in specified rank
+        if idist.get_rank() == self.save_rank:
+            _metrics = {}
+            if self.metrics is not None and len(engine.state.metrics) > 0:
+                _metrics = {k: v for k, v in engine.state.metrics.items() if k in self.metrics or "*" in self.metrics}
+            _metric_details = {}
+            if self.metric_details is not None and len(engine.state.metric_details) > 0:
                 for k, v in engine.state.metric_details.items():
                     if k in self.metric_details or "*" in self.metric_details:
-                        if torch.is_tensor(v):
-                            v = v.cpu().numpy()
-                        if v.ndim == 0:
-                            # reshape to [1, 1] if no batch and class dims
-                            v = v.reshape((1, 1))
-                        elif v.ndim == 1:
-                            # reshape to [N, 1] if no class dim
-                            v = v.reshape((-1, 1))
-
-                        # add the average value to v
-                        class_labels = ["class" + str(i) for i in range(v.shape[1])] + ["mean"]
-                        v = np.concatenate([v, np.nanmean(v, axis=1, keepdims=True)], axis=1)
-                        write_per_image_metric(
-                            class_labels=class_labels,
-                            images=_files,
-                            metric=v,
-                            filepath=os.path.join(self.save_dir, k + "_raw.csv"),
-                            deli=self.deli,
-                            output_type=self.output_type,
-                        )
-
-                        if self.summary_ops is not None:
-                            write_metric_summary(
-                                class_labels=class_labels,
-                                metric=v,
-                                filepath=os.path.join(self.save_dir, k + "_summary.csv"),
-                                summary_ops=self.summary_ops,
-                                deli=self.deli,
-                                output_type=self.output_type,
-                            )
+                        _metric_details[k] = v
+
+            write_metrics_reports(
+                save_dir=self.save_dir,
+                images=_filenames,
+                metrics=_metrics,
+                metric_details=_metric_details,
+                summary_ops=self.summary_ops,
+                deli=self.deli,
+                output_type=self.output_type,
+            )
diff --git a/monai/handlers/utils.py b/monai/handlers/utils.py
index 6b0428fad0..678d93d0a5 100644
--- a/monai/handlers/utils.py
+++ b/monai/handlers/utils.py
@@ -10,8 +10,8 @@
 # limitations under the License.
 
 from collections import OrderedDict
-from typing import TYPE_CHECKING, Any, Callable, Sequence, Union
-
+from typing import TYPE_CHECKING, Any, Callable, Sequence, Union, Optional, Dict
+import os
 import numpy as np
 import torch
 
@@ -27,8 +27,7 @@
     "stopping_fn_from_metric",
     "stopping_fn_from_loss",
     "evenly_divisible_all_gather",
-    "write_per_image_metric",
-    "write_metric_summary",
+    "write_metrics_reports",
 ]
 
 
@@ -81,86 +80,86 @@ def evenly_divisible_all_gather(data: torch.Tensor) -> torch.Tensor:
     return torch.cat([data[i * max_len : i * max_len + l, ...] for i, l in enumerate(all_lens)], dim=0)
 
 
-def write_per_image_metric(
-    class_labels: Sequence[str],
-    images: Sequence[str],
-    metric: np.ndarray,
-    filepath: str,
-    deli: str = "\t",
-    output_type: str = "csv",
-):
-    """
-    Utility function to write the raw metric data of every image into a file, every line is for 1 image.
-    The input metric data metric must have at least 2 dims(batch, classes).
-
-    Args:
-        class_labels: label string for every class in the metric data.
-        images: name or path of every image corresponding to the metric data.
-        metric: raw metric data for all images, it must have at least 2 dims(batch, classes).
-        filepath: target file path to save the result, for example: "/workspace/data/mean_dice_raw.csv".
-        deli: the delimiter charactor in the file, default to "\t".
-        output_type: expected output file type, supported types: ["csv"], default to "csv".
-
-    """
-
-    if metric.ndim < 2:
-        raise ValueError("metric must have at least 2 dims(batch, classes).")
-
-    if output_type.lower() == "csv":
-        with open(filepath, "w") as f:
-            f.write(f"filename{deli}{deli.join(class_labels)}\n")
-            for i, image in enumerate(metric):
-                f.write(f"{images[i]}{deli}{deli.join([str(c) for c in image])}\n")
-    else:
-        raise ValueError(f"unsupported output type: {output_type}.")
-
-
-def write_metric_summary(
-    class_labels: Sequence[str],
-    metric: np.ndarray,
-    filepath: str,
-    summary_ops: Union[str, Sequence[str]],
+def write_metrics_reports(
+    save_dir: str,
+    images: Optional[Sequence[str]],
+    metrics: Optional[Dict[str, Union[torch.Tensor, np.ndarray]]],
+    metric_details: Optional[Dict[str, Union[torch.Tensor, np.ndarray]]],
+    summary_ops: Optional[Union[str, Sequence[str]]],
     deli: str = "\t",
     output_type: str = "csv",
 ):
     """
-    Utility function to compute summary report of metric data on all the images, every line is for 1 class.
+    Utility function to write the metrics into files, contains 3 parts:
+    1. if `metrics` dict is not None, write overall metrics into file, every line is a metric name and value pair.
+    2. if `metric_details` dict is not None,  write raw metric data of every image into file, every line for 1 image.
+    3. if `summary_ops` is not None, compute summary based on operations on `metric_details` and write to file.
 
     Args:
-        class_labels: label string for every class in the metric data.
-        metric: raw metric data for all images, it must have at least 2 dims(batch, classes).
-        filepath: target file path to save the result, for example: "/workspace/data/mean_dice_summary.csv".
+        save_dir: directory to save all the metrics reports.
+        images: name or path of every input image corresponding to the metric_details data.
+            if None, will use index number as the filename of every input image.
+        metrics: a dictionary of (metric name, metric value) pairs.
+        metric_details: a dictionary of (metric name, metric raw values) pairs,
+            for example, the raw value can be the mean_dice of every channel of every input image.
         summary_ops: expected computation operations to generate the summary report.
-            it can be: "*" or list of strings.
-            "*" - generate summary report with all the supported operations.
-            list of strings - generate summary report with specified operations, they should be within this list:
-            [`mean`, `median`, `max`, `min`, `90percent`, `std`].
+            it can be: None, "*" or list of strings.
+            None - don't generate summary report for every expected metric_details
+            "*" - generate summary report for every metric_details with all the supported operations.
+            list of strings - generate summary report for every metric_details with specified operations, they
+            should be within this list: [`mean`, `median`, `max`, `min`, `90percent`, `std`].
+            default to None.
         deli: the delimiter charactor in the file, default to "\t".
         output_type: expected output file type, supported types: ["csv"], default to "csv".
 
     """
-
-    if metric.ndim < 2:
-        raise ValueError("metric must have at least 2 dims(batch, classes).")
-
-    supported_ops = OrderedDict(
-        {
-            "mean": np.nanmean,
-            "median": np.nanmedian,
-            "max": np.nanmax,
-            "min": np.nanmin,
-            "90percent": lambda x: np.nanpercentile(x, 10),
-            "std": np.nanstd,
-        }
-    )
-    ops = ensure_tuple(summary_ops)
-    if "*" in ops:
-        ops = tuple(supported_ops.keys())
-
-    if output_type.lower() == "csv":
-        with open(filepath, "w") as f:
-            f.write(f"class{deli}{deli.join(ops)}\n")
-            for i, c in enumerate(metric.transpose()):
-                f.write(f"{class_labels[i]}{deli}{deli.join([f'{supported_ops[k](c):.4f}' for k in ops])}\n")
-    else:
+    if output_type.lower() != "csv":
         raise ValueError(f"unsupported output type: {output_type}.")
+
+    if not os.path.exists(save_dir):
+        os.makedirs(save_dir)
+
+    if metrics is not None and len(metrics) > 0:
+        with open(os.path.join(save_dir, "metrics.csv"), "w") as f:
+            for k, v in metrics.items():
+                f.write(f"{k}{deli}{str(v)}\n")
+    
+    if metric_details is not None and len(metric_details) > 0:
+        for k, v in metric_details.items():
+            if torch.is_tensor(v):
+                v = v.cpu().numpy()
+            if v.ndim == 0:
+                # reshape to [1, 1] if no batch and class dims
+                v = v.reshape((1, 1))
+            elif v.ndim == 1:
+                # reshape to [N, 1] if no class dim
+                v = v.reshape((-1, 1))
+
+            # add the average value of all classes to v
+            class_labels = ["class" + str(i) for i in range(v.shape[1])] + ["mean"]
+            v = np.concatenate([v, np.nanmean(v, axis=1, keepdims=True)], axis=1)
+
+            with open(os.path.join(save_dir, f"{k}_raw.csv"), "w") as f:
+                f.write(f"filename{deli}{deli.join(class_labels)}\n")
+                for i, b in enumerate(v):
+                    f.write(f"{images[i] if images is not None else str(i)}{deli}{deli.join([str(c) for c in b])}\n")
+            
+            if summary_ops is not None:
+                supported_ops = OrderedDict(
+                    {
+                        "mean": np.nanmean,
+                        "median": np.nanmedian,
+                        "max": np.nanmax,
+                        "min": np.nanmin,
+                        "90percent": lambda x: np.nanpercentile(x, 10),
+                        "std": np.nanstd,
+                    }
+                )
+                ops = ensure_tuple(summary_ops)
+                if "*" in ops:
+                    ops = tuple(supported_ops.keys())
+
+                with open(os.path.join(save_dir, f"{k}_summary.csv"), "w") as f:
+                    f.write(f"class{deli}{deli.join(ops)}\n")
+                    for i, c in enumerate(v.transpose()):
+                        f.write(f"{class_labels[i]}{deli}{deli.join([f'{supported_ops[k](c):.4f}' for k in ops])}\n")
diff --git a/tests/test_write_metrics_reports.py b/tests/test_write_metrics_reports.py
new file mode 100644
index 0000000000..94acf2eb29
--- /dev/null
+++ b/tests/test_write_metrics_reports.py
@@ -0,0 +1,65 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import csv
+import os
+import tempfile
+import unittest
+
+import torch
+from ignite.engine import Engine, Events
+
+from monai.handlers import write_metrics_reports
+
+
+class TestWriteMetricsReports(unittest.TestCase):
+    def test_content(self):
+        with tempfile.TemporaryDirectory() as tempdir:
+            write_metrics_reports(
+                save_dir=tempdir,
+                images=["filepath1", "filepath2"],
+                metrics={"metric1": 1, "metric2": 2},
+                metric_details={"metric3": torch.tensor([[1, 2], [2, 3]]), "metric4": torch.tensor([[5, 6], [7, 8]])},
+                summary_ops=["mean", "median", "max", "90percent"],
+                deli="\t",
+                output_type="csv",
+            )
+
+            # check the metrics.csv and content
+            self.assertTrue(os.path.exists(os.path.join(tempdir, "metrics.csv")))
+            with open(os.path.join(tempdir, "metrics.csv")) as f:
+                f_csv = csv.reader(f)
+                for i, row in enumerate(f_csv):
+                    self.assertEqual(row, [f"metric{i + 1}\t{i + 1}"])
+            self.assertTrue(os.path.exists(os.path.join(tempdir, "metric3_raw.csv")))
+            # check the metric_raw.csv and content
+            with open(os.path.join(tempdir, "metric3_raw.csv")) as f:
+                f_csv = csv.reader(f)
+                for i, row in enumerate(f_csv):
+                    if i > 0:
+                        self.assertEqual(row, [f"filepath{i}\t{float(i)}\t{float(i + 1)}\t{i + 0.5}"])
+            self.assertTrue(os.path.exists(os.path.join(tempdir, "metric3_summary.csv")))
+            # check the metric_summary.csv and content
+            with open(os.path.join(tempdir, "metric3_summary.csv")) as f:
+                f_csv = csv.reader(f)
+                for i, row in enumerate(f_csv):
+                    if i == 1:
+                        self.assertEqual(row, ["class0\t1.5000\t1.5000\t2.0000\t1.1000"])
+                    elif i == 2:
+                        self.assertEqual(row, ["class1\t2.5000\t2.5000\t3.0000\t2.1000"])
+                    elif i == 3:
+                        self.assertEqual(row, ["mean\t2.0000\t2.0000\t2.5000\t1.6000"])
+            self.assertTrue(os.path.exists(os.path.join(tempdir, "metric4_raw.csv")))
+            self.assertTrue(os.path.exists(os.path.join(tempdir, "metric4_summary.csv")))
+
+
+if __name__ == "__main__":
+    unittest.main()

From abcb98a085fc1a5e3d7f748a032d667e1538ee53 Mon Sep 17 00:00:00 2001
From: monai-bot <monai.miccai2019@gmail.com>
Date: Fri, 29 Jan 2021 02:31:07 +0000
Subject: [PATCH 36/39] [MONAI] python code formatting

Signed-off-by: monai-bot <monai.miccai2019@gmail.com>
---
 monai/handlers/metrics_saver.py | 4 +---
 monai/handlers/utils.py         | 9 +++++----
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/monai/handlers/metrics_saver.py b/monai/handlers/metrics_saver.py
index fcef049fe5..6821a5933d 100644
--- a/monai/handlers/metrics_saver.py
+++ b/monai/handlers/metrics_saver.py
@@ -112,9 +112,7 @@ def __call__(self, engine: Engine) -> None:
                 # all gather across all processes
                 _filenames = self.deli.join(idist.all_gather(_filenames))
             else:
-                raise RuntimeError(
-                    "MetricsSaver can not save metric details in distributed mode with PyTorch < 1.7.0."
-                )
+                raise RuntimeError("MetricsSaver can not save metric details in distributed mode with PyTorch < 1.7.0.")
             _filenames = _filenames.split(self.deli)
 
         # only save metrics to file in specified rank
diff --git a/monai/handlers/utils.py b/monai/handlers/utils.py
index 678d93d0a5..ef652efe0a 100644
--- a/monai/handlers/utils.py
+++ b/monai/handlers/utils.py
@@ -9,9 +9,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from collections import OrderedDict
-from typing import TYPE_CHECKING, Any, Callable, Sequence, Union, Optional, Dict
 import os
+from collections import OrderedDict
+from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, Sequence, Union
+
 import numpy as np
 import torch
 
@@ -123,7 +124,7 @@ def write_metrics_reports(
         with open(os.path.join(save_dir, "metrics.csv"), "w") as f:
             for k, v in metrics.items():
                 f.write(f"{k}{deli}{str(v)}\n")
-    
+
     if metric_details is not None and len(metric_details) > 0:
         for k, v in metric_details.items():
             if torch.is_tensor(v):
@@ -143,7 +144,7 @@ def write_metrics_reports(
                 f.write(f"filename{deli}{deli.join(class_labels)}\n")
                 for i, b in enumerate(v):
                     f.write(f"{images[i] if images is not None else str(i)}{deli}{deli.join([str(c) for c in b])}\n")
-            
+
             if summary_ops is not None:
                 supported_ops = OrderedDict(
                     {

From fc274ba28166a492a3a0a5af2aaac608f660245b Mon Sep 17 00:00:00 2001
From: Nic Ma <nma@nvidia.com>
Date: Fri, 29 Jan 2021 10:47:38 +0800
Subject: [PATCH 37/39] [DLMED] fix flake8 issue

Signed-off-by: Nic Ma <nma@nvidia.com>
---
 tests/test_write_metrics_reports.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/test_write_metrics_reports.py b/tests/test_write_metrics_reports.py
index 94acf2eb29..d1a3f7718a 100644
--- a/tests/test_write_metrics_reports.py
+++ b/tests/test_write_metrics_reports.py
@@ -15,7 +15,6 @@
 import unittest
 
 import torch
-from ignite.engine import Engine, Events
 
 from monai.handlers import write_metrics_reports
 

From bb03f1973665fcfe7d7cc6bfdf234ab2cefb5de7 Mon Sep 17 00:00:00 2001
From: Nic Ma <nma@nvidia.com>
Date: Fri, 29 Jan 2021 19:00:43 +0800
Subject: [PATCH 38/39] [DLMED] fix flake8 error

Signed-off-by: Nic Ma <nma@nvidia.com>
---
 monai/handlers/metrics_saver.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/monai/handlers/metrics_saver.py b/monai/handlers/metrics_saver.py
index 6821a5933d..f9deea35df 100644
--- a/monai/handlers/metrics_saver.py
+++ b/monai/handlers/metrics_saver.py
@@ -105,15 +105,15 @@ def __call__(self, engine: Engine) -> None:
         if self.save_rank >= ws:
             raise ValueError("target rank is greater than the distributed group size.")
 
-        _filenames = self._filenames
+        _images = self._filenames
         if ws > 1:
-            _filenames = self.deli.join(_filenames)
+            _filenames = self.deli.join(_images)
             if get_torch_version_tuple() > (1, 6, 0):
                 # all gather across all processes
                 _filenames = self.deli.join(idist.all_gather(_filenames))
             else:
                 raise RuntimeError("MetricsSaver can not save metric details in distributed mode with PyTorch < 1.7.0.")
-            _filenames = _filenames.split(self.deli)
+            _images = _filenames.split(self.deli)
 
         # only save metrics to file in specified rank
         if idist.get_rank() == self.save_rank:
@@ -128,7 +128,7 @@ def __call__(self, engine: Engine) -> None:
 
             write_metrics_reports(
                 save_dir=self.save_dir,
-                images=_filenames,
+                images=_images,
                 metrics=_metrics,
                 metric_details=_metric_details,
                 summary_ops=self.summary_ops,

From 040470d87eb045b6edc1cece5be9d9e0f67f10c7 Mon Sep 17 00:00:00 2001
From: Nic Ma <nma@nvidia.com>
Date: Fri, 29 Jan 2021 19:44:28 +0800
Subject: [PATCH 39/39] [DLMED] fix min test

Signed-off-by: Nic Ma <nma@nvidia.com>
---
 tests/test_write_metrics_reports.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_write_metrics_reports.py b/tests/test_write_metrics_reports.py
index d1a3f7718a..72625ddd9a 100644
--- a/tests/test_write_metrics_reports.py
+++ b/tests/test_write_metrics_reports.py
@@ -16,7 +16,7 @@
 
 import torch
 
-from monai.handlers import write_metrics_reports
+from monai.handlers.utils import write_metrics_reports
 
 
 class TestWriteMetricsReports(unittest.TestCase):