Yejiaojiao/dev bcewithlogitsloss (#5173)

* first commit of op floor * first commit of op acos * split acos from math_ops and add doctest * resolve conflicting files * fix format bug * fix bug in forward * add test_bcewithlogitsloss * add test_bcewithlogitsloss fix bugs * modify doctest * modify doctest * add reduce and size_average arguments * rebuild test case and modify bugs in docstring * solve conficts * auto format by CI * rectify args explanation in docstring * remove redundant check of pos_weight length * resolve conflicts * auto format by CI Co-authored-by: oneflow-ci-bot <69100618+oneflow-ci-bot@users.noreply.github.com> Co-authored-by: oneflow-ci-bot <ci-bot@oneflow.org>
Oneflow-Inc · Jun 17, 2021 · 53d9b7e · 53d9b7e
1 parent ce3f008
commit 53d9b7e
Show file tree

Hide file tree

Showing 4 changed files with 277 additions and 3 deletions.
diff --git a/docs/source/experimental.rst b/docs/source/experimental.rst
@@ -100,6 +100,7 @@ Experimental features
 .. autofunction:: oneflow.experimental.nn.KLDivLoss
 .. autofunction:: oneflow.experimental.nn.MSELoss
 .. autofunction:: oneflow.experimental.nn.MarginRankingLoss
+.. autofunction:: oneflow.experimental.nn.BCEWithLogitsLoss
 .. autofunction:: oneflow.experimental.masked_fill
 .. autofunction:: oneflow.experimental.Tensor.masked_fill
 .. autofunction:: oneflow.experimental.sum

diff --git a/oneflow/python/nn/modules/loss.py b/oneflow/python/nn/modules/loss.py
@@ -546,8 +546,8 @@ class MarginRankingLoss(Module):
 
     For example:
 
-    .. code-block:: python 
-        
+    .. code-block:: python
+
         >>> import oneflow.experimental as flow
         >>> flow.enable_eager_execution()
         >>> import numpy as np
@@ -566,7 +566,7 @@ class MarginRankingLoss(Module):
         >>> out = m(x1, x2, target)
         >>> out
         tensor([8.2], dtype=oneflow.float32)
-        
+
         >>> m = flow.nn.MarginRankingLoss(margin = 10, reduction="mean")
         >>> out = m(x1, x2, target)
         >>> out
@@ -774,6 +774,147 @@ def forward(
             return loss
 
 
+@oneflow_export("nn.BCEWithLogitsLoss")
+@experimental_api
+class BCEWithLogitsLoss(Module):
+    r"""This operator combines the `Sigmoid` and `BCELoss` together. For numerical stability,
+    we apply some math tricks instead of using `Sigmoid` layer with `BCELoss`.
+
+    The equation is:
+
+    if :attr:`reduction` = ``"none"``:
+
+    .. math::
+
+        out = -weight*[Pos\_weight*y*log\sigma({x}) + (1-y)*log(1-\sigma(x))]
+
+    if :attr:`reduction` = ``"mean"``:
+
+    .. math::
+
+        out = -\frac{weight}{n}\sum_{i=1}^n[Pos\_weight*y*log\sigma({x}) + (1-y)*log(1-\sigma(x))]
+
+    if :attr:`reduction` = ``"sum"``:
+
+    .. math::
+
+        out =k -weight*\sum_{i=1}^n[Pos\_weight*y*log\sigma({x}) + (1-y)*log(1-\sigma(x))]
+
+    Args:
+        weight (Tensor, optional): The manual rescaling weight to the loss. Default: ``None``
+        size_average (bool, optional) – Deprecated (see :attr:`reduction`). Default: ``True``
+        reduce (bool, optional) – Deprecated (see :attr:`reduction`). Default: ``True``
+        reduction (str, optional): The reduce type, it can be one of ``"none"``, ``"mean"``, ``"sum"``.
+            ``'none'``: no reduction will be applied, ``'mean'``: the sum of the output will be divided
+            by the number of elements in the output, ``'sum'``: the output will be summed. Default: ``"mean"``
+        pos_weight (Tensor, optional): The manual rescaling weight to the positive examples.
+            Default: ``None``
+
+    Shape:
+        - Input: :math:`(N,*)` where `*` means, any number of additional dimensions
+        - Target: :math:`(N,*)`, same shape as the input
+        - Output: scalar. If :attr:`reduction` is ``"none"``, then :math:`(N,*)`, same shape as input.
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.experimental as flow
+        >>> flow.enable_eager_execution()
+        >>> import oneflow.typing as tp
+
+        >>> input = flow.Tensor([[1.2, 0.2, -0.3], [0.7, 0.6, -2], [0.7, 0.6, -2]], dtype=flow.float32)
+        >>> target = flow.Tensor([[0, 1, 0], [1, 0, 1], [1, 0, 1]], dtype=flow.float32)
+        >>> weight = flow.Tensor([[2, 2, 2], [2, 2, 2], [2, 2, 2]], dtype=flow.float32)
+        >>> pos_weight = flow.Tensor([1.2, 1.3, 1.4], dtype=flow.float32)
+
+        >>> m = flow.nn.BCEWithLogitsLoss(weight=weight, pos_weight=pos_weight, reduction="none")
+        >>> out = m(input, target)
+        >>> out
+        tensor([[2.9266, 1.5552, 1.1087],
+                [0.9676, 2.075 , 5.9554],
+                [0.9676, 2.075 , 5.9554]], dtype=oneflow.float32)
+
+        >>> m = flow.nn.BCEWithLogitsLoss(weight=weight, pos_weight=pos_weight, reduction="mean")
+        >>> out = m(input, target)
+        >>> out
+        tensor([2.6207], dtype=oneflow.float32)
+
+        >>> m = flow.nn.BCEWithLogitsLoss(weight=weight, pos_weight=pos_weight, reduction="sum")
+        >>> out = m(input, target)
+        >>> out
+        tensor([23.5865], dtype=oneflow.float32)
+
+
+    """
+
+    def __init__(
+        self,
+        weight=None,
+        size_average: bool = True,
+        reduce: bool = True,
+        reduction: Optional[str] = "mean",
+        pos_weight=None,
+    ) -> None:
+        super().__init__()
+        assert reduction in [
+            "sum",
+            "none",
+            "mean",
+            None,
+        ], "only 'sum', 'mean' and None supported by now"
+
+        self.weight = weight
+        self.size_average = size_average
+        self.reduce = reduce
+        self.reduction = reduction
+        self.pos_weight = pos_weight
+
+    def forward(self, input, target):
+        if not (target.shape == input.shape):
+            raise ValueError(
+                "Target size ({}) must be the same as input size ({})".format(
+                    target.size(), input.size()
+                )
+            )
+
+        _neg_input = flow.experimental.negative(input)
+        _max_val = flow.experimental.clip(_neg_input, 0)
+        _neg_max_val = flow.experimental.negative(_max_val)
+
+        if self.pos_weight:
+            _log_weight = ((self.pos_weight - 1) * target) + 1
+            _loss = (1 - target) * input + _log_weight * (
+                flow.experimental.log(
+                    flow.experimental.exp(_neg_max_val)
+                    + flow.experimental.exp(_neg_input - _max_val)
+                )
+                + _max_val
+            )
+        else:
+            _loss = (1 - target) * input + _max_val
+            _loss += flow.experimental.log(
+                flow.experimental.exp(_neg_max_val)
+                + flow.experimental.exp(_neg_input - _max_val)
+            )
+
+        if self.weight is not None:
+            assert (
+                self.weight.shape == input.shape
+            ), "The weight shape must be the same as Input shape"
+            _weighted_loss = self.weight * _loss
+        else:
+            _weighted_loss = _loss
+
+        if self.reduction == "mean":
+            return flow.experimental.mean(_weighted_loss)
+        elif self.reduction == "sum":
+            return flow.experimental.sum(_weighted_loss)
+        else:
+            # Do no reduction
+            return _weighted_loss
+
+
 if __name__ == "__main__":
     import doctest
 

diff --git a/oneflow/python/ops/nn_ops.py b/oneflow/python/ops/nn_ops.py
@@ -3787,6 +3787,7 @@ def bce_loss_job(input: tp.Numpy.Placeholder(shape=(2, 3)),
 
 
 @oneflow_export("nn.BCEWithLogitsLoss")
+@stable_api
 def bce_with_logits_loss(
     input: oneflow._oneflow_internal.BlobDesc,
     target: oneflow._oneflow_internal.BlobDesc,

diff --git a/oneflow/python/test/modules/test_bcewithlogitsloss.py b/oneflow/python/test/modules/test_bcewithlogitsloss.py
@@ -0,0 +1,131 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import unittest
+from collections import OrderedDict
+
+import numpy as np
+
+import oneflow.experimental as flow
+from test_util import GenArgList
+
+
+def _np_bcewithlogitsloss(
+    np_input, np_target, np_weight=None, np_pos_weight=None, reduction="none"
+):
+    _neg_input = np.negative(np_input)
+    _max_val = np.clip(_neg_input, 0, None)
+    _neg_max_val = np.negative(_max_val)
+
+    if np_pos_weight is not None:
+        _log_weight = ((np_pos_weight - 1) * np_target) + 1
+        _loss = (1 - np_target) * np_input + _log_weight * (
+            np.log(np.exp(_neg_max_val) + np.exp(_neg_input - _max_val)) + _max_val
+        )
+    else:
+        _loss = (1 - np_target) * np_input + _max_val
+        _loss += np.log(np.exp(_neg_max_val) + np.exp(_neg_input - _max_val))
+
+    if np_weight is not None:
+        assert (
+            np_weight.shape == np_input.shape
+        ), "The weight shape must be the same as Input shape"
+        _weighted_loss = np_weight * _loss
+    else:
+        _weighted_loss = _loss
+
+    if reduction == "mean":
+        return _weighted_loss.mean()
+    elif reduction == "sum":
+        return _weighted_loss.sum()
+    else:
+        return _weighted_loss
+
+
+def _np_bcewithlogitsloss_grad(np_input, np_target, np_weight, np_pos_weight):
+    # Use numpy to compute grad
+    elemcnt = np_target.size
+
+    np_bce_with_logits_grad_mean = -(np_weight / elemcnt) * (
+        (np_target - 1)
+        + ((1 - np_pos_weight) * np_target - 1)
+        * (-np.exp(-np_input) / (1 + np.exp(-np_input)))
+    )
+    np_bce_with_logits_grad_sum = np_bce_with_logits_grad_mean * elemcnt
+
+    return {
+        "mean": np_bce_with_logits_grad_mean,
+        "sum": np_bce_with_logits_grad_sum,
+        "none": np_bce_with_logits_grad_sum,
+    }
+
+
+def _test_bcewithlogitsloss_impl(test_case, device, shape, reduction):
+    x = np.random.randn(*shape).astype(np.float32)
+    y = np.random.randint(0, 2, [*shape]).astype(np.float32)
+    w = np.random.randn(*shape).astype(np.float32)
+    pw = np.random.randn([*shape][-1]).astype(np.float32)
+
+    input = flow.Tensor(
+        x, dtype=flow.float32, requires_grad=True, device=flow.device(device)
+    )
+    target = flow.Tensor(y, dtype=flow.float32, device=flow.device(device))
+    weight = flow.Tensor(w, dtype=flow.float32, device=flow.device(device))
+    pos_weight = flow.Tensor(pw, dtype=flow.float32, device=flow.device(device))
+
+    bcewithlogits_loss = flow.nn.BCEWithLogitsLoss(
+        weight=weight, pos_weight=pos_weight, reduction=reduction
+    )
+    of_out = bcewithlogits_loss(input, target)
+    np_out = _np_bcewithlogitsloss(
+        x, y, np_weight=w, np_pos_weight=pw, reduction=reduction
+    )
+    test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-5, 1e-5))
+
+    # Backward test with np:
+    of_out = of_out.sum()
+    of_out.backward()
+    np_grad = _np_bcewithlogitsloss_grad(x, y, np_weight=w, np_pos_weight=pw,)[
+        reduction
+    ]
+    test_case.assertTrue(np.allclose(input.grad.numpy(), np_grad, 1e-5, 1e-5))
+
+
+@unittest.skipIf(
+    not flow.unittest.env.eager_execution_enabled(),
+    ".numpy() doesn't work in lazy mode",
+)
+class TestBCEWithLogitsLossModule(flow.unittest.TestCase):
+    def test_bcewithlogitsloss(test_case):
+        arg_dict = OrderedDict()
+        arg_dict["test_fun"] = [
+            _test_bcewithlogitsloss_impl,
+        ]
+
+        arg_dict["device"] = ["cpu", "cuda"]
+        arg_dict["shape"] = [
+            (3, 5),
+            (10, 9, 21),
+            (14, 22, 9, 21),
+            (3, 2, 4, 16, 5),
+            (1,),
+        ]
+        arg_dict["reduction"] = ["none", "sum", "mean"]
+        for arg in GenArgList(arg_dict):
+            arg[0](test_case, *arg[1:])
+
+
+if __name__ == "__main__":
+    unittest.main()