From 0551566244e034765fa7a2e96ef940fbe290e889 Mon Sep 17 00:00:00 2001
From: Wang Bojun <105858416+wwbitejotunn@users.noreply.github.com>
Date: Fri, 29 Jul 2022 15:17:55 +0800
Subject: [PATCH] Phi softplus migration (#44542)

* add yaml and utests of phi softplus

add yaml of softplus

fix softplus bug in phi

* update utests

* bug fix

* bug fix for test_layers

* layer api match

* match def and doc in ops.py

* doc polish

* fix unwanted modified of thresholded_relu

* style imporve
---
 paddle/phi/api/yaml/legacy_api.yaml           | 10 +++
 paddle/phi/api/yaml/legacy_backward.yaml      | 12 ++++
 python/paddle/fluid/layers/ops.py             | 62 +++++++++++++------
 .../tests/unittests/test_activation_op.py     |  7 ++-
 python/paddle/nn/functional/activation.py     |  6 +-
 5 files changed, 77 insertions(+), 20 deletions(-)

diff --git a/paddle/phi/api/yaml/legacy_api.yaml b/paddle/phi/api/yaml/legacy_api.yaml
index 2b83f79055a34..6cbdf7424b7ca 100644
--- a/paddle/phi/api/yaml/legacy_api.yaml
+++ b/paddle/phi/api/yaml/legacy_api.yaml
@@ -2175,6 +2175,16 @@
     use_gpudnn : true
   backward : softmax_grad
 
+- api : softplus
+  args : (Tensor x, float beta, float threshold)
+  output : Tensor
+  infer_meta :
+    func : UnchangedInferMeta
+    param : [x]
+  kernel :
+    func : softplus
+  backward : softplus_grad
+
 # softsign
 - api : softsign
   args : (Tensor x)
diff --git a/paddle/phi/api/yaml/legacy_backward.yaml b/paddle/phi/api/yaml/legacy_backward.yaml
index 39addc9421d80..e7167b1763731 100644
--- a/paddle/phi/api/yaml/legacy_backward.yaml
+++ b/paddle/phi/api/yaml/legacy_backward.yaml
@@ -2046,6 +2046,18 @@
     func : softmax_grad
     use_gpudnn : true
 
+# softplus
+- backward_api : softplus_grad
+  forward : softplus (Tensor x, float beta, float threshold) -> Tensor(out)
+  args : (Tensor x, Tensor out_grad, float beta, float threshold)
+  output : Tensor(x_grad)
+  infer_meta :
+    func : UnchangedInferMeta
+    param : [x]
+  kernel :
+    func : softplus_grad
+  inplace : (out_grad -> x_grad)
+
 - backward_api : softsign_grad
   forward : softsign (Tensor x) -> Tensor(out)
   args : (Tensor x, Tensor out_grad)
diff --git a/python/paddle/fluid/layers/ops.py b/python/paddle/fluid/layers/ops.py
index b6ae3b0e58a2a..01da331e57b2e 100755
--- a/python/paddle/fluid/layers/ops.py
+++ b/python/paddle/fluid/layers/ops.py
@@ -32,7 +32,6 @@
     'silu',
     'logsigmoid',
     'tanh_shrink',
-    'softplus',
     'softsign',
     'tanh',
 ]
@@ -53,7 +52,15 @@
     'reciprocal_',
 ]
 
-__all__ = []
+__all__ = [
+    'softplus',
+    'softshrink',
+    'hard_shrink',
+    'cumsum',
+    'thresholded_relu',
+    'gelu',
+    'erf',
+]
 
 for _OP in set(__all__):
     globals()[_OP] = generate_layer_fn(_OP)
@@ -462,8 +469,40 @@
 
 """)
 
-add_sample_code(
-    globals()["softplus"], r"""
+_softplus_ = generate_layer_fn('softplus')
+
+
+def softplus(x, beta: float = 1.0, threshold: float = 20.0, name=None):
+    check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'softplus')
+    locals_val = locals().copy()
+    kwargs = dict()
+    for name, val in locals_val.items():
+        if val is not None:
+            kwargs[name] = val
+    return _softplus_(**kwargs)
+
+
+softplus.__doc__ = r"""
+    :alias_main: paddle.nn.functional.softplus
+    :alias: paddle.nn.functional.softplus, paddle.nn.functional.activation.softplus
+    :old_api: paddle.fluid.layers.softplus
+
+:strong:`Softplus Activation Operator`
+
+Equation:
+    .. math::
+        out = \\frac{1}{beta} * log(1 + e^{beta * x})
+        For numerical stability, the implementation reverts to the linear function when: beta * x > threshold.
+
+Args:
+    x(Tensor): Input of Softplus op, Tensor, dtype: float32 or float64
+    beta(float, optional): The value of beta for softplus. Default is 1
+    threshold (float, optional): The value of threshold for softplus. Default is 20
+    name(str, optional): Name for the operation (optional, default is None)
+
+Returns:
+    Variable: The output of Softplus op, Tensor, dtype: float32 or float64
+
 Examples:
     .. code-block:: python
 
@@ -474,8 +513,7 @@
         out = F.softplus(x) 
         print(out)
         # [0.513015, 0.598139, 0.744397, 0.854355]
-
-""")
+"""
 
 add_sample_code(
     globals()["softsign"], r"""
@@ -492,8 +530,6 @@
 
 """)
 
-__all__ += ['softshrink']
-
 _softshrink_ = generate_layer_fn('softshrink')
 
 
@@ -542,8 +578,6 @@ def softshrink(x, alpha=None):
         result = fluid.layers.softshrink(x=data, alpha=0.3)
 """
 
-__all__ += ['hard_shrink']
-
 _hard_shrink_ = generate_layer_fn('hard_shrink')
 
 
@@ -568,8 +602,6 @@ def hard_shrink(x, threshold=None):
     >>> result = fluid.layers.hard_shrink(x=data, threshold=0.3)
 """
 
-__all__ += ['cumsum']
-
 _cum_sum_ = generate_layer_fn('cumsum')
 
 
@@ -610,8 +642,6 @@ def cumsum(x, axis=None, exclusive=None, reverse=None):
         result = fluid.layers.cumsum(data, axis=0)
 """
 
-__all__ += ['thresholded_relu']
-
 _thresholded_relu_ = generate_layer_fn('thresholded_relu')
 
 
@@ -700,8 +730,6 @@ def thresholded_relu(x, threshold=None):
         #        [-0.        , -0.        ,  1.0013918 ]], dtype=float32)
 """
 
-__all__ += ['gelu']
-
 _gelu_ = generate_layer_fn('gelu')
 
 
@@ -785,8 +813,6 @@ def gelu(x, approximate=False):
         #        [ 0.08796856,  0.20387867,  0.2080159 ]], dtype=float32)
 """
 
-__all__ += ['erf']
-
 _erf_ = generate_layer_fn('erf')
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_activation_op.py b/python/paddle/fluid/tests/unittests/test_activation_op.py
index 5edb1185ad644..cdb01b4c99416 100755
--- a/python/paddle/fluid/tests/unittests/test_activation_op.py
+++ b/python/paddle/fluid/tests/unittests/test_activation_op.py
@@ -2676,6 +2676,7 @@ class TestSoftplus(TestActivation):
 
     def setUp(self):
         self.op_type = "softplus"
+        self.python_api = paddle.nn.functional.softplus
         self.init_dtype()
 
         beta = 2
@@ -2688,10 +2689,14 @@ def setUp(self):
         self.attrs = {'beta': beta, "threshold": threshold}
         self.outputs = {'Out': out}
 
+        self.check_eager = True
+
     def test_check_grad(self):
         if self.dtype == np.float16:
             return
-        self.check_grad(['X'], 'Out')
+        if hasattr(self, 'check_eager'):
+            check_eager = self.check_eager
+        self.check_grad(['X'], 'Out', check_eager=check_eager)
 
 
 @unittest.skipIf(not core.is_compiled_with_cuda(),
diff --git a/python/paddle/nn/functional/activation.py b/python/paddle/nn/functional/activation.py
index f0f04fb133283..373186096bda0 100644
--- a/python/paddle/nn/functional/activation.py
+++ b/python/paddle/nn/functional/activation.py
@@ -1177,7 +1177,11 @@ def softplus(x, beta=1, threshold=20, name=None):
             x = paddle.to_tensor(np.array([-0.4, -0.2, 0.1, 0.3]))
             out = F.softplus(x) # [0.513015, 0.598139, 0.744397, 0.854355]
     """
-    if in_dynamic_mode():
+
+    if in_dygraph_mode():
+        return _C_ops.final_state_softplus(x, beta, threshold)
+
+    if _in_legacy_dygraph():
         return _C_ops.softplus(x, 'beta', beta, 'threshold', threshold)
 
     check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],