From ea4d4454ee09817864f4d10816934e98009e3f2e Mon Sep 17 00:00:00 2001
From: Dang Qingqing <dangqingqing@baidu.com>
Date: Thu, 14 Jun 2018 23:43:57 +0800
Subject: [PATCH 1/5] Update some doc about layers' API.

---
 .../gaussian_random_batch_size_like_op.cc     |  9 ++-
 python/paddle/fluid/layers/io.py              | 18 ++---
 python/paddle/fluid/layers/nn.py              | 80 ++++++++++---------
 python/paddle/fluid/layers/tensor.py          | 19 ++++-
 4 files changed, 76 insertions(+), 50 deletions(-)
diff --git a/paddle/fluid/operators/gaussian_random_batch_size_like_op.cc b/paddle/fluid/operators/gaussian_random_batch_size_like_op.cc
index 8050f61d4546f..4a974281481c8 100644
--- a/paddle/fluid/operators/gaussian_random_batch_size_like_op.cc
+++ b/paddle/fluid/operators/gaussian_random_batch_size_like_op.cc
@@ -36,11 +36,12 @@ class GaussianRandomBatchSizeLikeOpMaker : public BatchSizeLikeOpMaker {
   void Apply() override {
     AddAttr<float>("mean",
                    "(float, default 0.0) "
-                   "mean of random tensor.")
+                   "The mean (or center) of the gaussian distribution.")
         .SetDefault(.0f);
     AddAttr<float>("std",
                    "(float, default 1.0) "
-                   "std of random tensor.")
+                   "The standard deviation (std, or spread) of the "
+                   "gaussian distribution.")
         .SetDefault(1.0f);
     AddAttr<int>("seed",
                  "(int, default 0) "
@@ -55,9 +56,11 @@ class GaussianRandomBatchSizeLikeOpMaker : public BatchSizeLikeOpMaker {
         .SetDefault(framework::proto::VarType::FP32);
 
     AddComment(R"DOC(
-GaussianRandom Operator.
 
 Used to initialize tensors with gaussian random generator.
+The defalut mean of the distribution is 0. and defalut standard
+deviation (std) of the distribution is 1.. Uers can set mean and std
+by input arguments.
 )DOC");
   }
 };
diff --git a/python/paddle/fluid/layers/io.py b/python/paddle/fluid/layers/io.py
index 9de88e2c3205a..8f5b22f50042d 100644
--- a/python/paddle/fluid/layers/io.py
+++ b/python/paddle/fluid/layers/io.py
@@ -386,16 +386,16 @@ def random_data_generator(low, high, shapes, lod_levels, for_parallel=True):
        Variable: A Reader Variable from which we can get random data.
 
     Examples:
-       .. code-block:: python
-
-         reader = fluid.layers.io.random_data_generator(
-                                          low=0.0,
-                                          high=1.0,
-                                          shapes=[(3,224,224), (1)],
-                                          lod_levels=[0, 0])
+        .. code-block:: python
 
-         # Via the reader, we can use 'read_file' layer to get data:
-         image, label = fluid.layers.io.read_file(reader)
+        import paddle.fluid as fluid
+        reader = fluid.layers.io.random_data_generator(
+                                         low=0.0,
+                                         high=1.0,
+                                         shapes=[(3,224,224), (1)],
+                                         lod_levels=[0, 0])
+        # Via the reader, we can use 'read_file' layer to get data:
+        image, label = fluid.layers.io.read_file(reader)
     """
     dtypes = [core.VarDesc.VarType.FP32] * len(shapes)
     shape_concat = []
diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index 2c1f988828218..f8013b915a87d 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -312,8 +312,7 @@ def dynamic_lstm(input,
         cell_activation(str): The activation for cell output. Choices = ["sigmoid",
                               "tanh", "relu", "identity"], default "tanh".
         candidate_activation(str): The activation for candidate hidden state.
-                              Choices = ["sigmoid", "tanh",
-                                  "relu", "identity"],
+                              Choices = ["sigmoid", "tanh", "relu", "identity"],
                               default "tanh".
         dtype(str): Data type. Choices = ["float32", "float64"], default "float32".
         name(str|None): A name for this layer(optional). If set None, the layer
@@ -488,35 +487,38 @@ def dynamic_lstmp(input,
         cell_activation(str): The activation for cell output. Choices = ["sigmoid",
                               "tanh", "relu", "identity"], default "tanh".
         candidate_activation(str): The activation for candidate hidden state.
-                              Choices = ["sigmoid", "tanh",
-                                  "relu", "identity"],
+                              Choices = ["sigmoid", "tanh", "relu", "identity"],
                               default "tanh".
         proj_activation(str): The activation for projection output.
-                              Choices = ["sigmoid", "tanh",
-                                  "relu", "identity"],
+                              Choices = ["sigmoid", "tanh", "relu", "identity"],
                               default "tanh".
         dtype(str): Data type. Choices = ["float32", "float64"], default "float32".
         name(str|None): A name for this layer(optional). If set None, the layer
                         will be named automatically.
 
     Returns:
-        tuple: The projection of hidden state, and cell state of LSTMP. The \
-               shape of projection is (T x P), for the cell state which is \
-               (T x D), and both LoD is the same with the `input`.
+        tuple: A tuple of two output variable: the projection of hidden state, \
+               and cell state of LSTMP. The shape of projection is (T x P), \
+               for the cell state which is (T x D), and both LoD is the same \
+               with the `input`.
 
     Examples:
         .. code-block:: python
 
-            hidden_dim, proj_dim = 512, 256
-            fc_out = fluid.layers.fc(input=input_seq, size=hidden_dim * 4,
-                                     act=None, bias_attr=None)
-            proj_out, _ = fluid.layers.dynamic_lstmp(input=fc_out,
-                                                     size=hidden_dim * 4,
-                                                     proj_size=proj_dim,
-                                                     use_peepholes=False,
-                                                     is_reverse=True,
-                                                     cell_activation="tanh",
-                                                     proj_activation="tanh")
+        dict_dim, emb_dim = 128, 64
+        data = fluid.layers.data(name='sequence', shape=(128),
+                                 dtype='int32', lod_level=1)
+        emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim])
+        hidden_dim, proj_dim = 512, 256
+        fc_out = fluid.layers.fc(input=emb, size=hidden_dim * 4,
+                                 act=None, bias_attr=None)
+        proj_out, _ = fluid.layers.dynamic_lstmp(input=fc_out,
+                                                 size=hidden_dim * 4,
+                                                 proj_size=proj_dim,
+                                                 use_peepholes=False,
+                                                 is_reverse=True,
+                                                 cell_activation="tanh",
+                                                 proj_activation="tanh")
     """
 
     helper = LayerHelper('lstmp', **locals())
@@ -574,10 +576,10 @@ def dynamic_gru(input,
                 candidate_activation='tanh',
                 h_0=None):
     """
-    **Dynamic GRU Layer**
+    **Gated Recurrent Unit (GRU) Layer**
 
     Refer to `Empirical Evaluation of Gated Recurrent Neural Networks on
-    Sequence Modeling <https://arxiv.org/abs/1412.3555>`_
+    Sequence Modeling <https://arxiv.org/abs/1412.3555>`_ .
 
     The formula is as follows:
 
@@ -624,17 +626,24 @@ def dynamic_gru(input,
             Choices = ["sigmoid", "tanh", "relu", "identity"], default "sigmoid".
         candidate_activation(str): The activation for candidate hidden state.
             Choices = ["sigmoid", "tanh", "relu", "identity"], default "tanh".
-        h_0 (Variable): The hidden output of the first time step.
+        h_0 (Variable): This is initial hidden state. If not set, default is
+            zero. This is a tensor with shape (N x D), where N is the number of
+            total time steps of input mini-batch feature and D is the hidden
+            size.
 
     Returns:
         Variable: The hidden state of GRU. The shape is :math:`(T \\times D)`, \
-            and lod is the same with the input.
+            and sequence length is the same with the input.
 
     Examples:
         .. code-block:: python
 
+            dict_dim, emb_dim = 128, 64
+            data = fluid.layers.data(name='sequence', shape=(128),
+                                     dtype='int32', lod_level=1)
+            emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim])
             hidden_dim = 512
-            x = fluid.layers.fc(input=data, size=hidden_dim * 3)
+            x = fluid.layers.fc(input=emb, size=hidden_dim * 3)
             hidden = fluid.layers.dynamic_gru(input=x, dim=hidden_dim)
     """
 
@@ -872,13 +881,13 @@ def dropout(x, dropout_prob, is_test=False, seed=None, name=None):
 
     Drop or keep each element of `x` independently. Dropout is a regularization
     technique for reducing overfitting by preventing neuron co-adaption during
-    training. The dropout operator randomly set (according to the given dropout
+    training. The dropout operator randomly sets (according to the given dropout
     probability) the outputs of some units to zero, while others are remain
     unchanged.
 
     Args:
-        x (Variable): The input tensor.
-         dropout_prob (float): Probability of setting units to zero.
+        x (Variable): The input tensor variable.
+        dropout_prob (float): Probability of setting units to zero.
         is_test (bool): A flag indicating whether it is in test phrase or not.
         seed (int): A Python integer used to create random seeds. If this
                     parameter is set to None, a random seed is used.
@@ -888,13 +897,13 @@ def dropout(x, dropout_prob, is_test=False, seed=None, name=None):
                          will be named automatically.
 
     Returns:
-        Variable: A tensor variable.
+        Variable: A tensor variable is the shape with `x`.
 
     Examples:
         .. code-block:: python
 
-          x = fluid.layers.data(name="data", shape=[32, 32], dtype="float32")
-          droped = fluid.layers.dropout(input=x, dropout_rate=0.5)
+        x = fluid.layers.data(name="data", shape=[32, 32], dtype="float32")
+        droped = fluid.layers.dropout(input=x, dropout_rate=0.5)
     """
 
     helper = LayerHelper('dropout', **locals())
@@ -2547,17 +2556,16 @@ def l2_normalize(x, axis, epsilon=1e-12, name=None):
         name(str|None): A name for this layer(optional). If set None, the layer
             will be named automatically.
 
-
     Returns:
-        Variable: The output tensor variable.
+        Variable: The output tensor variable is the same shape with `x`.
 
     Examples:
         .. code-block:: python
 
-          data = fluid.layers.data(name="data",
-                                   shape=(3, 17, 13),
-                                   dtype="float32")
-          normed = fluid.layers.l2_normalize(x=data, axis=1)
+        data = fluid.layers.data(name="data",
+                                 shape=(3, 17, 13),
+                                 dtype="float32")
+        normed = fluid.layers.l2_normalize(x=data, axis=1)
     """
 
     if len(x.shape) == 1:
diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py
index 62b01d595a812..366bb2983dfb6 100644
--- a/python/paddle/fluid/layers/tensor.py
+++ b/python/paddle/fluid/layers/tensor.py
@@ -486,11 +486,26 @@ def save_combine(x, file_path, overwrite=True):
     Saves a list of variables into a single file.
 
     Args:
-        x(list): A list of Tensor/LoDTensor to be saved together in a single file.
+        x(list): A list of Tensor/LoDTensor variables to be saved together in
+                 a single file.
         file_path(str): The file path where variables will be saved.
-        overwrite(bool): Whether or not cover the given file when it has already 
+        overwrite(bool): Whether or not cover the given file when it has already
             existed. If it's set 'False' and the file is existed, a runtime 
             error will be thrown. 
+
+    Returns:
+        There is no return value.
+
+    Examples:
+        .. code-block:: python
+
+        v1 = fluid.layers.data(name="data",
+                               shape=(4, 6),
+                               dtype="float32")
+        v2 = fluid.layers.data(name="data",
+                               shape=(6, 8, 4),
+                               dtype="float32")
+        normed = fluid.layers.save_combine([v1, v2], file_path="output")
     """
     helper = LayerHelper("save_combine", **locals())
     helper.append_op(

From e7816dbac692743fc0630ba9b380c4d6e799057d Mon Sep 17 00:00:00 2001
From: Dang Qingqing <dangqingqing@baidu.com>
Date: Fri, 15 Jun 2018 13:07:18 +0800
Subject: [PATCH 2/5] Fix format.

---
 .../fluid/operators/detection/box_coder_op.cc | 41 +++++++++-----
 python/paddle/fluid/layers/io.py              | 16 +++---
 python/paddle/fluid/layers/nn.py              | 53 ++++++++++---------
 python/paddle/fluid/layers/tensor.py          | 15 +++---
 4 files changed, 72 insertions(+), 53 deletions(-)

diff --git a/paddle/fluid/operators/detection/box_coder_op.cc b/paddle/fluid/operators/detection/box_coder_op.cc
index 8c4b4321b7582..d0f95f727fdbc 100644
--- a/paddle/fluid/operators/detection/box_coder_op.cc
+++ b/paddle/fluid/operators/detection/box_coder_op.cc
@@ -106,23 +106,36 @@ class BoxCoderOpMaker : public framework::OpProtoAndCheckerMaker {
               "and M represents the number of deocded boxes.");
 
     AddComment(R"DOC(
-Bounding Box Coder Operator.
+
+Bounding Box Coder.
+
 Encode/Decode the target bounding box with the priorbox information.
+
 The Encoding schema described below:
-ox = (tx - px) / pw / pxv
-oy = (ty - py) / ph / pyv
-ow = log(abs(tw / pw)) / pwv 
-oh = log(abs(th / ph)) / phv 
+
+    ox = (tx - px) / pw / pxv
+
+    oy = (ty - py) / ph / pyv
+
+    ow = log(abs(tw / pw)) / pwv 
+
+    oh = log(abs(th / ph)) / phv 
+
 The Decoding schema described below:
-ox = (pw * pxv * tx * + px) - tw / 2
-oy = (ph * pyv * ty * + py) - th / 2
-ow = exp(pwv * tw) * pw + tw / 2
-oh = exp(phv * th) * ph + th / 2
-where tx, ty, tw, th denote the target box's center coordinates, width and
-height respectively. Similarly, px, py, pw, ph denote the priorbox's(anchor)
-center coordinates, width and height. pxv, pyv, pwv, phv denote the variance
-of the priorbox and ox, oy, ow, oh denote the encoded/decoded coordinates,
-width and height.
+
+    ox = (pw * pxv * tx * + px) - tw / 2
+
+    oy = (ph * pyv * ty * + py) - th / 2
+
+    ow = exp(pwv * tw) * pw + tw / 2
+
+    oh = exp(phv * th) * ph + th / 2
+
+where `tx`, `ty`, `tw`, `th` denote the target box's center coordinates, width
+and height respectively. Similarly, `px`, `py`, `pw`, `ph` denote the
+priorbox's (anchor) center coordinates, width and height. `pxv`, `pyv`, `pwv`,
+`phv` denote the variance of the priorbox and `ox`, `oy`, `ow`, `oh` denote the
+encoded/decoded coordinates, width and height.
 )DOC");
   }
 };
diff --git a/python/paddle/fluid/layers/io.py b/python/paddle/fluid/layers/io.py
index 8f5b22f50042d..59f6c29220cde 100644
--- a/python/paddle/fluid/layers/io.py
+++ b/python/paddle/fluid/layers/io.py
@@ -386,16 +386,16 @@ def random_data_generator(low, high, shapes, lod_levels, for_parallel=True):
        Variable: A Reader Variable from which we can get random data.
 
     Examples:
+
         .. code-block:: python
 
-        import paddle.fluid as fluid
-        reader = fluid.layers.io.random_data_generator(
-                                         low=0.0,
-                                         high=1.0,
-                                         shapes=[(3,224,224), (1)],
-                                         lod_levels=[0, 0])
-        # Via the reader, we can use 'read_file' layer to get data:
-        image, label = fluid.layers.io.read_file(reader)
+            reader = fluid.layers.random_data_generator(
+                                             low=0.0,
+                                             high=1.0,
+                                             shapes=[(3,224,224), (1)],
+                                             lod_levels=[0, 0])
+            # Via the reader, we can use 'read_file' layer to get data:
+            image, label = fluid.layers.read_file(reader)
     """
     dtypes = [core.VarDesc.VarType.FP32] * len(shapes)
     shape_concat = []
diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index f8013b915a87d..205817aa5a38a 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -503,22 +503,23 @@ def dynamic_lstmp(input,
                with the `input`.
 
     Examples:
+
         .. code-block:: python
 
-        dict_dim, emb_dim = 128, 64
-        data = fluid.layers.data(name='sequence', shape=(128),
-                                 dtype='int32', lod_level=1)
-        emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim])
-        hidden_dim, proj_dim = 512, 256
-        fc_out = fluid.layers.fc(input=emb, size=hidden_dim * 4,
-                                 act=None, bias_attr=None)
-        proj_out, _ = fluid.layers.dynamic_lstmp(input=fc_out,
-                                                 size=hidden_dim * 4,
-                                                 proj_size=proj_dim,
-                                                 use_peepholes=False,
-                                                 is_reverse=True,
-                                                 cell_activation="tanh",
-                                                 proj_activation="tanh")
+            dict_dim, emb_dim = 128, 64
+            data = fluid.layers.data(name='sequence', shape=(128),
+                                     dtype='int32', lod_level=1)
+            emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim])
+            hidden_dim, proj_dim = 512, 256
+            fc_out = fluid.layers.fc(input=hidden, size=hidden_dim * 4,
+                                     act=None, bias_attr=None)
+            proj_out, _ = fluid.layers.dynamic_lstmp(input=fc_out,
+                                                     size=hidden_dim * 4,
+                                                     proj_size=proj_dim,
+                                                     use_peepholes=False,
+                                                     is_reverse=True,
+                                                     cell_activation="tanh",
+                                                     proj_activation="tanh")
     """
 
     helper = LayerHelper('lstmp', **locals())
@@ -636,6 +637,7 @@ def dynamic_gru(input,
             and sequence length is the same with the input.
 
     Examples:
+
         .. code-block:: python
 
             dict_dim, emb_dim = 128, 64
@@ -900,10 +902,11 @@ def dropout(x, dropout_prob, is_test=False, seed=None, name=None):
         Variable: A tensor variable is the shape with `x`.
 
     Examples:
+
         .. code-block:: python
 
-        x = fluid.layers.data(name="data", shape=[32, 32], dtype="float32")
-        droped = fluid.layers.dropout(input=x, dropout_rate=0.5)
+            x = fluid.layers.data(name="data", shape=[32, 32], dtype="float32")
+            droped = fluid.layers.dropout(input=x, dropout_rate=0.5)
     """
 
     helper = LayerHelper('dropout', **locals())
@@ -2541,31 +2544,33 @@ def l2_normalize(x, axis, epsilon=1e-12, name=None):
     norm. For a 1-D tensor (`dim` is fixed to 0), this layer computes
 
     .. math::
-    y = \frac{x}{ \sqrt{\sum {x^2} + epsion }}
+
+        y = \\frac{x}{ \sqrt{\sum {x^2} + epsion }}
 
     For `x` with more dimensions, this layer independently normalizes each 1-D
     slice along dimension `axis`.
 
     Args:
         x(Variable|list): The input tensor to l2_normalize layer.
-        axis(int): The axis on which to apply normalization. If `axis < 0`,
+        axis(int): The axis on which to apply normalization. If `axis < 0`, \
             the dimension to normalization is rank(X) + axis. -1 is the
             last dimension.
-        epsilon(float): The epsilon value is used to avoid division by zero,
+        epsilon(float): The epsilon value is used to avoid division by zero, \
             the defalut value is 1e-10.
-        name(str|None): A name for this layer(optional). If set None, the layer
+        name(str|None): A name for this layer(optional). If set None, the layer \
             will be named automatically.
 
     Returns:
         Variable: The output tensor variable is the same shape with `x`.
 
     Examples:
+
         .. code-block:: python
 
-        data = fluid.layers.data(name="data",
-                                 shape=(3, 17, 13),
-                                 dtype="float32")
-        normed = fluid.layers.l2_normalize(x=data, axis=1)
+            data = fluid.layers.data(name="data",
+                                     shape=(3, 17, 13),
+                                     dtype="float32")
+            normed = fluid.layers.l2_normalize(x=data, axis=1)
     """
 
     if len(x.shape) == 1:
diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py
index 366bb2983dfb6..c0ea9bd8e9c47 100644
--- a/python/paddle/fluid/layers/tensor.py
+++ b/python/paddle/fluid/layers/tensor.py
@@ -497,15 +497,16 @@ def save_combine(x, file_path, overwrite=True):
         There is no return value.
 
     Examples:
+
         .. code-block:: python
 
-        v1 = fluid.layers.data(name="data",
-                               shape=(4, 6),
-                               dtype="float32")
-        v2 = fluid.layers.data(name="data",
-                               shape=(6, 8, 4),
-                               dtype="float32")
-        normed = fluid.layers.save_combine([v1, v2], file_path="output")
+            v1 = fluid.layers.data(name="data",
+                                   shape=(4, 6),
+                                   dtype="float32")
+            v2 = fluid.layers.data(name="data",
+                                   shape=(6, 8, 4),
+                                   dtype="float32")
+            normed = fluid.layers.save_combine([v1, v2], file_path="output")
     """
     helper = LayerHelper("save_combine", **locals())
     helper.append_op(

From 7045775bbd1766f9316574b2bccd875dfa37862d Mon Sep 17 00:00:00 2001
From: Dang Qingqing <dangqingqing@baidu.com>
Date: Fri, 15 Jun 2018 13:38:37 +0800
Subject: [PATCH 3/5] Fix example bug in random_data_generator.

---
 python/paddle/fluid/layers/io.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/paddle/fluid/layers/io.py b/python/paddle/fluid/layers/io.py
index 59f6c29220cde..c61cb83dcfa8a 100644
--- a/python/paddle/fluid/layers/io.py
+++ b/python/paddle/fluid/layers/io.py
@@ -392,7 +392,7 @@ def random_data_generator(low, high, shapes, lod_levels, for_parallel=True):
             reader = fluid.layers.random_data_generator(
                                              low=0.0,
                                              high=1.0,
-                                             shapes=[(3,224,224), (1)],
+                                             shapes=[[3,224,224], [1]],
                                              lod_levels=[0, 0])
             # Via the reader, we can use 'read_file' layer to get data:
             image, label = fluid.layers.read_file(reader)

From b58eec85e57ae3aaba8527686fa3490cac14c1cb Mon Sep 17 00:00:00 2001
From: Dang Qingqing <dangqingqing@baidu.com>
Date: Fri, 15 Jun 2018 13:41:56 +0800
Subject: [PATCH 4/5] Fix example bug in dropout.

---
 python/paddle/fluid/layers/nn.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index 205817aa5a38a..8213502ba72d6 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -906,7 +906,7 @@ def dropout(x, dropout_prob, is_test=False, seed=None, name=None):
         .. code-block:: python
 
             x = fluid.layers.data(name="data", shape=[32, 32], dtype="float32")
-            droped = fluid.layers.dropout(input=x, dropout_rate=0.5)
+            droped = fluid.layers.dropout(x, dropout_prob=0.5)
     """
 
     helper = LayerHelper('dropout', **locals())

From ccf743b61658b62afbf25daa03d6a2e9d8d5a73d Mon Sep 17 00:00:00 2001
From: Dang Qingqing <dangqingqing@baidu.com>
Date: Fri, 15 Jun 2018 14:17:32 +0800
Subject: [PATCH 5/5] Follow comments and small fix for some examples.

---
 paddle/fluid/operators/activation_op.cc | 2 +-
 python/paddle/fluid/layers/nn.py        | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc
index af1d85047e519..ac0216ad8933f 100644
--- a/paddle/fluid/operators/activation_op.cc
+++ b/paddle/fluid/operators/activation_op.cc
@@ -112,7 +112,7 @@ Sigmoid Activation Operator
 __attribute__((unused)) constexpr char LogSigmoidDoc[] = R"DOC(
 Logsigmoid Activation Operator
 
-$$out = \log \frac{1}{1 + e^{-x}}$$
+$$out = \\log \\frac{1}{1 + e^{-x}}$$
 
 )DOC";
 
diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index 8213502ba72d6..cc16d36b2ff9a 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -507,11 +507,11 @@ def dynamic_lstmp(input,
         .. code-block:: python
 
             dict_dim, emb_dim = 128, 64
-            data = fluid.layers.data(name='sequence', shape=(128),
+            data = fluid.layers.data(name='sequence', shape=[1],
                                      dtype='int32', lod_level=1)
             emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim])
             hidden_dim, proj_dim = 512, 256
-            fc_out = fluid.layers.fc(input=hidden, size=hidden_dim * 4,
+            fc_out = fluid.layers.fc(input=emb, size=hidden_dim * 4,
                                      act=None, bias_attr=None)
             proj_out, _ = fluid.layers.dynamic_lstmp(input=fc_out,
                                                      size=hidden_dim * 4,
@@ -641,7 +641,7 @@ def dynamic_gru(input,
         .. code-block:: python
 
             dict_dim, emb_dim = 128, 64
-            data = fluid.layers.data(name='sequence', shape=(128),
+            data = fluid.layers.data(name='sequence', shape=[1],
                                      dtype='int32', lod_level=1)
             emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim])
             hidden_dim = 512