From 16a3d88a20b4c8e029efb837e6523ac651722003 Mon Sep 17 00:00:00 2001
From: dzhwinter <dzhwinter@gmail.com>
Date: Thu, 14 Jun 2018 06:25:33 -0700
Subject: [PATCH 01/10] fix typo

---
 paddle/fluid/operators/clip_by_norm_op.cc  | 11 ++++++++++-
 python/paddle/fluid/layers/control_flow.py |  1 +
 python/paddle/fluid/layers/nn.py           |  6 ++----
 3 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/paddle/fluid/operators/clip_by_norm_op.cc b/paddle/fluid/operators/clip_by_norm_op.cc
index c87bded034e38..eae86a373be27 100644
--- a/paddle/fluid/operators/clip_by_norm_op.cc
+++ b/paddle/fluid/operators/clip_by_norm_op.cc
@@ -54,10 +54,19 @@ be linearly scaled to make the L2 norm of $Out$ equal to $max\_norm$, as
 shown in the following formula:
 
 $$
-Out = \frac{max\_norm * X}{norm(X)},
+Out = \\frac{max\\_norm * X}{norm(X)},
 $$
 
 where $norm(X)$ represents the L2 norm of $X$.
+
+Examples:
+        .. code-block:: python
+
+            data = fluid.layer.data(
+                name='data', shape=[2, 4, 6], dtype='float32')
+            reshaped = fluid.layers.clip_by_norm(
+                x=data, max_norm=0.5)
+
 )DOC");
   }
 };
diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py
index 80e8ff484a4c0..be4dd41577cd7 100644
--- a/python/paddle/fluid/layers/control_flow.py
+++ b/python/paddle/fluid/layers/control_flow.py
@@ -866,6 +866,7 @@ def array_write(x, i, array=None):
         Variable: The output LOD_TENSOR_ARRAY where the input tensor is written.
 
     Examples:
+
         .. code-block::python
 
           tmp = fluid.layers.zeros(shape=[10], dtype='int32')
diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index 2c1f988828218..2c7e04c1e68d7 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -3159,8 +3159,6 @@ def im2sequence(input, filter_size=1, stride=1, padding=0, name=None):
 
     Examples:
 
-    As an example:
-
         .. code-block:: text
 
             Given:
@@ -3204,7 +3202,7 @@ def im2sequence(input, filter_size=1, stride=1, padding=0, name=None):
 
             output.lod = [[0, 4, 8]]
 
-        The simple usage is:
+     Examples:
 
         .. code-block:: python
 
@@ -3738,7 +3736,7 @@ def lrn(input, n=5, k=1.0, alpha=1e-4, beta=0.75, name=None):
 
         Output(i, x, y) = Input(i, x, y) / \left(
         k + \alpha \sum\limits^{\min(C, c + n/2)}_{j = \max(0, c - n/2)}
-        (Input(j, x, y))^2 \right)^{\beta}
+        (Input(j, x, y))^2\right)^{\beta}
 
     In the above equation:
 

From 1f38cbf79b8369fbfc6fa626446b9c98a7314426 Mon Sep 17 00:00:00 2001
From: dzhwinter <dzhwinter@gmail.com>
Date: Thu, 14 Jun 2018 21:26:51 -0700
Subject: [PATCH 02/10] "fix some typo"

---
 paddle/fluid/operators/uniform_random_batch_size_like_op.cc | 2 +-
 python/paddle/fluid/layers/control_flow.py                  | 3 +--
 python/paddle/fluid/layers/nn.py                            | 2 +-
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/paddle/fluid/operators/uniform_random_batch_size_like_op.cc b/paddle/fluid/operators/uniform_random_batch_size_like_op.cc
index 78fee77df8151..192366fd44ef4 100644
--- a/paddle/fluid/operators/uniform_random_batch_size_like_op.cc
+++ b/paddle/fluid/operators/uniform_random_batch_size_like_op.cc
@@ -35,7 +35,7 @@ class UniformRandomBatchSizeLikeOpMaker : public BatchSizeLikeOpMaker {
  protected:
   void Apply() override {
     AddComment(R"DOC(
-Uniform random operator
+UniformRandomBatchSizeLike operator.
 
 This operator initializes a tensor with the same batch_size as the Input tensor
  with random values sampled from a uniform distribution.
diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py
index 850945001c75b..f4e95dd36390b 100644
--- a/python/paddle/fluid/layers/control_flow.py
+++ b/python/paddle/fluid/layers/control_flow.py
@@ -866,8 +866,7 @@ def array_write(x, i, array=None):
         Variable: The output LOD_TENSOR_ARRAY where the input tensor is written.
 
     Examples:
-
-        .. code-block::python
+        .. code-block:: python
 
           tmp = fluid.layers.zeros(shape=[10], dtype='int32')
           i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=10)
diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index b5745e20f1628..dc1124a512396 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -2935,7 +2935,7 @@ def split(input, num_or_sections, dim=-1, name=None):
                        will be named automatically.
 
     Returns:
-        List: The list of segmented tensor variables.
+        list(Variable): The list of segmented tensor variables.
 
     Examples:
         .. code-block:: python

From dbe0fe6d181fb8e747856d93e60ae78216e6734c Mon Sep 17 00:00:00 2001
From: dzhwinter <dzhwinter@gmail.com>
Date: Thu, 14 Jun 2018 21:44:17 -0700
Subject: [PATCH 03/10] 'fix typo'

---
 python/paddle/fluid/layers/nn.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index dc1124a512396..3456a1bde86c1 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -2051,15 +2051,25 @@ def layer_norm(input,
 
 def beam_search_decode(ids, scores, name=None):
     """
+    This layers is to pack the output of beam search layer into sentences and
+    associated scores. It is usually called after the beam search layer.
+
     ${beam_search_decode}
 
     Args:
         ids (Variable): ${ids_comment}
         scores (Variable): ${scores_comment}
         name (str): The name of this layer. It is optional.
-    
+
     Returns:
-        tuple: a tuple of two output variable: sentence_ids, sentence_scores
+        tuple(Variable): a tuple of two output variable: sentence_ids, sentence_scores
+
+    Examples:
+        .. code-block:: python
+            ids, scores = fluid.layers.beam_search(
+                pre_ids, ids, scores, beam_size, end_id)
+            sentence_ids, sentence_scores = fluid.layers.beam_search_decode(
+                ids, scores)
     """
     helper = LayerHelper('beam_search_decode', **locals())
     sentence_ids = helper.create_tmp_variable(dtype=ids.dtype)

From dd711c3754ddce6bf1e5ee9b52964870aaa7f944 Mon Sep 17 00:00:00 2001
From: dzhwinter <dzhwinter@gmail.com>
Date: Thu, 14 Jun 2018 21:55:21 -0700
Subject: [PATCH 04/10] "add beam search"

---
 python/paddle/fluid/layers/nn.py | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index 3456a1bde86c1..bdbb6c47e257e 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -834,11 +834,14 @@ def linear_chain_crf(input, label, param_attr=None):
 
     Args:
         input(${emission_type}): ${emission_comment}
+        input(${transition_type}): ${transition_comment}
         label(${label_type}): ${label_comment}
         param_attr(ParamAttr): The attribute of the learnable parameter.
 
     Returns:
         ${log_likelihood_comment}
+        ${transitionexps_comment}
+        ${emissionexps_comment}
 
     """
     helper = LayerHelper('linear_chain_crf', **locals())
@@ -1170,10 +1173,6 @@ def sequence_conv(input,
         Variable: output of sequence_conv
     """
 
-    # FIXME(dzh) : want to unify the argument of python layer
-    # function. So we ignore some unecessary attributes.
-    # such as, padding_trainable, context_start.
-
     helper = LayerHelper('sequence_conv', **locals())
     dtype = helper.input_dtype()
     filter_shape = [filter_size * input.shape[1], num_filters]
@@ -2051,18 +2050,31 @@ def layer_norm(input,
 
 def beam_search_decode(ids, scores, name=None):
     """
+    Beam Search Decode
+
     This layers is to pack the output of beam search layer into sentences and
     associated scores. It is usually called after the beam search layer.
+    Typically, the output of beam search layer is a tensor of selected ids, with
+    a tensor of the score of each id. Beam search layer's output ids, however, 
+    are generated directly during the tree search, and they are stacked by each 
+    level of the search tree. Thus we need to reorganize them into sentences, 
+    based on the score of each id. This layer takes the output of beam search
+    layer as input and repack them into sentences.
 
     ${beam_search_decode}
 
     Args:
-        ids (Variable): ${ids_comment}
-        scores (Variable): ${scores_comment}
+        ids (Variable): The selected ids, output of beam search layer. 
+        scores (Variable): The associated scores of the ids, out put of beam
+            search layer.
         name (str): The name of this layer. It is optional.
 
     Returns:
-        tuple(Variable): a tuple of two output variable: sentence_ids, sentence_scores
+        tuple(Variable): a tuple of two output tensors: sentence_ids, sentence_scores.
+        sentence_ids is a tensor with shape [size, length], where size is the
+        beam size of beam search, and length is the length of each sentence. 
+        Note that the length of sentences may vary.
+        sentence_scores is a tensor with the same shape as sentence_ids.
 
     Examples:
         .. code-block:: python

From 6e3d168b9d3b6147efa7740023286b866d7d6e4a Mon Sep 17 00:00:00 2001
From: dzhwinter <dzhwinter@gmail.com>
Date: Thu, 14 Jun 2018 22:00:59 -0700
Subject: [PATCH 05/10] "fix latex"

---
 python/paddle/fluid/layers/nn.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index bdbb6c47e257e..283d3c4b2a9b6 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -4210,8 +4210,8 @@ def lrn(input, n=5, k=1.0, alpha=1e-4, beta=0.75, name=None):
 
     .. math::
 
-        Output(i, x, y) = Input(i, x, y) / \left(
-        k + \alpha \sum\limits^{\min(C, c + n/2)}_{j = \max(0, c - n/2)}
+        Output(i, x, y) = Input(i, x, y) / \left( \\
+        k + \alpha \sum\limits^{\min(C, c + n/2)}_{j = \max(0, c - n/2)} \\
         (Input(j, x, y))^2\right)^{\beta}
 
     In the above equation:

From 541ddf7e24e263abbee3b342a69100b99ec413d7 Mon Sep 17 00:00:00 2001
From: dzhwinter <dzhwinter@gmail.com>
Date: Thu, 14 Jun 2018 22:05:36 -0700
Subject: [PATCH 06/10] squash to one pr

---
 python/paddle/fluid/framework.py     | 31 ++++++++++++++++++++++++++++
 python/paddle/fluid/layers/metric.py | 26 ++++++++++++++++++++++-
 2 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py
index f6438c82ac207..595f67961fe34 100644
--- a/python/paddle/fluid/framework.py
+++ b/python/paddle/fluid/framework.py
@@ -1034,6 +1034,37 @@ def clone_variable(self, var):
 
 
 class Program(object):
+    """
+    Python Program. Beneath it is a ProgramDesc, which is used for
+    create c++ Program. A program is a self-contained programing
+    language like container. It has at least one Block, when the
+    control flow op like conditional_block, while_op is included,
+    it will contains nested block.
+    Please reference the framework.proto for details.
+
+    Notes: we have default_startup_program and default_main_program
+    by default, a pair of them will shared the parameters.
+    The default_startup_program only run once to initialize parameters,
+    default_main_program run in every minibatch and adjust the weights.
+
+    Args:
+        None
+
+    Returns:
+        Python Program
+
+    Examples:
+       .. code-block:: python
+
+         main_program = Program()
+         startup_program = Program()
+         with fluid.program_guard(main_program=main_program, startup_program=startup_program):
+            fluid.layers.data(name="x", shape=[-1, 784], dtype='float32')
+            fluid.layers.data(name="y", shape=[-1, 1], dtype='int32')
+            fluid.layers.fc(name="fc", shape=[10], dtype='float32', act="relu")
+
+    """
+
     def __init__(self):
         self.desc = core.ProgramDesc()
         self.blocks = [Block(self, 0)]
diff --git a/python/paddle/fluid/layers/metric.py b/python/paddle/fluid/layers/metric.py
index a1c64ce277152..069c060da4869 100644
--- a/python/paddle/fluid/layers/metric.py
+++ b/python/paddle/fluid/layers/metric.py
@@ -27,8 +27,32 @@
 
 def accuracy(input, label, k=1, correct=None, total=None):
     """
+    accuracy layer.
+    Refer to the https://en.wikipedia.org/wiki/Precision_and_recall
+
     This function computes the accuracy using the input and label.
-    The output is the top k inputs and their indices.
+    If the correct label occurs in top k predictions, then correct will increment by one.
+    Note: the dtype of accuracy is determined by input. the input and label dtype can be different.
+
+    Args:
+        input(Variable): The input of accuracy layer, which is the predictions of network.
+          Carry LoD information is supported.
+        label(Variable): The label of dataset.
+        k(int): The top k predictions for each class will be checked.
+        correct(Variable): The correct predictions count.
+        total(Variable): The total entries count.
+
+    Returns:
+        Variable: The correct rate.
+
+    Examples:
+        .. code-block:: python
+
+           data = fluid.layers.data(name="data", shape=[-1, 32, 32], dtype="float32")
+           label = fluid.layers.data(name="data", shape=[-1,1], dtype="int32")
+           predict = fluid.layers.fc(input=data, size=10)
+           acc = fluid.layers.accuracy(input=predict, label=label, k=5)
+
     """
     helper = LayerHelper("accuracy", **locals())
     topk_out, topk_indices = nn.topk(input, k=k)

From f2c9c33f158890e0cfba827db3a328c317962d4c Mon Sep 17 00:00:00 2001
From: dzhwinter <dzhwinter@gmail.com>
Date: Thu, 14 Jun 2018 23:24:32 -0700
Subject: [PATCH 07/10] "fix lrn"

---
 python/paddle/fluid/layers/nn.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index 283d3c4b2a9b6..2d33742e73c64 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -839,9 +839,9 @@ def linear_chain_crf(input, label, param_attr=None):
         param_attr(ParamAttr): The attribute of the learnable parameter.
 
     Returns:
-        ${log_likelihood_comment}
-        ${transitionexps_comment}
-        ${emissionexps_comment}
+        output(${emission_exps_type}): ${emission_exps_comment} \n
+        output(${transition_exps_type}): ${transition_exps_comment} \n
+        output(${log_likelihood_type}): ${log_likelihood_comment}
 
     """
     helper = LayerHelper('linear_chain_crf', **locals())
@@ -4210,7 +4210,7 @@ def lrn(input, n=5, k=1.0, alpha=1e-4, beta=0.75, name=None):
 
     .. math::
 
-        Output(i, x, y) = Input(i, x, y) / \left( \\
+      Output(i, x, y) = Input(i, x, y) / \left( \\
         k + \alpha \sum\limits^{\min(C, c + n/2)}_{j = \max(0, c - n/2)} \\
         (Input(j, x, y))^2\right)^{\beta}
 

From 0d86f13ce2455a1ae4f24d4e5550d6256530122f Mon Sep 17 00:00:00 2001
From: dzhwinter <dzhwinter@gmail.com>
Date: Thu, 14 Jun 2018 23:43:18 -0700
Subject: [PATCH 08/10] "fix math"

---
 python/paddle/fluid/layers/nn.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index 2d33742e73c64..8c8d6328ebbd5 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -4210,9 +4210,7 @@ def lrn(input, n=5, k=1.0, alpha=1e-4, beta=0.75, name=None):
 
     .. math::
 
-      Output(i, x, y) = Input(i, x, y) / \left( \\
-        k + \alpha \sum\limits^{\min(C, c + n/2)}_{j = \max(0, c - n/2)} \\
-        (Input(j, x, y))^2\right)^{\beta}
+      Output(i, x, y) = Input(i, x, y) / \\left(k + \\alpha \\sum\\limits^{\\min(C, c + n/2)}_{j = \\max(0, c - n/2)}(Input(j, x, y))^2\\right)^{\\beta}
 
     In the above equation:
 

From 6ac8383f28a572cbfcd6d2ec87a87174faa26a12 Mon Sep 17 00:00:00 2001
From: dzhwinter <dzhwinter@gmail.com>
Date: Fri, 15 Jun 2018 00:28:02 -0700
Subject: [PATCH 09/10] "fix based comments"

---
 .../operators/uniform_random_batch_size_like_op.cc     |  2 +-
 python/paddle/fluid/framework.py                       | 10 ++++++++--
 python/paddle/fluid/layers/control_flow.py             |  3 +--
 python/paddle/fluid/layers/nn.py                       |  2 --
 4 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/paddle/fluid/operators/uniform_random_batch_size_like_op.cc b/paddle/fluid/operators/uniform_random_batch_size_like_op.cc
index 192366fd44ef4..75d6181749e4e 100644
--- a/paddle/fluid/operators/uniform_random_batch_size_like_op.cc
+++ b/paddle/fluid/operators/uniform_random_batch_size_like_op.cc
@@ -38,7 +38,7 @@ class UniformRandomBatchSizeLikeOpMaker : public BatchSizeLikeOpMaker {
 UniformRandomBatchSizeLike operator.
 
 This operator initializes a tensor with the same batch_size as the Input tensor
- with random values sampled from a uniform distribution.
+with random values sampled from a uniform distribution.
 
 )DOC");
     AddAttr<float>("min",
diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py
index 595f67961fe34..df0625649d2cf 100644
--- a/python/paddle/fluid/framework.py
+++ b/python/paddle/fluid/framework.py
@@ -1130,6 +1130,8 @@ def get_desc(self):
 
     def clone(self, for_test=False):
         """Clone the Program object
+        Args:
+           for_test(bool): indicate whether clone for test.
 
         Set for_test to False when we want to clone the program for training.
         Set for_test to True when we want to clone the program for testing.
@@ -1140,8 +1142,9 @@ def clone(self, for_test=False):
                 the is_test attributes in these operators will be set to True for
                 testing purposes, otherwise, they remain unchanged.
 
-        Returns(Program):
-            The cloned Program object.
+        Returns:
+            Program: The cloned Program object.
+
         """
         if for_test:
             p = self.inference_optimize()
@@ -1259,6 +1262,7 @@ def sync_with_cpp(self):
     def copy_param_info_from(self, other):
         """
         Copy the information of parameters from other program.
+
         Args:
             other(Program): Other program
 
@@ -1277,6 +1281,7 @@ def copy_param_info_from(self, other):
     def copy_data_info_from(self, other):
         """
         Copy the information of data variables from other program.
+
         Args:
             other(Program): Other program
 
@@ -1330,6 +1335,7 @@ def __str__(self):
     def to_string(self, throw_on_error, with_details=False):
         """
         To debug string.
+
         Args:
             throw_on_error(bool): raise exception when self is not initialized
                 when throw_on_error is True
diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py
index f4e95dd36390b..df2f9dedabfd0 100644
--- a/python/paddle/fluid/layers/control_flow.py
+++ b/python/paddle/fluid/layers/control_flow.py
@@ -823,8 +823,7 @@ def increment(x, value=1.0, in_place=True):
         in_place (bool): If the increment should be performed in-place.
 
     Returns:
-        Variable: The tensor variable storing the transformation of
-                  element-wise increment of each value in the input.
+        Variable: The elementwise-incremented object.
 
     Examples:
         .. code-block:: python
diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index 8c8d6328ebbd5..4fd353171a00c 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -2061,8 +2061,6 @@ def beam_search_decode(ids, scores, name=None):
     based on the score of each id. This layer takes the output of beam search
     layer as input and repack them into sentences.
 
-    ${beam_search_decode}
-
     Args:
         ids (Variable): The selected ids, output of beam search layer. 
         scores (Variable): The associated scores of the ids, out put of beam

From 7ad46ec03ccc1961bb1a8b22e178ce5406cfbee2 Mon Sep 17 00:00:00 2001
From: dzhwinter <dzhwinter@gmail.com>
Date: Fri, 15 Jun 2018 00:36:19 -0700
Subject: [PATCH 10/10] "show example"

---
 python/paddle/fluid/layers/nn.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index 4fd353171a00c..b49560a526a9f 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -2076,6 +2076,7 @@ def beam_search_decode(ids, scores, name=None):
 
     Examples:
         .. code-block:: python
+
             ids, scores = fluid.layers.beam_search(
                 pre_ids, ids, scores, beam_size, end_id)
             sentence_ids, sentence_scores = fluid.layers.beam_search_decode(