From ea4d4454ee09817864f4d10816934e98009e3f2e Mon Sep 17 00:00:00 2001 From: Dang Qingqing Date: Thu, 14 Jun 2018 23:43:57 +0800 Subject: [PATCH 1/5] Update some doc about layers' API. --- .../gaussian_random_batch_size_like_op.cc | 9 ++- python/paddle/fluid/layers/io.py | 18 ++--- python/paddle/fluid/layers/nn.py | 80 ++++++++++--------- python/paddle/fluid/layers/tensor.py | 19 ++++- 4 files changed, 76 insertions(+), 50 deletions(-) diff --git a/paddle/fluid/operators/gaussian_random_batch_size_like_op.cc b/paddle/fluid/operators/gaussian_random_batch_size_like_op.cc index 8050f61d4546f..4a974281481c8 100644 --- a/paddle/fluid/operators/gaussian_random_batch_size_like_op.cc +++ b/paddle/fluid/operators/gaussian_random_batch_size_like_op.cc @@ -36,11 +36,12 @@ class GaussianRandomBatchSizeLikeOpMaker : public BatchSizeLikeOpMaker { void Apply() override { AddAttr("mean", "(float, default 0.0) " - "mean of random tensor.") + "The mean (or center) of the gaussian distribution.") .SetDefault(.0f); AddAttr("std", "(float, default 1.0) " - "std of random tensor.") + "The standard deviation (std, or spread) of the " + "gaussian distribution.") .SetDefault(1.0f); AddAttr("seed", "(int, default 0) " @@ -55,9 +56,11 @@ class GaussianRandomBatchSizeLikeOpMaker : public BatchSizeLikeOpMaker { .SetDefault(framework::proto::VarType::FP32); AddComment(R"DOC( -GaussianRandom Operator. Used to initialize tensors with gaussian random generator. +The defalut mean of the distribution is 0. and defalut standard +deviation (std) of the distribution is 1.. Uers can set mean and std +by input arguments. )DOC"); } }; diff --git a/python/paddle/fluid/layers/io.py b/python/paddle/fluid/layers/io.py index 9de88e2c3205a..8f5b22f50042d 100644 --- a/python/paddle/fluid/layers/io.py +++ b/python/paddle/fluid/layers/io.py @@ -386,16 +386,16 @@ def random_data_generator(low, high, shapes, lod_levels, for_parallel=True): Variable: A Reader Variable from which we can get random data. Examples: - .. code-block:: python - - reader = fluid.layers.io.random_data_generator( - low=0.0, - high=1.0, - shapes=[(3,224,224), (1)], - lod_levels=[0, 0]) + .. code-block:: python - # Via the reader, we can use 'read_file' layer to get data: - image, label = fluid.layers.io.read_file(reader) + import paddle.fluid as fluid + reader = fluid.layers.io.random_data_generator( + low=0.0, + high=1.0, + shapes=[(3,224,224), (1)], + lod_levels=[0, 0]) + # Via the reader, we can use 'read_file' layer to get data: + image, label = fluid.layers.io.read_file(reader) """ dtypes = [core.VarDesc.VarType.FP32] * len(shapes) shape_concat = [] diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 2c1f988828218..f8013b915a87d 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -312,8 +312,7 @@ def dynamic_lstm(input, cell_activation(str): The activation for cell output. Choices = ["sigmoid", "tanh", "relu", "identity"], default "tanh". candidate_activation(str): The activation for candidate hidden state. - Choices = ["sigmoid", "tanh", - "relu", "identity"], + Choices = ["sigmoid", "tanh", "relu", "identity"], default "tanh". dtype(str): Data type. Choices = ["float32", "float64"], default "float32". name(str|None): A name for this layer(optional). If set None, the layer @@ -488,35 +487,38 @@ def dynamic_lstmp(input, cell_activation(str): The activation for cell output. Choices = ["sigmoid", "tanh", "relu", "identity"], default "tanh". candidate_activation(str): The activation for candidate hidden state. - Choices = ["sigmoid", "tanh", - "relu", "identity"], + Choices = ["sigmoid", "tanh", "relu", "identity"], default "tanh". proj_activation(str): The activation for projection output. - Choices = ["sigmoid", "tanh", - "relu", "identity"], + Choices = ["sigmoid", "tanh", "relu", "identity"], default "tanh". dtype(str): Data type. Choices = ["float32", "float64"], default "float32". name(str|None): A name for this layer(optional). If set None, the layer will be named automatically. Returns: - tuple: The projection of hidden state, and cell state of LSTMP. The \ - shape of projection is (T x P), for the cell state which is \ - (T x D), and both LoD is the same with the `input`. + tuple: A tuple of two output variable: the projection of hidden state, \ + and cell state of LSTMP. The shape of projection is (T x P), \ + for the cell state which is (T x D), and both LoD is the same \ + with the `input`. Examples: .. code-block:: python - hidden_dim, proj_dim = 512, 256 - fc_out = fluid.layers.fc(input=input_seq, size=hidden_dim * 4, - act=None, bias_attr=None) - proj_out, _ = fluid.layers.dynamic_lstmp(input=fc_out, - size=hidden_dim * 4, - proj_size=proj_dim, - use_peepholes=False, - is_reverse=True, - cell_activation="tanh", - proj_activation="tanh") + dict_dim, emb_dim = 128, 64 + data = fluid.layers.data(name='sequence', shape=(128), + dtype='int32', lod_level=1) + emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim]) + hidden_dim, proj_dim = 512, 256 + fc_out = fluid.layers.fc(input=emb, size=hidden_dim * 4, + act=None, bias_attr=None) + proj_out, _ = fluid.layers.dynamic_lstmp(input=fc_out, + size=hidden_dim * 4, + proj_size=proj_dim, + use_peepholes=False, + is_reverse=True, + cell_activation="tanh", + proj_activation="tanh") """ helper = LayerHelper('lstmp', **locals()) @@ -574,10 +576,10 @@ def dynamic_gru(input, candidate_activation='tanh', h_0=None): """ - **Dynamic GRU Layer** + **Gated Recurrent Unit (GRU) Layer** Refer to `Empirical Evaluation of Gated Recurrent Neural Networks on - Sequence Modeling `_ + Sequence Modeling `_ . The formula is as follows: @@ -624,17 +626,24 @@ def dynamic_gru(input, Choices = ["sigmoid", "tanh", "relu", "identity"], default "sigmoid". candidate_activation(str): The activation for candidate hidden state. Choices = ["sigmoid", "tanh", "relu", "identity"], default "tanh". - h_0 (Variable): The hidden output of the first time step. + h_0 (Variable): This is initial hidden state. If not set, default is + zero. This is a tensor with shape (N x D), where N is the number of + total time steps of input mini-batch feature and D is the hidden + size. Returns: Variable: The hidden state of GRU. The shape is :math:`(T \\times D)`, \ - and lod is the same with the input. + and sequence length is the same with the input. Examples: .. code-block:: python + dict_dim, emb_dim = 128, 64 + data = fluid.layers.data(name='sequence', shape=(128), + dtype='int32', lod_level=1) + emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim]) hidden_dim = 512 - x = fluid.layers.fc(input=data, size=hidden_dim * 3) + x = fluid.layers.fc(input=emb, size=hidden_dim * 3) hidden = fluid.layers.dynamic_gru(input=x, dim=hidden_dim) """ @@ -872,13 +881,13 @@ def dropout(x, dropout_prob, is_test=False, seed=None, name=None): Drop or keep each element of `x` independently. Dropout is a regularization technique for reducing overfitting by preventing neuron co-adaption during - training. The dropout operator randomly set (according to the given dropout + training. The dropout operator randomly sets (according to the given dropout probability) the outputs of some units to zero, while others are remain unchanged. Args: - x (Variable): The input tensor. - dropout_prob (float): Probability of setting units to zero. + x (Variable): The input tensor variable. + dropout_prob (float): Probability of setting units to zero. is_test (bool): A flag indicating whether it is in test phrase or not. seed (int): A Python integer used to create random seeds. If this parameter is set to None, a random seed is used. @@ -888,13 +897,13 @@ def dropout(x, dropout_prob, is_test=False, seed=None, name=None): will be named automatically. Returns: - Variable: A tensor variable. + Variable: A tensor variable is the shape with `x`. Examples: .. code-block:: python - x = fluid.layers.data(name="data", shape=[32, 32], dtype="float32") - droped = fluid.layers.dropout(input=x, dropout_rate=0.5) + x = fluid.layers.data(name="data", shape=[32, 32], dtype="float32") + droped = fluid.layers.dropout(input=x, dropout_rate=0.5) """ helper = LayerHelper('dropout', **locals()) @@ -2547,17 +2556,16 @@ def l2_normalize(x, axis, epsilon=1e-12, name=None): name(str|None): A name for this layer(optional). If set None, the layer will be named automatically. - Returns: - Variable: The output tensor variable. + Variable: The output tensor variable is the same shape with `x`. Examples: .. code-block:: python - data = fluid.layers.data(name="data", - shape=(3, 17, 13), - dtype="float32") - normed = fluid.layers.l2_normalize(x=data, axis=1) + data = fluid.layers.data(name="data", + shape=(3, 17, 13), + dtype="float32") + normed = fluid.layers.l2_normalize(x=data, axis=1) """ if len(x.shape) == 1: diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py index 62b01d595a812..366bb2983dfb6 100644 --- a/python/paddle/fluid/layers/tensor.py +++ b/python/paddle/fluid/layers/tensor.py @@ -486,11 +486,26 @@ def save_combine(x, file_path, overwrite=True): Saves a list of variables into a single file. Args: - x(list): A list of Tensor/LoDTensor to be saved together in a single file. + x(list): A list of Tensor/LoDTensor variables to be saved together in + a single file. file_path(str): The file path where variables will be saved. - overwrite(bool): Whether or not cover the given file when it has already + overwrite(bool): Whether or not cover the given file when it has already existed. If it's set 'False' and the file is existed, a runtime error will be thrown. + + Returns: + There is no return value. + + Examples: + .. code-block:: python + + v1 = fluid.layers.data(name="data", + shape=(4, 6), + dtype="float32") + v2 = fluid.layers.data(name="data", + shape=(6, 8, 4), + dtype="float32") + normed = fluid.layers.save_combine([v1, v2], file_path="output") """ helper = LayerHelper("save_combine", **locals()) helper.append_op( From e7816dbac692743fc0630ba9b380c4d6e799057d Mon Sep 17 00:00:00 2001 From: Dang Qingqing Date: Fri, 15 Jun 2018 13:07:18 +0800 Subject: [PATCH 2/5] Fix format. --- .../fluid/operators/detection/box_coder_op.cc | 41 +++++++++----- python/paddle/fluid/layers/io.py | 16 +++--- python/paddle/fluid/layers/nn.py | 53 ++++++++++--------- python/paddle/fluid/layers/tensor.py | 15 +++--- 4 files changed, 72 insertions(+), 53 deletions(-) diff --git a/paddle/fluid/operators/detection/box_coder_op.cc b/paddle/fluid/operators/detection/box_coder_op.cc index 8c4b4321b7582..d0f95f727fdbc 100644 --- a/paddle/fluid/operators/detection/box_coder_op.cc +++ b/paddle/fluid/operators/detection/box_coder_op.cc @@ -106,23 +106,36 @@ class BoxCoderOpMaker : public framework::OpProtoAndCheckerMaker { "and M represents the number of deocded boxes."); AddComment(R"DOC( -Bounding Box Coder Operator. + +Bounding Box Coder. + Encode/Decode the target bounding box with the priorbox information. + The Encoding schema described below: -ox = (tx - px) / pw / pxv -oy = (ty - py) / ph / pyv -ow = log(abs(tw / pw)) / pwv -oh = log(abs(th / ph)) / phv + + ox = (tx - px) / pw / pxv + + oy = (ty - py) / ph / pyv + + ow = log(abs(tw / pw)) / pwv + + oh = log(abs(th / ph)) / phv + The Decoding schema described below: -ox = (pw * pxv * tx * + px) - tw / 2 -oy = (ph * pyv * ty * + py) - th / 2 -ow = exp(pwv * tw) * pw + tw / 2 -oh = exp(phv * th) * ph + th / 2 -where tx, ty, tw, th denote the target box's center coordinates, width and -height respectively. Similarly, px, py, pw, ph denote the priorbox's(anchor) -center coordinates, width and height. pxv, pyv, pwv, phv denote the variance -of the priorbox and ox, oy, ow, oh denote the encoded/decoded coordinates, -width and height. + + ox = (pw * pxv * tx * + px) - tw / 2 + + oy = (ph * pyv * ty * + py) - th / 2 + + ow = exp(pwv * tw) * pw + tw / 2 + + oh = exp(phv * th) * ph + th / 2 + +where `tx`, `ty`, `tw`, `th` denote the target box's center coordinates, width +and height respectively. Similarly, `px`, `py`, `pw`, `ph` denote the +priorbox's (anchor) center coordinates, width and height. `pxv`, `pyv`, `pwv`, +`phv` denote the variance of the priorbox and `ox`, `oy`, `ow`, `oh` denote the +encoded/decoded coordinates, width and height. )DOC"); } }; diff --git a/python/paddle/fluid/layers/io.py b/python/paddle/fluid/layers/io.py index 8f5b22f50042d..59f6c29220cde 100644 --- a/python/paddle/fluid/layers/io.py +++ b/python/paddle/fluid/layers/io.py @@ -386,16 +386,16 @@ def random_data_generator(low, high, shapes, lod_levels, for_parallel=True): Variable: A Reader Variable from which we can get random data. Examples: + .. code-block:: python - import paddle.fluid as fluid - reader = fluid.layers.io.random_data_generator( - low=0.0, - high=1.0, - shapes=[(3,224,224), (1)], - lod_levels=[0, 0]) - # Via the reader, we can use 'read_file' layer to get data: - image, label = fluid.layers.io.read_file(reader) + reader = fluid.layers.random_data_generator( + low=0.0, + high=1.0, + shapes=[(3,224,224), (1)], + lod_levels=[0, 0]) + # Via the reader, we can use 'read_file' layer to get data: + image, label = fluid.layers.read_file(reader) """ dtypes = [core.VarDesc.VarType.FP32] * len(shapes) shape_concat = [] diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index f8013b915a87d..205817aa5a38a 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -503,22 +503,23 @@ def dynamic_lstmp(input, with the `input`. Examples: + .. code-block:: python - dict_dim, emb_dim = 128, 64 - data = fluid.layers.data(name='sequence', shape=(128), - dtype='int32', lod_level=1) - emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim]) - hidden_dim, proj_dim = 512, 256 - fc_out = fluid.layers.fc(input=emb, size=hidden_dim * 4, - act=None, bias_attr=None) - proj_out, _ = fluid.layers.dynamic_lstmp(input=fc_out, - size=hidden_dim * 4, - proj_size=proj_dim, - use_peepholes=False, - is_reverse=True, - cell_activation="tanh", - proj_activation="tanh") + dict_dim, emb_dim = 128, 64 + data = fluid.layers.data(name='sequence', shape=(128), + dtype='int32', lod_level=1) + emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim]) + hidden_dim, proj_dim = 512, 256 + fc_out = fluid.layers.fc(input=hidden, size=hidden_dim * 4, + act=None, bias_attr=None) + proj_out, _ = fluid.layers.dynamic_lstmp(input=fc_out, + size=hidden_dim * 4, + proj_size=proj_dim, + use_peepholes=False, + is_reverse=True, + cell_activation="tanh", + proj_activation="tanh") """ helper = LayerHelper('lstmp', **locals()) @@ -636,6 +637,7 @@ def dynamic_gru(input, and sequence length is the same with the input. Examples: + .. code-block:: python dict_dim, emb_dim = 128, 64 @@ -900,10 +902,11 @@ def dropout(x, dropout_prob, is_test=False, seed=None, name=None): Variable: A tensor variable is the shape with `x`. Examples: + .. code-block:: python - x = fluid.layers.data(name="data", shape=[32, 32], dtype="float32") - droped = fluid.layers.dropout(input=x, dropout_rate=0.5) + x = fluid.layers.data(name="data", shape=[32, 32], dtype="float32") + droped = fluid.layers.dropout(input=x, dropout_rate=0.5) """ helper = LayerHelper('dropout', **locals()) @@ -2541,31 +2544,33 @@ def l2_normalize(x, axis, epsilon=1e-12, name=None): norm. For a 1-D tensor (`dim` is fixed to 0), this layer computes .. math:: - y = \frac{x}{ \sqrt{\sum {x^2} + epsion }} + + y = \\frac{x}{ \sqrt{\sum {x^2} + epsion }} For `x` with more dimensions, this layer independently normalizes each 1-D slice along dimension `axis`. Args: x(Variable|list): The input tensor to l2_normalize layer. - axis(int): The axis on which to apply normalization. If `axis < 0`, + axis(int): The axis on which to apply normalization. If `axis < 0`, \ the dimension to normalization is rank(X) + axis. -1 is the last dimension. - epsilon(float): The epsilon value is used to avoid division by zero, + epsilon(float): The epsilon value is used to avoid division by zero, \ the defalut value is 1e-10. - name(str|None): A name for this layer(optional). If set None, the layer + name(str|None): A name for this layer(optional). If set None, the layer \ will be named automatically. Returns: Variable: The output tensor variable is the same shape with `x`. Examples: + .. code-block:: python - data = fluid.layers.data(name="data", - shape=(3, 17, 13), - dtype="float32") - normed = fluid.layers.l2_normalize(x=data, axis=1) + data = fluid.layers.data(name="data", + shape=(3, 17, 13), + dtype="float32") + normed = fluid.layers.l2_normalize(x=data, axis=1) """ if len(x.shape) == 1: diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py index 366bb2983dfb6..c0ea9bd8e9c47 100644 --- a/python/paddle/fluid/layers/tensor.py +++ b/python/paddle/fluid/layers/tensor.py @@ -497,15 +497,16 @@ def save_combine(x, file_path, overwrite=True): There is no return value. Examples: + .. code-block:: python - v1 = fluid.layers.data(name="data", - shape=(4, 6), - dtype="float32") - v2 = fluid.layers.data(name="data", - shape=(6, 8, 4), - dtype="float32") - normed = fluid.layers.save_combine([v1, v2], file_path="output") + v1 = fluid.layers.data(name="data", + shape=(4, 6), + dtype="float32") + v2 = fluid.layers.data(name="data", + shape=(6, 8, 4), + dtype="float32") + normed = fluid.layers.save_combine([v1, v2], file_path="output") """ helper = LayerHelper("save_combine", **locals()) helper.append_op( From 7045775bbd1766f9316574b2bccd875dfa37862d Mon Sep 17 00:00:00 2001 From: Dang Qingqing Date: Fri, 15 Jun 2018 13:38:37 +0800 Subject: [PATCH 3/5] Fix example bug in random_data_generator. --- python/paddle/fluid/layers/io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/fluid/layers/io.py b/python/paddle/fluid/layers/io.py index 59f6c29220cde..c61cb83dcfa8a 100644 --- a/python/paddle/fluid/layers/io.py +++ b/python/paddle/fluid/layers/io.py @@ -392,7 +392,7 @@ def random_data_generator(low, high, shapes, lod_levels, for_parallel=True): reader = fluid.layers.random_data_generator( low=0.0, high=1.0, - shapes=[(3,224,224), (1)], + shapes=[[3,224,224], [1]], lod_levels=[0, 0]) # Via the reader, we can use 'read_file' layer to get data: image, label = fluid.layers.read_file(reader) From b58eec85e57ae3aaba8527686fa3490cac14c1cb Mon Sep 17 00:00:00 2001 From: Dang Qingqing Date: Fri, 15 Jun 2018 13:41:56 +0800 Subject: [PATCH 4/5] Fix example bug in dropout. --- python/paddle/fluid/layers/nn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 205817aa5a38a..8213502ba72d6 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -906,7 +906,7 @@ def dropout(x, dropout_prob, is_test=False, seed=None, name=None): .. code-block:: python x = fluid.layers.data(name="data", shape=[32, 32], dtype="float32") - droped = fluid.layers.dropout(input=x, dropout_rate=0.5) + droped = fluid.layers.dropout(x, dropout_prob=0.5) """ helper = LayerHelper('dropout', **locals()) From ccf743b61658b62afbf25daa03d6a2e9d8d5a73d Mon Sep 17 00:00:00 2001 From: Dang Qingqing Date: Fri, 15 Jun 2018 14:17:32 +0800 Subject: [PATCH 5/5] Follow comments and small fix for some examples. --- paddle/fluid/operators/activation_op.cc | 2 +- python/paddle/fluid/layers/nn.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc index af1d85047e519..ac0216ad8933f 100644 --- a/paddle/fluid/operators/activation_op.cc +++ b/paddle/fluid/operators/activation_op.cc @@ -112,7 +112,7 @@ Sigmoid Activation Operator __attribute__((unused)) constexpr char LogSigmoidDoc[] = R"DOC( Logsigmoid Activation Operator -$$out = \log \frac{1}{1 + e^{-x}}$$ +$$out = \\log \\frac{1}{1 + e^{-x}}$$ )DOC"; diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 8213502ba72d6..cc16d36b2ff9a 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -507,11 +507,11 @@ def dynamic_lstmp(input, .. code-block:: python dict_dim, emb_dim = 128, 64 - data = fluid.layers.data(name='sequence', shape=(128), + data = fluid.layers.data(name='sequence', shape=[1], dtype='int32', lod_level=1) emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim]) hidden_dim, proj_dim = 512, 256 - fc_out = fluid.layers.fc(input=hidden, size=hidden_dim * 4, + fc_out = fluid.layers.fc(input=emb, size=hidden_dim * 4, act=None, bias_attr=None) proj_out, _ = fluid.layers.dynamic_lstmp(input=fc_out, size=hidden_dim * 4, @@ -641,7 +641,7 @@ def dynamic_gru(input, .. code-block:: python dict_dim, emb_dim = 128, 64 - data = fluid.layers.data(name='sequence', shape=(128), + data = fluid.layers.data(name='sequence', shape=[1], dtype='int32', lod_level=1) emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim]) hidden_dim = 512