From 44ae44da49f206af56d02816aff8e9b2920d0bf8 Mon Sep 17 00:00:00 2001
From: caoying03 <caoying03@baidu.com>
Date: Mon, 14 Aug 2017 09:01:22 +0800
Subject: [PATCH 1/9] add configuratioin helpers.

---
 python/paddle/trainer/config_parser.py        |  16 ++
 .../paddle/trainer_config_helpers/layers.py   |  34 ++-
 .../tests/configs/file_list.sh                |   2 +-
 .../test_cross_entropy_over_beam.protostr     | 208 ++++++++++++++++++
 .../configs/test_cross_entropy_over_beam.py   |  39 ++++
 5 files changed, 295 insertions(+), 4 deletions(-)
 create mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_cross_entropy_over_beam.protostr
 create mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_cross_entropy_over_beam.py

diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py
index da99e5bd53458..a24299787bfd6 100644
--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@@ -1602,6 +1602,21 @@ def __init__(self, name, inputs, softmax_selfnorm_alpha=0.1, **xargs):
         self.config.softmax_selfnorm_alpha = softmax_selfnorm_alpha
 
 
+@config_layer('cross_entropy_over_beam')
+class CrossEntropyOverBeamLayer(LayerBase):
+    def __init__(self, name, inputs, **xargs):
+        config_assert(len(inputs) % 3 == 0, "Error input numbers.")
+        super(CrossEntropyOverBeamLayer, self).__init__(
+            name, 'cross_entropy_over_beam', 0, inputs, **xargs)
+        input_num = len(inputs) / 3
+        for i in range(input_num):
+            input_layer = self.get_input_layer(i * 2)
+            config_assert(
+                input_layer.size == 1, "Inputs for this layer are made up of "
+                "several pairs and the first one in a pair is scores for "
+                "all the candidates, so its size should be equal to 1.")
+
+
 @config_layer('fc')
 class FCLayer(LayerBase):
     layer_type = 'fc'
@@ -2249,6 +2264,7 @@ def init(cls, name, inputs, device=None, coeff=1.):
 
 
 define_cost('MultiClassCrossEntropy', 'multi-class-cross-entropy')
+define_cost('CrossEntropyOverBeamCostLayer', 'cross_entropy_over_beam')
 define_cost('RankingCost', 'rank-cost')
 define_cost('AucValidation', 'auc-validation')
 define_cost('PnpairValidation', 'pnpair-validation')
diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
index 1bc55c8696015..2b01b6ad4d790 100755
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import functools
 import collections
 import inspect
@@ -104,6 +103,7 @@
     'nce_layer',
     'cross_entropy_with_selfnorm',
     'cross_entropy',
+    'cross_entropy_over_beam',
     'multi_binary_label_cross_entropy',
     'sum_cost',
     'rank_cost',
@@ -219,6 +219,7 @@ class LayerType(object):
     HUBER = 'huber'
     CROSS_ENTROPY = 'multi-class-cross-entropy'
     CROSS_ENTROPY_WITH_SELFNORM = 'multi_class_cross_entropy_with_selfnorm'
+    CROSS_ENTROPY_OVER_BEAM = 'cross_entropy_over_beam'
     SOFT_BIN_CLASS_CROSS_ENTROPY = 'soft_binary_class_cross_entropy'
     MULTI_BIN_LABEL_CROSS_ENTROPY = 'multi_binary_label_cross_entropy'
     SUM_COST = 'sum_cost'
@@ -4028,8 +4029,12 @@ def __cost_input__(input, label, weight=None):
     """
     inputs and parents for cost layers.
     """
-    ipts = [Input(input.name), Input(label.name)]
-    parents = [input, label]
+    if isinstance(input, LayerOutput):
+        input = [input]
+    if isinstance(label, LayerOutput):
+        label = [label]
+    ipts = [Input(ipt.name) for ipt in (input + label)]
+    parents = [ipt for ipt in (input + label)]
     if weight is not None:
         assert weight.size == 1
         ipts.append(Input(weight.name))
@@ -5692,6 +5697,29 @@ def multi_binary_label_cross_entropy(input,
         size=1)
 
 
+@wrap_name_default()
+@layer_support()
+def cross_entropy_over_beam(input, label, name=None, coeff=1.0, weight=None):
+    """
+    TODO(caoying) add comments.
+    """
+
+    assert len(input) / 2 == len(label), "Error input numbers."
+    for i in range(0, len(input), 2):
+        assert (input[i].size == 1), (
+            "Inputs for this layer are made up of "
+            "several pairs and the first one in a pair is scores for "
+            "all the candidates, so its size should be equal to 1.")
+
+    ipts, parents = __cost_input__(input, label, weight)
+    Layer(
+        name=name,
+        type=LayerType.CROSS_ENTROPY_OVER_BEAM,
+        inputs=ipts,
+        coeff=coeff)
+    return LayerOutput(name, LayerType.CROSS_ENTROPY, parents=parents, size=1)
+
+
 @wrap_name_default()
 @layer_support()
 def smooth_l1_cost(input, label, name=None, coeff=1.0, layer_attr=None):
diff --git a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh
index a61beb871ad06..130e6332a7cf5 100755
--- a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh
+++ b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh
@@ -8,6 +8,6 @@ test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops
 test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer
 test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer
 test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer
-test_kmax_seq_socre_layer test_seq_select_layers)
+test_kmax_seq_socre_layer test_seq_select_layers test_cross_entropy_over_beam)
 
 export whole_configs=(test_split_datasource)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cross_entropy_over_beam.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cross_entropy_over_beam.protostr
new file mode 100644
index 0000000000000..e44478ec2ba1f
--- /dev/null
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cross_entropy_over_beam.protostr
@@ -0,0 +1,208 @@
+type: "nn"
+layers {
+  name: "sentence_states"
+  type: "data"
+  size: 32
+  active_type: ""
+}
+layers {
+  name: "sentence_scores"
+  type: "data"
+  size: 1
+  active_type: ""
+}
+layers {
+  name: "__kmax_sequence_score_layer_0__"
+  type: "kmax_seq_score"
+  active_type: ""
+  inputs {
+    input_layer_name: "sentence_scores"
+  }
+  beam_size: 5
+}
+layers {
+  name: "__sub_nested_seq_layer_0__"
+  type: "sub_nested_seq"
+  size: 32
+  active_type: ""
+  inputs {
+    input_layer_name: "sentence_states"
+  }
+  inputs {
+    input_layer_name: "__kmax_sequence_score_layer_0__"
+  }
+}
+layers {
+  name: "__fc_layer_0__"
+  type: "fc"
+  size: 1
+  active_type: ""
+  inputs {
+    input_layer_name: "__sub_nested_seq_layer_0__"
+    input_parameter_name: "___fc_layer_0__.w0"
+  }
+  bias_parameter_name: "___fc_layer_0__.wbias"
+}
+layers {
+  name: "__kmax_sequence_score_layer_1__"
+  type: "kmax_seq_score"
+  active_type: ""
+  inputs {
+    input_layer_name: "sentence_scores"
+  }
+  beam_size: 5
+}
+layers {
+  name: "__seq_slice_layer_0__"
+  type: "seq_slice"
+  size: 32
+  active_type: ""
+  inputs {
+    input_layer_name: "__sub_nested_seq_layer_0__"
+  }
+  inputs {
+    input_layer_name: "__kmax_sequence_score_layer_1__"
+  }
+  select_first: true
+}
+layers {
+  name: "__fc_layer_1__"
+  type: "fc"
+  size: 1
+  active_type: ""
+  inputs {
+    input_layer_name: "__seq_slice_layer_0__"
+    input_parameter_name: "___fc_layer_1__.w0"
+  }
+  bias_parameter_name: "___fc_layer_1__.wbias"
+}
+layers {
+  name: "__kmax_sequence_score_layer_2__"
+  type: "kmax_seq_score"
+  active_type: ""
+  inputs {
+    input_layer_name: "__fc_layer_1__"
+  }
+  beam_size: 5
+}
+layers {
+  name: "sentences_ids"
+  type: "data"
+  size: 1
+  active_type: ""
+}
+layers {
+  name: "start_ids"
+  type: "data"
+  size: 1
+  active_type: ""
+}
+layers {
+  name: "end_ids"
+  type: "data"
+  size: 1
+  active_type: ""
+}
+layers {
+  name: "__cross_entropy_over_beam_0__"
+  type: "cross_entropy_over_beam"
+  active_type: ""
+  inputs {
+    input_layer_name: "sentence_scores"
+  }
+  inputs {
+    input_layer_name: "__kmax_sequence_score_layer_0__"
+  }
+  inputs {
+    input_layer_name: "__fc_layer_0__"
+  }
+  inputs {
+    input_layer_name: "__kmax_sequence_score_layer_1__"
+  }
+  inputs {
+    input_layer_name: "__fc_layer_1__"
+  }
+  inputs {
+    input_layer_name: "__kmax_sequence_score_layer_2__"
+  }
+  inputs {
+    input_layer_name: "sentences_ids"
+  }
+  inputs {
+    input_layer_name: "start_ids"
+  }
+  inputs {
+    input_layer_name: "end_ids"
+  }
+  coeff: 1.0
+}
+parameters {
+  name: "___fc_layer_0__.w0"
+  size: 32
+  initial_mean: 0.0
+  initial_std: 0.176776695297
+  dims: 32
+  dims: 1
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "___fc_layer_0__.wbias"
+  size: 1
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 1
+  initial_strategy: 0
+  initial_smart: false
+}
+parameters {
+  name: "___fc_layer_1__.w0"
+  size: 32
+  initial_mean: 0.0
+  initial_std: 0.176776695297
+  dims: 32
+  dims: 1
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "___fc_layer_1__.wbias"
+  size: 1
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 1
+  initial_strategy: 0
+  initial_smart: false
+}
+input_layer_names: "sentence_scores"
+input_layer_names: "sentence_states"
+input_layer_names: "sentences_ids"
+input_layer_names: "start_ids"
+input_layer_names: "end_ids"
+output_layer_names: "__cross_entropy_over_beam_0__"
+sub_models {
+  name: "root"
+  layer_names: "sentence_states"
+  layer_names: "sentence_scores"
+  layer_names: "__kmax_sequence_score_layer_0__"
+  layer_names: "__sub_nested_seq_layer_0__"
+  layer_names: "__fc_layer_0__"
+  layer_names: "__kmax_sequence_score_layer_1__"
+  layer_names: "__seq_slice_layer_0__"
+  layer_names: "__fc_layer_1__"
+  layer_names: "__kmax_sequence_score_layer_2__"
+  layer_names: "sentences_ids"
+  layer_names: "start_ids"
+  layer_names: "end_ids"
+  layer_names: "__cross_entropy_over_beam_0__"
+  input_layer_names: "sentence_scores"
+  input_layer_names: "sentence_states"
+  input_layer_names: "sentences_ids"
+  input_layer_names: "start_ids"
+  input_layer_names: "end_ids"
+  output_layer_names: "__cross_entropy_over_beam_0__"
+  is_recurrent_layer_group: false
+}
+
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_cross_entropy_over_beam.py b/python/paddle/trainer_config_helpers/tests/configs/test_cross_entropy_over_beam.py
new file mode 100644
index 0000000000000..edc2d32fca1c9
--- /dev/null
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_cross_entropy_over_beam.py
@@ -0,0 +1,39 @@
+#!/usr/bin/env python
+#coding=utf-8
+
+from paddle.trainer_config_helpers import *
+beam_size = 5
+
+# the first beam expansion.
+sentence_states = data_layer(name="sentence_states", size=32)
+sentence_scores = data_layer(name="sentence_scores", size=1)
+topk_sentence_ids = kmax_sequence_score_layer(
+    input=sentence_scores, beam_size=beam_size)
+
+# the second beam expansion.
+topk_sen = sub_nested_seq_layer(
+    input=sentence_states, selected_indices=topk_sentence_ids)
+start_pos_scores = fc_layer(input=topk_sen, size=1, act=LinearActivation())
+topk_start_pos_ids = kmax_sequence_score_layer(
+    input=sentence_scores, beam_size=beam_size)
+
+# the final beam expansion.
+topk_start_spans = seq_slice_layer(
+    input=topk_sen, starts=topk_start_pos_ids, ends=None)
+end_pos_scores = fc_layer(
+    input=topk_start_spans, size=1, act=LinearActivation())
+topk_end_pos_ids = kmax_sequence_score_layer(
+    input=end_pos_scores, beam_size=beam_size)
+
+# define the cost
+sentence_idx = data_layer(name="sentences_ids", size=1)
+start_idx = data_layer(name="start_ids", size=1)
+end_idx = data_layer(name="end_ids", size=1)
+cost = cross_entropy_over_beam(
+    input=[
+        sentence_scores, topk_sentence_ids, start_pos_scores,
+        topk_start_pos_ids, end_pos_scores, topk_end_pos_ids
+    ],
+    label=[sentence_idx, start_idx, end_idx])
+
+outputs(cost)

From 05e8a26b4bb093f9dccb9aeb533a5851aaed09b8 Mon Sep 17 00:00:00 2001
From: caoying03 <caoying03@baidu.com>
Date: Mon, 14 Aug 2017 10:33:28 +0800
Subject: [PATCH 2/9] add unittest.

---
 .../gserver/layers/CrossEntropyOverBeam.cpp   | 35 +++++++
 paddle/gserver/layers/CrossEntropyOverBeam.h  | 31 ++++++
 paddle/gserver/tests/CMakeLists.txt           |  6 ++
 paddle/gserver/tests/LayerGradUtil.cpp        | 25 +++--
 paddle/gserver/tests/LayerGradUtil.h          | 18 ++++
 .../tests/test_CrossEntropyOverBeamGrad.cpp   | 94 +++++++++++++++++++
 6 files changed, 201 insertions(+), 8 deletions(-)
 create mode 100644 paddle/gserver/layers/CrossEntropyOverBeam.cpp
 create mode 100644 paddle/gserver/layers/CrossEntropyOverBeam.h
 create mode 100644 paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp

diff --git a/paddle/gserver/layers/CrossEntropyOverBeam.cpp b/paddle/gserver/layers/CrossEntropyOverBeam.cpp
new file mode 100644
index 0000000000000..8b6223ec6a826
--- /dev/null
+++ b/paddle/gserver/layers/CrossEntropyOverBeam.cpp
@@ -0,0 +1,35 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "CrossEntropyOverBeam.h"
+
+namespace paddle {
+
+REGISTER_LAYER(cross_entropy_over_beam, CrossEntropyOverBeam);
+
+bool CrossEntropyOverBeam::init(const LayerMap& layerMap,
+                                const ParameterMap& parameterMap) {
+  /* Initialize the basic parent class */
+  Layer::init(layerMap, parameterMap);
+
+  setNeedSequenceInfo(false);
+
+  return true;
+}
+
+void CrossEntropyOverBeam::forward(PassType passType) {}
+
+void CrossEntropyOverBeam::backward(const UpdateCallback& callback) {}
+
+}  // namespace paddle
diff --git a/paddle/gserver/layers/CrossEntropyOverBeam.h b/paddle/gserver/layers/CrossEntropyOverBeam.h
new file mode 100644
index 0000000000000..3106f9858b751
--- /dev/null
+++ b/paddle/gserver/layers/CrossEntropyOverBeam.h
@@ -0,0 +1,31 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "CrossEntropyOverBeam.h"
+#include "Layer.h"
+
+namespace paddle {
+
+class CrossEntropyOverBeam : public Layer {
+public:
+  explicit CrossEntropyOverBeam(const LayerConfig& config) : Layer(config) {}
+  bool init(const LayerMap& layerMap,
+            const ParameterMap& parameterMap) override;
+  void forward(PassType passType) override;
+  void backward(const UpdateCallback& callback) override;
+};
+
+}  // namespace paddle
diff --git a/paddle/gserver/tests/CMakeLists.txt b/paddle/gserver/tests/CMakeLists.txt
index c2a2993620492..24df7e7220b0d 100644
--- a/paddle/gserver/tests/CMakeLists.txt
+++ b/paddle/gserver/tests/CMakeLists.txt
@@ -34,6 +34,12 @@ add_unittest_without_exec(test_CRFLayerGrad
 add_test(NAME test_CRFLayerGrad
     COMMAND test_CRFLayerGrad)
 
+################ test_CrossEntropyOverBeam ####################
+add_unittest_without_exec(test_CrossEntropyOverBeam
+    test_CrossEntropyOverBeamGrad.cpp
+    LayerGradUtil.cpp)
+add_test(NAME test_CrossEntropyOverBeam
+    COMMAND test_CrossEntropyOverBeam)
 
 add_unittest_without_exec(test_ActivationGrad
     test_ActivationGrad.cpp
diff --git a/paddle/gserver/tests/LayerGradUtil.cpp b/paddle/gserver/tests/LayerGradUtil.cpp
index fd9cfa1dc7a90..a38880e14cdfc 100644
--- a/paddle/gserver/tests/LayerGradUtil.cpp
+++ b/paddle/gserver/tests/LayerGradUtil.cpp
@@ -388,14 +388,23 @@ void initDataLayer(TestConfig testConf,
         data.grad->zeroMem();
         break;
       case INPUT_SELF_DEFINE_DATA: {
-        size_t height = testConf.inputDefs[i].selfDefinedData->getHeight();
-        size_t width = testConf.inputDefs[i].selfDefinedData->getWidth();
-        CHECK_GT(static_cast<int>(height), 0);
-        CHECK_GT(static_cast<int>(width), 0);
-        data.value = Matrix::create(height, width, false, useGpu);
-        data.grad = Matrix::create(height, width, false, useGpu);
-        data.value->copyFrom(*testConf.inputDefs[i].selfDefinedData);
-        data.grad->zeroMem();
+        if (testConf.inputDefs[i].ids.size()) {
+          data.ids = IVector::create(testConf.inputDefs[i].ids.size(), useGpu);
+          data.ids->copyFrom(testConf.inputDefs[i].ids.data(),
+                             testConf.inputDefs[i].ids.size());
+        } else if (testConf.inputDefs[i].selfDefinedData) {
+          size_t height = testConf.inputDefs[i].selfDefinedData->getHeight();
+          size_t width = testConf.inputDefs[i].selfDefinedData->getWidth();
+          CHECK_GT(static_cast<int>(height), 0);
+          CHECK_GT(static_cast<int>(width), 0);
+          data.value = Matrix::create(height, width, false, useGpu);
+          data.grad = Matrix::create(height, width, false, useGpu);
+          data.value->copyFrom(*testConf.inputDefs[i].selfDefinedData);
+          data.grad->zeroMem();
+        } else {
+          LOG(FATAL) << "No self-defined data are given.";
+          return;
+        }
 
         const std::vector<int>& labelSeqStartPositions =
             testConf.inputDefs[i].labelSeqStartPositions;
diff --git a/paddle/gserver/tests/LayerGradUtil.h b/paddle/gserver/tests/LayerGradUtil.h
index 5debedf5ef6a3..a35edd2b5e21a 100644
--- a/paddle/gserver/tests/LayerGradUtil.h
+++ b/paddle/gserver/tests/LayerGradUtil.h
@@ -68,6 +68,7 @@ struct InputDef {
   std::vector<int> labelInitValue;
   std::vector<int> labelSeqStartPositions;
   std::vector<int> labelSubSeqStartPositions;
+  std::vector<int> ids;
   MatrixPtr selfDefinedData;
 
   InputDef(InputType type, string nameIn, size_t dimIn, size_t sizeIn) {
@@ -95,6 +96,23 @@ struct InputDef {
     isStatic = false;
   }
 
+  InputDef(InputType type,
+           string nameIn,
+           std::vector<int> ids,
+           std::vector<int> selfDefinedSeqStartPos = {},
+           std::vector<int> selfDefinedSubSeqStartPos = {})
+      : labelSeqStartPositions(selfDefinedSeqStartPos),
+        labelSubSeqStartPositions(selfDefinedSubSeqStartPos),
+        ids(ids) {
+    selfDefinedData = nullptr;
+    inputType = type;
+    name = nameIn;
+    dim = 0;
+    sparse = {""};
+    paraSize = 0;
+    isStatic = false;
+  }
+
   InputDef(InputType type,
            string nameIn,
            size_t dimIn,
diff --git a/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp b/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp
new file mode 100644
index 0000000000000..54daba3656ef4
--- /dev/null
+++ b/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp
@@ -0,0 +1,94 @@
+/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <sstream>
+
+#include <gtest/gtest.h>
+#include "ModelConfig.pb.h"
+#include "paddle/gserver/layers/DataLayer.h"
+#include "paddle/trainer/Trainer.h"
+
+#include "LayerGradUtil.h"
+#include "paddle/testing/TestUtil.h"
+
+using namespace paddle;  // NOLINT
+
+DECLARE_int32(gpu_id);
+DECLARE_bool(thread_local_rand_use_global_seed);
+
+struct SingleBeamExpansion {
+  vector<int> seqStartPos;
+  vector<int> subSeqStartPos;
+
+  vector<real> candidateScores;
+  // TODO(caoying): store this into Argument.ids
+  vector<real> selectedIndices;
+  vector<int> groundTruth;
+};
+
+void genRandomBeamExpansion(size_t expansionCount,
+                            vector<SingleBeamExpansion>& beamExpansions) {
+  beamExpansions.clear();
+}
+
+void testCrossEntropyOverBeam() {
+  const size_t expansionCount = 3;
+  vector<SingleBeamExpansion> beams;
+  genRandomBeamExpansion(expansionCount, beams);
+
+  for (size_t i = 0; i < beams.size(); ++i) {
+    const SingleBeamExpansion& beam = beams[i];
+    // create scores for all the candidates
+    MatrixPtr candidateScorePtr =
+        Matrix::create(beam.candidateScores.size(), 1, false, false);
+    candidateScorePtr->copyFrom(candidateScores.data(), candidateScores.size());
+
+    ostringstream paramName;
+    paramName << "candidate_scores_" << i;
+    beam.subSeqStartPos.size()
+        ? config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA,
+                                      ostr.str(),
+                                      candidateScorePtr,
+                                      beam.seqStartPos,
+                                      beam.subSeqStartPos})
+        : config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA,
+                                      ostr.str(),
+                                      candidateScorePtr,
+                                      beam.seqStartPos});
+    // create indices for the selected candidates
+
+    // create the ground truth
+  }
+}
+
+TestConfig config;
+config.layerConfig.set_type("cross_entropy_over_beam");
+
+// testLayerGrad(
+//     config, "cross_entropy_over_beam", seqNum, false, useGpu, false);
+}
+
+TEST(Layer, CrossEntropyOverBeam) {
+  for (bool useGpu : {false, true}) testCrossEntropyOverBeam(useGpu);
+}
+
+int main(int argc, char** argv) {
+  initMain(argc, argv);
+  hl_start();
+  hl_init(FLAGS_gpu_id);
+  FLAGS_thread_local_rand_use_global_seed = true;
+  srand(1);
+  testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}

From a037b099f7f4bf8370e882f397bd4c691b0e0986 Mon Sep 17 00:00:00 2001
From: caoying03 <caoying03@baidu.com>
Date: Mon, 14 Aug 2017 15:49:48 +0800
Subject: [PATCH 3/9] finish unittest.

---
 .../gserver/layers/CrossEntropyOverBeam.cpp   |   1 +
 .../tests/test_CrossEntropyOverBeamGrad.cpp   | 218 +++++++++++++++---
 2 files changed, 191 insertions(+), 28 deletions(-)

diff --git a/paddle/gserver/layers/CrossEntropyOverBeam.cpp b/paddle/gserver/layers/CrossEntropyOverBeam.cpp
index 8b6223ec6a826..88d80aa83af5c 100644
--- a/paddle/gserver/layers/CrossEntropyOverBeam.cpp
+++ b/paddle/gserver/layers/CrossEntropyOverBeam.cpp
@@ -22,6 +22,7 @@ bool CrossEntropyOverBeam::init(const LayerMap& layerMap,
                                 const ParameterMap& parameterMap) {
   /* Initialize the basic parent class */
   Layer::init(layerMap, parameterMap);
+  CHECK_EQ(0U, inputLayers_.size() % 3) << "Error input number.";
 
   setNeedSequenceInfo(false);
 
diff --git a/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp b/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp
index e9ecebcfe5204..a5f06c15dc480 100644
--- a/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp
+++ b/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#include <random>
 #include <sstream>
 
 #include <gtest/gtest.h>
@@ -27,6 +28,10 @@ using namespace paddle;  // NOLINT
 DECLARE_int32(gpu_id);
 DECLARE_bool(thread_local_rand_use_global_seed);
 
+const size_t MAX_SEQ_NUM = 10;
+const size_t MAX_SEQ_LEN = 27;
+const size_t MAX_BEAM_SIZE = 10;
+
 struct SingleBeamExpansion {
   vector<int> seqStartPos;
   vector<int> subSeqStartPos;
@@ -34,37 +39,195 @@ struct SingleBeamExpansion {
 
   // TODO(caoying): store this into Argument.ids
   vector<real> selectedIndices;
+
   vector<int> groundTruth;
-  vector<int> labelSeqStartPos;
+  vector<size_t> inBeam;
+  vector<int> rowIdxInBeam;
 };
 
-void genCandidateScores(bool hasSubSeq,
-                        vector<real>& scores,
+void genRand(real* numbers, size_t n) {
+  default_random_engine generator;
+  uniform_real_distribution<double> distribution(0.0, 1.0);
+  for (size_t i = 0; i < n; ++i) numbers[i] = distribution(generator);
+}
+
+vector<real> randSampling(real range, int n) {
+  CHECK_GE(range, n);
+  vector<real> num(range);
+  iota(begin(num), end(num), 0.);
+  if (range == n) return num;
+
+  random_shuffle(begin(num), end(num));
+  num.resize(n);
+  sort(begin(num), end(num));
+  return num;
+}
+
+void genCandidateScores(bool hasSubseq,
+                        size_t beamSize,
+                        SingleBeamExpansion& prevBeam,
+                        SingleBeamExpansion& curBeam) {
+  vector<int>& seqStartPos = curBeam.seqStartPos;
+  seqStartPos.resize(1, 0);
+  vector<int>& subSeqStartPos = curBeam.subSeqStartPos;
+  subSeqStartPos.resize(1, 0);
+
+  srand((size_t)(time(NULL)));
+  // srand(1);
+  if (prevBeam.selectedIndices.size()) {
+    if (prevBeam.subSeqStartPos.size() > 1) {
+      int seqIdx = 1;
+      // samples in previous beam are nested sequences.
+      for (size_t i = 1; i < prevBeam.subSeqStartPos.size(); ++i) {
+        for (size_t j = 0; j < beamSize; ++j) {
+          if (prevBeam.selectedIndices[(i - 1) * beamSize + j] == -1.) break;
+          for (size_t k = 0; k < beamSize; ++k)
+            subSeqStartPos.push_back(1 + (rand() % MAX_SEQ_LEN) +
+                                     subSeqStartPos.back());
+        }
+        if (prevBeam.seqStartPos[seqIdx] == prevBeam.subSeqStartPos[i]) {
+          seqStartPos.push_back(subSeqStartPos.back());
+          seqIdx++;
+        }
+      }
+    } else {
+      // samples in previous beam are sequences.
+      for (size_t i = 0; i <= prevBeam.selectedIndices.size(); ++i) {
+        if (i && i % beamSize == 0) {
+          seqStartPos.push_back(subSeqStartPos.back());
+          if (i == prevBeam.selectedIndices.size()) break;
+        }
+        if (prevBeam.selectedIndices[i] == -1.) continue;
+        subSeqStartPos.push_back(subSeqStartPos.back() +
+                                 (1 + (rand() % MAX_SEQ_LEN)));
+      }
+    }
+  } else {
+    // the first beam expansion
+    int seqNum = 1 + (rand() % MAX_SEQ_NUM);
+    for (int i = 0; i < seqNum; ++i) {
+      if (hasSubseq) {
+        for (size_t j = 0; j < 1 + (rand() % MAX_SEQ_NUM); ++j)
+          subSeqStartPos.push_back(subSeqStartPos.back() +
+                                   (1 + (rand() % MAX_SEQ_LEN)));
+        seqStartPos.push_back(subSeqStartPos.back());
+      } else {
+        seqStartPos.push_back(seqStartPos.back() +
+                              (1 + (rand() % MAX_SEQ_LEN)));
+      }
+    }
+  }
+
+  size_t totalSeqNum = hasSubseq ? subSeqStartPos.back() : seqStartPos.back();
+  curBeam.candidateScores.resize(totalSeqNum, 0.);
+  genRand(curBeam.candidateScores.data(), totalSeqNum);
+}
+
+void genSelectedIndices(size_t beamSize,
                         vector<int>& seqStartPos,
-                        vector<int>& subSeqStartPos) {}
-
-void genSelectedIndicesAndGroundtruth(size_t beamSize,
-                                      vector<int>& seqStartPos,
-                                      vector<real>& selectedIndices) {}
-
-SingleBeamExpansion genOneBeam(size_t beamSize, bool hasSubSeq) {
-  SingleBeamExpansion beam;
-  genCandidateScores(
-      hasSubSeq, beam.candidateScores, beam.seqStartPos, beam.subSeqStartPos);
-  genSelectedIndicesAndGroundtruth(
-      beamSize,
-      hasSubSeq ? beam.subSeqStartPos : beam.seqStartPos,
-      beam.selectedIndices);
-  return beam;
+                        vector<real>& selectedIndices) {
+  size_t selectedIdsCount = beamSize * (seqStartPos.size() - 1);
+  selectedIndices.resize(selectedIdsCount, -1.);
+
+  for (size_t i = 0; i < seqStartPos.size() - 1; ++i) {
+    int seqLen = seqStartPos[i + 1] - seqStartPos[i];
+    int n = min(seqLen, static_cast<int>(beamSize));
+    vector<real> ids = randSampling(seqLen, n);
+    memcpy(selectedIndices.data() + i * beamSize,
+           ids.data(),
+           sizeof(real) * ids.size());
+  }
+}
+
+void genGroundTruth(vector<SingleBeamExpansion>& beamExpansions,
+                    size_t beamSize) {
+  size_t seqNum = beamExpansions[1].seqStartPos.size() - 1;
+  for (size_t i = 2; i < beamExpansions.size(); ++i)
+    CHECK_EQ(seqNum, beamExpansions[i - 1].seqStartPos.size() - 1);
+
+  // srand(1);
+  srand((size_t)(time(NULL)));
+
+  // initialize the first beam.
+  SingleBeamExpansion& beam = beamExpansions[1];
+  beam.groundTruth.resize(seqNum, 0);
+  beam.inBeam.resize(seqNum, 0);
+  beam.rowIdxInBeam.resize(seqNum, -1);
+
+  auto begPos = beam.selectedIndices.begin();
+  for (size_t i = 0; i < seqNum; ++i) {
+    int seqLen = beam.seqStartPos[i + 1] - beam.seqStartPos[i];
+    int label = rand() % seqLen;
+    auto endPos = begPos + beamSize;
+    beam.groundTruth[i] = label;
+    if (find(begPos, endPos, real(label)) != endPos) beam.inBeam[i] = 1;
+    begPos = endPos;
+    beam.rowIdxInBeam[i] = i;
+  }
+
+  // iterate over each beam expansions
+  for (size_t i = 2; i < beamExpansions.size(); ++i) {
+    SingleBeamExpansion& curBeam = beamExpansions[i];
+    SingleBeamExpansion& prevBeam = beamExpansions[i - 1];
+
+    curBeam.groundTruth.resize(seqNum, 0);
+    curBeam.inBeam.resize(seqNum, 0);
+    curBeam.rowIdxInBeam.resize(seqNum, -1);
+
+    // iterate over each sequence
+    for (size_t j = 0; j < seqNum; ++j) {
+      if (prevBeam.inBeam[j]) {
+        // gold sequence falls in the beam in previous search.
+
+        auto begPos = prevBeam.selectedIndices.begin();
+        auto endPos = begPos + prevBeam.rowIdxInBeam[j] * beamSize;
+        size_t totalExpansion =
+            prevBeam.rowIdxInBeam[j] * beamSize - count(begPos, endPos, -1.);
+        curBeam.rowIdxInBeam[j] = totalExpansion + prevBeam.groundTruth[j];
+
+        CHECK_LE(curBeam.rowIdxInBeam[j] + 1,
+                 curBeam.subSeqStartPos.size() - 1);
+        int start = curBeam.subSeqStartPos[curBeam.rowIdxInBeam[j]];
+        int end = curBeam.subSeqStartPos[curBeam.rowIdxInBeam[j] + 1];
+        CHECK_GT(size_t(end), size_t(start));
+        int label = rand() % (end - start);
+
+        curBeam.groundTruth[j] = label;
+        auto findBeg = curBeam.selectedIndices.begin() +
+                       curBeam.rowIdxInBeam[j] * beamSize;
+        auto findEnd = findBeg + beamSize;
+        if (find(findBeg, findEnd, real(label)) != findEnd)
+          curBeam.inBeam[j] = 1;
+      } else {
+        // in previous search, gold sequence has fallen off the beam,
+        // the beam search stops, here use -1 as a dummy label.
+        // It will not used in calculation the cost.
+        beamExpansions[i].groundTruth[j] = -1;
+      }
+    }
+  }
+}
+
+void genOneBeam(size_t beamSize,
+                bool hasSubseq,
+                SingleBeamExpansion& prevBeam,
+                SingleBeamExpansion& curBeam) {
+  genCandidateScores(hasSubseq, beamSize, prevBeam, curBeam);
+  genSelectedIndices(beamSize,
+                     hasSubseq ? curBeam.subSeqStartPos : curBeam.seqStartPos,
+                     curBeam.selectedIndices);
 }
 
 void genRandomBeamExpansion(size_t expansionCount,
                             size_t beamSize,
                             vector<SingleBeamExpansion>& beamExpansions) {
   beamExpansions.clear();
-  for (size_t i = 0; i < expansionCount; ++i) {
-    beamExpansions.emplace_back(genOneBeam(beamSize, i));
-  }
+  beamExpansions.resize(expansionCount + 1);
+
+  // beamExpansions[0] is reserved.
+  for (size_t i = 1; i <= expansionCount; ++i)
+    genOneBeam(beamSize, bool(i - 1), beamExpansions[i - 1], beamExpansions[i]);
+  genGroundTruth(beamExpansions, beamSize);
 }
 
 void testCrossEntropyOverBeam(bool useGpu) {
@@ -72,12 +235,12 @@ void testCrossEntropyOverBeam(bool useGpu) {
   config.layerConfig.set_type("cross_entropy_over_beam");
 
   const size_t expansionCount = 3;
-  const size_t beamSize = 3;
+  const size_t beamSize = MAX_BEAM_SIZE;
   vector<SingleBeamExpansion> beams;
   genRandomBeamExpansion(expansionCount, beamSize, beams);
 
   size_t seqNum = 0;
-  for (size_t i = 0; i < beams.size(); ++i) {
+  for (size_t i = 1; i < beams.size(); ++i) {
     const SingleBeamExpansion& beam = beams[i];
     // create scores for all the candidates
     MatrixPtr candidateScorePtr =
@@ -88,7 +251,7 @@ void testCrossEntropyOverBeam(bool useGpu) {
     ostringstream paramName;
     paramName << "candidate_scores_" << i;
 
-    if (beam.subSeqStartPos.size()) {
+    if (beam.subSeqStartPos.size() > 1) {
       seqNum = beam.subSeqStartPos.size() - 1;
       config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA,
                                   paramName.str(),
@@ -118,10 +281,9 @@ void testCrossEntropyOverBeam(bool useGpu) {
     // create the ground truth
     paramName.clear();
     paramName << "label_" << i;
-    config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA,
-                                paramName.str(),
-                                beam.groundTruth,
-                                beam.labelSeqStartPos});
+    config.inputDefs.push_back(
+        {INPUT_SELF_DEFINE_DATA, paramName.str(), beam.groundTruth});
+    config.layerConfig.add_inputs();
   }
 
   testLayerGrad(

From 8f4ca2d12fffe38d5adff0ad74db6ba1bdc0d223 Mon Sep 17 00:00:00 2001
From: caoying03 <caoying03@baidu.com>
Date: Wed, 16 Aug 2017 15:34:02 +0800
Subject: [PATCH 4/9] add implementations.

---
 .../gserver/layers/CrossEntropyOverBeam.cpp   | 344 +++++++++++++++++-
 paddle/gserver/layers/CrossEntropyOverBeam.h  |  98 +++++
 .../tests/test_CrossEntropyOverBeamGrad.cpp   | 166 ++++++---
 3 files changed, 549 insertions(+), 59 deletions(-)

diff --git a/paddle/gserver/layers/CrossEntropyOverBeam.cpp b/paddle/gserver/layers/CrossEntropyOverBeam.cpp
index 88d80aa83af5c..09258fb305990 100644
--- a/paddle/gserver/layers/CrossEntropyOverBeam.cpp
+++ b/paddle/gserver/layers/CrossEntropyOverBeam.cpp
@@ -16,6 +16,168 @@ limitations under the License. */
 
 namespace paddle {
 
+void CostForOneSequence::calValidExpandStep() {
+  validExpansionCount_ = 0;
+  goldAsExtraPath_ = true;
+
+  for (size_t i = 0; i < beams_->expansionCount; ++i) {
+    real gold = static_cast<real>(beams_->gold[i]);
+    if (i) {
+      real* start = beams_->candidateIds[i - 1]->getData();
+      goldRowIds_[i] = std::count_if(
+          start,
+          start + goldRowIds_[i - 1] * beamSize_ + goldColIds_[i - 1],
+          [](const real& val) { return val != -1.; });
+    } else
+      goldRowIds_[i] = 0;
+
+    real* start =
+        beams_->candidateIds[i]->getData() + goldRowIds_[i] * beamSize_;
+    real* findEnd = std::find(start, start + beamSize_, gold);
+    validExpansionCount_++;
+
+    if (start + beamSize_ == findEnd) return;
+    goldColIds_[i] = findEnd - start;
+  }
+
+  if (goldColIds_[beams_->expansionCount - 1] != -1) goldAsExtraPath_ = false;
+}
+
+size_t CostForOneSequence::initLastExpansion() {
+  int beamId = validExpansionCount_ - 1;
+  const MatrixPtr candidates = beams_->candidateIds[beamId];
+  size_t height = candidates->getHeight();
+
+  /* initialization the last expansion. */
+  size_t pathCount = std::count_if(candidates->getData(),
+                                   candidates->getData() + height * beamSize_,
+                                   [](const real& val) { return val != -1; });
+  /*
+   * if the gold sequence falls off the beam during search,
+   * add the gold sequence as the last path into all expanded paths.
+   */
+  if (goldAsExtraPath_) goldIdsInFinalExpansion_ = pathCount++;
+
+  pathRowIdsInEachBeam_.clear();
+  pathRowIdsInEachBeam_.resize(validExpansionCount_,
+                               std::vector<int>(pathCount, 0));
+  parentIdsInBeam_.clear();
+  parentIdsInBeam_.resize(pathCount, 0);
+
+  if (goldAsExtraPath_) {
+    /* add gold sequence into the total expansion. */
+    pathRowIdsInEachBeam_[beamId].back() =
+        beams_->gold[beamId] +
+        getSeqStartPos(beamId, goldRowIds_[validExpansionCount_ - 1]);
+    parentIdsInBeam_.back() = goldRowIds_[validExpansionCount_ - 1];
+  } else {
+    size_t goldOffset = goldRowIds_[beamId] * beamSize_ + goldColIds_[beamId];
+    goldIdsInFinalExpansion_ =
+        std::count_if(candidates->getData(),
+                      candidates->getData() + goldOffset,
+                      [](const real& val) { return val != -1.; });
+  }
+
+  /*
+   * TODO(caoying): fix this, store the indices of selected candidate
+   * paths into Argument.ids
+   */
+  real* ids = candidates->getData();
+  size_t curIdx = 0;
+  for (size_t i = 0; i < height; ++i) {
+    int basePos = getSeqStartPos(beamId, i);
+    for (size_t j = 0; j < beamSize_; ++j) {
+      int id = ids[i * beamSize_ + j];
+      if (id == -1) continue;
+      pathRowIdsInEachBeam_[beamId][curIdx] = id + basePos;
+      parentIdsInBeam_[curIdx++] = i;
+    }
+  }
+  return pathCount;
+}
+
+void CostForOneSequence::constructTotalExpansion() {
+  /*
+   * construct the entire expanded beam by begining with the last search
+   * in which gold falls off the beam.
+   */
+  size_t totalPathCount = initLastExpansion();
+
+  for (int beamId = validExpansionCount_ - 2; beamId >= 0; --beamId) {
+    const MatrixPtr candidates = beams_->candidateIds[beamId];
+    real* ids = candidates->getData();
+
+    int lastParentIdInBeam = -1;
+    int basePos = -1;
+    for (size_t i = 0;
+         i < (goldAsExtraPath_ ? totalPathCount - 1 : totalPathCount);
+         ++i) {
+      int id = ids[parentIdsInBeam_[i]];
+      int parentRowId = std::div(parentIdsInBeam_[i], beamSize_).quot;
+      if (parentIdsInBeam_[i] != lastParentIdInBeam)
+        basePos = getSeqStartPos(beamId, parentRowId);
+
+      pathRowIdsInEachBeam_[beamId][i] = id + basePos;
+      lastParentIdInBeam = parentIdsInBeam_[i];
+      parentIdsInBeam_[i] = parentRowId;
+
+      if (goldAsExtraPath_)
+        pathRowIdsInEachBeam_[beamId][totalPathCount - 1] =
+            beams_->gold[beamId] + getSeqStartPos(beamId, goldRowIds_[beamId]);
+    }
+  }
+}
+
+real CostForOneSequence::globallyNormalizedScore() {
+  expandedPathScores_.resize(validExpansionCount_);
+
+  Matrix::resizeOrCreate(
+      softmaxOut_, 1, pathRowIdsInEachBeam_[0].size(), false, false);
+  softmaxOut_->zero();
+  MatrixPtr tmp = Matrix::create(
+      softmaxOut_->getData(), softmaxOut_->getWidth(), 1, false, false);
+
+  for (size_t i = 0; i < validExpansionCount_; ++i) {
+    Matrix::resizeOrCreate(expandedPathScores_[i],
+                           pathRowIdsInEachBeam_[i].size(),
+                           1,
+                           false,
+                           false);
+    IVectorPtr rowIds = IVector::create(pathRowIdsInEachBeam_[i].data(),
+                                        pathRowIdsInEachBeam_[i].size(),
+                                        false);
+    expandedPathScores_[i]->selectRows(*(beams_->scores[i]), *rowIds);
+    tmp->add(*expandedPathScores_[i]);
+  }
+
+  softmaxOut_->softmax(*softmaxOut_);
+  return -std::log(softmaxOut_->getData()[goldIdsInFinalExpansion_]);
+}
+
+real CostForOneSequence::forward() {
+  calValidExpandStep();
+  constructTotalExpansion();
+  return globallyNormalizedScore();
+}
+
+void CostForOneSequence::backward() {
+  softmaxOut_->getData()[goldIdsInFinalExpansion_] -= 1.;
+  MatrixPtr tmp = Matrix::create(
+      softmaxOut_->getData(), softmaxOut_->getWidth(), 1, false, false);
+
+  for (size_t i = 0; i < validExpansionCount_; ++i) {
+    IVectorPtr rowIds = IVector::create(pathRowIdsInEachBeam_[i].data(),
+                                        pathRowIdsInEachBeam_[i].size(),
+                                        false);
+    /*
+      beams_->scoreGrad[i] has been intialized outside this class, this
+      class only keeps a pointer pointing to the original input gradients,
+      so here does not need to allocate or initalize the memory.
+    */
+    tmp->addToRows(*beams_->scoreGrad[i], *rowIds);
+  }
+}
+
 REGISTER_LAYER(cross_entropy_over_beam, CrossEntropyOverBeam);
 
 bool CrossEntropyOverBeam::init(const LayerMap& layerMap,
@@ -24,13 +186,189 @@ bool CrossEntropyOverBeam::init(const LayerMap& layerMap,
   Layer::init(layerMap, parameterMap);
   CHECK_EQ(0U, inputLayers_.size() % 3) << "Error input number.";
 
-  setNeedSequenceInfo(false);
+  beamExpanCount_ = inputLayers_.size() / 3;
+
+  candidateScores_.resize(beamExpanCount_);
+  candidateScoreGrad_.resize(beamExpanCount_);
 
+  candidateInBeam_.resize(beamExpanCount_);
+  goldSequence_.resize(beamExpanCount_);
+  gradToInputs_.resize(beamExpanCount_);
+
+  setNeedSequenceInfo(false);
   return true;
 }
 
-void CrossEntropyOverBeam::forward(PassType passType) {}
+void CrossEntropyOverBeam::checkInputs() {
+  batchSize_ = 0;
+  for (size_t i = 0; i < beamExpanCount_; ++i) {
+    const Argument& scores = getInput(i * 3);
+    const Argument& selCandidates = getInput(i * 3 + 1);
+    const Argument& goldSeq = getInput(i * 3 + 2);
+
+    if (i) {
+      CHECK(scores.hasSubseq()) << "Beam expansion expect the first one, "
+                                   "should be a nested sequence";
+      CHECK_EQ(getInputValue(i * 3 + 1)->getWidth(), beamSize_);
+      CHECK_EQ(scores.getNumSequences(), batchSize_);
+      CHECK_EQ(scores.getNumSubSequences(), selCandidates.getBatchSize());
+    } else {
+      CHECK(scores.hasSeq()) << "The first beam expansion should be a sequence";
+      batchSize_ = scores.getNumSequences();
+      beamSize_ = getInputValue(i * 3 + 1)->getWidth();
+      CHECK_EQ(batchSize_, selCandidates.getBatchSize());
+    }
+    CHECK_EQ(1U, scores.value->getWidth());
+    CHECK_EQ(batchSize_, goldSeq.getBatchSize());
+  }
+}
+
+void CrossEntropyOverBeam::copyInputsToCpu() {
+  auto copyValue = [](const MatrixPtr& src, MatrixPtr& trg) {
+    if (dynamic_cast<GpuMatrix*>(src.get())) {
+      Matrix::resizeOrCreate(
+          trg, src->getHeight(), src->getWidth(), false, false);
+      trg->copyFrom(*src);
+    } else {
+      trg = std::move(src);
+    }
+  };
+
+  auto copyIds = [](const IVectorPtr& src, IVectorPtr& trg) {
+    if (dynamic_cast<GpuIVector*>(src.get())) {
+      IVector::resizeOrCreate(trg, src->getSize(), false);
+      trg->copyFrom(*src);
+    } else {
+      trg = std::move(src);
+    }
+  };
+
+  beamSplitPos_.clear();
+  beamSplitPos_.resize(batchSize_, std::vector<int>(beamExpanCount_, 0));
+  for (size_t i = 0; i < beamExpanCount_; ++i) {
+    copyValue(getInputValue(i * 3), candidateScores_[i]);
+    copyValue(getInputValue(i * 3 + 1), candidateInBeam_[i]);
+    copyIds(getInput(i * 3 + 2).ids, goldSequence_[i]);
+
+    if (i) {
+      ICpuGpuVectorPtr seqInfo = getInput(i * 3).sequenceStartPositions;
+      const int* seqStarts = seqInfo->getMutableData(false);
+      ICpuGpuVectorPtr subSeqInfo = getInput(i * 3).subSequenceStartPositions;
+      const int* subSeqStarts = subSeqInfo->getMutableData(false);
+
+      size_t seqId = 1;
+      for (size_t subSeqId = 0; subSeqId < subSeqInfo->getSize() - 1;
+           ++subSeqId) {
+        CHECK_LT(seqId, seqInfo->getSize());
+        if (subSeqStarts[subSeqId] == seqStarts[seqId]) {
+          beamSplitPos_[seqId][i] = beamSplitPos_[seqId - 1][i];
+          seqId++;
+        }
+        beamSplitPos_[seqId - 1][i]++;
+      }
+    } else {
+      for (size_t j = 0; j < batchSize_; ++j) beamSplitPos_[j][i] = j + 1;
+    }
+  }
+}
+
+void CrossEntropyOverBeam::splitBatchBeams() {
+  beamCosts_.resize(batchSize_);
+  beamPerSeq_.resize(batchSize_, beamExpanCount_);
+
+  for (size_t i = 0; i < beamExpanCount_; ++i) {
+    int* seqStarts =
+        getInput(i * 3).sequenceStartPositions->getMutableData(false);
+
+    int* subSeqStarts = nullptr;
+    int maxLen = 0;
+    if (i) {
+      subSeqStarts =
+          getInput(i * 3).subSequenceStartPositions->getMutableData(false);
+      maxLen = getInput(i * 3).subSequenceStartPositions->getSize() - 1;
+    } else
+      maxLen = getInput(i).sequenceStartPositions->getSize() - 1;
+
+    for (size_t j = 0; j < batchSize_; ++j) {
+      beamPerSeq_[j].scores[i] =
+          Matrix::create(candidateScores_[i]->getData() + seqStarts[j],
+                         seqStarts[j + 1] - seqStarts[j],
+                         1,
+                         false,
+                         false);
+      beamPerSeq_[j].scoreGrad[i] =
+          Matrix::create(candidateScoreGrad_[i]->getData() + seqStarts[j],
+                         seqStarts[j + 1] - seqStarts[j],
+                         1,
+                         false,
+                         false);
+
+      int offset = j ? beamSplitPos_[j - 1][i] : 0;
+      int height = beamSplitPos_[j][i] - (j ? beamSplitPos_[j - 1][i] : 0);
+      CHECK_GE(maxLen, offset + height);
+      beamPerSeq_[j].seqInfo[i] = IVector::create(
+          (i ? subSeqStarts : seqStarts) + offset, height + 1, false);
 
-void CrossEntropyOverBeam::backward(const UpdateCallback& callback) {}
+      beamPerSeq_[j].candidateIds[i] =
+          Matrix::create(candidateInBeam_[i]->getData() + offset * beamSize_,
+                         height,
+                         beamSize_,
+                         false,
+                         false);
+      beamPerSeq_[j].gold[i] = goldSequence_[i]->getData()[j];
+    }
+  }
+}
+
+void CrossEntropyOverBeam::resizeOutput() {
+  Matrix::resizeOrCreate(output_.value, batchSize_, 1, false, false);
+  output_.value->zero();
+
+  for (size_t i = 0; i < beamExpanCount_; ++i) {
+    MatrixPtr inGrad = getInputGrad(i * 3);
+    if (dynamic_cast<GpuMatrix*>(inGrad.get())) {
+      Matrix::resizeOrCreate(candidateScoreGrad_[i],
+                             inGrad->getHeight(),
+                             inGrad->getWidth(),
+                             false,
+                             false);
+    } else
+      candidateScoreGrad_[i] = std::move(inGrad);
+    candidateScoreGrad_[i]->zero();
+  }
+}
+
+void CrossEntropyOverBeam::copyGradToGpu(size_t copyCount) {
+  for (size_t i = 0; i < beamExpanCount_; ++i) {
+    if (dynamic_cast<GpuMatrix*>(getInputGrad(i * 3).get()))
+      getInputGrad(i * 3)->copyFrom(*candidateScoreGrad_[i]);
+
+    if (i == copyCount - 1) break;
+  }
+}
+
+void CrossEntropyOverBeam::forward(PassType passType) {
+  Layer::forward(passType);
+
+  checkInputs();
+  copyInputsToCpu();
+
+  resizeOutput();
+  splitBatchBeams();
+
+  MatrixPtr outputValue = getOutputValue();
+  for (size_t i = 0; i < batchSize_; ++i) {
+    beamCosts_[i].setData(
+        std::move(std::make_shared<BeamExpansion>(beamPerSeq_[i])), beamSize_);
+    outputValue->getData()[i] = beamCosts_[i].forward();
+  }
+}
+
+void CrossEntropyOverBeam::backward(const UpdateCallback& callback) {
+  for (size_t i = 0; i < batchSize_; ++i) {
+    beamCosts_[i].backward();
+    copyGradToGpu(beamCosts_[i].getValidExpansionCount());
+  }
+}
 
 }  // namespace paddle
diff --git a/paddle/gserver/layers/CrossEntropyOverBeam.h b/paddle/gserver/layers/CrossEntropyOverBeam.h
index 3106f9858b751..96a5df7dfbe46 100644
--- a/paddle/gserver/layers/CrossEntropyOverBeam.h
+++ b/paddle/gserver/layers/CrossEntropyOverBeam.h
@@ -19,6 +19,79 @@ limitations under the License. */
 
 namespace paddle {
 
+struct BeamExpansion {
+  // store the entire beam expansion for a single sequence
+  std::vector<MatrixPtr> scores;
+  std::vector<IVectorPtr> seqInfo;
+
+  std::vector<MatrixPtr> candidateIds;
+  std::vector<int> gold;
+
+  std::vector<MatrixPtr> scoreGrad;
+
+  size_t expansionCount;
+
+  BeamExpansion(int n) {
+    expansionCount = n;
+    scores.resize(expansionCount);
+    seqInfo.resize(expansionCount);
+    candidateIds.resize(expansionCount);
+    scoreGrad.resize(expansionCount);
+
+    gold.resize(expansionCount);
+  };
+};
+typedef std::shared_ptr<BeamExpansion> BeamExpansionPtr;
+
+class CostForOneSequence {
+public:
+  CostForOneSequence()
+      : beamSize_(0), validExpansionCount_(0), goldAsExtraPath_(false) {}
+  void setData(const BeamExpansionPtr bPtr, size_t beamSize) {
+    beams_ = bPtr;
+    beamSize_ = beamSize;
+
+    expandedPathScores_.clear();
+    expandedPathScores_.resize(beams_->expansionCount);
+
+    goldRowIds_.clear();
+    goldRowIds_.resize(beams_->expansionCount, 0);
+    goldColIds_.clear();
+    goldColIds_.resize(beams_->expansionCount, -1);
+  }
+  size_t getValidExpansionCount() { return validExpansionCount_; }
+
+  real forward();
+  void backward();
+
+private:
+  void calValidExpandStep();
+  void constructTotalExpansion();
+  size_t initLastExpansion();
+  real globallyNormalizedScore();
+
+  int getSeqStartPos(size_t beamId, size_t rowId) {
+    CHECK_GT(beams_->seqInfo[beamId]->getSize() - 1, rowId);
+    int* starts = beams_->seqInfo[beamId]->getData();
+    return starts[rowId] - starts[0];
+  };
+
+  size_t beamSize_;
+  size_t validExpansionCount_;
+  bool goldAsExtraPath_;
+  std::vector<int> goldRowIds_;
+  std::vector<int> goldColIds_;
+
+  BeamExpansionPtr beams_;
+  std::vector<std::vector<int>> pathRowIdsInEachBeam_;
+  std::vector<int> parentIdsInBeam_;
+  size_t goldIdsInFinalExpansion_;
+
+  std::vector<MatrixPtr> expandedPathScores_;
+
+  MatrixPtr softmaxOut_;
+};
+
 class CrossEntropyOverBeam : public Layer {
 public:
   explicit CrossEntropyOverBeam(const LayerConfig& config) : Layer(config) {}
@@ -26,6 +99,31 @@ class CrossEntropyOverBeam : public Layer {
             const ParameterMap& parameterMap) override;
   void forward(PassType passType) override;
   void backward(const UpdateCallback& callback) override;
+
+private:
+  void checkInputs();
+  void copyInputsToCpu();
+  void resizeOutput();
+  void copyGradToGpu(size_t copyCount);
+  void splitBatchBeams();
+
+  size_t beamExpanCount_;
+  size_t batchSize_;
+  size_t beamSize_;
+
+  // Currently, this layer only works on CPU, if its inputs is on GPU,
+  // copy them to CPU memory.
+  std::vector<MatrixPtr> candidateScores_;
+  std::vector<MatrixPtr> candidateScoreGrad_;
+  std::vector<MatrixPtr> candidateInBeam_;
+  std::vector<MatrixPtr> gradToInputs_;
+  std::vector<IVectorPtr> goldSequence_;
+  std::vector<std::vector<int>> beamSplitPos_;
+
+  // split entire bath of beams into beam per sequnence.
+  std::vector<BeamExpansion> beamPerSeq_;
+  // beamCosts_ is used to propagate error in one sequence.
+  std::vector<CostForOneSequence> beamCosts_;
 };
 
 }  // namespace paddle
diff --git a/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp b/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp
index a5f06c15dc480..506a4281df4f0 100644
--- a/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp
+++ b/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp
@@ -28,9 +28,17 @@ using namespace paddle;  // NOLINT
 DECLARE_int32(gpu_id);
 DECLARE_bool(thread_local_rand_use_global_seed);
 
-const size_t MAX_SEQ_NUM = 10;
-const size_t MAX_SEQ_LEN = 27;
-const size_t MAX_BEAM_SIZE = 10;
+// const size_t MAX_SEQ_NUM = 5;
+// const size_t MAX_SEQ_LEN = 10;
+// const size_t MAX_BEAM_SIZE = 3;
+
+const size_t MAX_SEQ_NUM = 23;
+const size_t MAX_SEQ_LEN = 50;
+const size_t MAX_BEAM_SIZE = 27;
+
+// const size_t SEED = 1503391792;
+// const size_t SEED = 1;
+const size_t SEED = (size_t)(time(NULL));
 
 struct SingleBeamExpansion {
   vector<int> seqStartPos;
@@ -43,11 +51,30 @@ struct SingleBeamExpansion {
   vector<int> groundTruth;
   vector<size_t> inBeam;
   vector<int> rowIdxInBeam;
+  vector<int> colIdxInBeam;
+
+  void resetGroundTruth(size_t n) {
+    groundTruth.clear();
+    groundTruth.resize(n, -1);
+
+    inBeam.clear();
+    inBeam.resize(n, 0);
+
+    rowIdxInBeam.clear();
+    rowIdxInBeam.resize(n, -1);
+
+    colIdxInBeam.clear();
+    colIdxInBeam.resize(n, -1);
+  }
 };
 
+inline float randFloat() {
+  return static_cast<float>(rand()) / static_cast<float>(RAND_MAX);
+}
+
 void genRand(real* numbers, size_t n) {
   default_random_engine generator;
-  uniform_real_distribution<double> distribution(0.0, 1.0);
+  uniform_real_distribution<real> distribution(0.0, 1.0);
   for (size_t i = 0; i < n; ++i) numbers[i] = distribution(generator);
 }
 
@@ -72,8 +99,7 @@ void genCandidateScores(bool hasSubseq,
   vector<int>& subSeqStartPos = curBeam.subSeqStartPos;
   subSeqStartPos.resize(1, 0);
 
-  srand((size_t)(time(NULL)));
-  // srand(1);
+  srand(SEED);
   if (prevBeam.selectedIndices.size()) {
     if (prevBeam.subSeqStartPos.size() > 1) {
       int seqIdx = 1;
@@ -81,9 +107,8 @@ void genCandidateScores(bool hasSubseq,
       for (size_t i = 1; i < prevBeam.subSeqStartPos.size(); ++i) {
         for (size_t j = 0; j < beamSize; ++j) {
           if (prevBeam.selectedIndices[(i - 1) * beamSize + j] == -1.) break;
-          for (size_t k = 0; k < beamSize; ++k)
-            subSeqStartPos.push_back(1 + (rand() % MAX_SEQ_LEN) +
-                                     subSeqStartPos.back());
+          subSeqStartPos.push_back(1 + (rand() % MAX_SEQ_LEN) +
+                                   subSeqStartPos.back());
         }
         if (prevBeam.seqStartPos[seqIdx] == prevBeam.subSeqStartPos[i]) {
           seqStartPos.push_back(subSeqStartPos.back());
@@ -91,7 +116,6 @@ void genCandidateScores(bool hasSubseq,
         }
       }
     } else {
-      // samples in previous beam are sequences.
       for (size_t i = 0; i <= prevBeam.selectedIndices.size(); ++i) {
         if (i && i % beamSize == 0) {
           seqStartPos.push_back(subSeqStartPos.back());
@@ -141,27 +165,41 @@ void genSelectedIndices(size_t beamSize,
 
 void genGroundTruth(vector<SingleBeamExpansion>& beamExpansions,
                     size_t beamSize) {
-  size_t seqNum = beamExpansions[1].seqStartPos.size() - 1;
+  SingleBeamExpansion& beam = beamExpansions[1];
+  size_t seqNum = beam.seqStartPos.size() - 1;
   for (size_t i = 2; i < beamExpansions.size(); ++i)
-    CHECK_EQ(seqNum, beamExpansions[i - 1].seqStartPos.size() - 1);
+    CHECK_EQ(seqNum, beamExpansions[i].seqStartPos.size() - 1);
 
-  // srand(1);
-  srand((size_t)(time(NULL)));
+  srand(SEED);
 
   // initialize the first beam.
-  SingleBeamExpansion& beam = beamExpansions[1];
-  beam.groundTruth.resize(seqNum, 0);
-  beam.inBeam.resize(seqNum, 0);
-  beam.rowIdxInBeam.resize(seqNum, -1);
-
-  auto begPos = beam.selectedIndices.begin();
+  beam.resetGroundTruth(seqNum);
   for (size_t i = 0; i < seqNum; ++i) {
-    int seqLen = beam.seqStartPos[i + 1] - beam.seqStartPos[i];
-    int label = rand() % seqLen;
-    auto endPos = begPos + beamSize;
-    beam.groundTruth[i] = label;
-    if (find(begPos, endPos, real(label)) != endPos) beam.inBeam[i] = 1;
-    begPos = endPos;
+    if (randFloat() > 0.5) {
+      // force the randomly generated label falls in the beam by chance 0.5.
+      // otherwise, when sequence length is relatively long and beam size is
+      // relatively small, the gold sequences falls off the beam at in
+      // the first search.
+      real* begPos = beam.selectedIndices.data() + i * beamSize;
+      beam.colIdxInBeam[i] =
+          rand() % count_if(begPos, begPos + beamSize, [](const real& val) {
+            return val != -1.;
+          });
+      beam.groundTruth[i] =
+          beam.selectedIndices[i * beamSize + beam.colIdxInBeam[i]];
+      beam.inBeam[i] = 1;
+    } else {
+      int label = rand() % (beam.seqStartPos[i + 1] - beam.seqStartPos[i]);
+      beam.groundTruth[i] = label;
+
+      real* begPos = beam.selectedIndices.data() + i * beamSize;
+      real* endPos = begPos + beamSize;
+      real* lblPos = find(begPos, endPos, real(label));
+      if (lblPos != endPos) {
+        beam.inBeam[i] = 1;
+        beam.colIdxInBeam[i] = lblPos - begPos;
+      }
+    }
     beam.rowIdxInBeam[i] = i;
   }
 
@@ -169,22 +207,33 @@ void genGroundTruth(vector<SingleBeamExpansion>& beamExpansions,
   for (size_t i = 2; i < beamExpansions.size(); ++i) {
     SingleBeamExpansion& curBeam = beamExpansions[i];
     SingleBeamExpansion& prevBeam = beamExpansions[i - 1];
-
-    curBeam.groundTruth.resize(seqNum, 0);
-    curBeam.inBeam.resize(seqNum, 0);
-    curBeam.rowIdxInBeam.resize(seqNum, -1);
+    curBeam.resetGroundTruth(seqNum);
 
     // iterate over each sequence
     for (size_t j = 0; j < seqNum; ++j) {
-      if (prevBeam.inBeam[j]) {
-        // gold sequence falls in the beam in previous search.
-
-        auto begPos = prevBeam.selectedIndices.begin();
-        auto endPos = begPos + prevBeam.rowIdxInBeam[j] * beamSize;
-        size_t totalExpansion =
-            prevBeam.rowIdxInBeam[j] * beamSize - count(begPos, endPos, -1.);
-        curBeam.rowIdxInBeam[j] = totalExpansion + prevBeam.groundTruth[j];
-
+      if (!prevBeam.inBeam[j]) continue;
+
+      // gold sequence falls in the beam in previous search.
+      real* begPos = prevBeam.selectedIndices.data();
+      int offset =
+          prevBeam.rowIdxInBeam[j] * beamSize + prevBeam.colIdxInBeam[j];
+      curBeam.rowIdxInBeam[j] = count_if(
+          begPos, begPos + offset, [](const real& val) { return val != -1.; });
+
+      if (randFloat() > 0.5) {
+        // force the randomly generated label falls in the beam by chance 0.5.
+        // otherwise, when sequence length is relatively long and beam size is
+        // relatively small, the gold sequences falls off the beam at in
+        // the first search.
+        real* start =
+            curBeam.selectedIndices.data() + curBeam.rowIdxInBeam[j] * beamSize;
+        int n = rand() % count_if(start, start + beamSize, [](const real& val) {
+                  return val != -1.;
+                });
+        curBeam.colIdxInBeam[j] = n;
+        curBeam.groundTruth[j] = *(start + n);
+        curBeam.inBeam[j] = 1;
+      } else {
         CHECK_LE(curBeam.rowIdxInBeam[j] + 1,
                  curBeam.subSeqStartPos.size() - 1);
         int start = curBeam.subSeqStartPos[curBeam.rowIdxInBeam[j]];
@@ -193,16 +242,14 @@ void genGroundTruth(vector<SingleBeamExpansion>& beamExpansions,
         int label = rand() % (end - start);
 
         curBeam.groundTruth[j] = label;
-        auto findBeg = curBeam.selectedIndices.begin() +
-                       curBeam.rowIdxInBeam[j] * beamSize;
-        auto findEnd = findBeg + beamSize;
-        if (find(findBeg, findEnd, real(label)) != findEnd)
+        real* findBeg =
+            curBeam.selectedIndices.data() + curBeam.rowIdxInBeam[j] * beamSize;
+        real* lblPos =
+            find(findBeg, findBeg + beamSize, static_cast<real>(label));
+        if (lblPos != (findBeg + beamSize)) {
           curBeam.inBeam[j] = 1;
-      } else {
-        // in previous search, gold sequence has fallen off the beam,
-        // the beam search stops, here use -1 as a dummy label.
-        // It will not used in calculation the cost.
-        beamExpansions[i].groundTruth[j] = -1;
+          curBeam.colIdxInBeam[j] = lblPos - findBeg;
+        }
       }
     }
   }
@@ -230,15 +277,12 @@ void genRandomBeamExpansion(size_t expansionCount,
   genGroundTruth(beamExpansions, beamSize);
 }
 
-void testCrossEntropyOverBeam(bool useGpu) {
+void testCrossEntropyOverBeam(bool useGpu,
+                              size_t beamSize,
+                              vector<SingleBeamExpansion>& beams) {
   TestConfig config;
   config.layerConfig.set_type("cross_entropy_over_beam");
 
-  const size_t expansionCount = 3;
-  const size_t beamSize = MAX_BEAM_SIZE;
-  vector<SingleBeamExpansion> beams;
-  genRandomBeamExpansion(expansionCount, beamSize, beams);
-
   size_t seqNum = 0;
   for (size_t i = 1; i < beams.size(); ++i) {
     const SingleBeamExpansion& beam = beams[i];
@@ -291,7 +335,17 @@ void testCrossEntropyOverBeam(bool useGpu) {
 }
 
 TEST(Layer, CrossEntropyOverBeam) {
-  for (bool useGpu : {false, true}) testCrossEntropyOverBeam(useGpu);
+  LOG(INFO) << "SEED = " << SEED;
+  const size_t beamSize = 1 + rand() % MAX_BEAM_SIZE;
+  LOG(INFO) << "beamSize = " << beamSize;
+
+  // TODO(caoying): test with more beam expansions.
+  const size_t expansionCount = 3;
+  vector<SingleBeamExpansion> beams;
+  genRandomBeamExpansion(expansionCount, beamSize, beams);
+
+  for (bool useGpu : {false, true})
+    testCrossEntropyOverBeam(useGpu, beamSize, beams);
 }
 
 int main(int argc, char** argv) {
@@ -299,7 +353,7 @@ int main(int argc, char** argv) {
   hl_start();
   hl_init(FLAGS_gpu_id);
   FLAGS_thread_local_rand_use_global_seed = true;
-  srand(1);
+  srand(SEED);
   testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }

From 5e59ca7ccc8232b2028cfc8b4cffe19ffc73ba18 Mon Sep 17 00:00:00 2001
From: caoying03 <caoying03@baidu.com>
Date: Wed, 23 Aug 2017 10:40:46 +0800
Subject: [PATCH 5/9] fix config helper.

---
 .../gserver/layers/CrossEntropyOverBeam.cpp   |  10 ++
 paddle/gserver/layers/CrossEntropyOverBeam.h  |  16 ++-
 .../tests/test_CrossEntropyOverBeamGrad.cpp   |  22 ++-
 python/paddle/trainer/config_parser.py        |  12 +-
 .../paddle/trainer_config_helpers/layers.py   | 129 +++++++++++++++---
 .../test_cross_entropy_over_beam.protostr     |  17 ++-
 .../configs/test_cross_entropy_over_beam.py   |  18 ++-
 7 files changed, 162 insertions(+), 62 deletions(-)

diff --git a/paddle/gserver/layers/CrossEntropyOverBeam.cpp b/paddle/gserver/layers/CrossEntropyOverBeam.cpp
index 09258fb305990..f7736f0ce905f 100644
--- a/paddle/gserver/layers/CrossEntropyOverBeam.cpp
+++ b/paddle/gserver/layers/CrossEntropyOverBeam.cpp
@@ -161,7 +161,17 @@ real CostForOneSequence::forward() {
 }
 
 void CostForOneSequence::backward() {
+  /*
+   * when softmax layer is the output layer, and it is combined with
+   * cross-entropy as cost. The derivate with regard to softmax's input
+   * is simply:
+   *
+   * grad_i = softmax_out_i - target_i,
+   *
+   * and here hard label is used.
+   */
   softmaxOut_->getData()[goldIdsInFinalExpansion_] -= 1.;
+
   MatrixPtr tmp = Matrix::create(
       softmaxOut_->getData(), softmaxOut_->getWidth(), 1, false, false);
 
diff --git a/paddle/gserver/layers/CrossEntropyOverBeam.h b/paddle/gserver/layers/CrossEntropyOverBeam.h
index 96a5df7dfbe46..5d0cffee3c159 100644
--- a/paddle/gserver/layers/CrossEntropyOverBeam.h
+++ b/paddle/gserver/layers/CrossEntropyOverBeam.h
@@ -19,8 +19,8 @@ limitations under the License. */
 
 namespace paddle {
 
+/* This struct stores the beams in all search steps for a single sequence. */
 struct BeamExpansion {
-  // store the entire beam expansion for a single sequence
   std::vector<MatrixPtr> scores;
   std::vector<IVectorPtr> seqInfo;
 
@@ -111,8 +111,11 @@ class CrossEntropyOverBeam : public Layer {
   size_t batchSize_;
   size_t beamSize_;
 
-  // Currently, this layer only works on CPU, if its inputs is on GPU,
-  // copy them to CPU memory.
+  /*
+   * the process of constructing beams is not friendly to GPU, currently, this
+   * layer only runs on CPU, if any of its inputs is on GPU memory, then copy
+   * it to CPU memory.
+   */
   std::vector<MatrixPtr> candidateScores_;
   std::vector<MatrixPtr> candidateScoreGrad_;
   std::vector<MatrixPtr> candidateInBeam_;
@@ -120,9 +123,12 @@ class CrossEntropyOverBeam : public Layer {
   std::vector<IVectorPtr> goldSequence_;
   std::vector<std::vector<int>> beamSplitPos_;
 
-  // split entire bath of beams into beam per sequnence.
+  /*
+   * split entire bath of beams into beam per sequnence and store the result
+   * into this member.
+   */
   std::vector<BeamExpansion> beamPerSeq_;
-  // beamCosts_ is used to propagate error in one sequence.
+  /* beamCosts_ is used to propagate error in one sequence. */
   std::vector<CostForOneSequence> beamCosts_;
 };
 
diff --git a/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp b/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp
index 506a4281df4f0..538d18cdc3d26 100644
--- a/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp
+++ b/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp
@@ -28,16 +28,10 @@ using namespace paddle;  // NOLINT
 DECLARE_int32(gpu_id);
 DECLARE_bool(thread_local_rand_use_global_seed);
 
-// const size_t MAX_SEQ_NUM = 5;
-// const size_t MAX_SEQ_LEN = 10;
-// const size_t MAX_BEAM_SIZE = 3;
-
 const size_t MAX_SEQ_NUM = 23;
 const size_t MAX_SEQ_LEN = 50;
 const size_t MAX_BEAM_SIZE = 27;
 
-// const size_t SEED = 1503391792;
-// const size_t SEED = 1;
 const size_t SEED = (size_t)(time(NULL));
 
 struct SingleBeamExpansion {
@@ -176,10 +170,12 @@ void genGroundTruth(vector<SingleBeamExpansion>& beamExpansions,
   beam.resetGroundTruth(seqNum);
   for (size_t i = 0; i < seqNum; ++i) {
     if (randFloat() > 0.5) {
-      // force the randomly generated label falls in the beam by chance 0.5.
-      // otherwise, when sequence length is relatively long and beam size is
-      // relatively small, the gold sequences falls off the beam at in
-      // the first search.
+      /*
+       * force the randomly generated label falls in the beam by chance 0.5.
+       * otherwise, when sequence length is relatively long and beam size is
+       * relatively small, the gold sequences falls off the beam at in the
+       * first search.
+       */
       real* begPos = beam.selectedIndices.data() + i * beamSize;
       beam.colIdxInBeam[i] =
           rand() % count_if(begPos, begPos + beamSize, [](const real& val) {
@@ -222,9 +218,7 @@ void genGroundTruth(vector<SingleBeamExpansion>& beamExpansions,
 
       if (randFloat() > 0.5) {
         // force the randomly generated label falls in the beam by chance 0.5.
-        // otherwise, when sequence length is relatively long and beam size is
-        // relatively small, the gold sequences falls off the beam at in
-        // the first search.
+
         real* start =
             curBeam.selectedIndices.data() + curBeam.rowIdxInBeam[j] * beamSize;
         int n = rand() % count_if(start, start + beamSize, [](const real& val) {
@@ -339,7 +333,7 @@ TEST(Layer, CrossEntropyOverBeam) {
   const size_t beamSize = 1 + rand() % MAX_BEAM_SIZE;
   LOG(INFO) << "beamSize = " << beamSize;
 
-  // TODO(caoying): test with more beam expansions.
+  // TODO(caoying): test with random beam expansions.
   const size_t expansionCount = 3;
   vector<SingleBeamExpansion> beams;
   genRandomBeamExpansion(expansionCount, beamSize, beams);
diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py
index 7707ece819c9e..579713546f15e 100644
--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@@ -1605,16 +1605,16 @@ def __init__(self, name, inputs, softmax_selfnorm_alpha=0.1, **xargs):
 @config_layer('cross_entropy_over_beam')
 class CrossEntropyOverBeamLayer(LayerBase):
     def __init__(self, name, inputs, **xargs):
-        config_assert(len(inputs) % 3 == 0, "Error input numbers.")
+        config_assert(len(inputs) % 3 == 0, "Error input number.")
         super(CrossEntropyOverBeamLayer, self).__init__(
             name, 'cross_entropy_over_beam', 0, inputs, **xargs)
         input_num = len(inputs) / 3
         for i in range(input_num):
-            input_layer = self.get_input_layer(i * 2)
-            config_assert(
-                input_layer.size == 1, "Inputs for this layer are made up of "
-                "several pairs and the first one in a pair is scores for "
-                "all the candidates, so its size should be equal to 1.")
+            input_layer = self.get_input_layer(i * 3)
+            config_assert(input_layer.size == 1, (
+                "Inputs for this layer are made up of "
+                "several triples, in which the first one is scores over "
+                "all candidate paths, whose size should be equal to 1."))
 
 
 @config_layer('fc')
diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
index b027f84b5d576..053c92d005f7a 100755
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -103,6 +103,7 @@
     'nce_layer',
     'cross_entropy_with_selfnorm',
     'cross_entropy',
+    'BeamInput',
     'cross_entropy_over_beam',
     'multi_binary_label_cross_entropy',
     'sum_cost',
@@ -5681,10 +5682,10 @@ def multi_binary_label_cross_entropy(input,
 
     if input.activation is None or \
             not isinstance(input.activation, SigmoidActivation):
-        logger.log(
-            logging.WARN,
-            "%s is not recommend for multi_binary_label_cross_entropy's activation, "
-            "maybe the sigmoid is better" % repr(input.activation))
+        logger.log(logging.WARN,
+                   ("%s is not a recommended activation for "
+                    "multi_binary_label_cross_entropy, sigmoid is better") %
+                   repr(input.activation))
 
     Layer(
         name=name,
@@ -5699,26 +5700,110 @@ def multi_binary_label_cross_entropy(input,
         size=1)
 
 
+class BeamInput(object):
+    """
+    Define the input for cross_entropy_over_beam layer.
+
+    A beam is made up of a triple: the first one is scores over all
+    candidates; the second one is indices of top k selected candidates; the
+    third one is the index of ground truth, which is also always called
+    gold.
+    """
+
+    def __init__(self, candidate_scores, selected_candidates, gold):
+        assert isinstance(candidate_scores, LayerOutput)
+        self.candidate_scores = candidate_scores
+        assert candidate_scores.size == 1
+
+        assert isinstance(selected_candidates, LayerOutput)
+        self.selected_candidates = selected_candidates
+
+        assert isinstance(gold, LayerOutput)
+        self.gold = gold
+
+
 @wrap_name_default()
 @layer_support()
-def cross_entropy_over_beam(input, label, name=None, coeff=1.0, weight=None):
-    """
-    TODO(caoying) add comments.
+def cross_entropy_over_beam(input, name=None):
     """
+    This layer is used in learning to search models, which is to solve complex
+    joint prediction problems based on learning to search through a
+    problem-defined search space.
 
-    assert len(input) / 2 == len(label), "Error input numbers."
-    for i in range(0, len(input), 2):
-        assert (input[i].size == 1), (
-            "Inputs for this layer are made up of "
-            "several pairs and the first one in a pair is scores for "
-            "all the candidates, so its size should be equal to 1.")
+    Specifically, the learning to search process for this layer begins with
+    searching a target sequence from a nested sequence. In the first search
+    step, top beam size sequences with highest scores, indices of these top k
+    sequences in the original nested sequence, and the ground truth (also
+    called gold) altogether (a triple) make up of the first beam.
 
-    ipts, parents = __cost_input__(input, label, weight)
-    Layer(
-        name=name,
-        type=LayerType.CROSS_ENTROPY_OVER_BEAM,
-        inputs=ipts,
-        coeff=coeff)
+    Then, several special positions, for example, start and end positions
+    that define meaningful segments are searched. In these searches, top k
+    positions with highest scores are selected, and then sequence, starting
+    from the selected starts till ends of the sequences (or a fixed position)
+    are taken to search next.
+
+    We call the possible top k results returned in one search the beam. This
+    search process can be repeated for pre-defined turns and leads to several
+    beam expansions.
+
+    Finally, the layer cross_entropy_over_beam takes all the beam expansions
+    which contain several candidate targets found along the multi-step search.
+    cross_entropy_over_beam calculates cross entropy over the expanded beams
+    which all the candidates in the beam as the normalized factor.
+
+    Note that, if gold falls off the beam at search step t, then the cost is
+    calculated over the beam at step t.
+
+    This cost layer always works together with kmax_sequence_score_layer,
+    sub_nested_seq_layer, and sequence_slice_layer to trim the input to form a
+    sub-search space.
+
+
+    The example usage is:
+
+    .. code-block:: python
+
+       cost = cross_entropy_over_beam(input=[
+           BeamInput(
+               candidate_scores=beam1_candidates,
+               selected_candidates=beam1_topk,
+               gold=gold1),
+           BeamInput(
+               candidate_scores=beam2_candidates,
+               selected_candidates=beam2_topk,
+               gold=gold2),
+       ])
+
+
+    :param input: input beams for this layer.
+    :type input: BeamInput
+    :param name: input beams for this layer.
+    :type name: basestring
+    :return: LayerOutput object.
+    :rtype: LayerOutput
+    """
+
+    if isinstance(input, BeamInput):
+        input = [input]
+    else:
+        assert isinstance(input, list), (
+            'input for cross_entropy_over_beam shold be a python list '
+            'of BeamInput object.')
+        for ipt in input:
+            assert isinstance(ipt, BeamInput), (
+                'input for cross_entropy_over_beam '
+                'should be a BeamInput object.')
+
+    ipts = []
+    parents = []
+    for beam in input:
+        parents += [beam.candidate_scores, beam.selected_candidates, beam.gold]
+        ipts += [
+            beam.candidate_scores.name, beam.selected_candidates.name,
+            beam.gold.name
+        ]
+
+    Layer(name=name, type=LayerType.CROSS_ENTROPY_OVER_BEAM, inputs=ipts)
     return LayerOutput(name, LayerType.CROSS_ENTROPY, parents=parents, size=1)
 
 
@@ -6247,11 +6332,11 @@ def kmax_sequence_score_layer(input, name=None, beam_size=1):
 @wrap_bias_attr_default()
 def scale_shift_layer(input, name=None, param_attr=None, bias_attr=None):
     """
-    A layer applies a linear transformation to each element in each row of 
-    the input matrix. For each element, the layer first re-scale it and then 
+    A layer applies a linear transformation to each element in each row of
+    the input matrix. For each element, the layer first re-scale it and then
     adds a bias to it.
 
-    This layer is very like the SlopeInterceptLayer, except the scale and 
+    This layer is very like the SlopeInterceptLayer, except the scale and
     bias are trainable.
 
     .. math::
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cross_entropy_over_beam.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cross_entropy_over_beam.protostr
index e44478ec2ba1f..c43fc48e22204 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cross_entropy_over_beam.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cross_entropy_over_beam.protostr
@@ -114,27 +114,26 @@ layers {
     input_layer_name: "__kmax_sequence_score_layer_0__"
   }
   inputs {
-    input_layer_name: "__fc_layer_0__"
+    input_layer_name: "sentences_ids"
   }
   inputs {
-    input_layer_name: "__kmax_sequence_score_layer_1__"
+    input_layer_name: "__fc_layer_0__"
   }
   inputs {
-    input_layer_name: "__fc_layer_1__"
+    input_layer_name: "__kmax_sequence_score_layer_1__"
   }
   inputs {
-    input_layer_name: "__kmax_sequence_score_layer_2__"
+    input_layer_name: "start_ids"
   }
   inputs {
-    input_layer_name: "sentences_ids"
+    input_layer_name: "__fc_layer_1__"
   }
   inputs {
-    input_layer_name: "start_ids"
+    input_layer_name: "__kmax_sequence_score_layer_2__"
   }
   inputs {
     input_layer_name: "end_ids"
   }
-  coeff: 1.0
 }
 parameters {
   name: "___fc_layer_0__.w0"
@@ -177,8 +176,8 @@ parameters {
   initial_smart: false
 }
 input_layer_names: "sentence_scores"
-input_layer_names: "sentence_states"
 input_layer_names: "sentences_ids"
+input_layer_names: "sentence_states"
 input_layer_names: "start_ids"
 input_layer_names: "end_ids"
 output_layer_names: "__cross_entropy_over_beam_0__"
@@ -198,8 +197,8 @@ sub_models {
   layer_names: "end_ids"
   layer_names: "__cross_entropy_over_beam_0__"
   input_layer_names: "sentence_scores"
-  input_layer_names: "sentence_states"
   input_layer_names: "sentences_ids"
+  input_layer_names: "sentence_states"
   input_layer_names: "start_ids"
   input_layer_names: "end_ids"
   output_layer_names: "__cross_entropy_over_beam_0__"
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_cross_entropy_over_beam.py b/python/paddle/trainer_config_helpers/tests/configs/test_cross_entropy_over_beam.py
index edc2d32fca1c9..240e703dc904e 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_cross_entropy_over_beam.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_cross_entropy_over_beam.py
@@ -29,11 +29,17 @@
 sentence_idx = data_layer(name="sentences_ids", size=1)
 start_idx = data_layer(name="start_ids", size=1)
 end_idx = data_layer(name="end_ids", size=1)
-cost = cross_entropy_over_beam(
-    input=[
-        sentence_scores, topk_sentence_ids, start_pos_scores,
-        topk_start_pos_ids, end_pos_scores, topk_end_pos_ids
-    ],
-    label=[sentence_idx, start_idx, end_idx])
+cost = cross_entropy_over_beam(input=[
+    BeamInput(
+        candidate_scores=sentence_scores,
+        selected_candidates=topk_sentence_ids,
+        gold=sentence_idx), BeamInput(
+            candidate_scores=start_pos_scores,
+            selected_candidates=topk_start_pos_ids,
+            gold=start_idx), BeamInput(
+                candidate_scores=end_pos_scores,
+                selected_candidates=topk_end_pos_ids,
+                gold=end_idx)
+])
 
 outputs(cost)

From 7a42c92d49cbcf05bb7c8fc698b923a09503d22e Mon Sep 17 00:00:00 2001
From: caoying03 <caoying03@baidu.com>
Date: Thu, 24 Aug 2017 10:53:00 +0800
Subject: [PATCH 6/9] fix a bug that memory does not clean.

---
 .../gserver/layers/CrossEntropyOverBeam.cpp   | 21 ++++++++++++-------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/paddle/gserver/layers/CrossEntropyOverBeam.cpp b/paddle/gserver/layers/CrossEntropyOverBeam.cpp
index f7736f0ce905f..b7c2a44626595 100644
--- a/paddle/gserver/layers/CrossEntropyOverBeam.cpp
+++ b/paddle/gserver/layers/CrossEntropyOverBeam.cpp
@@ -53,8 +53,8 @@ size_t CostForOneSequence::initLastExpansion() {
                                    candidates->getData() + height * beamSize_,
                                    [](const real& val) { return val != -1; });
   /*
-   * if the gold sequence falls off the beam during search,
-   * add the gold sequence as the last path into all expanded paths.
+   * if the gold sequence falls off the beam during search, add the gold
+   * sequence as the last path into the all expanded candidates.
    */
   if (goldAsExtraPath_) goldIdsInFinalExpansion_ = pathCount++;
 
@@ -133,7 +133,7 @@ real CostForOneSequence::globallyNormalizedScore() {
 
   Matrix::resizeOrCreate(
       softmaxOut_, 1, pathRowIdsInEachBeam_[0].size(), false, false);
-  softmaxOut_->zero();
+  softmaxOut_->zeroMem();
   MatrixPtr tmp = Matrix::create(
       softmaxOut_->getData(), softmaxOut_->getWidth(), 1, false, false);
 
@@ -143,6 +143,8 @@ real CostForOneSequence::globallyNormalizedScore() {
                            1,
                            false,
                            false);
+    expandedPathScores_[i]->zeroMem();
+
     IVectorPtr rowIds = IVector::create(pathRowIdsInEachBeam_[i].data(),
                                         pathRowIdsInEachBeam_[i].size(),
                                         false);
@@ -217,13 +219,16 @@ void CrossEntropyOverBeam::checkInputs() {
     const Argument& goldSeq = getInput(i * 3 + 2);
 
     if (i) {
-      CHECK(scores.hasSubseq()) << "Beam expansion expect the first one, "
-                                   "should be a nested sequence";
+      CHECK(scores.hasSubseq()) << "input " << i << " "
+                                << inputLayers_[i * 3]->getName()
+                                << " should be a nested sequence";
       CHECK_EQ(getInputValue(i * 3 + 1)->getWidth(), beamSize_);
       CHECK_EQ(scores.getNumSequences(), batchSize_);
       CHECK_EQ(scores.getNumSubSequences(), selCandidates.getBatchSize());
     } else {
-      CHECK(scores.hasSeq()) << "The first beam expansion should be a sequence";
+      CHECK(scores.hasSeq()) << "input " << i << " "
+                             << inputLayers_[i]->getName()
+                             << " should be a sequence";
       batchSize_ = scores.getNumSequences();
       beamSize_ = getInputValue(i * 3 + 1)->getWidth();
       CHECK_EQ(batchSize_, selCandidates.getBatchSize());
@@ -332,7 +337,7 @@ void CrossEntropyOverBeam::splitBatchBeams() {
 
 void CrossEntropyOverBeam::resizeOutput() {
   Matrix::resizeOrCreate(output_.value, batchSize_, 1, false, false);
-  output_.value->zero();
+  output_.value->zeroMem();
 
   for (size_t i = 0; i < beamExpanCount_; ++i) {
     MatrixPtr inGrad = getInputGrad(i * 3);
@@ -344,7 +349,7 @@ void CrossEntropyOverBeam::resizeOutput() {
                              false);
     } else
       candidateScoreGrad_[i] = std::move(inGrad);
-    candidateScoreGrad_[i]->zero();
+    candidateScoreGrad_[i]->zeroMem();
   }
 }
 

From 7035bb63e91a2dcf1f91df5e440d2c3e45bdd2e8 Mon Sep 17 00:00:00 2001
From: caoying03 <caoying03@baidu.com>
Date: Fri, 25 Aug 2017 20:44:04 +0800
Subject: [PATCH 7/9] fix a bug.

---
 paddle/parameter/Argument.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/paddle/parameter/Argument.cpp b/paddle/parameter/Argument.cpp
index 2b945de18a4cd..b0e9e740c84e6 100644
--- a/paddle/parameter/Argument.cpp
+++ b/paddle/parameter/Argument.cpp
@@ -677,6 +677,7 @@ void Argument::reorganizeSeqInfo(
     const ICpuGpuVectorPtr subSeqStartPos,
     std::vector<std::vector<int>>& reorganizedSeqInfo) {
   CHECK(seqStartPos);
+  reorganizedSeqInfo.clear();
 
   int seqNum = seqStartPos->getSize() - 1;
   int* seqStarts = seqStartPos->getMutableData(false);

From 09e903eb9417745952ced6db532594fd4a759d74 Mon Sep 17 00:00:00 2001
From: caoying03 <caoying03@baidu.com>
Date: Tue, 29 Aug 2017 13:44:51 +0800
Subject: [PATCH 8/9] fix v2 infer interface.

---
 paddle/gserver/layers/CrossEntropyOverBeam.cpp | 1 -
 python/paddle/v2/inference.py                  | 7 +++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/paddle/gserver/layers/CrossEntropyOverBeam.cpp b/paddle/gserver/layers/CrossEntropyOverBeam.cpp
index 500cd6ff8ccc6..bffcc30154370 100644
--- a/paddle/gserver/layers/CrossEntropyOverBeam.cpp
+++ b/paddle/gserver/layers/CrossEntropyOverBeam.cpp
@@ -39,7 +39,6 @@ void CostForOneSequence::calValidExpandStep() {
     if (start + beamSize_ == findEnd) return;
     goldColIds_[i] = findEnd - start;
   }
-
   if (goldColIds_[beams_->expansionCount - 1] != -1) goldAsExtraPath_ = false;
 }
 
diff --git a/python/paddle/v2/inference.py b/python/paddle/v2/inference.py
index 4dcc3ab57e7e6..8acea6155c588 100644
--- a/python/paddle/v2/inference.py
+++ b/python/paddle/v2/inference.py
@@ -70,7 +70,7 @@ def iter_infer_field(self, field, **kwargs):
                 item = [each_result[each_field] for each_field in field]
                 yield item
 
-    def infer(self, input, field='value', **kwargs):
+    def infer(self, input, field='value', flatten_result=True, **kwargs):
         """
         Infer a data by model.
         :param input: input data batch. Should be python iterable object.
@@ -83,7 +83,10 @@ def infer(self, input, field='value', **kwargs):
                 retv = [[] for i in xrange(len(result))]
             for i, item in enumerate(result):
                 retv[i].append(item)
-        retv = [numpy.concatenate(out) for out in retv]
+
+        if flatten_result:
+            retv = [numpy.concatenate(out) for out in retv]
+
         if len(retv) == 1:
             return retv[0]
         else:

From 36f0aa7390e3044b8e26d1787f99ed5edaf27ed0 Mon Sep 17 00:00:00 2001
From: caoying03 <caoying03@baidu.com>
Date: Thu, 31 Aug 2017 13:06:22 +0800
Subject: [PATCH 9/9] fix code style to pass CI.

---
 paddle/gserver/layers/CrossEntropyOverBeam.cpp | 11 +++++++----
 paddle/gserver/layers/CrossEntropyOverBeam.h   |  6 +++---
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/paddle/gserver/layers/CrossEntropyOverBeam.cpp b/paddle/gserver/layers/CrossEntropyOverBeam.cpp
index bffcc30154370..4acc077035b17 100644
--- a/paddle/gserver/layers/CrossEntropyOverBeam.cpp
+++ b/paddle/gserver/layers/CrossEntropyOverBeam.cpp
@@ -28,8 +28,9 @@ void CostForOneSequence::calValidExpandStep() {
           start,
           start + goldRowIds_[i - 1] * beamSize_ + goldColIds_[i - 1],
           [](const real& val) { return val != -1.; });
-    } else
+    } else {
       goldRowIds_[i] = 0;
+    }
 
     real* start =
         beams_->candidateIds[i]->getData() + goldRowIds_[i] * beamSize_;
@@ -288,7 +289,7 @@ void CrossEntropyOverBeam::copyInputsToCpu() {
 
 void CrossEntropyOverBeam::splitBatchBeams() {
   beamCosts_.resize(batchSize_);
-  beamPerSeq_.resize(batchSize_, beamExpanCount_);
+  beamPerSeq_.resize(batchSize_, BeamExpansion(beamExpanCount_));
 
   for (size_t i = 0; i < beamExpanCount_; ++i) {
     int* seqStarts =
@@ -300,8 +301,9 @@ void CrossEntropyOverBeam::splitBatchBeams() {
       subSeqStarts =
           getInput(i * 3).subSequenceStartPositions->getMutableData(false);
       maxLen = getInput(i * 3).subSequenceStartPositions->getSize() - 1;
-    } else
+    } else {
       maxLen = getInput(i).sequenceStartPositions->getSize() - 1;
+    }
 
     for (size_t j = 0; j < batchSize_; ++j) {
       beamPerSeq_[j].scores[i] =
@@ -348,8 +350,9 @@ void CrossEntropyOverBeam::resizeOutput() {
                              inGrad->getWidth(),
                              false,
                              false);
-    } else
+    } else {
       candidateScoreGrad_[i] = std::move(inGrad);
+    }
     candidateScoreGrad_[i]->zeroMem();
   }
 }
diff --git a/paddle/gserver/layers/CrossEntropyOverBeam.h b/paddle/gserver/layers/CrossEntropyOverBeam.h
index 5d0cffee3c159..5643556f43370 100644
--- a/paddle/gserver/layers/CrossEntropyOverBeam.h
+++ b/paddle/gserver/layers/CrossEntropyOverBeam.h
@@ -31,7 +31,7 @@ struct BeamExpansion {
 
   size_t expansionCount;
 
-  BeamExpansion(int n) {
+  explicit BeamExpansion(int n) {
     expansionCount = n;
     scores.resize(expansionCount);
     seqInfo.resize(expansionCount);
@@ -39,7 +39,7 @@ struct BeamExpansion {
     scoreGrad.resize(expansionCount);
 
     gold.resize(expansionCount);
-  };
+  }
 };
 typedef std::shared_ptr<BeamExpansion> BeamExpansionPtr;
 
@@ -74,7 +74,7 @@ class CostForOneSequence {
     CHECK_GT(beams_->seqInfo[beamId]->getSize() - 1, rowId);
     int* starts = beams_->seqInfo[beamId]->getData();
     return starts[rowId] - starts[0];
-  };
+  }
 
   size_t beamSize_;
   size_t validExpansionCount_;