Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BeamSearchDecodeOp #5498

Merged
merged 31 commits into from
Nov 13, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
c59f569
init trieconcat_op
jacquesqiao Nov 6, 2017
4ef08e0
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jacquesqiao Nov 6, 2017
cf4287b
add basic implementation
jacquesqiao Nov 7, 2017
e0bdab9
add test
jacquesqiao Nov 7, 2017
067b30e
add more test
jacquesqiao Nov 7, 2017
c2c5147
update unit test
jacquesqiao Nov 8, 2017
03ec48a
add PackAllSteps test
jacquesqiao Nov 8, 2017
4b09874
fix PackAllSteps
jacquesqiao Nov 8, 2017
79dcbd4
all test passed
jacquesqiao Nov 8, 2017
799d6db
clean code
jacquesqiao Nov 8, 2017
beaf643
remove state inside helper
jacquesqiao Nov 8, 2017
549d8fe
rename prob to score
jacquesqiao Nov 8, 2017
e802ebc
optimize RemoveFromEnd
jacquesqiao Nov 8, 2017
96959ff
use deconstructor to delete BeamNode recursively
jacquesqiao Nov 8, 2017
31b871e
optimize interface
jacquesqiao Nov 9, 2017
37bb50c
add comment to interface
jacquesqiao Nov 9, 2017
d4341dd
optimizer data structure
jacquesqiao Nov 9, 2017
66ba330
use template to define the type of score
jacquesqiao Nov 10, 2017
c2079cc
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jacquesqiao Nov 10, 2017
cdb6f8a
use template parameter for BeamHelper
jacquesqiao Nov 10, 2017
3bf4c81
change father to parent
jacquesqiao Nov 10, 2017
0705b54
rename TrieConcat to BeamSearchOutConcat
jacquesqiao Nov 10, 2017
89b8d32
use LoDTensorArray
jacquesqiao Nov 10, 2017
1c038bd
rename BeamSearchOutConcat to BeamSearchDecode
jacquesqiao Nov 10, 2017
fa1c74b
refine code
jacquesqiao Nov 10, 2017
2f1cba2
remain all candidate sentence in beam_search_decode_op, do not consid…
jacquesqiao Nov 10, 2017
1e0263f
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jacquesqiao Nov 13, 2017
90e93b8
use unique_ptr
jacquesqiao Nov 13, 2017
08e0ef4
fix compare bug
jacquesqiao Nov 13, 2017
794be6e
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jacquesqiao Nov 13, 2017
1f76e94
fix lod compile problem
jacquesqiao Nov 13, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions paddle/operators/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ set(GLOB_OP_LIB ${OP_LIBRARY} CACHE INTERNAL "Global OP library")
cc_test(gather_test SRCS gather_test.cc DEPS tensor)
cc_test(net_op_test SRCS net_op_test.cc DEPS net_op)
cc_test(scatter_test SRCS scatter_test.cc DEPS tensor)
cc_test(beam_search_decode_op_test SRCS beam_search_decode_op_test.cc DEPS lod_tensor)
cc_test(strided_memcpy_test SRCS strided_memcpy_test.cc DEPS tensor paddle_memory)
cc_test(dynamic_recurrent_op_test SRCS dynamic_recurrent_op_test.cc
rnn/recurrent_op_utils.cc
Expand Down
110 changes: 110 additions & 0 deletions paddle/operators/beam_search_decode_op.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/operators/beam_search_decode_op.h"

namespace paddle {
namespace operators {

class BeamSearchDecodeOp : public framework::OperatorBase {
public:
BeamSearchDecodeOp(const std::string& type,
const framework::VariableNameMap& inputs,
const framework::VariableNameMap& outputs,
const framework::AttributeMap& attrs)
: OperatorBase(type, inputs, outputs, attrs) {}
void Run(const framework::Scope& scope,
const platform::DeviceContext& dev_ctx) const override {
framework::ExecutionContext ctx(*this, scope, dev_ctx);
const LoDTensorArray* ids = ctx.Input<LoDTensorArray>("Ids");
const LoDTensorArray* scores = ctx.Input<LoDTensorArray>("Scores");
const size_t step_num = ids->size();
PADDLE_ENFORCE_GT(step_num, 0UL,
"beam search steps should be larger than 0");
const size_t source_num = ids->at(0).lod().at(0).size() - 1;
PADDLE_ENFORCE_GT(source_num, 0UL, "source num should be larger than 0");

for (size_t i = 0; i < step_num; ++i) {
PADDLE_ENFORCE_EQ(ids->at(i).lod().size(), 2UL,
"Level of LodTensor should be 2");
}

// prepare output
LoDTensor* sentenceIds = ctx.Output<LoDTensor>("SentenceIds");
LoDTensor* sentenceScores = ctx.Output<LoDTensor>("SentenceScores");

BeamSearchDecoder<float> beam_search_decoder;
beam_search_decoder.PackAllSteps(*ids, *scores, sentenceIds,
sentenceScores);
}
};

class BeamSearchDecodeOpProtoMaker : public framework::OpProtoAndCheckerMaker {
public:
BeamSearchDecodeOpProtoMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Ids",
"(LodTensorArray)"
"score of the candidate words in each step");
AddInput("Scores",
"(LodTensorArray)"
"score of the candidate words in each step");
AddOutput("SentenceIds",
"(LodTensor)"
"All possible result sentences of word ids");
AddOutput("SentenceScores",
"(LodTensor)"
"All possible result sentences of word scores");
AddComment(R"DOC(
Pack the result of Beam search op into SentenceIds and SentenceScores.
)DOC");
}
};

class BeamSearchDecodeInferShape : public framework::InferShapeBase {
public:
void operator()(framework::InferShapeContext* context) const override {
PADDLE_ENFORCE(context->HasInput("Ids"),
"BeamSearchDecodeOp must has input Ids");
PADDLE_ENFORCE(context->HasInput("Scores"),
"BeamSearchDecodeOp must has input Scores");
PADDLE_ENFORCE(context->HasOutput("SentenceIds"),
"BeamSearchDecodeOp must has output SentenceIds");
PADDLE_ENFORCE(context->HasOutput("SentenceScores"),
"BeamSearchDecodeOp must has output SentenceScores");
}
};

class BeamSearchDecodeInferVarType : public framework::VarTypeInference {
public:
void operator()(const framework::OpDescBind& op_desc,
framework::BlockDescBind* block) const override {
for (auto& o : op_desc.Output("SentenceIds")) {
block->Var(o)->SetType(framework::VarDesc::LOD_TENSOR);
}
for (auto& o : op_desc.Output("SentenceScores")) {
block->Var(o)->SetType(framework::VarDesc::LOD_TENSOR);
}
}
};

} // namespace operators
} // namespace paddle

REGISTER_OPERATOR(beam_search_decode, paddle::operators::BeamSearchDecodeOp,
paddle::operators::BeamSearchDecodeOpProtoMaker,
paddle::operators::BeamSearchDecodeInferShape,
paddle::operators::BeamSearchDecodeInferVarType,
paddle::framework::EmptyGradOpMaker);
280 changes: 280 additions & 0 deletions paddle/operators/beam_search_decode_op.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,280 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once

#include "paddle/framework/lod_tensor_array.h"
#include "paddle/framework/op_registry.h"

namespace paddle {
namespace operators {

using LoDTensor = framework::LoDTensor;
using LoDTensorArray = framework::LoDTensorArray;

// all the lod have 2 levels.
// The First is source level, the second is sentence level.
// source level describe how many candidate words for this source.
// sentence level describe these candidates belong to which prefix
const size_t kSourceLevel = 0;
const size_t kSentenceLevel = 1;

template <typename T>
struct BeamNode {
BeamNode(int64_t word_id, T score) : word_id_(word_id), score_(score) {}

~BeamNode() {
if (parent_) {
parent_->DropKid(this);
if (parent_->kids_.size() == 0UL) {
delete parent_;
}
}
VLOG(3) << "Delete BeamNode root with word_id:" << this->word_id_;
}

void AppendTo(BeamNode* parent) {
parent_ = parent;
parent->kids_.insert(this);
}

void DropKid(BeamNode* kid) { kids_.erase(kid); }

BeamNode* parent_ = nullptr;
std::unordered_set<BeamNode*> kids_;
int64_t word_id_;
T score_;
};

template <typename T>
using BeamNodeVector = std::vector<std::unique_ptr<BeamNode<T>>>;

template <typename T>
struct Sentence {
std::vector<int64_t> word_ids;
std::vector<T> scores;
};

template <typename T>
using SentenceVector = std::vector<Sentence<T>>;

template <typename T>
struct BeamSearchDecoder {
/**
* make a BeamNode and all it's related prefix BeanNode into a Sentence.
*/
Sentence<T> MakeSentence(const BeamNode<T>* node) const;

/**
* Param:
* cur_ids: LoDTensor of One step for word ID
* cur_scores: LoDTensor of One Step for word score
* prefixes_list: prefixes for each source sentence.
* sentence_vector_list: result sentence_vector for each source sentence.
* Return:
* a new prefixes list for each source of current step
*/
std::vector<BeamNodeVector<T>> PackTwoSteps(
const LoDTensor& cur_ids, const LoDTensor& cur_scores,
std::vector<BeamNodeVector<T>>& prefixes_list,
std::vector<SentenceVector<T>>* sentence_vector_list) const;

/**
* convert the result sentence_vector for each source sentence into two
* LodTensor.
* One is all candidate sentences with word id, one is all candidate sentences
* with word score.
* Param:
* sentence_vector_list: sentence_vector for each source sentence.
* id_tensor: result LoDTensor for sentences of id.
* score_tensor: result LoDTensor for sentences of score.
*/
void ConvertSentenceVectorToLodTensor(
std::vector<SentenceVector<T>> sentence_vector_list, LoDTensor* id_tensor,
LoDTensor* score_tensor) const;

/**
* Pack all steps of id/score LodTensor into sentence LoDTensor
* it's main logic is:
* ```python
* prefix
* result_sentence
* result_lod_tensor
*
* for (step in steps):
* prefix = PackTwoSteps(prefix, step, &result_sentence)
* ConvertSentenceVector<T>ToLodTensor(result_sentence, &result_lod_tensor)
* ```
*/
void PackAllSteps(const LoDTensorArray& step_ids,
const LoDTensorArray& step_scores, LoDTensor* id_tensor,
LoDTensor* score_tensor) const;
};

template <typename T>
Sentence<T> BeamSearchDecoder<T>::MakeSentence(const BeamNode<T>* node) const {
Sentence<T> sentence;
while (node != nullptr) {
sentence.word_ids.emplace_back(node->word_id_);
sentence.scores.emplace_back(node->score_);
node = node->parent_;
}

std::reverse(std::begin(sentence.word_ids), std::end(sentence.word_ids));
std::reverse(std::begin(sentence.scores), std::end(sentence.scores));

return sentence;
}

template <typename T>
std::vector<BeamNodeVector<T>> BeamSearchDecoder<T>::PackTwoSteps(
const LoDTensor& cur_ids, const LoDTensor& cur_scores,
std::vector<BeamNodeVector<T>>& prefixes_list,
std::vector<SentenceVector<T>>* sentence_vector_list) const {
std::vector<BeamNodeVector<T>> result;

for (size_t src_idx = 0; src_idx < cur_ids.lod()[kSourceLevel].size() - 1;
++src_idx) {
size_t src_start = cur_ids.lod().at(kSourceLevel)[src_idx];
size_t src_end = cur_ids.lod().at(kSourceLevel)[src_idx + 1];

BeamNodeVector<T> beam_nodes;

// if prefixes size is 0, it means this is the first step. In this step,
// all candidate id is the start of candidate sentences.
if (prefixes_list.empty()) {
PADDLE_ENFORCE_EQ(cur_ids.lod().at(kSourceLevel).back(),
cur_ids.lod().at(kSentenceLevel).back(),
"in the first step");
for (size_t id_idx = src_start; id_idx < src_end; ++id_idx) {
beam_nodes.push_back(std::unique_ptr<BeamNode<T>>(new BeamNode<T>(
cur_ids.data<int64_t>()[id_idx], cur_scores.data<T>()[id_idx])));
}
} else {
BeamNodeVector<T>& prefixes = prefixes_list[src_idx];
SentenceVector<T>& sentence_vector = (*sentence_vector_list)[src_idx];

PADDLE_ENFORCE_EQ(src_end - src_start, prefixes.size(),
"prefix and candidate set number should be the same");

auto candidate_offset = cur_ids.lod()[kSentenceLevel];
for (size_t prefix_idx = 0; prefix_idx < prefixes.size(); ++prefix_idx) {
std::unique_ptr<BeamNode<T>>& prefix = prefixes[prefix_idx];
size_t candidate_start = candidate_offset[src_start + prefix_idx];
size_t candidate_end = candidate_offset[src_start + prefix_idx + 1];
if (candidate_start == candidate_end) {
VLOG(3) << "this sentence has no more candidate, "
"add to result sentence and rm it from beam tree";
sentence_vector.push_back(MakeSentence(prefix.get()));
prefix.reset();
} else {
for (size_t candidate_idx = candidate_start;
candidate_idx < candidate_end; ++candidate_idx) {
auto* candidate =
new BeamNode<T>(cur_ids.data<int64_t>()[candidate_idx],
cur_scores.data<T>()[candidate_idx]);
candidate->AppendTo(prefix.get());
beam_nodes.push_back(std::unique_ptr<BeamNode<T>>(candidate));
}
prefix.release();
}
}
}
result.push_back(std::move(beam_nodes));
}
return result;
}

template <typename T>
void BeamSearchDecoder<T>::ConvertSentenceVectorToLodTensor(
std::vector<SentenceVector<T>> sentence_vector_list, LoDTensor* id_tensor,
LoDTensor* score_tensor) const {
size_t src_num = sentence_vector_list.size();

PADDLE_ENFORCE_NE(src_num, 0, "src_num should not be 0");

std::vector<size_t> source_level_lod = {0};
std::vector<size_t> sentence_level_lod = {0};
std::vector<int64_t> id_data;
std::vector<T> score_data;

for (size_t src_idx = 0; src_idx < src_num; ++src_idx) {
for (Sentence<T>& sentence : sentence_vector_list[src_idx]) {
id_data.insert(id_data.end(), sentence.word_ids.begin(),
sentence.word_ids.end());
score_data.insert(score_data.end(), sentence.scores.begin(),
sentence.scores.end());
sentence_level_lod.push_back(sentence_level_lod.back() +
sentence.word_ids.size());
}
source_level_lod.push_back(source_level_lod.back() +
sentence_vector_list[src_idx].size());
}

auto cpu_place = new paddle::platform::CPUPlace();
paddle::platform::CPUDeviceContext cpu_ctx(*cpu_place);

framework::LoD lod;
lod.push_back(source_level_lod);
lod.push_back(sentence_level_lod);

id_tensor->set_lod(lod);
id_tensor->Resize({static_cast<int64_t>(id_data.size())});
id_tensor->mutable_data<int64_t>(paddle::platform::CPUPlace());
id_tensor->CopyFromVector<int64_t>(id_data, cpu_ctx);

score_tensor->set_lod(lod);
score_tensor->Resize({static_cast<int64_t>(score_data.size())});
score_tensor->mutable_data<T>(paddle::platform::CPUPlace());
score_tensor->CopyFromVector<T>(score_data, cpu_ctx);
}

template <typename T>
void BeamSearchDecoder<T>::PackAllSteps(const LoDTensorArray& step_ids,
const LoDTensorArray& step_scores,
LoDTensor* id_tensor,
LoDTensor* score_tensor) const {
PADDLE_ENFORCE(!step_ids.empty(), "step num should be larger than 0");
PADDLE_ENFORCE_EQ(step_ids.size(), step_scores.size(),
"step_ids and step_scores should be the same");
const size_t step_num = step_ids.size();
const size_t src_num = step_ids.at(0).lod().at(kSourceLevel).size() - 1;

PADDLE_ENFORCE_GT(src_num, 0UL, "source num should be larger than 0");

// previous prefixes for each step,
// the init length is 0, means this is the first step.
std::vector<BeamNodeVector<T>> beamnode_vector_list(0);
std::vector<SentenceVector<T>> sentence_vector_list(src_num);

// pack all steps for one batch first, then another batch
for (size_t step_id = 0; step_id < step_num; ++step_id) {
beamnode_vector_list =
PackTwoSteps(step_ids.at(step_id), step_scores.at(step_id),
beamnode_vector_list, &sentence_vector_list);
}
// append last beam_node to result
for (size_t src_idx = 0; src_idx < src_num; ++src_idx) {
for (auto& beam_node : beamnode_vector_list.at(src_idx)) {
sentence_vector_list[src_idx].push_back(MakeSentence(beam_node.get()));
beam_node.reset();
}
}

ConvertSentenceVectorToLodTensor(sentence_vector_list, id_tensor,
score_tensor);
}

} // namespace operators
} // namespace paddle
Loading