Skip to content

Commit

Permalink
Feature/dynamic recurrent op forward test (#4729)
Browse files Browse the repository at this point in the history
  • Loading branch information
Superjomn committed Oct 13, 2017
1 parent 6316b40 commit 1c1f73b
Show file tree
Hide file tree
Showing 9 changed files with 323 additions and 69 deletions.
28 changes: 17 additions & 11 deletions paddle/framework/tensor_array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,17 @@ LoDTensor PackDynamicBatch(const std::vector<LoDTensor>& source,
const std::vector<DySeqMeta>& meta, const LoD& lod,
size_t level);

std::vector<size_t> GenDyBatchIndice(const DySeqMetaBatch& meta, int batch_id) {
// collect indice need to copy to the batch
std::vector<size_t> indice;
for (const auto& seq : meta) {
size_t id = seq.begin + batch_id;
if (id >= seq.end) break;
indice.push_back(id);
}
return indice;
}

} // namespace detail

const LoDTensor& TensorArray::Read(size_t index) const {
Expand Down Expand Up @@ -113,8 +124,8 @@ LoDTensor TensorArray::Pack(size_t level, const std::vector<DySeqMeta>& meta,
return detail::PackDynamicBatch(values_, meta, lod, level);
}

std::vector<DySeqMeta> TensorArray::Unpack(const LoDTensor& source, int level,
bool length_desend) {
DySeqMetaBatch TensorArray::Unpack(const LoDTensor& source, int level,
bool length_desend) {
detail::DynamicBatchUnpacker unpacker(source, level,
length_desend /*descend*/);

Expand All @@ -129,6 +140,7 @@ std::vector<DySeqMeta> TensorArray::Unpack(const LoDTensor& source, int level,
Write(batch_id, unpacker.GetBatch(batch_id));
}

PADDLE_ENFORCE(!unpacker.meta.empty());
return unpacker.meta;
}

Expand Down Expand Up @@ -218,13 +230,7 @@ LoDTensor DynamicBatchUnpacker::GetBatch(size_t index) {
PADDLE_ENFORCE(!meta.empty(), "should build meta first");
LoDTensor result;

// collect indice need to copy to the batch
std::vector<size_t> indice;
for (const auto& seq : meta) {
size_t id = seq.begin + index;
if (id >= seq.end) break;
indice.push_back(id);
}
auto indice = detail::GenDyBatchIndice(meta, index);
PADDLE_ENFORCE(!indice.empty(), "invalid batch at %d", index);

// copy the indice of records in LoDTensor
Expand All @@ -237,9 +243,9 @@ LoDTensor DynamicBatchUnpacker::GetBatch(size_t index) {
for (size_t i = 0; i < indice.size(); i++) {
auto index = indice[i];
auto target = result.Slice<value_type>(i, i + 1);
auto source_ = source->Slice<value_type>(index, index + 1);
auto slice = source->Slice<value_type>(index, index + 1);

target.CopyFrom<value_type>(source_, platform::CPUPlace(),
target.CopyFrom<value_type>(slice, platform::CPUPlace(),
platform::CPUDeviceContext());
}

Expand Down
12 changes: 9 additions & 3 deletions paddle/framework/tensor_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,13 @@ struct DySeqMeta {
size_t ori_idx;
};

using DySeqMetaBatch = std::vector<DySeqMeta>;

/*
* Extract the indices of instances.
*/
std::vector<size_t> GenDyBatchIndice(const DySeqMetaBatch &metas, int batch_id);

/*
* TensorArray is a C-array-like array of tensors, it is meant to be used with
* dynamic iteration primitives such as while_loop. It is used to segment inputs
Expand Down Expand Up @@ -69,16 +76,15 @@ class TensorArray {
* Recover the original LoD-arranged LoDTensor with the `values`, `level` and
* `indice_map`.
*/
LoDTensor Pack(size_t level, const std::vector<DySeqMeta> &meta,
LoDTensor Pack(size_t level, const DySeqMetaBatch &meta,
const LoD &lod) const;

/*
* Split LoDTensor in some `level` and write the generated batches to
* `values`, if set `desend`, will sort by length in descending order else in
* ascending order.
*/
std::vector<DySeqMeta> Unpack(const LoDTensor &source, int level,
bool length_desend);
DySeqMetaBatch Unpack(const LoDTensor &source, int level, bool length_desend);

/*
* Pack the values into a tensor with rank one higher than each tensor in
Expand Down
162 changes: 110 additions & 52 deletions paddle/operators/dynamic_recurrent_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ using framework::Scope;
using framework::TensorArray;
using framework::LoDTensor;
using framework::Variable;
using framework::DySeqMetaBatch;

namespace detail {

Expand All @@ -33,6 +34,29 @@ inline void CreateVariables(Scope& scope,
}
}

/*
* The inputs with sequence should be reordered when they are split, so the
* boot_states should be reordered in the same order.
*
* NOTE This may require that the `pre_state` of the first time step should just
* copy the `boot_state` rather than reference it, for that the content should
* be reordered, but the RNN op should not change the `boot_state` as an input
* variable's content.
*/
template <typename T>
inline void ReorderBootState(const DySeqMetaBatch& metas,
const LoDTensor& boot_state, LoDTensor* tensor,
const platform::Place& dst_place) {
for (size_t seq_id = 0; seq_id < metas.size(); seq_id++) {
auto slice = tensor->Slice<T>(seq_id, seq_id + 1);
auto boot_slice =
boot_state.Slice<T>(metas[seq_id].ori_idx, metas[seq_id].ori_idx + 1);
// TODO(superjom) pass in device context as an argument
slice.template CopyFrom<T>(boot_slice, dst_place,
platform::CPUDeviceContext());
}
}

} // namespace detail

class DynamicRecurrentOpProtoAndCheckerMaker
Expand Down Expand Up @@ -69,26 +93,26 @@ void DynamicRecurrentOp::Run(const Scope& scope,
CreateScopes();
WriteStepInputs();
InitStates();
WriteStepOutputs();

// call stepnet in all the time steps
for (size_t step = 0; step < cache_.num_steps; step++) {
auto& step_scope = cache_.GetScope(step);
stepnet_->Run(step_scope, dev_ctx);
}

WriteStepOutputs();
ConcatOutputs();
}

void DynamicRecurrentOp::SplitInputs() const {
// TODO(superjom) make level a config
// TODO(superjom) check all the inputs has the same LoD
int level = 0;
const auto& inlinks = cache_.inlinks;
for (const auto& item : inlinks) {
for (const auto& item : cache_.inlinks) {
const auto& var = item.second;
const auto& tensor = var->Get<LoDTensor>();
TensorArray& ta = step_inputs_[item.first];

dy_seq_metas_[item.first] =
ta.Unpack(tensor, level, true /*length_descend*/);

Expand Down Expand Up @@ -120,17 +144,11 @@ void DynamicRecurrentOp::WriteStepInputs() const {
}

void DynamicRecurrentOp::WriteStepOutputs() const {
for (size_t step = 0; step < cache_.scopes->size(); step++) {
auto& scope = cache_.GetScope(step);
for (auto& item : step_outputs_) {
auto* var = scope.FindVar(item.first);
if (var == nullptr) {
var = scope.NewVar(item.first);
}
auto* tensor = var->GetMutable<LoDTensor>();
item.second.WriteShared(step, *tensor);
}
// initialize step outputs
for (const auto& item : cache_.outlinks) {
step_outputs_.emplace(item.first, TensorArray());
}
PADDLE_ENFORCE_GT(step_outputs_.size(), 0UL);
}

void DynamicRecurrentOp::CreateScopes() const {
Expand All @@ -145,73 +163,107 @@ void DynamicRecurrentOp::CreateScopes() const {
PADDLE_ENFORCE_NOT_NULL(stepnet_, "stepnet should be set first");
std::vector<std::string> memories;
std::vector<std::string> pre_memories;
std::vector<std::string> stepnet_outputs;
std::transform(arg_.memories.begin(), arg_.memories.end(),
std::back_inserter(memories),
[](const rnn::MemoryAttr& m) { return m.var; });
std::transform(arg_.memories.begin(), arg_.memories.end(),
std::back_inserter(pre_memories),
[](const rnn::MemoryAttr& m) { return m.pre_var; });
for (const auto& item : stepnet_->Outputs()) {
for (const auto& var : item.second) {
stepnet_outputs.push_back(var);
}
}

for (size_t step = 0; step < cache_.num_steps; step++) {
auto& scope = cache_.GetScope(step);
detail::CreateVariables(scope, arg_.inlinks);
detail::CreateVariables(scope, arg_.outlinks);
detail::CreateVariables(scope, memories);
detail::CreateVariables(scope, pre_memories);
detail::CreateVariables(scope, stepnet_outputs);
}
}

void DynamicRecurrentOp::ConcatOutputs() const {
// TODO(superjom) transform this to a config
int level = 0;
// TODO(superjom) pass in some lod
// just a placeholder
framework::LoD lod;
for (size_t step = 0; step < cache_.num_steps; step++) {
auto& scope = cache_.GetScope(step);
for (auto& item : step_outputs_) {
auto* var = scope.FindVar(item.first);
PADDLE_ENFORCE_NOT_NULL(var);
auto* tensor = var->GetMutable<LoDTensor>();
tensor->mutable_data<value_type>(platform::CPUPlace());
item.second.WriteShared(step, *tensor);
}
}
// the inlinks' lods should be the same, so randomly get one lod.
const auto& some_lod =
cache_.scope->FindVar(arg_.inlinks.front())->Get<LoDTensor>().lod();
const auto& some_meta = dy_seq_metas_[arg_.inlinks.front()];
for (auto& item : step_outputs_) {
auto tensor = item.second.Pack(level, dy_seq_metas_[item.first], lod);
auto& output = cache_.outlinks[item.first]->Get<LoDTensor>();
const_cast<LoDTensor*>(&output)->ShareDataWith<value_type>(tensor);
auto tensor = item.second.Pack(level, some_meta, some_lod);
auto* output = cache_.outlinks[item.first]->GetMutable<LoDTensor>();
const_cast<LoDTensor*>(output)->ShareDataWith<value_type>(tensor);
}
}

void DynamicRecurrentOp::InitStates() const {
// init the first state
// TODO(superjom) parepare the scenerio that boot state not exists
for (auto memory : arg_.memories) {
auto* boot_state_var = cache_.scope->FindVar(memory.boot_var);
PADDLE_ENFORCE_NOT_NULL(boot_state_var);
auto& boot_state = boot_state_var->Get<LoDTensor>();
const auto& dims = boot_state.dims();

for (size_t step = 0; step < cache_.num_steps; step++) {
auto& cur_scope = cache_.GetScope(step);
// link pre-state to boot_state
// init state and pre-state
auto* pre_state = cur_scope.FindVar(memory.pre_var);
PADDLE_ENFORCE_NOT_NULL(pre_state);
pre_state->GetMutable<LoDTensor>();

auto* state = cur_scope.FindVar(memory.var);
PADDLE_ENFORCE_NOT_NULL(state);
state->GetMutable<LoDTensor>()->Resize(dims);
state->GetMutable<LoDTensor>()->mutable_data<value_type>(
platform::CPUPlace());

if (step == 0) {
auto* pre_state_tensor = pre_state->GetMutable<LoDTensor>();
pre_state_tensor->Resize(boot_state.dims());
pre_state_tensor->ShareDataWith<value_type>(boot_state);
} else {
auto& pre_scope = cache_.GetScope(step - 1);
auto* state_pre = pre_scope.FindVar(memory.var);
PADDLE_ENFORCE_NOT_NULL(state_pre);
pre_state->GetMutable<LoDTensor>()->ShareDataWith<value_type>(
*state_pre->GetMutable<LoDTensor>());
}
for (size_t step = 0; step < cache_.num_steps; step++) {
for (const auto& memory : arg_.memories) {
CreateState(memory, step);
LinkState(memory, step);
}
}
}

void DynamicRecurrentOp::CreateState(const rnn::MemoryAttr& memory,
size_t step) const {
auto& scope = cache_.GetScope(step);
auto& state = *cache_.GetTensor(scope, memory.var);
auto& boot_state = *cache_.GetTensor(*cache_.scope, memory.boot_var);

size_t num_instances =
step_inputs_[arg_.inlinks.front()].Read(step).dims()[0];
auto dims = boot_state.dims();
dims[0] = num_instances;

state.Resize(dims);
state.mutable_data<value_type>(platform::CPUPlace());
states_[memory.var].WriteShared(step, state);
}

void DynamicRecurrentOp::LinkState(const rnn::MemoryAttr& memory,
size_t step) const {
auto& scope = cache_.GetScope(step);
auto& state_pre = *cache_.GetTensor(scope, memory.pre_var);

// all the step_inputs' metas should be the same, just randomly select one
// and get the dyseq meta.
const auto& some_meta = dy_seq_metas_[arg_.inlinks.front()];
size_t num_instances =
step_inputs_[arg_.inlinks.front()].Read(step).dims()[0];

LoDTensor* pre_state{nullptr};
if (step == 0) {
pre_state = cache_.GetTensor(*cache_.scope, memory.boot_var);
pre_state->mutable_data<float>(platform::CPUPlace());
// allocate memory
state_pre.Resize(pre_state->dims());
state_pre.mutable_data<value_type>(platform::CPUPlace());
detail::ReorderBootState<value_type>(some_meta, *pre_state, &state_pre,
pre_state->place());
} else {
pre_state = cache_.GetTensor(cache_.GetScope(step - 1), memory.var);
}

// shink and share from previous state
auto shrinked_pre_state = pre_state->Slice<value_type>(0, num_instances);
state_pre.ShareDataWith<value_type>(shrinked_pre_state);
}

void DynamicRecurrentOp::ArgCache::Init(
const rnn::ArgumentName& name, const paddle::framework::OperatorBase& op,
const paddle::framework::Scope& scope, rnn::Argument* arg) {
Expand Down Expand Up @@ -261,6 +313,12 @@ Variable* DynamicRecurrentOp::ArgCache::GetVariable(const Scope& scope,
return var;
}

LoDTensor* DynamicRecurrentOp::ArgCache::GetTensor(
const framework::Scope& scope, const std::string& name) {
auto* var = GetVariable(scope, name);
return var->GetMutable<LoDTensor>();
}

const rnn::ArgumentName DynamicRecurrentOp::kArgName{
"step_net", "step_scopes", "inlinks", "outlinks",
"memories", "pre_memories", "boot_memories"};
Expand Down
Loading

0 comments on commit 1c1f73b

Please sign in to comment.