diff --git a/paddle/gserver/layers/CrossEntropyOverBeam.cpp b/paddle/gserver/layers/CrossEntropyOverBeam.cpp new file mode 100644 index 0000000000000..4acc077035b17 --- /dev/null +++ b/paddle/gserver/layers/CrossEntropyOverBeam.cpp @@ -0,0 +1,393 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "CrossEntropyOverBeam.h" + +namespace paddle { + +void CostForOneSequence::calValidExpandStep() { + validExpansionCount_ = 0; + goldAsExtraPath_ = true; + + for (size_t i = 0; i < beams_->expansionCount; ++i) { + real gold = static_cast(beams_->gold[i]); + if (i) { + real* start = beams_->candidateIds[i - 1]->getData(); + goldRowIds_[i] = std::count_if( + start, + start + goldRowIds_[i - 1] * beamSize_ + goldColIds_[i - 1], + [](const real& val) { return val != -1.; }); + } else { + goldRowIds_[i] = 0; + } + + real* start = + beams_->candidateIds[i]->getData() + goldRowIds_[i] * beamSize_; + real* findEnd = std::find(start, start + beamSize_, gold); + validExpansionCount_++; + + if (start + beamSize_ == findEnd) return; + goldColIds_[i] = findEnd - start; + } + if (goldColIds_[beams_->expansionCount - 1] != -1) goldAsExtraPath_ = false; +} + +size_t CostForOneSequence::initLastExpansion() { + int beamId = validExpansionCount_ - 1; + const MatrixPtr candidates = beams_->candidateIds[beamId]; + size_t height = candidates->getHeight(); + + /* initialization the last expansion. */ + size_t pathCount = std::count_if(candidates->getData(), + candidates->getData() + height * beamSize_, + [](const real& val) { return val != -1; }); + /* + * if the gold sequence falls off the beam during search, add the gold + * sequence as the last path into the all expanded candidates. + */ + if (goldAsExtraPath_) goldIdsInFinalExpansion_ = pathCount++; + + pathRowIdsInEachBeam_.clear(); + pathRowIdsInEachBeam_.resize(validExpansionCount_, + std::vector(pathCount, 0)); + parentIdsInBeam_.clear(); + parentIdsInBeam_.resize(pathCount, 0); + + if (goldAsExtraPath_) { + /* add gold sequence into the total expansion. */ + pathRowIdsInEachBeam_[beamId].back() = + beams_->gold[beamId] + + getSeqStartPos(beamId, goldRowIds_[validExpansionCount_ - 1]); + parentIdsInBeam_.back() = goldRowIds_[validExpansionCount_ - 1]; + } else { + size_t goldOffset = goldRowIds_[beamId] * beamSize_ + goldColIds_[beamId]; + goldIdsInFinalExpansion_ = + std::count_if(candidates->getData(), + candidates->getData() + goldOffset, + [](const real& val) { return val != -1.; }); + } + + /* + * TODO(caoying): fix this, store the indices of selected candidate + * paths into Argument.ids + */ + real* ids = candidates->getData(); + size_t curIdx = 0; + for (size_t i = 0; i < height; ++i) { + int basePos = getSeqStartPos(beamId, i); + for (size_t j = 0; j < beamSize_; ++j) { + int id = ids[i * beamSize_ + j]; + if (id == -1) continue; + pathRowIdsInEachBeam_[beamId][curIdx] = id + basePos; + parentIdsInBeam_[curIdx++] = i; + } + } + return pathCount; +} + +void CostForOneSequence::constructTotalExpansion() { + /* + * construct the entire expanded beam by begining with the last search + * in which gold falls off the beam. + */ + size_t totalPathCount = initLastExpansion(); + + for (int beamId = validExpansionCount_ - 2; beamId >= 0; --beamId) { + const MatrixPtr candidates = beams_->candidateIds[beamId]; + real* ids = candidates->getData(); + + int lastParentIdInBeam = -1; + int basePos = -1; + for (size_t i = 0; + i < (goldAsExtraPath_ ? totalPathCount - 1 : totalPathCount); + ++i) { + int id = ids[parentIdsInBeam_[i]]; + int parentRowId = std::div(parentIdsInBeam_[i], beamSize_).quot; + if (parentIdsInBeam_[i] != lastParentIdInBeam) + basePos = getSeqStartPos(beamId, parentRowId); + + pathRowIdsInEachBeam_[beamId][i] = id + basePos; + lastParentIdInBeam = parentIdsInBeam_[i]; + parentIdsInBeam_[i] = parentRowId; + + if (goldAsExtraPath_) + pathRowIdsInEachBeam_[beamId][totalPathCount - 1] = + beams_->gold[beamId] + getSeqStartPos(beamId, goldRowIds_[beamId]); + } + } +} + +real CostForOneSequence::globallyNormalizedScore() { + expandedPathScores_.resize(validExpansionCount_); + + Matrix::resizeOrCreate( + softmaxOut_, 1, pathRowIdsInEachBeam_[0].size(), false, false); + softmaxOut_->zeroMem(); + MatrixPtr tmp = Matrix::create( + softmaxOut_->getData(), softmaxOut_->getWidth(), 1, false, false); + + for (size_t i = 0; i < validExpansionCount_; ++i) { + Matrix::resizeOrCreate(expandedPathScores_[i], + pathRowIdsInEachBeam_[i].size(), + 1, + false, + false); + expandedPathScores_[i]->zeroMem(); + + IVectorPtr rowIds = IVector::create(pathRowIdsInEachBeam_[i].data(), + pathRowIdsInEachBeam_[i].size(), + false); + expandedPathScores_[i]->selectRows(*(beams_->scores[i]), *rowIds); + tmp->add(*expandedPathScores_[i]); + } + + softmaxOut_->softmax(*softmaxOut_); + return -std::log(softmaxOut_->getData()[goldIdsInFinalExpansion_]); +} + +real CostForOneSequence::forward() { + calValidExpandStep(); + constructTotalExpansion(); + return globallyNormalizedScore(); +} + +void CostForOneSequence::backward() { + /* + * when softmax layer is the output layer, and it is combined with + * cross-entropy as cost. The derivate with regard to softmax's input + * is simply: + * + * grad_i = softmax_out_i - target_i, + * + * and here hard label is used. + */ + softmaxOut_->getData()[goldIdsInFinalExpansion_] -= 1.; + + MatrixPtr tmp = Matrix::create( + softmaxOut_->getData(), softmaxOut_->getWidth(), 1, false, false); + + for (size_t i = 0; i < validExpansionCount_; ++i) { + IVectorPtr rowIds = IVector::create(pathRowIdsInEachBeam_[i].data(), + pathRowIdsInEachBeam_[i].size(), + false); + /* + beams_->scoreGrad[i] has been intialized outside this class, this + class only keeps a pointer pointing to the original input gradients, + so here does not need to allocate or initalize the memory. + */ + tmp->addToRows(*beams_->scoreGrad[i], *rowIds); + } +} + +REGISTER_LAYER(cross_entropy_over_beam, CrossEntropyOverBeam); + +bool CrossEntropyOverBeam::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + /* Initialize the basic parent class */ + Layer::init(layerMap, parameterMap); + CHECK_EQ(0U, inputLayers_.size() % 3) << "Error input number."; + + beamExpanCount_ = inputLayers_.size() / 3; + + candidateScores_.resize(beamExpanCount_); + candidateScoreGrad_.resize(beamExpanCount_); + + candidateInBeam_.resize(beamExpanCount_); + goldSequence_.resize(beamExpanCount_); + gradToInputs_.resize(beamExpanCount_); + + setNeedSequenceInfo(false); + return true; +} + +void CrossEntropyOverBeam::checkInputs() { + batchSize_ = 0; + for (size_t i = 0; i < beamExpanCount_; ++i) { + const Argument& scores = getInput(i * 3); + const Argument& selCandidates = getInput(i * 3 + 1); + const Argument& goldSeq = getInput(i * 3 + 2); + + if (i) { + CHECK(scores.hasSubseq()) << "input " << i << " " + << inputLayers_[i * 3]->getName() + << " should be a nested sequence"; + CHECK_EQ(getInputValue(i * 3 + 1)->getWidth(), beamSize_); + CHECK_EQ(scores.getNumSequences(), batchSize_); + CHECK_EQ(scores.getNumSubSequences(), selCandidates.getBatchSize()); + } else { + CHECK(scores.hasSeq()) << "input " << i << " " + << inputLayers_[i]->getName() + << " should be a sequence"; + batchSize_ = scores.getNumSequences(); + beamSize_ = getInputValue(i * 3 + 1)->getWidth(); + CHECK_EQ(batchSize_, selCandidates.getBatchSize()); + } + CHECK_EQ(1U, scores.value->getWidth()); + CHECK_EQ(batchSize_, goldSeq.getBatchSize()); + } +} + +void CrossEntropyOverBeam::copyInputsToCpu() { + auto copyValue = [](const MatrixPtr& src, MatrixPtr& trg) { + if (dynamic_cast(src.get())) { + Matrix::resizeOrCreate( + trg, src->getHeight(), src->getWidth(), false, false); + trg->copyFrom(*src); + } else { + trg = std::move(src); + } + }; + + auto copyIds = [](const IVectorPtr& src, IVectorPtr& trg) { + if (dynamic_cast(src.get())) { + IVector::resizeOrCreate(trg, src->getSize(), false); + trg->copyFrom(*src); + } else { + trg = std::move(src); + } + }; + + beamSplitPos_.clear(); + beamSplitPos_.resize(batchSize_, std::vector(beamExpanCount_, 0)); + for (size_t i = 0; i < beamExpanCount_; ++i) { + copyValue(getInputValue(i * 3), candidateScores_[i]); + copyValue(getInputValue(i * 3 + 1), candidateInBeam_[i]); + copyIds(getInput(i * 3 + 2).ids, goldSequence_[i]); + + if (i) { + ICpuGpuVectorPtr seqInfo = getInput(i * 3).sequenceStartPositions; + const int* seqStarts = seqInfo->getMutableData(false); + ICpuGpuVectorPtr subSeqInfo = getInput(i * 3).subSequenceStartPositions; + const int* subSeqStarts = subSeqInfo->getMutableData(false); + + size_t seqId = 1; + for (size_t subSeqId = 0; subSeqId < subSeqInfo->getSize() - 1; + ++subSeqId) { + CHECK_LT(seqId, seqInfo->getSize()); + if (subSeqStarts[subSeqId] == seqStarts[seqId]) { + beamSplitPos_[seqId][i] = beamSplitPos_[seqId - 1][i]; + seqId++; + } + beamSplitPos_[seqId - 1][i]++; + } + } else { + for (size_t j = 0; j < batchSize_; ++j) beamSplitPos_[j][i] = j + 1; + } + } +} + +void CrossEntropyOverBeam::splitBatchBeams() { + beamCosts_.resize(batchSize_); + beamPerSeq_.resize(batchSize_, BeamExpansion(beamExpanCount_)); + + for (size_t i = 0; i < beamExpanCount_; ++i) { + int* seqStarts = + getInput(i * 3).sequenceStartPositions->getMutableData(false); + + int* subSeqStarts = nullptr; + int maxLen = 0; + if (i) { + subSeqStarts = + getInput(i * 3).subSequenceStartPositions->getMutableData(false); + maxLen = getInput(i * 3).subSequenceStartPositions->getSize() - 1; + } else { + maxLen = getInput(i).sequenceStartPositions->getSize() - 1; + } + + for (size_t j = 0; j < batchSize_; ++j) { + beamPerSeq_[j].scores[i] = + Matrix::create(candidateScores_[i]->getData() + seqStarts[j], + seqStarts[j + 1] - seqStarts[j], + 1, + false, + false); + beamPerSeq_[j].scoreGrad[i] = + Matrix::create(candidateScoreGrad_[i]->getData() + seqStarts[j], + seqStarts[j + 1] - seqStarts[j], + 1, + false, + false); + + int offset = j ? beamSplitPos_[j - 1][i] : 0; + int height = beamSplitPos_[j][i] - (j ? beamSplitPos_[j - 1][i] : 0); + CHECK_GE(maxLen, offset + height); + beamPerSeq_[j].seqInfo[i] = IVector::create( + (i ? subSeqStarts : seqStarts) + offset, height + 1, false); + + beamPerSeq_[j].candidateIds[i] = + Matrix::create(candidateInBeam_[i]->getData() + offset * beamSize_, + height, + beamSize_, + false, + false); + beamPerSeq_[j].gold[i] = goldSequence_[i]->getData()[j]; + + CHECK_LE(beamPerSeq_[j].gold[i], seqStarts[j + 1] - seqStarts[j]); + } + } +} + +void CrossEntropyOverBeam::resizeOutput() { + Matrix::resizeOrCreate(output_.value, batchSize_, 1, false, false); + output_.value->zeroMem(); + + for (size_t i = 0; i < beamExpanCount_; ++i) { + MatrixPtr inGrad = getInputGrad(i * 3); + if (dynamic_cast(inGrad.get())) { + Matrix::resizeOrCreate(candidateScoreGrad_[i], + inGrad->getHeight(), + inGrad->getWidth(), + false, + false); + } else { + candidateScoreGrad_[i] = std::move(inGrad); + } + candidateScoreGrad_[i]->zeroMem(); + } +} + +void CrossEntropyOverBeam::copyGradToGpu(size_t copyCount) { + for (size_t i = 0; i < beamExpanCount_; ++i) { + if (dynamic_cast(getInputGrad(i * 3).get())) + getInputGrad(i * 3)->copyFrom(*candidateScoreGrad_[i]); + + if (i == copyCount - 1) break; + } +} + +void CrossEntropyOverBeam::forward(PassType passType) { + Layer::forward(passType); + + checkInputs(); + copyInputsToCpu(); + + resizeOutput(); + splitBatchBeams(); + + MatrixPtr outputValue = getOutputValue(); + for (size_t i = 0; i < batchSize_; ++i) { + beamCosts_[i].setData( + std::move(std::make_shared(beamPerSeq_[i])), beamSize_); + outputValue->getData()[i] = beamCosts_[i].forward(); + } +} + +void CrossEntropyOverBeam::backward(const UpdateCallback& callback) { + for (size_t i = 0; i < batchSize_; ++i) { + beamCosts_[i].backward(); + copyGradToGpu(beamCosts_[i].getValidExpansionCount()); + } +} + +} // namespace paddle diff --git a/paddle/gserver/layers/CrossEntropyOverBeam.h b/paddle/gserver/layers/CrossEntropyOverBeam.h new file mode 100644 index 0000000000000..5643556f43370 --- /dev/null +++ b/paddle/gserver/layers/CrossEntropyOverBeam.h @@ -0,0 +1,135 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "CrossEntropyOverBeam.h" +#include "Layer.h" + +namespace paddle { + +/* This struct stores the beams in all search steps for a single sequence. */ +struct BeamExpansion { + std::vector scores; + std::vector seqInfo; + + std::vector candidateIds; + std::vector gold; + + std::vector scoreGrad; + + size_t expansionCount; + + explicit BeamExpansion(int n) { + expansionCount = n; + scores.resize(expansionCount); + seqInfo.resize(expansionCount); + candidateIds.resize(expansionCount); + scoreGrad.resize(expansionCount); + + gold.resize(expansionCount); + } +}; +typedef std::shared_ptr BeamExpansionPtr; + +class CostForOneSequence { +public: + CostForOneSequence() + : beamSize_(0), validExpansionCount_(0), goldAsExtraPath_(false) {} + void setData(const BeamExpansionPtr bPtr, size_t beamSize) { + beams_ = bPtr; + beamSize_ = beamSize; + + expandedPathScores_.clear(); + expandedPathScores_.resize(beams_->expansionCount); + + goldRowIds_.clear(); + goldRowIds_.resize(beams_->expansionCount, 0); + goldColIds_.clear(); + goldColIds_.resize(beams_->expansionCount, -1); + } + size_t getValidExpansionCount() { return validExpansionCount_; } + + real forward(); + void backward(); + +private: + void calValidExpandStep(); + void constructTotalExpansion(); + size_t initLastExpansion(); + real globallyNormalizedScore(); + + int getSeqStartPos(size_t beamId, size_t rowId) { + CHECK_GT(beams_->seqInfo[beamId]->getSize() - 1, rowId); + int* starts = beams_->seqInfo[beamId]->getData(); + return starts[rowId] - starts[0]; + } + + size_t beamSize_; + size_t validExpansionCount_; + bool goldAsExtraPath_; + std::vector goldRowIds_; + std::vector goldColIds_; + + BeamExpansionPtr beams_; + std::vector> pathRowIdsInEachBeam_; + std::vector parentIdsInBeam_; + size_t goldIdsInFinalExpansion_; + + std::vector expandedPathScores_; + + MatrixPtr softmaxOut_; +}; + +class CrossEntropyOverBeam : public Layer { +public: + explicit CrossEntropyOverBeam(const LayerConfig& config) : Layer(config) {} + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + void forward(PassType passType) override; + void backward(const UpdateCallback& callback) override; + +private: + void checkInputs(); + void copyInputsToCpu(); + void resizeOutput(); + void copyGradToGpu(size_t copyCount); + void splitBatchBeams(); + + size_t beamExpanCount_; + size_t batchSize_; + size_t beamSize_; + + /* + * the process of constructing beams is not friendly to GPU, currently, this + * layer only runs on CPU, if any of its inputs is on GPU memory, then copy + * it to CPU memory. + */ + std::vector candidateScores_; + std::vector candidateScoreGrad_; + std::vector candidateInBeam_; + std::vector gradToInputs_; + std::vector goldSequence_; + std::vector> beamSplitPos_; + + /* + * split entire bath of beams into beam per sequnence and store the result + * into this member. + */ + std::vector beamPerSeq_; + /* beamCosts_ is used to propagate error in one sequence. */ + std::vector beamCosts_; +}; + +} // namespace paddle diff --git a/paddle/gserver/tests/CMakeLists.txt b/paddle/gserver/tests/CMakeLists.txt index 346c01ced648e..de9b8e63dfc42 100644 --- a/paddle/gserver/tests/CMakeLists.txt +++ b/paddle/gserver/tests/CMakeLists.txt @@ -34,6 +34,13 @@ add_unittest_without_exec(test_CRFLayerGrad add_test(NAME test_CRFLayerGrad COMMAND test_CRFLayerGrad) +################ test_CrossEntropyOverBeam #################### +add_unittest_without_exec(test_CrossEntropyOverBeam + test_CrossEntropyOverBeamGrad.cpp + LayerGradUtil.cpp) +add_test(NAME test_CrossEntropyOverBeam + COMMAND test_CrossEntropyOverBeam) + ################ test_SeqSliceLayerGrad #################### add_unittest_without_exec(test_SeqSliceLayerGrad test_SeqSliceLayerGrad.cpp diff --git a/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp b/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp new file mode 100644 index 0000000000000..538d18cdc3d26 --- /dev/null +++ b/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp @@ -0,0 +1,353 @@ +/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include + +#include +#include "ModelConfig.pb.h" +#include "paddle/gserver/layers/DataLayer.h" +#include "paddle/trainer/Trainer.h" + +#include "LayerGradUtil.h" +#include "paddle/testing/TestUtil.h" + +using namespace paddle; // NOLINT + +DECLARE_int32(gpu_id); +DECLARE_bool(thread_local_rand_use_global_seed); + +const size_t MAX_SEQ_NUM = 23; +const size_t MAX_SEQ_LEN = 50; +const size_t MAX_BEAM_SIZE = 27; + +const size_t SEED = (size_t)(time(NULL)); + +struct SingleBeamExpansion { + vector seqStartPos; + vector subSeqStartPos; + vector candidateScores; + + // TODO(caoying): store this into Argument.ids + vector selectedIndices; + + vector groundTruth; + vector inBeam; + vector rowIdxInBeam; + vector colIdxInBeam; + + void resetGroundTruth(size_t n) { + groundTruth.clear(); + groundTruth.resize(n, -1); + + inBeam.clear(); + inBeam.resize(n, 0); + + rowIdxInBeam.clear(); + rowIdxInBeam.resize(n, -1); + + colIdxInBeam.clear(); + colIdxInBeam.resize(n, -1); + } +}; + +inline float randFloat() { + return static_cast(rand()) / static_cast(RAND_MAX); +} + +void genRand(real* numbers, size_t n) { + default_random_engine generator; + uniform_real_distribution distribution(0.0, 1.0); + for (size_t i = 0; i < n; ++i) numbers[i] = distribution(generator); +} + +vector randSampling(real range, int n) { + CHECK_GE(range, n); + vector num(range); + iota(begin(num), end(num), 0.); + if (range == n) return num; + + random_shuffle(begin(num), end(num)); + num.resize(n); + sort(begin(num), end(num)); + return num; +} + +void genCandidateScores(bool hasSubseq, + size_t beamSize, + SingleBeamExpansion& prevBeam, + SingleBeamExpansion& curBeam) { + vector& seqStartPos = curBeam.seqStartPos; + seqStartPos.resize(1, 0); + vector& subSeqStartPos = curBeam.subSeqStartPos; + subSeqStartPos.resize(1, 0); + + srand(SEED); + if (prevBeam.selectedIndices.size()) { + if (prevBeam.subSeqStartPos.size() > 1) { + int seqIdx = 1; + // samples in previous beam are nested sequences. + for (size_t i = 1; i < prevBeam.subSeqStartPos.size(); ++i) { + for (size_t j = 0; j < beamSize; ++j) { + if (prevBeam.selectedIndices[(i - 1) * beamSize + j] == -1.) break; + subSeqStartPos.push_back(1 + (rand() % MAX_SEQ_LEN) + + subSeqStartPos.back()); + } + if (prevBeam.seqStartPos[seqIdx] == prevBeam.subSeqStartPos[i]) { + seqStartPos.push_back(subSeqStartPos.back()); + seqIdx++; + } + } + } else { + for (size_t i = 0; i <= prevBeam.selectedIndices.size(); ++i) { + if (i && i % beamSize == 0) { + seqStartPos.push_back(subSeqStartPos.back()); + if (i == prevBeam.selectedIndices.size()) break; + } + if (prevBeam.selectedIndices[i] == -1.) continue; + subSeqStartPos.push_back(subSeqStartPos.back() + + (1 + (rand() % MAX_SEQ_LEN))); + } + } + } else { + // the first beam expansion + int seqNum = 1 + (rand() % MAX_SEQ_NUM); + for (int i = 0; i < seqNum; ++i) { + if (hasSubseq) { + for (size_t j = 0; j < 1 + (rand() % MAX_SEQ_NUM); ++j) + subSeqStartPos.push_back(subSeqStartPos.back() + + (1 + (rand() % MAX_SEQ_LEN))); + seqStartPos.push_back(subSeqStartPos.back()); + } else { + seqStartPos.push_back(seqStartPos.back() + + (1 + (rand() % MAX_SEQ_LEN))); + } + } + } + + size_t totalSeqNum = hasSubseq ? subSeqStartPos.back() : seqStartPos.back(); + curBeam.candidateScores.resize(totalSeqNum, 0.); + genRand(curBeam.candidateScores.data(), totalSeqNum); +} + +void genSelectedIndices(size_t beamSize, + vector& seqStartPos, + vector& selectedIndices) { + size_t selectedIdsCount = beamSize * (seqStartPos.size() - 1); + selectedIndices.resize(selectedIdsCount, -1.); + + for (size_t i = 0; i < seqStartPos.size() - 1; ++i) { + int seqLen = seqStartPos[i + 1] - seqStartPos[i]; + int n = min(seqLen, static_cast(beamSize)); + vector ids = randSampling(seqLen, n); + memcpy(selectedIndices.data() + i * beamSize, + ids.data(), + sizeof(real) * ids.size()); + } +} + +void genGroundTruth(vector& beamExpansions, + size_t beamSize) { + SingleBeamExpansion& beam = beamExpansions[1]; + size_t seqNum = beam.seqStartPos.size() - 1; + for (size_t i = 2; i < beamExpansions.size(); ++i) + CHECK_EQ(seqNum, beamExpansions[i].seqStartPos.size() - 1); + + srand(SEED); + + // initialize the first beam. + beam.resetGroundTruth(seqNum); + for (size_t i = 0; i < seqNum; ++i) { + if (randFloat() > 0.5) { + /* + * force the randomly generated label falls in the beam by chance 0.5. + * otherwise, when sequence length is relatively long and beam size is + * relatively small, the gold sequences falls off the beam at in the + * first search. + */ + real* begPos = beam.selectedIndices.data() + i * beamSize; + beam.colIdxInBeam[i] = + rand() % count_if(begPos, begPos + beamSize, [](const real& val) { + return val != -1.; + }); + beam.groundTruth[i] = + beam.selectedIndices[i * beamSize + beam.colIdxInBeam[i]]; + beam.inBeam[i] = 1; + } else { + int label = rand() % (beam.seqStartPos[i + 1] - beam.seqStartPos[i]); + beam.groundTruth[i] = label; + + real* begPos = beam.selectedIndices.data() + i * beamSize; + real* endPos = begPos + beamSize; + real* lblPos = find(begPos, endPos, real(label)); + if (lblPos != endPos) { + beam.inBeam[i] = 1; + beam.colIdxInBeam[i] = lblPos - begPos; + } + } + beam.rowIdxInBeam[i] = i; + } + + // iterate over each beam expansions + for (size_t i = 2; i < beamExpansions.size(); ++i) { + SingleBeamExpansion& curBeam = beamExpansions[i]; + SingleBeamExpansion& prevBeam = beamExpansions[i - 1]; + curBeam.resetGroundTruth(seqNum); + + // iterate over each sequence + for (size_t j = 0; j < seqNum; ++j) { + if (!prevBeam.inBeam[j]) continue; + + // gold sequence falls in the beam in previous search. + real* begPos = prevBeam.selectedIndices.data(); + int offset = + prevBeam.rowIdxInBeam[j] * beamSize + prevBeam.colIdxInBeam[j]; + curBeam.rowIdxInBeam[j] = count_if( + begPos, begPos + offset, [](const real& val) { return val != -1.; }); + + if (randFloat() > 0.5) { + // force the randomly generated label falls in the beam by chance 0.5. + + real* start = + curBeam.selectedIndices.data() + curBeam.rowIdxInBeam[j] * beamSize; + int n = rand() % count_if(start, start + beamSize, [](const real& val) { + return val != -1.; + }); + curBeam.colIdxInBeam[j] = n; + curBeam.groundTruth[j] = *(start + n); + curBeam.inBeam[j] = 1; + } else { + CHECK_LE(curBeam.rowIdxInBeam[j] + 1, + curBeam.subSeqStartPos.size() - 1); + int start = curBeam.subSeqStartPos[curBeam.rowIdxInBeam[j]]; + int end = curBeam.subSeqStartPos[curBeam.rowIdxInBeam[j] + 1]; + CHECK_GT(size_t(end), size_t(start)); + int label = rand() % (end - start); + + curBeam.groundTruth[j] = label; + real* findBeg = + curBeam.selectedIndices.data() + curBeam.rowIdxInBeam[j] * beamSize; + real* lblPos = + find(findBeg, findBeg + beamSize, static_cast(label)); + if (lblPos != (findBeg + beamSize)) { + curBeam.inBeam[j] = 1; + curBeam.colIdxInBeam[j] = lblPos - findBeg; + } + } + } + } +} + +void genOneBeam(size_t beamSize, + bool hasSubseq, + SingleBeamExpansion& prevBeam, + SingleBeamExpansion& curBeam) { + genCandidateScores(hasSubseq, beamSize, prevBeam, curBeam); + genSelectedIndices(beamSize, + hasSubseq ? curBeam.subSeqStartPos : curBeam.seqStartPos, + curBeam.selectedIndices); +} + +void genRandomBeamExpansion(size_t expansionCount, + size_t beamSize, + vector& beamExpansions) { + beamExpansions.clear(); + beamExpansions.resize(expansionCount + 1); + + // beamExpansions[0] is reserved. + for (size_t i = 1; i <= expansionCount; ++i) + genOneBeam(beamSize, bool(i - 1), beamExpansions[i - 1], beamExpansions[i]); + genGroundTruth(beamExpansions, beamSize); +} + +void testCrossEntropyOverBeam(bool useGpu, + size_t beamSize, + vector& beams) { + TestConfig config; + config.layerConfig.set_type("cross_entropy_over_beam"); + + size_t seqNum = 0; + for (size_t i = 1; i < beams.size(); ++i) { + const SingleBeamExpansion& beam = beams[i]; + // create scores for all the candidates + MatrixPtr candidateScorePtr = + Matrix::create(beam.candidateScores.size(), 1, false, false); + candidateScorePtr->copyFrom(beam.candidateScores.data(), + beam.candidateScores.size()); + + ostringstream paramName; + paramName << "candidate_scores_" << i; + + if (beam.subSeqStartPos.size() > 1) { + seqNum = beam.subSeqStartPos.size() - 1; + config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA, + paramName.str(), + candidateScorePtr, + beam.seqStartPos, + beam.subSeqStartPos}); + } else { + seqNum = beam.seqStartPos.size() - 1; + config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA, + paramName.str(), + candidateScorePtr, + beam.seqStartPos}); + } + config.layerConfig.add_inputs(); + + // create indices for the selected candidates + MatrixPtr selectedCandidates = + Matrix::create(seqNum, beamSize, false, false); + selectedCandidates->copyFrom(beam.selectedIndices.data(), + beam.selectedIndices.size()); + paramName.clear(); + paramName << "selected_candidates_" << i; + config.inputDefs.push_back( + {INPUT_SELF_DEFINE_DATA, paramName.str(), selectedCandidates}); + config.layerConfig.add_inputs(); + + // create the ground truth + paramName.clear(); + paramName << "label_" << i; + config.inputDefs.push_back( + {INPUT_SELF_DEFINE_DATA, paramName.str(), beam.groundTruth}); + config.layerConfig.add_inputs(); + } + + testLayerGrad( + config, "cross_entropy_over_beam", seqNum, false, useGpu, false); +} + +TEST(Layer, CrossEntropyOverBeam) { + LOG(INFO) << "SEED = " << SEED; + const size_t beamSize = 1 + rand() % MAX_BEAM_SIZE; + LOG(INFO) << "beamSize = " << beamSize; + + // TODO(caoying): test with random beam expansions. + const size_t expansionCount = 3; + vector beams; + genRandomBeamExpansion(expansionCount, beamSize, beams); + + for (bool useGpu : {false, true}) + testCrossEntropyOverBeam(useGpu, beamSize, beams); +} + +int main(int argc, char** argv) { + initMain(argc, argv); + hl_start(); + hl_init(FLAGS_gpu_id); + FLAGS_thread_local_rand_use_global_seed = true; + srand(SEED); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/paddle/parameter/Argument.cpp b/paddle/parameter/Argument.cpp index 2b945de18a4cd..b0e9e740c84e6 100644 --- a/paddle/parameter/Argument.cpp +++ b/paddle/parameter/Argument.cpp @@ -677,6 +677,7 @@ void Argument::reorganizeSeqInfo( const ICpuGpuVectorPtr subSeqStartPos, std::vector>& reorganizedSeqInfo) { CHECK(seqStartPos); + reorganizedSeqInfo.clear(); int seqNum = seqStartPos->getSize() - 1; int* seqStarts = seqStartPos->getMutableData(false); diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index c11037c3c8b03..0788e3994ebd9 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -1607,6 +1607,21 @@ def __init__(self, name, inputs, softmax_selfnorm_alpha=0.1, **xargs): self.config.softmax_selfnorm_alpha = softmax_selfnorm_alpha +@config_layer('cross_entropy_over_beam') +class CrossEntropyOverBeamLayer(LayerBase): + def __init__(self, name, inputs, **xargs): + config_assert(len(inputs) % 3 == 0, "Error input number.") + super(CrossEntropyOverBeamLayer, self).__init__( + name, 'cross_entropy_over_beam', 0, inputs, **xargs) + input_num = len(inputs) / 3 + for i in range(input_num): + input_layer = self.get_input_layer(i * 3) + config_assert(input_layer.size == 1, ( + "Inputs for this layer are made up of " + "several triples, in which the first one is scores over " + "all candidate paths, whose size should be equal to 1.")) + + @config_layer('fc') class FCLayer(LayerBase): layer_type = 'fc' @@ -2268,6 +2283,7 @@ def init(cls, name, inputs, device=None, coeff=1.): define_cost('MultiClassCrossEntropy', 'multi-class-cross-entropy') +define_cost('CrossEntropyOverBeamCostLayer', 'cross_entropy_over_beam') define_cost('RankingCost', 'rank-cost') define_cost('AucValidation', 'auc-validation') define_cost('PnpairValidation', 'pnpair-validation') diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index a525ce71d0f40..e73098910cb8a 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - import functools import collections import inspect @@ -106,6 +105,8 @@ 'nce_layer', 'cross_entropy_with_selfnorm', 'cross_entropy', + 'BeamInput', + 'cross_entropy_over_beam', 'multi_binary_label_cross_entropy', 'sum_cost', 'rank_cost', @@ -225,6 +226,7 @@ class LayerType(object): HUBER_CLASSIFICATION = 'huber_classification' CROSS_ENTROPY = 'multi-class-cross-entropy' CROSS_ENTROPY_WITH_SELFNORM = 'multi_class_cross_entropy_with_selfnorm' + CROSS_ENTROPY_OVER_BEAM = 'cross_entropy_over_beam' SOFT_BIN_CLASS_CROSS_ENTROPY = 'soft_binary_class_cross_entropy' MULTI_BIN_LABEL_CROSS_ENTROPY = 'multi_binary_label_cross_entropy' SUM_COST = 'sum_cost' @@ -4071,8 +4073,12 @@ def __cost_input__(input, label, weight=None): """ inputs and parents for cost layers. """ - ipts = [Input(input.name), Input(label.name)] - parents = [input, label] + if isinstance(input, LayerOutput): + input = [input] + if isinstance(label, LayerOutput): + label = [label] + ipts = [Input(ipt.name) for ipt in (input + label)] + parents = [ipt for ipt in (input + label)] if weight is not None: assert weight.size == 1 ipts.append(Input(weight.name)) @@ -5777,10 +5783,10 @@ def multi_binary_label_cross_entropy(input, if input.activation is None or \ not isinstance(input.activation, SigmoidActivation): - logger.log( - logging.WARN, - "%s is not recommend for multi_binary_label_cross_entropy's activation, " - "maybe the sigmoid is better" % repr(input.activation)) + logger.log(logging.WARN, + ("%s is not a recommended activation for " + "multi_binary_label_cross_entropy, sigmoid is better") % + repr(input.activation)) Layer( name=name, @@ -5795,6 +5801,113 @@ def multi_binary_label_cross_entropy(input, size=1) +class BeamInput(object): + """ + Define the input for cross_entropy_over_beam layer. + + A beam is made up of a triple: the first one is scores over all + candidates; the second one is indices of top k selected candidates; the + third one is the index of ground truth, which is also always called + gold. + """ + + def __init__(self, candidate_scores, selected_candidates, gold): + assert isinstance(candidate_scores, LayerOutput) + self.candidate_scores = candidate_scores + assert candidate_scores.size == 1 + + assert isinstance(selected_candidates, LayerOutput) + self.selected_candidates = selected_candidates + + assert isinstance(gold, LayerOutput) + self.gold = gold + + +@wrap_name_default() +@layer_support() +def cross_entropy_over_beam(input, name=None): + """ + This layer is used in learning to search models, which is to solve complex + joint prediction problems based on learning to search through a + problem-defined search space. + + Specifically, the learning to search process for this layer begins with + searching a target sequence from a nested sequence. In the first search + step, top beam size sequences with highest scores, indices of these top k + sequences in the original nested sequence, and the ground truth (also + called gold) altogether (a triple) make up of the first beam. + + Then, several special positions, for example, start and end positions + that define meaningful segments are searched. In these searches, top k + positions with highest scores are selected, and then sequence, starting + from the selected starts till ends of the sequences (or a fixed position) + are taken to search next. + + We call the possible top k results returned in one search the beam. This + search process can be repeated for pre-defined turns and leads to several + beam expansions. + + Finally, the layer cross_entropy_over_beam takes all the beam expansions + which contain several candidate targets found along the multi-step search. + cross_entropy_over_beam calculates cross entropy over the expanded beams + which all the candidates in the beam as the normalized factor. + + Note that, if gold falls off the beam at search step t, then the cost is + calculated over the beam at step t. + + This cost layer always works together with kmax_sequence_score_layer, + sub_nested_seq_layer, and sequence_slice_layer to trim the input to form a + sub-search space. + + + The example usage is: + + .. code-block:: python + + cost = cross_entropy_over_beam(input=[ + BeamInput( + candidate_scores=beam1_candidates, + selected_candidates=beam1_topk, + gold=gold1), + BeamInput( + candidate_scores=beam2_candidates, + selected_candidates=beam2_topk, + gold=gold2), + ]) + + + :param input: input beams for this layer. + :type input: BeamInput + :param name: input beams for this layer. + :type name: basestring + :return: LayerOutput object. + :rtype: LayerOutput + """ + + if isinstance(input, BeamInput): + input = [input] + else: + assert isinstance(input, list), ( + 'input for cross_entropy_over_beam shold be a python list ' + 'of BeamInput object.') + for ipt in input: + assert isinstance(ipt, BeamInput), ( + 'input for cross_entropy_over_beam ' + 'should be a BeamInput object.') + + ipts = [] + parents = [] + for beam in input: + parents += [beam.candidate_scores, beam.selected_candidates, beam.gold] + ipts += [ + beam.candidate_scores.name, beam.selected_candidates.name, + beam.gold.name + ] + + Layer(name=name, type=LayerType.CROSS_ENTROPY_OVER_BEAM, inputs=ipts) + return LayerOutput(name, LayerType.CROSS_ENTROPY, parents=parents, size=1) + + @wrap_name_default() @layer_support() def smooth_l1_cost(input, label, name=None, coeff=1.0, layer_attr=None): diff --git a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh index 1ca5c8a07ebb7..e87ee6a432592 100755 --- a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh +++ b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh @@ -9,6 +9,6 @@ test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer test_kmax_seq_socre_layer test_seq_select_layers test_scale_shift_layer -test_seq_slice_layer) +test_seq_slice_layer test_cross_entropy_over_beam) export whole_configs=(test_split_datasource) diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cross_entropy_over_beam.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cross_entropy_over_beam.protostr new file mode 100644 index 0000000000000..c43fc48e22204 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cross_entropy_over_beam.protostr @@ -0,0 +1,207 @@ +type: "nn" +layers { + name: "sentence_states" + type: "data" + size: 32 + active_type: "" +} +layers { + name: "sentence_scores" + type: "data" + size: 1 + active_type: "" +} +layers { + name: "__kmax_sequence_score_layer_0__" + type: "kmax_seq_score" + active_type: "" + inputs { + input_layer_name: "sentence_scores" + } + beam_size: 5 +} +layers { + name: "__sub_nested_seq_layer_0__" + type: "sub_nested_seq" + size: 32 + active_type: "" + inputs { + input_layer_name: "sentence_states" + } + inputs { + input_layer_name: "__kmax_sequence_score_layer_0__" + } +} +layers { + name: "__fc_layer_0__" + type: "fc" + size: 1 + active_type: "" + inputs { + input_layer_name: "__sub_nested_seq_layer_0__" + input_parameter_name: "___fc_layer_0__.w0" + } + bias_parameter_name: "___fc_layer_0__.wbias" +} +layers { + name: "__kmax_sequence_score_layer_1__" + type: "kmax_seq_score" + active_type: "" + inputs { + input_layer_name: "sentence_scores" + } + beam_size: 5 +} +layers { + name: "__seq_slice_layer_0__" + type: "seq_slice" + size: 32 + active_type: "" + inputs { + input_layer_name: "__sub_nested_seq_layer_0__" + } + inputs { + input_layer_name: "__kmax_sequence_score_layer_1__" + } + select_first: true +} +layers { + name: "__fc_layer_1__" + type: "fc" + size: 1 + active_type: "" + inputs { + input_layer_name: "__seq_slice_layer_0__" + input_parameter_name: "___fc_layer_1__.w0" + } + bias_parameter_name: "___fc_layer_1__.wbias" +} +layers { + name: "__kmax_sequence_score_layer_2__" + type: "kmax_seq_score" + active_type: "" + inputs { + input_layer_name: "__fc_layer_1__" + } + beam_size: 5 +} +layers { + name: "sentences_ids" + type: "data" + size: 1 + active_type: "" +} +layers { + name: "start_ids" + type: "data" + size: 1 + active_type: "" +} +layers { + name: "end_ids" + type: "data" + size: 1 + active_type: "" +} +layers { + name: "__cross_entropy_over_beam_0__" + type: "cross_entropy_over_beam" + active_type: "" + inputs { + input_layer_name: "sentence_scores" + } + inputs { + input_layer_name: "__kmax_sequence_score_layer_0__" + } + inputs { + input_layer_name: "sentences_ids" + } + inputs { + input_layer_name: "__fc_layer_0__" + } + inputs { + input_layer_name: "__kmax_sequence_score_layer_1__" + } + inputs { + input_layer_name: "start_ids" + } + inputs { + input_layer_name: "__fc_layer_1__" + } + inputs { + input_layer_name: "__kmax_sequence_score_layer_2__" + } + inputs { + input_layer_name: "end_ids" + } +} +parameters { + name: "___fc_layer_0__.w0" + size: 32 + initial_mean: 0.0 + initial_std: 0.176776695297 + dims: 32 + dims: 1 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "___fc_layer_0__.wbias" + size: 1 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 1 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "___fc_layer_1__.w0" + size: 32 + initial_mean: 0.0 + initial_std: 0.176776695297 + dims: 32 + dims: 1 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "___fc_layer_1__.wbias" + size: 1 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 1 + initial_strategy: 0 + initial_smart: false +} +input_layer_names: "sentence_scores" +input_layer_names: "sentences_ids" +input_layer_names: "sentence_states" +input_layer_names: "start_ids" +input_layer_names: "end_ids" +output_layer_names: "__cross_entropy_over_beam_0__" +sub_models { + name: "root" + layer_names: "sentence_states" + layer_names: "sentence_scores" + layer_names: "__kmax_sequence_score_layer_0__" + layer_names: "__sub_nested_seq_layer_0__" + layer_names: "__fc_layer_0__" + layer_names: "__kmax_sequence_score_layer_1__" + layer_names: "__seq_slice_layer_0__" + layer_names: "__fc_layer_1__" + layer_names: "__kmax_sequence_score_layer_2__" + layer_names: "sentences_ids" + layer_names: "start_ids" + layer_names: "end_ids" + layer_names: "__cross_entropy_over_beam_0__" + input_layer_names: "sentence_scores" + input_layer_names: "sentences_ids" + input_layer_names: "sentence_states" + input_layer_names: "start_ids" + input_layer_names: "end_ids" + output_layer_names: "__cross_entropy_over_beam_0__" + is_recurrent_layer_group: false +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_cross_entropy_over_beam.py b/python/paddle/trainer_config_helpers/tests/configs/test_cross_entropy_over_beam.py new file mode 100644 index 0000000000000..240e703dc904e --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/test_cross_entropy_over_beam.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python +#coding=utf-8 + +from paddle.trainer_config_helpers import * +beam_size = 5 + +# the first beam expansion. +sentence_states = data_layer(name="sentence_states", size=32) +sentence_scores = data_layer(name="sentence_scores", size=1) +topk_sentence_ids = kmax_sequence_score_layer( + input=sentence_scores, beam_size=beam_size) + +# the second beam expansion. +topk_sen = sub_nested_seq_layer( + input=sentence_states, selected_indices=topk_sentence_ids) +start_pos_scores = fc_layer(input=topk_sen, size=1, act=LinearActivation()) +topk_start_pos_ids = kmax_sequence_score_layer( + input=sentence_scores, beam_size=beam_size) + +# the final beam expansion. +topk_start_spans = seq_slice_layer( + input=topk_sen, starts=topk_start_pos_ids, ends=None) +end_pos_scores = fc_layer( + input=topk_start_spans, size=1, act=LinearActivation()) +topk_end_pos_ids = kmax_sequence_score_layer( + input=end_pos_scores, beam_size=beam_size) + +# define the cost +sentence_idx = data_layer(name="sentences_ids", size=1) +start_idx = data_layer(name="start_ids", size=1) +end_idx = data_layer(name="end_ids", size=1) +cost = cross_entropy_over_beam(input=[ + BeamInput( + candidate_scores=sentence_scores, + selected_candidates=topk_sentence_ids, + gold=sentence_idx), BeamInput( + candidate_scores=start_pos_scores, + selected_candidates=topk_start_pos_ids, + gold=start_idx), BeamInput( + candidate_scores=end_pos_scores, + selected_candidates=topk_end_pos_ids, + gold=end_idx) +]) + +outputs(cost) diff --git a/python/paddle/v2/inference.py b/python/paddle/v2/inference.py index 4dcc3ab57e7e6..8acea6155c588 100644 --- a/python/paddle/v2/inference.py +++ b/python/paddle/v2/inference.py @@ -70,7 +70,7 @@ def iter_infer_field(self, field, **kwargs): item = [each_result[each_field] for each_field in field] yield item - def infer(self, input, field='value', **kwargs): + def infer(self, input, field='value', flatten_result=True, **kwargs): """ Infer a data by model. :param input: input data batch. Should be python iterable object. @@ -83,7 +83,10 @@ def infer(self, input, field='value', **kwargs): retv = [[] for i in xrange(len(result))] for i, item in enumerate(result): retv[i].append(item) - retv = [numpy.concatenate(out) for out in retv] + + if flatten_result: + retv = [numpy.concatenate(out) for out in retv] + if len(retv) == 1: return retv[0] else: