From 1841cea19b23970086293f29f97f4aa8e0f32961 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Wed, 21 Mar 2018 07:48:01 -0700 Subject: [PATCH 1/9] Add decoder for deep asr model --- fluid/DeepASR/decoder/decoder.cc | 21 --- fluid/DeepASR/decoder/post_decode_faster.cc | 165 ++++++++++++++++++ .../{decoder.h => post_decode_faster.h} | 7 +- fluid/DeepASR/decoder/pybind.cc | 4 +- fluid/DeepASR/decoder/setup.py | 33 +++- fluid/DeepASR/decoder/setup.sh | 6 +- 6 files changed, 206 insertions(+), 30 deletions(-) delete mode 100644 fluid/DeepASR/decoder/decoder.cc create mode 100644 fluid/DeepASR/decoder/post_decode_faster.cc rename fluid/DeepASR/decoder/{decoder.h => post_decode_faster.h} (61%) diff --git a/fluid/DeepASR/decoder/decoder.cc b/fluid/DeepASR/decoder/decoder.cc deleted file mode 100644 index a99f972e2f..0000000000 --- a/fluid/DeepASR/decoder/decoder.cc +++ /dev/null @@ -1,21 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "decoder.h" - -std::string decode(std::vector> probs_mat) { - // Add decoding logic here - - return "example decoding result"; -} diff --git a/fluid/DeepASR/decoder/post_decode_faster.cc b/fluid/DeepASR/decoder/post_decode_faster.cc new file mode 100644 index 0000000000..d3f20a6ea3 --- /dev/null +++ b/fluid/DeepASR/decoder/post_decode_faster.cc @@ -0,0 +1,165 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "post_decode_faster.h" +#include "base/kaldi-common.h" +#include "base/timer.h" +#include "decoder/decodable-matrix.h" +#include "decoder/faster-decoder.h" +#include "fstext/fstext-lib.h" +#include "hmm/transition-model.h" +#include "lat/kaldi-lattice.h" // for {Compact}LatticeArc +#include "tree/context-dep.h" +#include "util/common-utils.h" + +std::vector decode(std::string word_syms_filename, + std::string fst_in_filename, + std::string logprior_rxfilename, + std::string posterior_rspecifier, + std::string words_wspecifier, + std::string alignment_wspecifier) { + std::vector decoding_results; + + try { + using namespace kaldi; + typedef kaldi::int32 int32; + using fst::SymbolTable; + using fst::VectorFst; + using fst::StdArc; + + const char *usage = + "Decode, reading log-likelihoods (of transition-ids or whatever symbol " + "is on the graph) as matrices."; + ParseOptions po(usage); + bool binary = true; + BaseFloat acoustic_scale = 1.5; + bool allow_partial = true; + FasterDecoderOptions decoder_opts; + decoder_opts.Register(&po, true); // true == include obscure settings. + po.Register("binary", &binary, "Write output in binary mode"); + po.Register("allow-partial", + &allow_partial, + "Produce output even when final state was not reached"); + po.Register("acoustic-scale", + &acoustic_scale, + "Scaling factor for acoustic likelihoods"); + + Int32VectorWriter words_writer(words_wspecifier); + + Int32VectorWriter alignment_writer(alignment_wspecifier); + fst::SymbolTable *word_syms = NULL; + if (word_syms_filename != "") { + word_syms = fst::SymbolTable::ReadText(word_syms_filename); + if (!word_syms) + KALDI_ERR << "Could not read symbol table from file " + << word_syms_filename; + } + + SequentialBaseFloatMatrixReader posterior_reader(posterior_rspecifier); + std::ifstream is_logprior(logprior_rxfilename); + Vector logprior; + logprior.Read(is_logprior, false); + + // It's important that we initialize decode_fst after loglikes_reader, as it + // can prevent crashes on systems installed without enough virtual memory. + // It has to do with what happens on UNIX systems if you call fork() on a + // large process: the page-table entries are duplicated, which requires a + // lot of virtual memory. + VectorFst *decode_fst = fst::ReadFstKaldi(fst_in_filename); + + BaseFloat tot_like = 0.0; + kaldi::int64 frame_count = 0; + int num_success = 0, num_fail = 0; + FasterDecoder decoder(*decode_fst, decoder_opts); + + Timer timer; + + for (; !posterior_reader.Done(); posterior_reader.Next()) { + std::string key = posterior_reader.Key(); + Matrix loglikes(posterior_reader.Value()); + KALDI_LOG << key << " " << loglikes.NumRows() << " x " + << loglikes.NumCols(); + + if (loglikes.NumRows() == 0) { + KALDI_WARN << "Zero-length utterance: " << key; + num_fail++; + continue; + } + KALDI_ASSERT(loglikes.NumCols() == logprior.Dim()); + + loglikes.ApplyLog(); + loglikes.AddVecToRows(-1.0, logprior); + + DecodableMatrixScaled decodable(loglikes, acoustic_scale); + decoder.Decode(&decodable); + + VectorFst decoded; // linear FST. + + if ((allow_partial || decoder.ReachedFinal()) && + decoder.GetBestPath(&decoded)) { + num_success++; + if (!decoder.ReachedFinal()) + KALDI_WARN << "Decoder did not reach end-state, outputting partial " + "traceback."; + + std::vector alignment; + std::vector words; + LatticeWeight weight; + frame_count += loglikes.NumRows(); + + GetLinearSymbolSequence(decoded, &alignment, &words, &weight); + + words_writer.Write(key, words); + if (alignment_writer.IsOpen()) alignment_writer.Write(key, alignment); + if (word_syms != NULL) { + std::string res; + for (size_t i = 0; i < words.size(); i++) { + std::string s = word_syms->Find(words[i]); + res += s; + if (s == "") + KALDI_ERR << "Word-id " << words[i] << " not in symbol table."; + std::cerr << s << ' '; + } + decoding_results.push_back(res); + } + BaseFloat like = -weight.Value1() - weight.Value2(); + tot_like += like; + KALDI_LOG << "Log-like per frame for utterance " << key << " is " + << (like / loglikes.NumRows()) << " over " + << loglikes.NumRows() << " frames."; + + } else { + num_fail++; + KALDI_WARN << "Did not successfully decode utterance " << key + << ", len = " << loglikes.NumRows(); + } + } + + double elapsed = timer.Elapsed(); + KALDI_LOG << "Time taken [excluding initialization] " << elapsed + << "s: real-time factor assuming 100 frames/sec is " + << (elapsed * 100.0 / frame_count); + KALDI_LOG << "Done " << num_success << " utterances, failed for " + << num_fail; + KALDI_LOG << "Overall log-likelihood per frame is " + << (tot_like / frame_count) << " over " << frame_count + << " frames."; + + delete word_syms; + delete decode_fst; + } catch (const std::exception &e) { + std::cerr << e.what(); + } + return decoding_results; +} diff --git a/fluid/DeepASR/decoder/decoder.h b/fluid/DeepASR/decoder/post_decode_faster.h similarity index 61% rename from fluid/DeepASR/decoder/decoder.h rename to fluid/DeepASR/decoder/post_decode_faster.h index 4a67fa366a..04983a3b93 100644 --- a/fluid/DeepASR/decoder/decoder.h +++ b/fluid/DeepASR/decoder/post_decode_faster.h @@ -15,4 +15,9 @@ limitations under the License. */ #include #include -std::string decode(std::vector> probs_mat); +std::vector decode(std::string word_syms_filename, + std::string fst_in_filename, + std::string logprior_rxfilename, + std::string posterior_respecifier, + std::string words_wspecifier, + std::string alignment_wspecifier = ""); diff --git a/fluid/DeepASR/decoder/pybind.cc b/fluid/DeepASR/decoder/pybind.cc index 8cd65903ea..a8744ee2ac 100644 --- a/fluid/DeepASR/decoder/pybind.cc +++ b/fluid/DeepASR/decoder/pybind.cc @@ -15,11 +15,11 @@ limitations under the License. */ #include #include -#include "decoder.h" +#include "post_decode_faster.h" namespace py = pybind11; -PYBIND11_MODULE(decoder, m) { +PYBIND11_MODULE(post_decode_faster, m) { m.doc() = "Decode function for Deep ASR model"; m.def("decode", diff --git a/fluid/DeepASR/decoder/setup.py b/fluid/DeepASR/decoder/setup.py index cedd5d644e..e1c74fcb0d 100644 --- a/fluid/DeepASR/decoder/setup.py +++ b/fluid/DeepASR/decoder/setup.py @@ -13,27 +13,50 @@ # limitations under the License. import os +import glob from distutils.core import setup, Extension from distutils.sysconfig import get_config_vars -args = ['-std=c++11'] +args = [ + '-std=c++11', '-Wno-sign-compare', '-Wno-unused-variable', + '-Wno-unused-local-typedefs', '-Wno-unused-but-set-variable', + '-Wno-deprecated-declarations', '-Wno-unused-function' +] # remove warning about -Wstrict-prototypes (opt, ) = get_config_vars('OPT') os.environ['OPT'] = " ".join(flag for flag in opt.split() if flag != '-Wstrict-prototypes') +os.environ['CC'] = 'g++' + +LIBS = [ + 'fst', 'kaldi-base', 'kaldi-util', 'kaldi-matrix', 'kaldi-tree', + 'kaldi-hmm', 'kaldi-fstext', 'kaldi-decoder', 'kaldi-lat' +] + +LIB_DIRS = [ + 'kaldi/tools/openfst/lib', 'kaldi/src/base', 'kaldi/src/matrix', + 'kaldi/src/util', 'kaldi/src/tree', 'kaldi/src/hmm', 'kaldi/src/fstext', + 'kaldi/src/decoder', 'kaldi/src/lat' +] ext_modules = [ Extension( - 'decoder', - ['pybind.cc', 'decoder.cc'], - include_dirs=['pybind11/include', '.'], + 'post_decode_faster', + ['pybind.cc', 'post_decode_faster.cc'], + include_dirs=[ + 'pybind11/include', '.', 'kaldi/src/', + 'kaldi/tools/openfst/src/include' + ], + libraries=LIBS, language='c++', + library_dirs=LIB_DIRS, + runtime_library_dirs=LIB_DIRS, extra_compile_args=args, ), ] setup( - name='decoder', + name='post_decode_faster', version='0.0.1', author='Paddle', author_email='', diff --git a/fluid/DeepASR/decoder/setup.sh b/fluid/DeepASR/decoder/setup.sh index 71fd6626ef..74cec0a482 100644 --- a/fluid/DeepASR/decoder/setup.sh +++ b/fluid/DeepASR/decoder/setup.sh @@ -1,7 +1,11 @@ - +set -e if [ ! -d pybind11 ]; then git clone https://github.com/pybind/pybind11.git fi +if [ ! -d kaldi ]; then + git clone https://github.com/kaldi-asr/kaldi.git +fi + python setup.py build_ext -i From b33396315f3f374d24dfb3c2a691e6eb60e83c02 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Wed, 21 Mar 2018 19:36:33 -0700 Subject: [PATCH 2/9] Reimpl in class to sepearte init and decoding --- fluid/DeepASR/decoder/post_decode_faster.cc | 275 ++++++++++---------- fluid/DeepASR/decoder/post_decode_faster.h | 38 ++- fluid/DeepASR/decoder/pybind.cc | 14 +- 3 files changed, 174 insertions(+), 153 deletions(-) diff --git a/fluid/DeepASR/decoder/post_decode_faster.cc b/fluid/DeepASR/decoder/post_decode_faster.cc index d3f20a6ea3..876c0d8ebb 100644 --- a/fluid/DeepASR/decoder/post_decode_faster.cc +++ b/fluid/DeepASR/decoder/post_decode_faster.cc @@ -13,153 +13,146 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "post_decode_faster.h" -#include "base/kaldi-common.h" -#include "base/timer.h" -#include "decoder/decodable-matrix.h" -#include "decoder/faster-decoder.h" -#include "fstext/fstext-lib.h" -#include "hmm/transition-model.h" -#include "lat/kaldi-lattice.h" // for {Compact}LatticeArc -#include "tree/context-dep.h" -#include "util/common-utils.h" - -std::vector decode(std::string word_syms_filename, - std::string fst_in_filename, - std::string logprior_rxfilename, - std::string posterior_rspecifier, - std::string words_wspecifier, - std::string alignment_wspecifier) { + +using namespace kaldi; +typedef kaldi::int32 int32; +using fst::SymbolTable; +using fst::VectorFst; +using fst::StdArc; + +Decoder::Decoder(std::string word_syms_filename, + std::string fst_in_filename, + std::string logprior_rxfilename) { + const char *usage = + "Decode, reading log-likelihoods (of transition-ids or whatever symbol " + "is on the graph) as matrices."; + + std::string words_wspecifier = "ark,t:out.ark"; + std::string alignment_wspecifier = ""; + ParseOptions po(usage); + binary = true; + acoustic_scale = 1.5; + allow_partial = true; + FasterDecoderOptions decoder_opts; + decoder_opts.Register(&po, true); // true == include obscure settings. + po.Register("binary", &binary, "Write output in binary mode"); + po.Register("allow-partial", + &allow_partial, + "Produce output even when final state was not reached"); + po.Register("acoustic-scale", + &acoustic_scale, + "Scaling factor for acoustic likelihoods"); + + words_writer = new Int32VectorWriter(words_wspecifier); + + alignment_writer = new Int32VectorWriter(alignment_wspecifier); + word_syms = NULL; + if (word_syms_filename != "") { + word_syms = fst::SymbolTable::ReadText(word_syms_filename); + if (!word_syms) + KALDI_ERR << "Could not read symbol table from file " + << word_syms_filename; + } + + std::ifstream is_logprior(logprior_rxfilename); + logprior.Read(is_logprior, false); + + // It's important that we initialize decode_fst after loglikes_reader, as it + // can prevent crashes on systems installed without enough virtual memory. + // It has to do with what happens on UNIX systems if you call fork() on a + // large process: the page-table entries are duplicated, which requires a + // lot of virtual memory. + decode_fst = fst::ReadFstKaldi(fst_in_filename); + + decoder = new FasterDecoder(*decode_fst, decoder_opts); +} + + +Decoder::~Decoder() { + if (!word_syms) delete word_syms; + delete decode_fst; + delete decoder; + delete words_writer; + delete alignment_writer; +} + +std::vector Decoder::decode(std::string posterior_rspecifier) { + SequentialBaseFloatMatrixReader posterior_reader(posterior_rspecifier); std::vector decoding_results; - try { - using namespace kaldi; - typedef kaldi::int32 int32; - using fst::SymbolTable; - using fst::VectorFst; - using fst::StdArc; - - const char *usage = - "Decode, reading log-likelihoods (of transition-ids or whatever symbol " - "is on the graph) as matrices."; - ParseOptions po(usage); - bool binary = true; - BaseFloat acoustic_scale = 1.5; - bool allow_partial = true; - FasterDecoderOptions decoder_opts; - decoder_opts.Register(&po, true); // true == include obscure settings. - po.Register("binary", &binary, "Write output in binary mode"); - po.Register("allow-partial", - &allow_partial, - "Produce output even when final state was not reached"); - po.Register("acoustic-scale", - &acoustic_scale, - "Scaling factor for acoustic likelihoods"); - - Int32VectorWriter words_writer(words_wspecifier); - - Int32VectorWriter alignment_writer(alignment_wspecifier); - fst::SymbolTable *word_syms = NULL; - if (word_syms_filename != "") { - word_syms = fst::SymbolTable::ReadText(word_syms_filename); - if (!word_syms) - KALDI_ERR << "Could not read symbol table from file " - << word_syms_filename; + BaseFloat tot_like = 0.0; + kaldi::int64 frame_count = 0; + int num_success = 0, num_fail = 0; + + Timer timer; + for (; !posterior_reader.Done(); posterior_reader.Next()) { + std::string key = posterior_reader.Key(); + Matrix loglikes(posterior_reader.Value()); + KALDI_LOG << key << " " << loglikes.NumRows() << " x " + << loglikes.NumCols(); + + if (loglikes.NumRows() == 0) { + KALDI_WARN << "Zero-length utterance: " << key; + num_fail++; + continue; } - - SequentialBaseFloatMatrixReader posterior_reader(posterior_rspecifier); - std::ifstream is_logprior(logprior_rxfilename); - Vector logprior; - logprior.Read(is_logprior, false); - - // It's important that we initialize decode_fst after loglikes_reader, as it - // can prevent crashes on systems installed without enough virtual memory. - // It has to do with what happens on UNIX systems if you call fork() on a - // large process: the page-table entries are duplicated, which requires a - // lot of virtual memory. - VectorFst *decode_fst = fst::ReadFstKaldi(fst_in_filename); - - BaseFloat tot_like = 0.0; - kaldi::int64 frame_count = 0; - int num_success = 0, num_fail = 0; - FasterDecoder decoder(*decode_fst, decoder_opts); - - Timer timer; - - for (; !posterior_reader.Done(); posterior_reader.Next()) { - std::string key = posterior_reader.Key(); - Matrix loglikes(posterior_reader.Value()); - KALDI_LOG << key << " " << loglikes.NumRows() << " x " - << loglikes.NumCols(); - - if (loglikes.NumRows() == 0) { - KALDI_WARN << "Zero-length utterance: " << key; - num_fail++; - continue; - } - KALDI_ASSERT(loglikes.NumCols() == logprior.Dim()); - - loglikes.ApplyLog(); - loglikes.AddVecToRows(-1.0, logprior); - - DecodableMatrixScaled decodable(loglikes, acoustic_scale); - decoder.Decode(&decodable); - - VectorFst decoded; // linear FST. - - if ((allow_partial || decoder.ReachedFinal()) && - decoder.GetBestPath(&decoded)) { - num_success++; - if (!decoder.ReachedFinal()) - KALDI_WARN << "Decoder did not reach end-state, outputting partial " - "traceback."; - - std::vector alignment; - std::vector words; - LatticeWeight weight; - frame_count += loglikes.NumRows(); - - GetLinearSymbolSequence(decoded, &alignment, &words, &weight); - - words_writer.Write(key, words); - if (alignment_writer.IsOpen()) alignment_writer.Write(key, alignment); - if (word_syms != NULL) { - std::string res; - for (size_t i = 0; i < words.size(); i++) { - std::string s = word_syms->Find(words[i]); - res += s; - if (s == "") - KALDI_ERR << "Word-id " << words[i] << " not in symbol table."; - std::cerr << s << ' '; - } - decoding_results.push_back(res); + KALDI_ASSERT(loglikes.NumCols() == logprior.Dim()); + + loglikes.ApplyLog(); + loglikes.AddVecToRows(-1.0, logprior); + + DecodableMatrixScaled decodable(loglikes, acoustic_scale); + decoder->Decode(&decodable); + + VectorFst decoded; // linear FST. + + if ((allow_partial || decoder->ReachedFinal()) && + decoder->GetBestPath(&decoded)) { + num_success++; + if (!decoder->ReachedFinal()) + KALDI_WARN << "Decoder did not reach end-state, outputting partial " + "traceback."; + + std::vector alignment; + std::vector words; + LatticeWeight weight; + frame_count += loglikes.NumRows(); + + GetLinearSymbolSequence(decoded, &alignment, &words, &weight); + + words_writer->Write(key, words); + if (alignment_writer->IsOpen()) alignment_writer->Write(key, alignment); + if (word_syms != NULL) { + std::string res; + for (size_t i = 0; i < words.size(); i++) { + std::string s = word_syms->Find(words[i]); + res += s; + if (s == "") + KALDI_ERR << "Word-id " << words[i] << " not in symbol table."; + std::cerr << s << ' '; } - BaseFloat like = -weight.Value1() - weight.Value2(); - tot_like += like; - KALDI_LOG << "Log-like per frame for utterance " << key << " is " - << (like / loglikes.NumRows()) << " over " - << loglikes.NumRows() << " frames."; - - } else { - num_fail++; - KALDI_WARN << "Did not successfully decode utterance " << key - << ", len = " << loglikes.NumRows(); + decoding_results.push_back(res); } + BaseFloat like = -weight.Value1() - weight.Value2(); + tot_like += like; + KALDI_LOG << "Log-like per frame for utterance " << key << " is " + << (like / loglikes.NumRows()) << " over " << loglikes.NumRows() + << " frames."; + + } else { + num_fail++; + KALDI_WARN << "Did not successfully decode utterance " << key + << ", len = " << loglikes.NumRows(); } - - double elapsed = timer.Elapsed(); - KALDI_LOG << "Time taken [excluding initialization] " << elapsed - << "s: real-time factor assuming 100 frames/sec is " - << (elapsed * 100.0 / frame_count); - KALDI_LOG << "Done " << num_success << " utterances, failed for " - << num_fail; - KALDI_LOG << "Overall log-likelihood per frame is " - << (tot_like / frame_count) << " over " << frame_count - << " frames."; - - delete word_syms; - delete decode_fst; - } catch (const std::exception &e) { - std::cerr << e.what(); } + + double elapsed = timer.Elapsed(); + KALDI_LOG << "Time taken [excluding initialization] " << elapsed + << "s: real-time factor assuming 100 frames/sec is " + << (elapsed * 100.0 / frame_count); + KALDI_LOG << "Done " << num_success << " utterances, failed for " << num_fail; + KALDI_LOG << "Overall log-likelihood per frame is " + << (tot_like / frame_count) << " over " << frame_count + << " frames."; return decoding_results; } diff --git a/fluid/DeepASR/decoder/post_decode_faster.h b/fluid/DeepASR/decoder/post_decode_faster.h index 04983a3b93..c1c6ac6930 100644 --- a/fluid/DeepASR/decoder/post_decode_faster.h +++ b/fluid/DeepASR/decoder/post_decode_faster.h @@ -14,10 +14,36 @@ limitations under the License. */ #include #include +#include "base/kaldi-common.h" +#include "base/timer.h" +#include "decoder/decodable-matrix.h" +#include "decoder/faster-decoder.h" +#include "fstext/fstext-lib.h" +#include "hmm/transition-model.h" +#include "lat/kaldi-lattice.h" // for {Compact}LatticeArc +#include "tree/context-dep.h" +#include "util/common-utils.h" -std::vector decode(std::string word_syms_filename, - std::string fst_in_filename, - std::string logprior_rxfilename, - std::string posterior_respecifier, - std::string words_wspecifier, - std::string alignment_wspecifier = ""); + +class Decoder { +public: + Decoder(std::string word_syms_filename, + std::string fst_in_filename, + std::string logprior_rxfilename); + ~Decoder(); + + std::vector decode(std::string posterior_rspecifier); + +private: + fst::SymbolTable *word_syms; + fst::VectorFst *decode_fst; + kaldi::FasterDecoder *decoder; + kaldi::Vector logprior; + + kaldi::Int32VectorWriter *words_writer; + kaldi::Int32VectorWriter *alignment_writer; + + bool binary; + kaldi::BaseFloat acoustic_scale; + bool allow_partial; +}; diff --git a/fluid/DeepASR/decoder/pybind.cc b/fluid/DeepASR/decoder/pybind.cc index a8744ee2ac..efa37d5d51 100644 --- a/fluid/DeepASR/decoder/pybind.cc +++ b/fluid/DeepASR/decoder/pybind.cc @@ -20,10 +20,12 @@ limitations under the License. */ namespace py = pybind11; PYBIND11_MODULE(post_decode_faster, m) { - m.doc() = "Decode function for Deep ASR model"; - - m.def("decode", - &decode, - "Decode one input probability matrix " - "and return the transcription"); + m.doc() = "Decoder for Deep ASR model"; + + py::class_(m, "Decoder") + .def(py::init()) + .def("decode", + &Decoder::decode, + "Decode one input probability matrix " + "and return the transcription"); } From 185ff858da8352ba7dc978057cdcb0e4de90556c Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Wed, 21 Mar 2018 19:57:57 -0700 Subject: [PATCH 3/9] Remove unused variables and output --- fluid/DeepASR/decoder/post_decode_faster.cc | 16 ---------------- fluid/DeepASR/decoder/post_decode_faster.h | 3 --- 2 files changed, 19 deletions(-) diff --git a/fluid/DeepASR/decoder/post_decode_faster.cc b/fluid/DeepASR/decoder/post_decode_faster.cc index 876c0d8ebb..140099bea7 100644 --- a/fluid/DeepASR/decoder/post_decode_faster.cc +++ b/fluid/DeepASR/decoder/post_decode_faster.cc @@ -27,8 +27,6 @@ Decoder::Decoder(std::string word_syms_filename, "Decode, reading log-likelihoods (of transition-ids or whatever symbol " "is on the graph) as matrices."; - std::string words_wspecifier = "ark,t:out.ark"; - std::string alignment_wspecifier = ""; ParseOptions po(usage); binary = true; acoustic_scale = 1.5; @@ -43,9 +41,6 @@ Decoder::Decoder(std::string word_syms_filename, &acoustic_scale, "Scaling factor for acoustic likelihoods"); - words_writer = new Int32VectorWriter(words_wspecifier); - - alignment_writer = new Int32VectorWriter(alignment_wspecifier); word_syms = NULL; if (word_syms_filename != "") { word_syms = fst::SymbolTable::ReadText(word_syms_filename); @@ -72,8 +67,6 @@ Decoder::~Decoder() { if (!word_syms) delete word_syms; delete decode_fst; delete decoder; - delete words_writer; - delete alignment_writer; } std::vector Decoder::decode(std::string posterior_rspecifier) { @@ -88,8 +81,6 @@ std::vector Decoder::decode(std::string posterior_rspecifier) { for (; !posterior_reader.Done(); posterior_reader.Next()) { std::string key = posterior_reader.Key(); Matrix loglikes(posterior_reader.Value()); - KALDI_LOG << key << " " << loglikes.NumRows() << " x " - << loglikes.NumCols(); if (loglikes.NumRows() == 0) { KALDI_WARN << "Zero-length utterance: " << key; @@ -120,8 +111,6 @@ std::vector Decoder::decode(std::string posterior_rspecifier) { GetLinearSymbolSequence(decoded, &alignment, &words, &weight); - words_writer->Write(key, words); - if (alignment_writer->IsOpen()) alignment_writer->Write(key, alignment); if (word_syms != NULL) { std::string res; for (size_t i = 0; i < words.size(); i++) { @@ -129,16 +118,11 @@ std::vector Decoder::decode(std::string posterior_rspecifier) { res += s; if (s == "") KALDI_ERR << "Word-id " << words[i] << " not in symbol table."; - std::cerr << s << ' '; } decoding_results.push_back(res); } BaseFloat like = -weight.Value1() - weight.Value2(); tot_like += like; - KALDI_LOG << "Log-like per frame for utterance " << key << " is " - << (like / loglikes.NumRows()) << " over " << loglikes.NumRows() - << " frames."; - } else { num_fail++; KALDI_WARN << "Did not successfully decode utterance " << key diff --git a/fluid/DeepASR/decoder/post_decode_faster.h b/fluid/DeepASR/decoder/post_decode_faster.h index c1c6ac6930..49d680c58a 100644 --- a/fluid/DeepASR/decoder/post_decode_faster.h +++ b/fluid/DeepASR/decoder/post_decode_faster.h @@ -40,9 +40,6 @@ class Decoder { kaldi::FasterDecoder *decoder; kaldi::Vector logprior; - kaldi::Int32VectorWriter *words_writer; - kaldi::Int32VectorWriter *alignment_writer; - bool binary; kaldi::BaseFloat acoustic_scale; bool allow_partial; From 77ce7a9202f7ea90c6030ed3d468ca36fb6543fc Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Thu, 22 Mar 2018 00:11:18 -0700 Subject: [PATCH 4/9] Avoid using namespace kaldi --- fluid/DeepASR/decoder/post_decode_faster.cc | 23 ++++++++++----------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/fluid/DeepASR/decoder/post_decode_faster.cc b/fluid/DeepASR/decoder/post_decode_faster.cc index 140099bea7..5c0027c4ba 100644 --- a/fluid/DeepASR/decoder/post_decode_faster.cc +++ b/fluid/DeepASR/decoder/post_decode_faster.cc @@ -14,7 +14,6 @@ limitations under the License. */ #include "post_decode_faster.h" -using namespace kaldi; typedef kaldi::int32 int32; using fst::SymbolTable; using fst::VectorFst; @@ -27,11 +26,11 @@ Decoder::Decoder(std::string word_syms_filename, "Decode, reading log-likelihoods (of transition-ids or whatever symbol " "is on the graph) as matrices."; - ParseOptions po(usage); + kaldi::ParseOptions po(usage); binary = true; acoustic_scale = 1.5; allow_partial = true; - FasterDecoderOptions decoder_opts; + kaldi::FasterDecoderOptions decoder_opts; decoder_opts.Register(&po, true); // true == include obscure settings. po.Register("binary", &binary, "Write output in binary mode"); po.Register("allow-partial", @@ -59,7 +58,7 @@ Decoder::Decoder(std::string word_syms_filename, // lot of virtual memory. decode_fst = fst::ReadFstKaldi(fst_in_filename); - decoder = new FasterDecoder(*decode_fst, decoder_opts); + decoder = new kaldi::FasterDecoder(*decode_fst, decoder_opts); } @@ -70,17 +69,17 @@ Decoder::~Decoder() { } std::vector Decoder::decode(std::string posterior_rspecifier) { - SequentialBaseFloatMatrixReader posterior_reader(posterior_rspecifier); + kaldi::SequentialBaseFloatMatrixReader posterior_reader(posterior_rspecifier); std::vector decoding_results; - BaseFloat tot_like = 0.0; + kaldi::BaseFloat tot_like = 0.0; kaldi::int64 frame_count = 0; int num_success = 0, num_fail = 0; - Timer timer; + kaldi::Timer timer; for (; !posterior_reader.Done(); posterior_reader.Next()) { std::string key = posterior_reader.Key(); - Matrix loglikes(posterior_reader.Value()); + kaldi::Matrix loglikes(posterior_reader.Value()); if (loglikes.NumRows() == 0) { KALDI_WARN << "Zero-length utterance: " << key; @@ -92,10 +91,10 @@ std::vector Decoder::decode(std::string posterior_rspecifier) { loglikes.ApplyLog(); loglikes.AddVecToRows(-1.0, logprior); - DecodableMatrixScaled decodable(loglikes, acoustic_scale); + kaldi::DecodableMatrixScaled decodable(loglikes, acoustic_scale); decoder->Decode(&decodable); - VectorFst decoded; // linear FST. + VectorFst decoded; // linear FST. if ((allow_partial || decoder->ReachedFinal()) && decoder->GetBestPath(&decoded)) { @@ -106,7 +105,7 @@ std::vector Decoder::decode(std::string posterior_rspecifier) { std::vector alignment; std::vector words; - LatticeWeight weight; + kaldi::LatticeWeight weight; frame_count += loglikes.NumRows(); GetLinearSymbolSequence(decoded, &alignment, &words, &weight); @@ -121,7 +120,7 @@ std::vector Decoder::decode(std::string posterior_rspecifier) { } decoding_results.push_back(res); } - BaseFloat like = -weight.Value1() - weight.Value2(); + kaldi::BaseFloat like = -weight.Value1() - weight.Value2(); tot_like += like; } else { num_fail++; From e3e9bb43eebd7af13238c212fadd03606812b31e Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Thu, 22 Mar 2018 01:10:49 -0700 Subject: [PATCH 5/9] Make the kaldi root directory flexible --- fluid/DeepASR/decoder/setup.py | 18 ++++++++++++------ fluid/DeepASR/decoder/setup.sh | 4 ---- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/fluid/DeepASR/decoder/setup.py b/fluid/DeepASR/decoder/setup.py index e1c74fcb0d..eaaf746132 100644 --- a/fluid/DeepASR/decoder/setup.py +++ b/fluid/DeepASR/decoder/setup.py @@ -17,6 +17,12 @@ from distutils.core import setup, Extension from distutils.sysconfig import get_config_vars +try: + kaldi_root = os.environ['KALDI_ROOT'] +except: + raise ValueError("Enviroment variable 'KALDI_ROOT' is not defined. Please " + "install kaldi and export KALDI_ROOT= .") + args = [ '-std=c++11', '-Wno-sign-compare', '-Wno-unused-variable', '-Wno-unused-local-typedefs', '-Wno-unused-but-set-variable', @@ -35,21 +41,21 @@ ] LIB_DIRS = [ - 'kaldi/tools/openfst/lib', 'kaldi/src/base', 'kaldi/src/matrix', - 'kaldi/src/util', 'kaldi/src/tree', 'kaldi/src/hmm', 'kaldi/src/fstext', - 'kaldi/src/decoder', 'kaldi/src/lat' + 'tools/openfst/lib', 'src/base', 'src/matrix', 'src/util', 'src/tree', + 'src/hmm', 'src/fstext', 'src/decoder', 'src/lat' ] +LIB_DIRS = [os.path.join(kaldi_root, path) for path in LIB_DIRS] ext_modules = [ Extension( 'post_decode_faster', ['pybind.cc', 'post_decode_faster.cc'], include_dirs=[ - 'pybind11/include', '.', 'kaldi/src/', - 'kaldi/tools/openfst/src/include' + 'pybind11/include', '.', os.path.join(kaldi_root, 'src'), + os.path.join(kaldi_root, 'tools/openfst/src/include') ], - libraries=LIBS, language='c++', + libraries=LIBS, library_dirs=LIB_DIRS, runtime_library_dirs=LIB_DIRS, extra_compile_args=args, ), diff --git a/fluid/DeepASR/decoder/setup.sh b/fluid/DeepASR/decoder/setup.sh index 74cec0a482..1471f85f41 100644 --- a/fluid/DeepASR/decoder/setup.sh +++ b/fluid/DeepASR/decoder/setup.sh @@ -4,8 +4,4 @@ if [ ! -d pybind11 ]; then git clone https://github.com/pybind/pybind11.git fi -if [ ! -d kaldi ]; then - git clone https://github.com/kaldi-asr/kaldi.git -fi - python setup.py build_ext -i From 4faf79a6d101f9f13b6d0dfe42e125c6bfd10f50 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Sat, 24 Mar 2018 05:01:34 -0700 Subject: [PATCH 6/9] Support input scores in ndarray format --- fluid/DeepASR/decoder/post_decode_faster.cc | 62 ++++++++++++++++++++- fluid/DeepASR/decoder/post_decode_faster.h | 6 ++ fluid/DeepASR/decoder/pybind.cc | 9 ++- 3 files changed, 75 insertions(+), 2 deletions(-) diff --git a/fluid/DeepASR/decoder/post_decode_faster.cc b/fluid/DeepASR/decoder/post_decode_faster.cc index 5c0027c4ba..957af550e3 100644 --- a/fluid/DeepASR/decoder/post_decode_faster.cc +++ b/fluid/DeepASR/decoder/post_decode_faster.cc @@ -22,7 +22,7 @@ using fst::StdArc; Decoder::Decoder(std::string word_syms_filename, std::string fst_in_filename, std::string logprior_rxfilename) { - const char *usage = + const char* usage = "Decode, reading log-likelihoods (of transition-ids or whatever symbol " "is on the graph) as matrices."; @@ -68,6 +68,23 @@ Decoder::~Decoder() { delete decoder; } +std::string Decoder::decode( + std::string key, std::vector>& log_probs) { + size_t num_frames = log_probs.size(); + size_t dim_label = log_probs[0].size(); + + kaldi::Matrix loglikes( + num_frames, dim_label, kaldi::kSetZero, kaldi::kStrideEqualNumCols); + for (size_t i = 0; i < num_frames; ++i) { + memcpy(loglikes.Data() + i * dim_label, + log_probs[i].data(), + sizeof(kaldi::BaseFloat) * dim_label); + } + + return decode(key, loglikes); +} + + std::vector Decoder::decode(std::string posterior_rspecifier) { kaldi::SequentialBaseFloatMatrixReader posterior_reader(posterior_rspecifier); std::vector decoding_results; @@ -139,3 +156,46 @@ std::vector Decoder::decode(std::string posterior_rspecifier) { << " frames."; return decoding_results; } + + +std::string Decoder::decode(std::string key, + kaldi::Matrix& loglikes) { + std::string decoding_result; + + if (loglikes.NumRows() == 0) { + KALDI_WARN << "Zero-length utterance: " << key; + } + KALDI_ASSERT(loglikes.NumCols() == logprior.Dim()); + + loglikes.ApplyLog(); + loglikes.AddVecToRows(-1.0, logprior); + + kaldi::DecodableMatrixScaled decodable(loglikes, acoustic_scale); + decoder->Decode(&decodable); + + VectorFst decoded; // linear FST. + + if ((allow_partial || decoder->ReachedFinal()) && + decoder->GetBestPath(&decoded)) { + if (!decoder->ReachedFinal()) + KALDI_WARN << "Decoder did not reach end-state, outputting partial " + "traceback."; + + std::vector alignment; + std::vector words; + kaldi::LatticeWeight weight; + + GetLinearSymbolSequence(decoded, &alignment, &words, &weight); + + if (word_syms != NULL) { + for (size_t i = 0; i < words.size(); i++) { + std::string s = word_syms->Find(words[i]); + decoding_result += s; + if (s == "") + KALDI_ERR << "Word-id " << words[i] << " not in symbol table."; + } + } + } + + return decoding_result; +} diff --git a/fluid/DeepASR/decoder/post_decode_faster.h b/fluid/DeepASR/decoder/post_decode_faster.h index 49d680c58a..6a5830e296 100644 --- a/fluid/DeepASR/decoder/post_decode_faster.h +++ b/fluid/DeepASR/decoder/post_decode_faster.h @@ -34,7 +34,13 @@ class Decoder { std::vector decode(std::string posterior_rspecifier); + std::string decode(std::string key, + std::vector> &log_probs); + private: + std::string decode(std::string key, + kaldi::Matrix &loglikes); + fst::SymbolTable *word_syms; fst::VectorFst *decode_fst; kaldi::FasterDecoder *decoder; diff --git a/fluid/DeepASR/decoder/pybind.cc b/fluid/DeepASR/decoder/pybind.cc index efa37d5d51..1b91f02b89 100644 --- a/fluid/DeepASR/decoder/pybind.cc +++ b/fluid/DeepASR/decoder/pybind.cc @@ -25,7 +25,14 @@ PYBIND11_MODULE(post_decode_faster, m) { py::class_(m, "Decoder") .def(py::init()) .def("decode", - &Decoder::decode, + (std::vector (Decoder::*)(std::string)) & + Decoder::decode, + "Decode one input probability matrix " + "and return the transcription") + .def("decode", + (std::string (Decoder::*)( + std::string, std::vector>&)) & + Decoder::decode, "Decode one input probability matrix " "and return the transcription"); } From f029f93181764555b211bf2a056b2e6dd75ba547 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Sat, 24 Mar 2018 05:15:53 -0700 Subject: [PATCH 7/9] Clean code and add some comments --- fluid/DeepASR/decoder/post_decode_faster.cc | 60 +-------------------- fluid/DeepASR/decoder/post_decode_faster.h | 4 ++ 2 files changed, 5 insertions(+), 59 deletions(-) diff --git a/fluid/DeepASR/decoder/post_decode_faster.cc b/fluid/DeepASR/decoder/post_decode_faster.cc index 957af550e3..6b318000e3 100644 --- a/fluid/DeepASR/decoder/post_decode_faster.cc +++ b/fluid/DeepASR/decoder/post_decode_faster.cc @@ -89,71 +89,13 @@ std::vector Decoder::decode(std::string posterior_rspecifier) { kaldi::SequentialBaseFloatMatrixReader posterior_reader(posterior_rspecifier); std::vector decoding_results; - kaldi::BaseFloat tot_like = 0.0; - kaldi::int64 frame_count = 0; - int num_success = 0, num_fail = 0; - - kaldi::Timer timer; for (; !posterior_reader.Done(); posterior_reader.Next()) { std::string key = posterior_reader.Key(); kaldi::Matrix loglikes(posterior_reader.Value()); - if (loglikes.NumRows() == 0) { - KALDI_WARN << "Zero-length utterance: " << key; - num_fail++; - continue; - } - KALDI_ASSERT(loglikes.NumCols() == logprior.Dim()); - - loglikes.ApplyLog(); - loglikes.AddVecToRows(-1.0, logprior); - - kaldi::DecodableMatrixScaled decodable(loglikes, acoustic_scale); - decoder->Decode(&decodable); - - VectorFst decoded; // linear FST. - - if ((allow_partial || decoder->ReachedFinal()) && - decoder->GetBestPath(&decoded)) { - num_success++; - if (!decoder->ReachedFinal()) - KALDI_WARN << "Decoder did not reach end-state, outputting partial " - "traceback."; - - std::vector alignment; - std::vector words; - kaldi::LatticeWeight weight; - frame_count += loglikes.NumRows(); - - GetLinearSymbolSequence(decoded, &alignment, &words, &weight); - - if (word_syms != NULL) { - std::string res; - for (size_t i = 0; i < words.size(); i++) { - std::string s = word_syms->Find(words[i]); - res += s; - if (s == "") - KALDI_ERR << "Word-id " << words[i] << " not in symbol table."; - } - decoding_results.push_back(res); - } - kaldi::BaseFloat like = -weight.Value1() - weight.Value2(); - tot_like += like; - } else { - num_fail++; - KALDI_WARN << "Did not successfully decode utterance " << key - << ", len = " << loglikes.NumRows(); - } + decoding_results.push_back(decode(key, loglikes)); } - double elapsed = timer.Elapsed(); - KALDI_LOG << "Time taken [excluding initialization] " << elapsed - << "s: real-time factor assuming 100 frames/sec is " - << (elapsed * 100.0 / frame_count); - KALDI_LOG << "Done " << num_success << " utterances, failed for " << num_fail; - KALDI_LOG << "Overall log-likelihood per frame is " - << (tot_like / frame_count) << " over " << frame_count - << " frames."; return decoding_results; } diff --git a/fluid/DeepASR/decoder/post_decode_faster.h b/fluid/DeepASR/decoder/post_decode_faster.h index 6a5830e296..c0b54cdf9f 100644 --- a/fluid/DeepASR/decoder/post_decode_faster.h +++ b/fluid/DeepASR/decoder/post_decode_faster.h @@ -32,12 +32,16 @@ class Decoder { std::string logprior_rxfilename); ~Decoder(); + // Interface to accept the scores read from specifier and return + // the batch decoding results std::vector decode(std::string posterior_rspecifier); + // Accept the scores of one utterance and return the decoding result std::string decode(std::string key, std::vector> &log_probs); private: + // For decoding one utterance std::string decode(std::string key, kaldi::Matrix &loglikes); From 49c9cc80a428ccbbcbd91f186a40bcb9797770a3 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Sun, 25 Mar 2018 19:30:06 -0700 Subject: [PATCH 8/9] Enable decoder in infer_by_ckpt --- fluid/DeepASR/decoder/setup.py | 1 + fluid/DeepASR/infer_by_ckpt.py | 21 ++++++++++++++++++--- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/fluid/DeepASR/decoder/setup.py b/fluid/DeepASR/decoder/setup.py index eaaf746132..1818ecbf00 100644 --- a/fluid/DeepASR/decoder/setup.py +++ b/fluid/DeepASR/decoder/setup.py @@ -45,6 +45,7 @@ 'src/hmm', 'src/fstext', 'src/decoder', 'src/lat' ] LIB_DIRS = [os.path.join(kaldi_root, path) for path in LIB_DIRS] +LIB_DIRS = [os.path.abspath(path) for path in LIB_DIRS] ext_modules = [ Extension( diff --git a/fluid/DeepASR/infer_by_ckpt.py b/fluid/DeepASR/infer_by_ckpt.py index 68dd573647..300f736b3c 100644 --- a/fluid/DeepASR/infer_by_ckpt.py +++ b/fluid/DeepASR/infer_by_ckpt.py @@ -13,7 +13,7 @@ import data_utils.augmentor.trans_add_delta as trans_add_delta import data_utils.augmentor.trans_splice as trans_splice import data_utils.async_data_reader as reader -import decoder.decoder as decoder +from decoder.post_decode_faster import Decoder from data_utils.util import lodtensor_to_ndarray from model_utils.model import stacked_lstmp_model from data_utils.util import split_infer_result @@ -81,6 +81,21 @@ def parse_args(): type=str, default='./checkpoint', help="The checkpoint path to init model. (default: %(default)s)") + parser.add_argument( + '--vocabulary', + type=str, + default='./decoder/graph/words.txt', + help="The path to vocabulary. (default: %(default)s)") + parser.add_argument( + '--graphs', + type=str, + default='./decoder/graph/TLG.fst', + help="The path to TLG graphs for decoding. (default: %(default)s)") + parser.add_argument( + '--log_prior', + type=str, + default="./decoder/logprior", + help="The log prior probs for training data. (default: %(default)s)") args = parser.parse_args() return args @@ -154,8 +169,8 @@ def infer_from_ckpt(args): probs, lod = lodtensor_to_ndarray(results[0]) infer_batch = split_infer_result(probs, lod) for index, sample in enumerate(infer_batch): - print("Decoding %d: " % (batch_id * args.batch_size + index), - decoder.decode(sample)) + key = "utter#%d" % (batch_id * args.batch_size + index) + print(key, ": ", decoder.decode(key, sample), "\n") print(np.mean(infer_costs), np.mean(infer_accs)) From 193c7e2de594bd0b31726dae884010b60620be83 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Mon, 26 Mar 2018 19:36:18 -0700 Subject: [PATCH 9/9] Format license and comments --- fluid/DeepASR/decoder/post_decode_faster.cc | 5 +++-- fluid/DeepASR/decoder/post_decode_faster.h | 7 ++++--- fluid/DeepASR/decoder/pybind.cc | 19 ++++++++++--------- fluid/DeepASR/decoder/setup.py | 2 +- 4 files changed, 18 insertions(+), 15 deletions(-) diff --git a/fluid/DeepASR/decoder/post_decode_faster.cc b/fluid/DeepASR/decoder/post_decode_faster.cc index 6b318000e3..d7f1d1ab34 100644 --- a/fluid/DeepASR/decoder/post_decode_faster.cc +++ b/fluid/DeepASR/decoder/post_decode_faster.cc @@ -1,4 +1,4 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -69,7 +69,8 @@ Decoder::~Decoder() { } std::string Decoder::decode( - std::string key, std::vector>& log_probs) { + std::string key, + const std::vector>& log_probs) { size_t num_frames = log_probs.size(); size_t dim_label = log_probs[0].size(); diff --git a/fluid/DeepASR/decoder/post_decode_faster.h b/fluid/DeepASR/decoder/post_decode_faster.h index c0b54cdf9f..2e31a1c19e 100644 --- a/fluid/DeepASR/decoder/post_decode_faster.h +++ b/fluid/DeepASR/decoder/post_decode_faster.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -37,8 +37,9 @@ class Decoder { std::vector decode(std::string posterior_rspecifier); // Accept the scores of one utterance and return the decoding result - std::string decode(std::string key, - std::vector> &log_probs); + std::string decode( + std::string key, + const std::vector> &log_probs); private: // For decoding one utterance diff --git a/fluid/DeepASR/decoder/pybind.cc b/fluid/DeepASR/decoder/pybind.cc index 1b91f02b89..56439d1802 100644 --- a/fluid/DeepASR/decoder/pybind.cc +++ b/fluid/DeepASR/decoder/pybind.cc @@ -1,4 +1,4 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -27,12 +27,13 @@ PYBIND11_MODULE(post_decode_faster, m) { .def("decode", (std::vector (Decoder::*)(std::string)) & Decoder::decode, - "Decode one input probability matrix " - "and return the transcription") - .def("decode", - (std::string (Decoder::*)( - std::string, std::vector>&)) & - Decoder::decode, - "Decode one input probability matrix " - "and return the transcription"); + "Decode for the probability matrices in specifier " + "and return the transcriptions.") + .def( + "decode", + (std::string (Decoder::*)( + std::string, const std::vector>&)) & + Decoder::decode, + "Decode one input probability matrix " + "and return the transcription."); } diff --git a/fluid/DeepASR/decoder/setup.py b/fluid/DeepASR/decoder/setup.py index 1818ecbf00..a98c0b4cc1 100644 --- a/fluid/DeepASR/decoder/setup.py +++ b/fluid/DeepASR/decoder/setup.py @@ -1,4 +1,4 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License.