From 1841cea19b23970086293f29f97f4aa8e0f32961 Mon Sep 17 00:00:00 2001
From: Yibing Liu <liuyibing01@baidu.com>
Date: Wed, 21 Mar 2018 07:48:01 -0700
Subject: [PATCH 1/9] Add decoder for deep asr model

---
 fluid/DeepASR/decoder/decoder.cc              |  21 ---
 fluid/DeepASR/decoder/post_decode_faster.cc   | 165 ++++++++++++++++++
 .../{decoder.h => post_decode_faster.h}       |   7 +-
 fluid/DeepASR/decoder/pybind.cc               |   4 +-
 fluid/DeepASR/decoder/setup.py                |  33 +++-
 fluid/DeepASR/decoder/setup.sh                |   6 +-
 6 files changed, 206 insertions(+), 30 deletions(-)
 delete mode 100644 fluid/DeepASR/decoder/decoder.cc
 create mode 100644 fluid/DeepASR/decoder/post_decode_faster.cc
 rename fluid/DeepASR/decoder/{decoder.h => post_decode_faster.h} (61%)
diff --git a/fluid/DeepASR/decoder/decoder.cc b/fluid/DeepASR/decoder/decoder.cc
deleted file mode 100644
index a99f972e2f..0000000000
--- a/fluid/DeepASR/decoder/decoder.cc
+++ /dev/null
@@ -1,21 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "decoder.h"
-
-std::string decode(std::vector<std::vector<float>> probs_mat) {
-  // Add decoding logic here
-
-  return "example decoding result";
-}
diff --git a/fluid/DeepASR/decoder/post_decode_faster.cc b/fluid/DeepASR/decoder/post_decode_faster.cc
new file mode 100644
index 0000000000..d3f20a6ea3
--- /dev/null
+++ b/fluid/DeepASR/decoder/post_decode_faster.cc
@@ -0,0 +1,165 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "post_decode_faster.h"
+#include "base/kaldi-common.h"
+#include "base/timer.h"
+#include "decoder/decodable-matrix.h"
+#include "decoder/faster-decoder.h"
+#include "fstext/fstext-lib.h"
+#include "hmm/transition-model.h"
+#include "lat/kaldi-lattice.h"  // for {Compact}LatticeArc
+#include "tree/context-dep.h"
+#include "util/common-utils.h"
+
+std::vector<std::string> decode(std::string word_syms_filename,
+                                std::string fst_in_filename,
+                                std::string logprior_rxfilename,
+                                std::string posterior_rspecifier,
+                                std::string words_wspecifier,
+                                std::string alignment_wspecifier) {
+  std::vector<std::string> decoding_results;
+
+  try {
+    using namespace kaldi;
+    typedef kaldi::int32 int32;
+    using fst::SymbolTable;
+    using fst::VectorFst;
+    using fst::StdArc;
+
+    const char *usage =
+        "Decode, reading log-likelihoods (of transition-ids or whatever symbol "
+        "is on the graph) as matrices.";
+    ParseOptions po(usage);
+    bool binary = true;
+    BaseFloat acoustic_scale = 1.5;
+    bool allow_partial = true;
+    FasterDecoderOptions decoder_opts;
+    decoder_opts.Register(&po, true);  // true == include obscure settings.
+    po.Register("binary", &binary, "Write output in binary mode");
+    po.Register("allow-partial",
+                &allow_partial,
+                "Produce output even when final state was not reached");
+    po.Register("acoustic-scale",
+                &acoustic_scale,
+                "Scaling factor for acoustic likelihoods");
+
+    Int32VectorWriter words_writer(words_wspecifier);
+
+    Int32VectorWriter alignment_writer(alignment_wspecifier);
+    fst::SymbolTable *word_syms = NULL;
+    if (word_syms_filename != "") {
+      word_syms = fst::SymbolTable::ReadText(word_syms_filename);
+      if (!word_syms)
+        KALDI_ERR << "Could not read symbol table from file "
+                  << word_syms_filename;
+    }
+
+    SequentialBaseFloatMatrixReader posterior_reader(posterior_rspecifier);
+    std::ifstream is_logprior(logprior_rxfilename);
+    Vector<BaseFloat> logprior;
+    logprior.Read(is_logprior, false);
+
+    // It's important that we initialize decode_fst after loglikes_reader, as it
+    // can prevent crashes on systems installed without enough virtual memory.
+    // It has to do with what happens on UNIX systems if you call fork() on a
+    // large process: the page-table entries are duplicated, which requires a
+    // lot of virtual memory.
+    VectorFst<StdArc> *decode_fst = fst::ReadFstKaldi(fst_in_filename);
+
+    BaseFloat tot_like = 0.0;
+    kaldi::int64 frame_count = 0;
+    int num_success = 0, num_fail = 0;
+    FasterDecoder decoder(*decode_fst, decoder_opts);
+
+    Timer timer;
+
+    for (; !posterior_reader.Done(); posterior_reader.Next()) {
+      std::string key = posterior_reader.Key();
+      Matrix<BaseFloat> loglikes(posterior_reader.Value());
+      KALDI_LOG << key << " " << loglikes.NumRows() << " x "
+                << loglikes.NumCols();
+
+      if (loglikes.NumRows() == 0) {
+        KALDI_WARN << "Zero-length utterance: " << key;
+        num_fail++;
+        continue;
+      }
+      KALDI_ASSERT(loglikes.NumCols() == logprior.Dim());
+
+      loglikes.ApplyLog();
+      loglikes.AddVecToRows(-1.0, logprior);
+
+      DecodableMatrixScaled decodable(loglikes, acoustic_scale);
+      decoder.Decode(&decodable);
+
+      VectorFst<LatticeArc> decoded;  // linear FST.
+
+      if ((allow_partial || decoder.ReachedFinal()) &&
+          decoder.GetBestPath(&decoded)) {
+        num_success++;
+        if (!decoder.ReachedFinal())
+          KALDI_WARN << "Decoder did not reach end-state, outputting partial "
+                        "traceback.";
+
+        std::vector<int32> alignment;
+        std::vector<int32> words;
+        LatticeWeight weight;
+        frame_count += loglikes.NumRows();
+
+        GetLinearSymbolSequence(decoded, &alignment, &words, &weight);
+
+        words_writer.Write(key, words);
+        if (alignment_writer.IsOpen()) alignment_writer.Write(key, alignment);
+        if (word_syms != NULL) {
+          std::string res;
+          for (size_t i = 0; i < words.size(); i++) {
+            std::string s = word_syms->Find(words[i]);
+            res += s;
+            if (s == "")
+              KALDI_ERR << "Word-id " << words[i] << " not in symbol table.";
+            std::cerr << s << ' ';
+          }
+          decoding_results.push_back(res);
+        }
+        BaseFloat like = -weight.Value1() - weight.Value2();
+        tot_like += like;
+        KALDI_LOG << "Log-like per frame for utterance " << key << " is "
+                  << (like / loglikes.NumRows()) << " over "
+                  << loglikes.NumRows() << " frames.";
+
+      } else {
+        num_fail++;
+        KALDI_WARN << "Did not successfully decode utterance " << key
+                   << ", len = " << loglikes.NumRows();
+      }
+    }
+
+    double elapsed = timer.Elapsed();
+    KALDI_LOG << "Time taken [excluding initialization] " << elapsed
+              << "s: real-time factor assuming 100 frames/sec is "
+              << (elapsed * 100.0 / frame_count);
+    KALDI_LOG << "Done " << num_success << " utterances, failed for "
+              << num_fail;
+    KALDI_LOG << "Overall log-likelihood per frame is "
+              << (tot_like / frame_count) << " over " << frame_count
+              << " frames.";
+
+    delete word_syms;
+    delete decode_fst;
+  } catch (const std::exception &e) {
+    std::cerr << e.what();
+  }
+  return decoding_results;
+}
diff --git a/fluid/DeepASR/decoder/decoder.h b/fluid/DeepASR/decoder/post_decode_faster.h
similarity index 61%
rename from fluid/DeepASR/decoder/decoder.h
rename to fluid/DeepASR/decoder/post_decode_faster.h
index 4a67fa366a..04983a3b93 100644
--- a/fluid/DeepASR/decoder/decoder.h
+++ b/fluid/DeepASR/decoder/post_decode_faster.h
@@ -15,4 +15,9 @@ limitations under the License. */
 #include <string>
 #include <vector>
 
-std::string decode(std::vector<std::vector<float>> probs_mat);
+std::vector<std::string> decode(std::string word_syms_filename,
+                                std::string fst_in_filename,
+                                std::string logprior_rxfilename,
+                                std::string posterior_respecifier,
+                                std::string words_wspecifier,
+                                std::string alignment_wspecifier = "");
diff --git a/fluid/DeepASR/decoder/pybind.cc b/fluid/DeepASR/decoder/pybind.cc
index 8cd65903ea..a8744ee2ac 100644
--- a/fluid/DeepASR/decoder/pybind.cc
+++ b/fluid/DeepASR/decoder/pybind.cc
@@ -15,11 +15,11 @@ limitations under the License. */
 #include <pybind11/pybind11.h>
 #include <pybind11/stl.h>
 
-#include "decoder.h"
+#include "post_decode_faster.h"
 
 namespace py = pybind11;
 
-PYBIND11_MODULE(decoder, m) {
+PYBIND11_MODULE(post_decode_faster, m) {
   m.doc() = "Decode function for Deep ASR model";
 
   m.def("decode",
diff --git a/fluid/DeepASR/decoder/setup.py b/fluid/DeepASR/decoder/setup.py
index cedd5d644e..e1c74fcb0d 100644
--- a/fluid/DeepASR/decoder/setup.py
+++ b/fluid/DeepASR/decoder/setup.py
@@ -13,27 +13,50 @@
 # limitations under the License.
 
 import os
+import glob
 from distutils.core import setup, Extension
 from distutils.sysconfig import get_config_vars
 
-args = ['-std=c++11']
+args = [
+    '-std=c++11', '-Wno-sign-compare', '-Wno-unused-variable',
+    '-Wno-unused-local-typedefs', '-Wno-unused-but-set-variable',
+    '-Wno-deprecated-declarations', '-Wno-unused-function'
+]
 
 # remove warning about -Wstrict-prototypes
 (opt, ) = get_config_vars('OPT')
 os.environ['OPT'] = " ".join(flag for flag in opt.split()
                              if flag != '-Wstrict-prototypes')
+os.environ['CC'] = 'g++'
+
+LIBS = [
+    'fst', 'kaldi-base', 'kaldi-util', 'kaldi-matrix', 'kaldi-tree',
+    'kaldi-hmm', 'kaldi-fstext', 'kaldi-decoder', 'kaldi-lat'
+]
+
+LIB_DIRS = [
+    'kaldi/tools/openfst/lib', 'kaldi/src/base', 'kaldi/src/matrix',
+    'kaldi/src/util', 'kaldi/src/tree', 'kaldi/src/hmm', 'kaldi/src/fstext',
+    'kaldi/src/decoder', 'kaldi/src/lat'
+]
 
 ext_modules = [
     Extension(
-        'decoder',
-        ['pybind.cc', 'decoder.cc'],
-        include_dirs=['pybind11/include', '.'],
+        'post_decode_faster',
+        ['pybind.cc', 'post_decode_faster.cc'],
+        include_dirs=[
+            'pybind11/include', '.', 'kaldi/src/',
+            'kaldi/tools/openfst/src/include'
+        ],
+        libraries=LIBS,
         language='c++',
+        library_dirs=LIB_DIRS,
+        runtime_library_dirs=LIB_DIRS,
         extra_compile_args=args, ),
 ]
 
 setup(
-    name='decoder',
+    name='post_decode_faster',
     version='0.0.1',
     author='Paddle',
     author_email='',
diff --git a/fluid/DeepASR/decoder/setup.sh b/fluid/DeepASR/decoder/setup.sh
index 71fd6626ef..74cec0a482 100644
--- a/fluid/DeepASR/decoder/setup.sh
+++ b/fluid/DeepASR/decoder/setup.sh
@@ -1,7 +1,11 @@
-
+set -e
 
 if [ ! -d pybind11 ]; then
     git clone https://github.com/pybind/pybind11.git
 fi 
 
+if [ ! -d kaldi ]; then
+    git clone https://github.com/kaldi-asr/kaldi.git
+fi 
+
 python setup.py build_ext -i 

From b33396315f3f374d24dfb3c2a691e6eb60e83c02 Mon Sep 17 00:00:00 2001
From: Yibing Liu <liuyibing01@baidu.com>
Date: Wed, 21 Mar 2018 19:36:33 -0700
Subject: [PATCH 2/9] Reimpl in class to sepearte init and decoding

---
 fluid/DeepASR/decoder/post_decode_faster.cc | 275 ++++++++++----------
 fluid/DeepASR/decoder/post_decode_faster.h  |  38 ++-
 fluid/DeepASR/decoder/pybind.cc             |  14 +-
 3 files changed, 174 insertions(+), 153 deletions(-)

diff --git a/fluid/DeepASR/decoder/post_decode_faster.cc b/fluid/DeepASR/decoder/post_decode_faster.cc
index d3f20a6ea3..876c0d8ebb 100644
--- a/fluid/DeepASR/decoder/post_decode_faster.cc
+++ b/fluid/DeepASR/decoder/post_decode_faster.cc
@@ -13,153 +13,146 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "post_decode_faster.h"
-#include "base/kaldi-common.h"
-#include "base/timer.h"
-#include "decoder/decodable-matrix.h"
-#include "decoder/faster-decoder.h"
-#include "fstext/fstext-lib.h"
-#include "hmm/transition-model.h"
-#include "lat/kaldi-lattice.h"  // for {Compact}LatticeArc
-#include "tree/context-dep.h"
-#include "util/common-utils.h"
-
-std::vector<std::string> decode(std::string word_syms_filename,
-                                std::string fst_in_filename,
-                                std::string logprior_rxfilename,
-                                std::string posterior_rspecifier,
-                                std::string words_wspecifier,
-                                std::string alignment_wspecifier) {
+
+using namespace kaldi;
+typedef kaldi::int32 int32;
+using fst::SymbolTable;
+using fst::VectorFst;
+using fst::StdArc;
+
+Decoder::Decoder(std::string word_syms_filename,
+                 std::string fst_in_filename,
+                 std::string logprior_rxfilename) {
+  const char *usage =
+      "Decode, reading log-likelihoods (of transition-ids or whatever symbol "
+      "is on the graph) as matrices.";
+
+  std::string words_wspecifier = "ark,t:out.ark";
+  std::string alignment_wspecifier = "";
+  ParseOptions po(usage);
+  binary = true;
+  acoustic_scale = 1.5;
+  allow_partial = true;
+  FasterDecoderOptions decoder_opts;
+  decoder_opts.Register(&po, true);  // true == include obscure settings.
+  po.Register("binary", &binary, "Write output in binary mode");
+  po.Register("allow-partial",
+              &allow_partial,
+              "Produce output even when final state was not reached");
+  po.Register("acoustic-scale",
+              &acoustic_scale,
+              "Scaling factor for acoustic likelihoods");
+
+  words_writer = new Int32VectorWriter(words_wspecifier);
+
+  alignment_writer = new Int32VectorWriter(alignment_wspecifier);
+  word_syms = NULL;
+  if (word_syms_filename != "") {
+    word_syms = fst::SymbolTable::ReadText(word_syms_filename);
+    if (!word_syms)
+      KALDI_ERR << "Could not read symbol table from file "
+                << word_syms_filename;
+  }
+
+  std::ifstream is_logprior(logprior_rxfilename);
+  logprior.Read(is_logprior, false);
+
+  // It's important that we initialize decode_fst after loglikes_reader, as it
+  // can prevent crashes on systems installed without enough virtual memory.
+  // It has to do with what happens on UNIX systems if you call fork() on a
+  // large process: the page-table entries are duplicated, which requires a
+  // lot of virtual memory.
+  decode_fst = fst::ReadFstKaldi(fst_in_filename);
+
+  decoder = new FasterDecoder(*decode_fst, decoder_opts);
+}
+
+
+Decoder::~Decoder() {
+  if (!word_syms) delete word_syms;
+  delete decode_fst;
+  delete decoder;
+  delete words_writer;
+  delete alignment_writer;
+}
+
+std::vector<std::string> Decoder::decode(std::string posterior_rspecifier) {
+  SequentialBaseFloatMatrixReader posterior_reader(posterior_rspecifier);
   std::vector<std::string> decoding_results;
 
-  try {
-    using namespace kaldi;
-    typedef kaldi::int32 int32;
-    using fst::SymbolTable;
-    using fst::VectorFst;
-    using fst::StdArc;
-
-    const char *usage =
-        "Decode, reading log-likelihoods (of transition-ids or whatever symbol "
-        "is on the graph) as matrices.";
-    ParseOptions po(usage);
-    bool binary = true;
-    BaseFloat acoustic_scale = 1.5;
-    bool allow_partial = true;
-    FasterDecoderOptions decoder_opts;
-    decoder_opts.Register(&po, true);  // true == include obscure settings.
-    po.Register("binary", &binary, "Write output in binary mode");
-    po.Register("allow-partial",
-                &allow_partial,
-                "Produce output even when final state was not reached");
-    po.Register("acoustic-scale",
-                &acoustic_scale,
-                "Scaling factor for acoustic likelihoods");
-
-    Int32VectorWriter words_writer(words_wspecifier);
-
-    Int32VectorWriter alignment_writer(alignment_wspecifier);
-    fst::SymbolTable *word_syms = NULL;
-    if (word_syms_filename != "") {
-      word_syms = fst::SymbolTable::ReadText(word_syms_filename);
-      if (!word_syms)
-        KALDI_ERR << "Could not read symbol table from file "
-                  << word_syms_filename;
+  BaseFloat tot_like = 0.0;
+  kaldi::int64 frame_count = 0;
+  int num_success = 0, num_fail = 0;
+
+  Timer timer;
+  for (; !posterior_reader.Done(); posterior_reader.Next()) {
+    std::string key = posterior_reader.Key();
+    Matrix<BaseFloat> loglikes(posterior_reader.Value());
+    KALDI_LOG << key << " " << loglikes.NumRows() << " x "
+              << loglikes.NumCols();
+
+    if (loglikes.NumRows() == 0) {
+      KALDI_WARN << "Zero-length utterance: " << key;
+      num_fail++;
+      continue;
     }
-
-    SequentialBaseFloatMatrixReader posterior_reader(posterior_rspecifier);
-    std::ifstream is_logprior(logprior_rxfilename);
-    Vector<BaseFloat> logprior;
-    logprior.Read(is_logprior, false);
-
-    // It's important that we initialize decode_fst after loglikes_reader, as it
-    // can prevent crashes on systems installed without enough virtual memory.
-    // It has to do with what happens on UNIX systems if you call fork() on a
-    // large process: the page-table entries are duplicated, which requires a
-    // lot of virtual memory.
-    VectorFst<StdArc> *decode_fst = fst::ReadFstKaldi(fst_in_filename);
-
-    BaseFloat tot_like = 0.0;
-    kaldi::int64 frame_count = 0;
-    int num_success = 0, num_fail = 0;
-    FasterDecoder decoder(*decode_fst, decoder_opts);
-
-    Timer timer;
-
-    for (; !posterior_reader.Done(); posterior_reader.Next()) {
-      std::string key = posterior_reader.Key();
-      Matrix<BaseFloat> loglikes(posterior_reader.Value());
-      KALDI_LOG << key << " " << loglikes.NumRows() << " x "
-                << loglikes.NumCols();
-
-      if (loglikes.NumRows() == 0) {
-        KALDI_WARN << "Zero-length utterance: " << key;
-        num_fail++;
-        continue;
-      }
-      KALDI_ASSERT(loglikes.NumCols() == logprior.Dim());
-
-      loglikes.ApplyLog();
-      loglikes.AddVecToRows(-1.0, logprior);
-
-      DecodableMatrixScaled decodable(loglikes, acoustic_scale);
-      decoder.Decode(&decodable);
-
-      VectorFst<LatticeArc> decoded;  // linear FST.
-
-      if ((allow_partial || decoder.ReachedFinal()) &&
-          decoder.GetBestPath(&decoded)) {
-        num_success++;
-        if (!decoder.ReachedFinal())
-          KALDI_WARN << "Decoder did not reach end-state, outputting partial "
-                        "traceback.";
-
-        std::vector<int32> alignment;
-        std::vector<int32> words;
-        LatticeWeight weight;
-        frame_count += loglikes.NumRows();
-
-        GetLinearSymbolSequence(decoded, &alignment, &words, &weight);
-
-        words_writer.Write(key, words);
-        if (alignment_writer.IsOpen()) alignment_writer.Write(key, alignment);
-        if (word_syms != NULL) {
-          std::string res;
-          for (size_t i = 0; i < words.size(); i++) {
-            std::string s = word_syms->Find(words[i]);
-            res += s;
-            if (s == "")
-              KALDI_ERR << "Word-id " << words[i] << " not in symbol table.";
-            std::cerr << s << ' ';
-          }
-          decoding_results.push_back(res);
+    KALDI_ASSERT(loglikes.NumCols() == logprior.Dim());
+
+    loglikes.ApplyLog();
+    loglikes.AddVecToRows(-1.0, logprior);
+
+    DecodableMatrixScaled decodable(loglikes, acoustic_scale);
+    decoder->Decode(&decodable);
+
+    VectorFst<LatticeArc> decoded;  // linear FST.
+
+    if ((allow_partial || decoder->ReachedFinal()) &&
+        decoder->GetBestPath(&decoded)) {
+      num_success++;
+      if (!decoder->ReachedFinal())
+        KALDI_WARN << "Decoder did not reach end-state, outputting partial "
+                      "traceback.";
+
+      std::vector<int32> alignment;
+      std::vector<int32> words;
+      LatticeWeight weight;
+      frame_count += loglikes.NumRows();
+
+      GetLinearSymbolSequence(decoded, &alignment, &words, &weight);
+
+      words_writer->Write(key, words);
+      if (alignment_writer->IsOpen()) alignment_writer->Write(key, alignment);
+      if (word_syms != NULL) {
+        std::string res;
+        for (size_t i = 0; i < words.size(); i++) {
+          std::string s = word_syms->Find(words[i]);
+          res += s;
+          if (s == "")
+            KALDI_ERR << "Word-id " << words[i] << " not in symbol table.";
+          std::cerr << s << ' ';
         }
-        BaseFloat like = -weight.Value1() - weight.Value2();
-        tot_like += like;
-        KALDI_LOG << "Log-like per frame for utterance " << key << " is "
-                  << (like / loglikes.NumRows()) << " over "
-                  << loglikes.NumRows() << " frames.";
-
-      } else {
-        num_fail++;
-        KALDI_WARN << "Did not successfully decode utterance " << key
-                   << ", len = " << loglikes.NumRows();
+        decoding_results.push_back(res);
       }
+      BaseFloat like = -weight.Value1() - weight.Value2();
+      tot_like += like;
+      KALDI_LOG << "Log-like per frame for utterance " << key << " is "
+                << (like / loglikes.NumRows()) << " over " << loglikes.NumRows()
+                << " frames.";
+
+    } else {
+      num_fail++;
+      KALDI_WARN << "Did not successfully decode utterance " << key
+                 << ", len = " << loglikes.NumRows();
     }
-
-    double elapsed = timer.Elapsed();
-    KALDI_LOG << "Time taken [excluding initialization] " << elapsed
-              << "s: real-time factor assuming 100 frames/sec is "
-              << (elapsed * 100.0 / frame_count);
-    KALDI_LOG << "Done " << num_success << " utterances, failed for "
-              << num_fail;
-    KALDI_LOG << "Overall log-likelihood per frame is "
-              << (tot_like / frame_count) << " over " << frame_count
-              << " frames.";
-
-    delete word_syms;
-    delete decode_fst;
-  } catch (const std::exception &e) {
-    std::cerr << e.what();
   }
+
+  double elapsed = timer.Elapsed();
+  KALDI_LOG << "Time taken [excluding initialization] " << elapsed
+            << "s: real-time factor assuming 100 frames/sec is "
+            << (elapsed * 100.0 / frame_count);
+  KALDI_LOG << "Done " << num_success << " utterances, failed for " << num_fail;
+  KALDI_LOG << "Overall log-likelihood per frame is "
+            << (tot_like / frame_count) << " over " << frame_count
+            << " frames.";
   return decoding_results;
 }
diff --git a/fluid/DeepASR/decoder/post_decode_faster.h b/fluid/DeepASR/decoder/post_decode_faster.h
index 04983a3b93..c1c6ac6930 100644
--- a/fluid/DeepASR/decoder/post_decode_faster.h
+++ b/fluid/DeepASR/decoder/post_decode_faster.h
@@ -14,10 +14,36 @@ limitations under the License. */
 
 #include <string>
 #include <vector>
+#include "base/kaldi-common.h"
+#include "base/timer.h"
+#include "decoder/decodable-matrix.h"
+#include "decoder/faster-decoder.h"
+#include "fstext/fstext-lib.h"
+#include "hmm/transition-model.h"
+#include "lat/kaldi-lattice.h"  // for {Compact}LatticeArc
+#include "tree/context-dep.h"
+#include "util/common-utils.h"
 
-std::vector<std::string> decode(std::string word_syms_filename,
-                                std::string fst_in_filename,
-                                std::string logprior_rxfilename,
-                                std::string posterior_respecifier,
-                                std::string words_wspecifier,
-                                std::string alignment_wspecifier = "");
+
+class Decoder {
+public:
+  Decoder(std::string word_syms_filename,
+          std::string fst_in_filename,
+          std::string logprior_rxfilename);
+  ~Decoder();
+
+  std::vector<std::string> decode(std::string posterior_rspecifier);
+
+private:
+  fst::SymbolTable *word_syms;
+  fst::VectorFst<fst::StdArc> *decode_fst;
+  kaldi::FasterDecoder *decoder;
+  kaldi::Vector<kaldi::BaseFloat> logprior;
+
+  kaldi::Int32VectorWriter *words_writer;
+  kaldi::Int32VectorWriter *alignment_writer;
+
+  bool binary;
+  kaldi::BaseFloat acoustic_scale;
+  bool allow_partial;
+};
diff --git a/fluid/DeepASR/decoder/pybind.cc b/fluid/DeepASR/decoder/pybind.cc
index a8744ee2ac..efa37d5d51 100644
--- a/fluid/DeepASR/decoder/pybind.cc
+++ b/fluid/DeepASR/decoder/pybind.cc
@@ -20,10 +20,12 @@ limitations under the License. */
 namespace py = pybind11;
 
 PYBIND11_MODULE(post_decode_faster, m) {
-  m.doc() = "Decode function for Deep ASR model";
-
-  m.def("decode",
-        &decode,
-        "Decode one input probability matrix "
-        "and return the transcription");
+  m.doc() = "Decoder for Deep ASR model";
+
+  py::class_<Decoder>(m, "Decoder")
+      .def(py::init<std::string, std::string, std::string>())
+      .def("decode",
+           &Decoder::decode,
+           "Decode one input probability matrix "
+           "and return the transcription");
 }

From 185ff858da8352ba7dc978057cdcb0e4de90556c Mon Sep 17 00:00:00 2001
From: Yibing Liu <liuyibing01@baidu.com>
Date: Wed, 21 Mar 2018 19:57:57 -0700
Subject: [PATCH 3/9] Remove unused variables and output

---
 fluid/DeepASR/decoder/post_decode_faster.cc | 16 ----------------
 fluid/DeepASR/decoder/post_decode_faster.h  |  3 ---
 2 files changed, 19 deletions(-)

diff --git a/fluid/DeepASR/decoder/post_decode_faster.cc b/fluid/DeepASR/decoder/post_decode_faster.cc
index 876c0d8ebb..140099bea7 100644
--- a/fluid/DeepASR/decoder/post_decode_faster.cc
+++ b/fluid/DeepASR/decoder/post_decode_faster.cc
@@ -27,8 +27,6 @@ Decoder::Decoder(std::string word_syms_filename,
       "Decode, reading log-likelihoods (of transition-ids or whatever symbol "
       "is on the graph) as matrices.";
 
-  std::string words_wspecifier = "ark,t:out.ark";
-  std::string alignment_wspecifier = "";
   ParseOptions po(usage);
   binary = true;
   acoustic_scale = 1.5;
@@ -43,9 +41,6 @@ Decoder::Decoder(std::string word_syms_filename,
               &acoustic_scale,
               "Scaling factor for acoustic likelihoods");
 
-  words_writer = new Int32VectorWriter(words_wspecifier);
-
-  alignment_writer = new Int32VectorWriter(alignment_wspecifier);
   word_syms = NULL;
   if (word_syms_filename != "") {
     word_syms = fst::SymbolTable::ReadText(word_syms_filename);
@@ -72,8 +67,6 @@ Decoder::~Decoder() {
   if (!word_syms) delete word_syms;
   delete decode_fst;
   delete decoder;
-  delete words_writer;
-  delete alignment_writer;
 }
 
 std::vector<std::string> Decoder::decode(std::string posterior_rspecifier) {
@@ -88,8 +81,6 @@ std::vector<std::string> Decoder::decode(std::string posterior_rspecifier) {
   for (; !posterior_reader.Done(); posterior_reader.Next()) {
     std::string key = posterior_reader.Key();
     Matrix<BaseFloat> loglikes(posterior_reader.Value());
-    KALDI_LOG << key << " " << loglikes.NumRows() << " x "
-              << loglikes.NumCols();
 
     if (loglikes.NumRows() == 0) {
       KALDI_WARN << "Zero-length utterance: " << key;
@@ -120,8 +111,6 @@ std::vector<std::string> Decoder::decode(std::string posterior_rspecifier) {
 
       GetLinearSymbolSequence(decoded, &alignment, &words, &weight);
 
-      words_writer->Write(key, words);
-      if (alignment_writer->IsOpen()) alignment_writer->Write(key, alignment);
       if (word_syms != NULL) {
         std::string res;
         for (size_t i = 0; i < words.size(); i++) {
@@ -129,16 +118,11 @@ std::vector<std::string> Decoder::decode(std::string posterior_rspecifier) {
           res += s;
           if (s == "")
             KALDI_ERR << "Word-id " << words[i] << " not in symbol table.";
-          std::cerr << s << ' ';
         }
         decoding_results.push_back(res);
       }
       BaseFloat like = -weight.Value1() - weight.Value2();
       tot_like += like;
-      KALDI_LOG << "Log-like per frame for utterance " << key << " is "
-                << (like / loglikes.NumRows()) << " over " << loglikes.NumRows()
-                << " frames.";
-
     } else {
       num_fail++;
       KALDI_WARN << "Did not successfully decode utterance " << key
diff --git a/fluid/DeepASR/decoder/post_decode_faster.h b/fluid/DeepASR/decoder/post_decode_faster.h
index c1c6ac6930..49d680c58a 100644
--- a/fluid/DeepASR/decoder/post_decode_faster.h
+++ b/fluid/DeepASR/decoder/post_decode_faster.h
@@ -40,9 +40,6 @@ class Decoder {
   kaldi::FasterDecoder *decoder;
   kaldi::Vector<kaldi::BaseFloat> logprior;
 
-  kaldi::Int32VectorWriter *words_writer;
-  kaldi::Int32VectorWriter *alignment_writer;
-
   bool binary;
   kaldi::BaseFloat acoustic_scale;
   bool allow_partial;

From 77ce7a9202f7ea90c6030ed3d468ca36fb6543fc Mon Sep 17 00:00:00 2001
From: Yibing Liu <liuyibing01@baidu.com>
Date: Thu, 22 Mar 2018 00:11:18 -0700
Subject: [PATCH 4/9] Avoid using namespace kaldi

---
 fluid/DeepASR/decoder/post_decode_faster.cc | 23 ++++++++++-----------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/fluid/DeepASR/decoder/post_decode_faster.cc b/fluid/DeepASR/decoder/post_decode_faster.cc
index 140099bea7..5c0027c4ba 100644
--- a/fluid/DeepASR/decoder/post_decode_faster.cc
+++ b/fluid/DeepASR/decoder/post_decode_faster.cc
@@ -14,7 +14,6 @@ limitations under the License. */
 
 #include "post_decode_faster.h"
 
-using namespace kaldi;
 typedef kaldi::int32 int32;
 using fst::SymbolTable;
 using fst::VectorFst;
@@ -27,11 +26,11 @@ Decoder::Decoder(std::string word_syms_filename,
       "Decode, reading log-likelihoods (of transition-ids or whatever symbol "
       "is on the graph) as matrices.";
 
-  ParseOptions po(usage);
+  kaldi::ParseOptions po(usage);
   binary = true;
   acoustic_scale = 1.5;
   allow_partial = true;
-  FasterDecoderOptions decoder_opts;
+  kaldi::FasterDecoderOptions decoder_opts;
   decoder_opts.Register(&po, true);  // true == include obscure settings.
   po.Register("binary", &binary, "Write output in binary mode");
   po.Register("allow-partial",
@@ -59,7 +58,7 @@ Decoder::Decoder(std::string word_syms_filename,
   // lot of virtual memory.
   decode_fst = fst::ReadFstKaldi(fst_in_filename);
 
-  decoder = new FasterDecoder(*decode_fst, decoder_opts);
+  decoder = new kaldi::FasterDecoder(*decode_fst, decoder_opts);
 }
 
 
@@ -70,17 +69,17 @@ Decoder::~Decoder() {
 }
 
 std::vector<std::string> Decoder::decode(std::string posterior_rspecifier) {
-  SequentialBaseFloatMatrixReader posterior_reader(posterior_rspecifier);
+  kaldi::SequentialBaseFloatMatrixReader posterior_reader(posterior_rspecifier);
   std::vector<std::string> decoding_results;
 
-  BaseFloat tot_like = 0.0;
+  kaldi::BaseFloat tot_like = 0.0;
   kaldi::int64 frame_count = 0;
   int num_success = 0, num_fail = 0;
 
-  Timer timer;
+  kaldi::Timer timer;
   for (; !posterior_reader.Done(); posterior_reader.Next()) {
     std::string key = posterior_reader.Key();
-    Matrix<BaseFloat> loglikes(posterior_reader.Value());
+    kaldi::Matrix<kaldi::BaseFloat> loglikes(posterior_reader.Value());
 
     if (loglikes.NumRows() == 0) {
       KALDI_WARN << "Zero-length utterance: " << key;
@@ -92,10 +91,10 @@ std::vector<std::string> Decoder::decode(std::string posterior_rspecifier) {
     loglikes.ApplyLog();
     loglikes.AddVecToRows(-1.0, logprior);
 
-    DecodableMatrixScaled decodable(loglikes, acoustic_scale);
+    kaldi::DecodableMatrixScaled decodable(loglikes, acoustic_scale);
     decoder->Decode(&decodable);
 
-    VectorFst<LatticeArc> decoded;  // linear FST.
+    VectorFst<kaldi::LatticeArc> decoded;  // linear FST.
 
     if ((allow_partial || decoder->ReachedFinal()) &&
         decoder->GetBestPath(&decoded)) {
@@ -106,7 +105,7 @@ std::vector<std::string> Decoder::decode(std::string posterior_rspecifier) {
 
       std::vector<int32> alignment;
       std::vector<int32> words;
-      LatticeWeight weight;
+      kaldi::LatticeWeight weight;
       frame_count += loglikes.NumRows();
 
       GetLinearSymbolSequence(decoded, &alignment, &words, &weight);
@@ -121,7 +120,7 @@ std::vector<std::string> Decoder::decode(std::string posterior_rspecifier) {
         }
         decoding_results.push_back(res);
       }
-      BaseFloat like = -weight.Value1() - weight.Value2();
+      kaldi::BaseFloat like = -weight.Value1() - weight.Value2();
       tot_like += like;
     } else {
       num_fail++;

From e3e9bb43eebd7af13238c212fadd03606812b31e Mon Sep 17 00:00:00 2001
From: Yibing Liu <liuyibing01@baidu.com>
Date: Thu, 22 Mar 2018 01:10:49 -0700
Subject: [PATCH 5/9] Make the kaldi root directory flexible

---
 fluid/DeepASR/decoder/setup.py | 18 ++++++++++++------
 fluid/DeepASR/decoder/setup.sh |  4 ----
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/fluid/DeepASR/decoder/setup.py b/fluid/DeepASR/decoder/setup.py
index e1c74fcb0d..eaaf746132 100644
--- a/fluid/DeepASR/decoder/setup.py
+++ b/fluid/DeepASR/decoder/setup.py
@@ -17,6 +17,12 @@
 from distutils.core import setup, Extension
 from distutils.sysconfig import get_config_vars
 
+try:
+    kaldi_root = os.environ['KALDI_ROOT']
+except:
+    raise ValueError("Enviroment variable 'KALDI_ROOT' is not defined. Please "
+                     "install kaldi and export KALDI_ROOT=<kaldi's root dir> .")
+
 args = [
     '-std=c++11', '-Wno-sign-compare', '-Wno-unused-variable',
     '-Wno-unused-local-typedefs', '-Wno-unused-but-set-variable',
@@ -35,21 +41,21 @@
 ]
 
 LIB_DIRS = [
-    'kaldi/tools/openfst/lib', 'kaldi/src/base', 'kaldi/src/matrix',
-    'kaldi/src/util', 'kaldi/src/tree', 'kaldi/src/hmm', 'kaldi/src/fstext',
-    'kaldi/src/decoder', 'kaldi/src/lat'
+    'tools/openfst/lib', 'src/base', 'src/matrix', 'src/util', 'src/tree',
+    'src/hmm', 'src/fstext', 'src/decoder', 'src/lat'
 ]
+LIB_DIRS = [os.path.join(kaldi_root, path) for path in LIB_DIRS]
 
 ext_modules = [
     Extension(
         'post_decode_faster',
         ['pybind.cc', 'post_decode_faster.cc'],
         include_dirs=[
-            'pybind11/include', '.', 'kaldi/src/',
-            'kaldi/tools/openfst/src/include'
+            'pybind11/include', '.', os.path.join(kaldi_root, 'src'),
+            os.path.join(kaldi_root, 'tools/openfst/src/include')
         ],
-        libraries=LIBS,
         language='c++',
+        libraries=LIBS,
         library_dirs=LIB_DIRS,
         runtime_library_dirs=LIB_DIRS,
         extra_compile_args=args, ),
diff --git a/fluid/DeepASR/decoder/setup.sh b/fluid/DeepASR/decoder/setup.sh
index 74cec0a482..1471f85f41 100644
--- a/fluid/DeepASR/decoder/setup.sh
+++ b/fluid/DeepASR/decoder/setup.sh
@@ -4,8 +4,4 @@ if [ ! -d pybind11 ]; then
     git clone https://github.com/pybind/pybind11.git
 fi 
 
-if [ ! -d kaldi ]; then
-    git clone https://github.com/kaldi-asr/kaldi.git
-fi 
-
 python setup.py build_ext -i 

From 4faf79a6d101f9f13b6d0dfe42e125c6bfd10f50 Mon Sep 17 00:00:00 2001
From: Yibing Liu <liuyibing01@baidu.com>
Date: Sat, 24 Mar 2018 05:01:34 -0700
Subject: [PATCH 6/9] Support input scores in ndarray format

---
 fluid/DeepASR/decoder/post_decode_faster.cc | 62 ++++++++++++++++++++-
 fluid/DeepASR/decoder/post_decode_faster.h  |  6 ++
 fluid/DeepASR/decoder/pybind.cc             |  9 ++-
 3 files changed, 75 insertions(+), 2 deletions(-)

diff --git a/fluid/DeepASR/decoder/post_decode_faster.cc b/fluid/DeepASR/decoder/post_decode_faster.cc
index 5c0027c4ba..957af550e3 100644
--- a/fluid/DeepASR/decoder/post_decode_faster.cc
+++ b/fluid/DeepASR/decoder/post_decode_faster.cc
@@ -22,7 +22,7 @@ using fst::StdArc;
 Decoder::Decoder(std::string word_syms_filename,
                  std::string fst_in_filename,
                  std::string logprior_rxfilename) {
-  const char *usage =
+  const char* usage =
       "Decode, reading log-likelihoods (of transition-ids or whatever symbol "
       "is on the graph) as matrices.";
 
@@ -68,6 +68,23 @@ Decoder::~Decoder() {
   delete decoder;
 }
 
+std::string Decoder::decode(
+    std::string key, std::vector<std::vector<kaldi::BaseFloat>>& log_probs) {
+  size_t num_frames = log_probs.size();
+  size_t dim_label = log_probs[0].size();
+
+  kaldi::Matrix<kaldi::BaseFloat> loglikes(
+      num_frames, dim_label, kaldi::kSetZero, kaldi::kStrideEqualNumCols);
+  for (size_t i = 0; i < num_frames; ++i) {
+    memcpy(loglikes.Data() + i * dim_label,
+           log_probs[i].data(),
+           sizeof(kaldi::BaseFloat) * dim_label);
+  }
+
+  return decode(key, loglikes);
+}
+
+
 std::vector<std::string> Decoder::decode(std::string posterior_rspecifier) {
   kaldi::SequentialBaseFloatMatrixReader posterior_reader(posterior_rspecifier);
   std::vector<std::string> decoding_results;
@@ -139,3 +156,46 @@ std::vector<std::string> Decoder::decode(std::string posterior_rspecifier) {
             << " frames.";
   return decoding_results;
 }
+
+
+std::string Decoder::decode(std::string key,
+                            kaldi::Matrix<kaldi::BaseFloat>& loglikes) {
+  std::string decoding_result;
+
+  if (loglikes.NumRows() == 0) {
+    KALDI_WARN << "Zero-length utterance: " << key;
+  }
+  KALDI_ASSERT(loglikes.NumCols() == logprior.Dim());
+
+  loglikes.ApplyLog();
+  loglikes.AddVecToRows(-1.0, logprior);
+
+  kaldi::DecodableMatrixScaled decodable(loglikes, acoustic_scale);
+  decoder->Decode(&decodable);
+
+  VectorFst<kaldi::LatticeArc> decoded;  // linear FST.
+
+  if ((allow_partial || decoder->ReachedFinal()) &&
+      decoder->GetBestPath(&decoded)) {
+    if (!decoder->ReachedFinal())
+      KALDI_WARN << "Decoder did not reach end-state, outputting partial "
+                    "traceback.";
+
+    std::vector<int32> alignment;
+    std::vector<int32> words;
+    kaldi::LatticeWeight weight;
+
+    GetLinearSymbolSequence(decoded, &alignment, &words, &weight);
+
+    if (word_syms != NULL) {
+      for (size_t i = 0; i < words.size(); i++) {
+        std::string s = word_syms->Find(words[i]);
+        decoding_result += s;
+        if (s == "")
+          KALDI_ERR << "Word-id " << words[i] << " not in symbol table.";
+      }
+    }
+  }
+
+  return decoding_result;
+}
diff --git a/fluid/DeepASR/decoder/post_decode_faster.h b/fluid/DeepASR/decoder/post_decode_faster.h
index 49d680c58a..6a5830e296 100644
--- a/fluid/DeepASR/decoder/post_decode_faster.h
+++ b/fluid/DeepASR/decoder/post_decode_faster.h
@@ -34,7 +34,13 @@ class Decoder {
 
   std::vector<std::string> decode(std::string posterior_rspecifier);
 
+  std::string decode(std::string key,
+                     std::vector<std::vector<kaldi::BaseFloat>> &log_probs);
+
 private:
+  std::string decode(std::string key,
+                     kaldi::Matrix<kaldi::BaseFloat> &loglikes);
+
   fst::SymbolTable *word_syms;
   fst::VectorFst<fst::StdArc> *decode_fst;
   kaldi::FasterDecoder *decoder;
diff --git a/fluid/DeepASR/decoder/pybind.cc b/fluid/DeepASR/decoder/pybind.cc
index efa37d5d51..1b91f02b89 100644
--- a/fluid/DeepASR/decoder/pybind.cc
+++ b/fluid/DeepASR/decoder/pybind.cc
@@ -25,7 +25,14 @@ PYBIND11_MODULE(post_decode_faster, m) {
   py::class_<Decoder>(m, "Decoder")
       .def(py::init<std::string, std::string, std::string>())
       .def("decode",
-           &Decoder::decode,
+           (std::vector<std::string> (Decoder::*)(std::string)) &
+               Decoder::decode,
+           "Decode one input probability matrix "
+           "and return the transcription")
+      .def("decode",
+           (std::string (Decoder::*)(
+               std::string, std::vector<std::vector<kaldi::BaseFloat>>&)) &
+               Decoder::decode,
            "Decode one input probability matrix "
            "and return the transcription");
 }

From f029f93181764555b211bf2a056b2e6dd75ba547 Mon Sep 17 00:00:00 2001
From: Yibing Liu <liuyibing01@baidu.com>
Date: Sat, 24 Mar 2018 05:15:53 -0700
Subject: [PATCH 7/9] Clean code and add some comments

---
 fluid/DeepASR/decoder/post_decode_faster.cc | 60 +--------------------
 fluid/DeepASR/decoder/post_decode_faster.h  |  4 ++
 2 files changed, 5 insertions(+), 59 deletions(-)

diff --git a/fluid/DeepASR/decoder/post_decode_faster.cc b/fluid/DeepASR/decoder/post_decode_faster.cc
index 957af550e3..6b318000e3 100644
--- a/fluid/DeepASR/decoder/post_decode_faster.cc
+++ b/fluid/DeepASR/decoder/post_decode_faster.cc
@@ -89,71 +89,13 @@ std::vector<std::string> Decoder::decode(std::string posterior_rspecifier) {
   kaldi::SequentialBaseFloatMatrixReader posterior_reader(posterior_rspecifier);
   std::vector<std::string> decoding_results;
 
-  kaldi::BaseFloat tot_like = 0.0;
-  kaldi::int64 frame_count = 0;
-  int num_success = 0, num_fail = 0;
-
-  kaldi::Timer timer;
   for (; !posterior_reader.Done(); posterior_reader.Next()) {
     std::string key = posterior_reader.Key();
     kaldi::Matrix<kaldi::BaseFloat> loglikes(posterior_reader.Value());
 
-    if (loglikes.NumRows() == 0) {
-      KALDI_WARN << "Zero-length utterance: " << key;
-      num_fail++;
-      continue;
-    }
-    KALDI_ASSERT(loglikes.NumCols() == logprior.Dim());
-
-    loglikes.ApplyLog();
-    loglikes.AddVecToRows(-1.0, logprior);
-
-    kaldi::DecodableMatrixScaled decodable(loglikes, acoustic_scale);
-    decoder->Decode(&decodable);
-
-    VectorFst<kaldi::LatticeArc> decoded;  // linear FST.
-
-    if ((allow_partial || decoder->ReachedFinal()) &&
-        decoder->GetBestPath(&decoded)) {
-      num_success++;
-      if (!decoder->ReachedFinal())
-        KALDI_WARN << "Decoder did not reach end-state, outputting partial "
-                      "traceback.";
-
-      std::vector<int32> alignment;
-      std::vector<int32> words;
-      kaldi::LatticeWeight weight;
-      frame_count += loglikes.NumRows();
-
-      GetLinearSymbolSequence(decoded, &alignment, &words, &weight);
-
-      if (word_syms != NULL) {
-        std::string res;
-        for (size_t i = 0; i < words.size(); i++) {
-          std::string s = word_syms->Find(words[i]);
-          res += s;
-          if (s == "")
-            KALDI_ERR << "Word-id " << words[i] << " not in symbol table.";
-        }
-        decoding_results.push_back(res);
-      }
-      kaldi::BaseFloat like = -weight.Value1() - weight.Value2();
-      tot_like += like;
-    } else {
-      num_fail++;
-      KALDI_WARN << "Did not successfully decode utterance " << key
-                 << ", len = " << loglikes.NumRows();
-    }
+    decoding_results.push_back(decode(key, loglikes));
   }
 
-  double elapsed = timer.Elapsed();
-  KALDI_LOG << "Time taken [excluding initialization] " << elapsed
-            << "s: real-time factor assuming 100 frames/sec is "
-            << (elapsed * 100.0 / frame_count);
-  KALDI_LOG << "Done " << num_success << " utterances, failed for " << num_fail;
-  KALDI_LOG << "Overall log-likelihood per frame is "
-            << (tot_like / frame_count) << " over " << frame_count
-            << " frames.";
   return decoding_results;
 }
 
diff --git a/fluid/DeepASR/decoder/post_decode_faster.h b/fluid/DeepASR/decoder/post_decode_faster.h
index 6a5830e296..c0b54cdf9f 100644
--- a/fluid/DeepASR/decoder/post_decode_faster.h
+++ b/fluid/DeepASR/decoder/post_decode_faster.h
@@ -32,12 +32,16 @@ class Decoder {
           std::string logprior_rxfilename);
   ~Decoder();
 
+  // Interface to accept the scores read from specifier and return
+  // the batch decoding results
   std::vector<std::string> decode(std::string posterior_rspecifier);
 
+  // Accept the scores of one utterance and return the decoding result
   std::string decode(std::string key,
                      std::vector<std::vector<kaldi::BaseFloat>> &log_probs);
 
 private:
+  // For decoding one utterance
   std::string decode(std::string key,
                      kaldi::Matrix<kaldi::BaseFloat> &loglikes);
 

From 49c9cc80a428ccbbcbd91f186a40bcb9797770a3 Mon Sep 17 00:00:00 2001
From: Yibing Liu <liuyibing01@baidu.com>
Date: Sun, 25 Mar 2018 19:30:06 -0700
Subject: [PATCH 8/9] Enable decoder in infer_by_ckpt

---
 fluid/DeepASR/decoder/setup.py |  1 +
 fluid/DeepASR/infer_by_ckpt.py | 21 ++++++++++++++++++---
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/fluid/DeepASR/decoder/setup.py b/fluid/DeepASR/decoder/setup.py
index eaaf746132..1818ecbf00 100644
--- a/fluid/DeepASR/decoder/setup.py
+++ b/fluid/DeepASR/decoder/setup.py
@@ -45,6 +45,7 @@
     'src/hmm', 'src/fstext', 'src/decoder', 'src/lat'
 ]
 LIB_DIRS = [os.path.join(kaldi_root, path) for path in LIB_DIRS]
+LIB_DIRS = [os.path.abspath(path) for path in LIB_DIRS]
 
 ext_modules = [
     Extension(
diff --git a/fluid/DeepASR/infer_by_ckpt.py b/fluid/DeepASR/infer_by_ckpt.py
index 68dd573647..300f736b3c 100644
--- a/fluid/DeepASR/infer_by_ckpt.py
+++ b/fluid/DeepASR/infer_by_ckpt.py
@@ -13,7 +13,7 @@
 import data_utils.augmentor.trans_add_delta as trans_add_delta
 import data_utils.augmentor.trans_splice as trans_splice
 import data_utils.async_data_reader as reader
-import decoder.decoder as decoder
+from decoder.post_decode_faster import Decoder
 from data_utils.util import lodtensor_to_ndarray
 from model_utils.model import stacked_lstmp_model
 from data_utils.util import split_infer_result
@@ -81,6 +81,21 @@ def parse_args():
         type=str,
         default='./checkpoint',
         help="The checkpoint path to init model. (default: %(default)s)")
+    parser.add_argument(
+        '--vocabulary',
+        type=str,
+        default='./decoder/graph/words.txt',
+        help="The path to vocabulary. (default: %(default)s)")
+    parser.add_argument(
+        '--graphs',
+        type=str,
+        default='./decoder/graph/TLG.fst',
+        help="The path to TLG graphs for decoding. (default: %(default)s)")
+    parser.add_argument(
+        '--log_prior',
+        type=str,
+        default="./decoder/logprior",
+        help="The log prior probs for training data. (default: %(default)s)")
     args = parser.parse_args()
     return args
 
@@ -154,8 +169,8 @@ def infer_from_ckpt(args):
         probs, lod = lodtensor_to_ndarray(results[0])
         infer_batch = split_infer_result(probs, lod)
         for index, sample in enumerate(infer_batch):
-            print("Decoding %d: " % (batch_id * args.batch_size + index),
-                  decoder.decode(sample))
+            key = "utter#%d" % (batch_id * args.batch_size + index)
+            print(key, ": ", decoder.decode(key, sample), "\n")
 
     print(np.mean(infer_costs), np.mean(infer_accs))
 

From 193c7e2de594bd0b31726dae884010b60620be83 Mon Sep 17 00:00:00 2001
From: Yibing Liu <liuyibing01@baidu.com>
Date: Mon, 26 Mar 2018 19:36:18 -0700
Subject: [PATCH 9/9] Format license and comments

---
 fluid/DeepASR/decoder/post_decode_faster.cc |  5 +++--
 fluid/DeepASR/decoder/post_decode_faster.h  |  7 ++++---
 fluid/DeepASR/decoder/pybind.cc             | 19 ++++++++++---------
 fluid/DeepASR/decoder/setup.py              |  2 +-
 4 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/fluid/DeepASR/decoder/post_decode_faster.cc b/fluid/DeepASR/decoder/post_decode_faster.cc
index 6b318000e3..d7f1d1ab34 100644
--- a/fluid/DeepASR/decoder/post_decode_faster.cc
+++ b/fluid/DeepASR/decoder/post_decode_faster.cc
@@ -1,4 +1,4 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -69,7 +69,8 @@ Decoder::~Decoder() {
 }
 
 std::string Decoder::decode(
-    std::string key, std::vector<std::vector<kaldi::BaseFloat>>& log_probs) {
+    std::string key,
+    const std::vector<std::vector<kaldi::BaseFloat>>& log_probs) {
   size_t num_frames = log_probs.size();
   size_t dim_label = log_probs[0].size();
 
diff --git a/fluid/DeepASR/decoder/post_decode_faster.h b/fluid/DeepASR/decoder/post_decode_faster.h
index c0b54cdf9f..2e31a1c19e 100644
--- a/fluid/DeepASR/decoder/post_decode_faster.h
+++ b/fluid/DeepASR/decoder/post_decode_faster.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -37,8 +37,9 @@ class Decoder {
   std::vector<std::string> decode(std::string posterior_rspecifier);
 
   // Accept the scores of one utterance and return the decoding result
-  std::string decode(std::string key,
-                     std::vector<std::vector<kaldi::BaseFloat>> &log_probs);
+  std::string decode(
+      std::string key,
+      const std::vector<std::vector<kaldi::BaseFloat>> &log_probs);
 
 private:
   // For decoding one utterance
diff --git a/fluid/DeepASR/decoder/pybind.cc b/fluid/DeepASR/decoder/pybind.cc
index 1b91f02b89..56439d1802 100644
--- a/fluid/DeepASR/decoder/pybind.cc
+++ b/fluid/DeepASR/decoder/pybind.cc
@@ -1,4 +1,4 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -27,12 +27,13 @@ PYBIND11_MODULE(post_decode_faster, m) {
       .def("decode",
            (std::vector<std::string> (Decoder::*)(std::string)) &
                Decoder::decode,
-           "Decode one input probability matrix "
-           "and return the transcription")
-      .def("decode",
-           (std::string (Decoder::*)(
-               std::string, std::vector<std::vector<kaldi::BaseFloat>>&)) &
-               Decoder::decode,
-           "Decode one input probability matrix "
-           "and return the transcription");
+           "Decode for the probability matrices in specifier "
+           "and return the transcriptions.")
+      .def(
+          "decode",
+          (std::string (Decoder::*)(
+              std::string, const std::vector<std::vector<kaldi::BaseFloat>>&)) &
+              Decoder::decode,
+          "Decode one input probability matrix "
+          "and return the transcription.");
 }
diff --git a/fluid/DeepASR/decoder/setup.py b/fluid/DeepASR/decoder/setup.py
index 1818ecbf00..a98c0b4cc1 100644
--- a/fluid/DeepASR/decoder/setup.py
+++ b/fluid/DeepASR/decoder/setup.py
@@ -1,4 +1,4 @@
-#  Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.