From 8268ddcf16edf97326182200564e66e7a99631ed Mon Sep 17 00:00:00 2001
From: Yibing Liu <liuyibing01@baidu.com>
Date: Mon, 26 Mar 2018 04:36:16 -0700
Subject: [PATCH 1/2] Solve the problem of fetching prediction

---
 fluid/DeepASR/infer_by_ckpt.py     | 14 ++++++++++++--
 fluid/DeepASR/model_utils/model.py | 11 +++++++----
 fluid/DeepASR/tools/profile.py     | 13 ++++++++++++-
 fluid/DeepASR/train.py             | 13 ++++++++++++-
 4 files changed, 43 insertions(+), 8 deletions(-)

diff --git a/fluid/DeepASR/infer_by_ckpt.py b/fluid/DeepASR/infer_by_ckpt.py
index 68dd573647..edaa2b5ac4 100644
--- a/fluid/DeepASR/infer_by_ckpt.py
+++ b/fluid/DeepASR/infer_by_ckpt.py
@@ -32,6 +32,11 @@ def parse_args():
         default=1,
         help='The minimum sequence number of a batch data. '
         '(default: %(default)d)')
+    parser.add_argument(
+        '--frame_dim',
+        type=int,
+        default=120 * 11,
+        help='Frame dimension of feature data. (default: %(default)d)')
     parser.add_argument(
         '--stacked_num',
         type=int,
@@ -47,6 +52,11 @@ def parse_args():
         type=int,
         default=1024,
         help='Hidden size of lstmp unit. (default: %(default)d)')
+    parser.add_argument(
+        '--class_num',
+        type=int,
+        default=1749,
+        help='Number of classes in label. (default: %(default)d)')
     parser.add_argument(
         '--learning_rate',
         type=float,
@@ -99,10 +109,11 @@ def infer_from_ckpt(args):
         raise IOError("Invalid checkpoint!")
 
     prediction, avg_cost, accuracy = stacked_lstmp_model(
+        frame_dim=args.frame_dim,
         hidden_dim=args.hidden_dim,
         proj_dim=args.proj_dim,
         stacked_num=args.stacked_num,
-        class_num=1749,
+        class_num=args.class_num,
         parallel=args.parallel)
 
     infer_program = fluid.default_main_program().clone()
@@ -156,7 +167,6 @@ def infer_from_ckpt(args):
         for index, sample in enumerate(infer_batch):
             print("Decoding %d: " % (batch_id * args.batch_size + index),
                   decoder.decode(sample))
-
     print(np.mean(infer_costs), np.mean(infer_accs))
 
 
diff --git a/fluid/DeepASR/model_utils/model.py b/fluid/DeepASR/model_utils/model.py
index 541f869c72..e138783ca2 100644
--- a/fluid/DeepASR/model_utils/model.py
+++ b/fluid/DeepASR/model_utils/model.py
@@ -6,7 +6,8 @@
 import paddle.fluid as fluid
 
 
-def stacked_lstmp_model(hidden_dim,
+def stacked_lstmp_model(frame_dim,
+                        hidden_dim,
                         proj_dim,
                         stacked_num,
                         class_num,
@@ -20,6 +21,7 @@ def stacked_lstmp_model(hidden_dim,
         label data respectively. And in inference, only `feature` is needed.
 
     Args:
+    frame_dim(int): The frame dimension of feature data.
 	hidden_dim(int): The hidden state's dimension of the LSTMP layer.
 	proj_dim(int): The projection size of the LSTMP layer.
 	stacked_num(int): The number of stacked LSTMP layers.
@@ -78,7 +80,7 @@ def _net_conf(feature, label):
 
     # data feeder
     feature = fluid.layers.data(
-        name="feature", shape=[-1, 120 * 11], dtype="float32", lod_level=1)
+        name="feature", shape=[-1, frame_dim], dtype="float32", lod_level=1)
     label = fluid.layers.data(
         name="label", shape=[-1, 1], dtype="int64", lod_level=1)
 
@@ -92,11 +94,12 @@ def _net_conf(feature, label):
             feat_ = pd.read_input(feature)
             label_ = pd.read_input(label)
             prediction, avg_cost, acc = _net_conf(feat_, label_)
-            for out in [avg_cost, acc]:
+            for out in [prediction, avg_cost, acc]:
                 pd.write_output(out)
 
         # get mean loss and acc through every devices.
-        avg_cost, acc = pd()
+        prediction, avg_cost, acc = pd()
+        prediction.stop_gradient = True
         avg_cost = fluid.layers.mean(x=avg_cost)
         acc = fluid.layers.mean(x=acc)
     else:
diff --git a/fluid/DeepASR/tools/profile.py b/fluid/DeepASR/tools/profile.py
index 77dff3cb37..cf73294453 100644
--- a/fluid/DeepASR/tools/profile.py
+++ b/fluid/DeepASR/tools/profile.py
@@ -31,6 +31,11 @@ def parse_args():
         default=1,
         help='The minimum sequence number of a batch data. '
         '(default: %(default)d)')
+    parser.add_argument(
+        '--frame_dim',
+        type=int,
+        default=120 * 11,
+        help='Frame dimension of feature data. (default: %(default)d)')
     parser.add_argument(
         '--stacked_num',
         type=int,
@@ -46,6 +51,11 @@ def parse_args():
         type=int,
         default=1024,
         help='Hidden size of lstmp unit. (default: %(default)d)')
+    parser.add_argument(
+        '--class_num',
+        type=int,
+        default=1749,
+        help='Number of classes in label. (default: %(default)d)')
     parser.add_argument(
         '--learning_rate',
         type=float,
@@ -119,10 +129,11 @@ def profile(args):
             "arg 'first_batches_to_skip' must not be smaller than 0.")
 
     _, avg_cost, accuracy = stacked_lstmp_model(
+        frame_dim=args.frame_dim,
         hidden_dim=args.hidden_dim,
         proj_dim=args.proj_dim,
         stacked_num=args.stacked_num,
-        class_num=1749,
+        class_num=args.class_num,
         parallel=args.parallel)
 
     optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate)
diff --git a/fluid/DeepASR/train.py b/fluid/DeepASR/train.py
index 8297ab7403..446e9e0ab1 100644
--- a/fluid/DeepASR/train.py
+++ b/fluid/DeepASR/train.py
@@ -30,6 +30,11 @@ def parse_args():
         default=1,
         help='The minimum sequence number of a batch data. '
         '(default: %(default)d)')
+    parser.add_argument(
+        '--frame_dim',
+        type=int,
+        default=120 * 11,
+        help='Frame dimension of feature data. (default: %(default)d)')
     parser.add_argument(
         '--stacked_num',
         type=int,
@@ -45,6 +50,11 @@ def parse_args():
         type=int,
         default=1024,
         help='Hidden size of lstmp unit. (default: %(default)d)')
+    parser.add_argument(
+        '--class_num',
+        type=int,
+        default=1749,
+        help='Number of classes in label. (default: %(default)d)')
     parser.add_argument(
         '--pass_num',
         type=int,
@@ -137,10 +147,11 @@ def train(args):
         os.mkdir(args.infer_models)
 
     prediction, avg_cost, accuracy = stacked_lstmp_model(
+        frame_dim=args.frame_dim,
         hidden_dim=args.hidden_dim,
         proj_dim=args.proj_dim,
         stacked_num=args.stacked_num,
-        class_num=1749,
+        class_num=args.class_num,
         parallel=args.parallel)
 
     # program for test

From b6baf323b376eb26b0c098651dcdd79dc404c604 Mon Sep 17 00:00:00 2001
From: Yibing Liu <liuyibing01@baidu.com>
Date: Mon, 26 Mar 2018 18:33:54 -0700
Subject: [PATCH 2/2] Format the indent of args list

---
 fluid/DeepASR/model_utils/model.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/fluid/DeepASR/model_utils/model.py b/fluid/DeepASR/model_utils/model.py
index e138783ca2..8fb7596e12 100644
--- a/fluid/DeepASR/model_utils/model.py
+++ b/fluid/DeepASR/model_utils/model.py
@@ -21,13 +21,13 @@ def stacked_lstmp_model(frame_dim,
         label data respectively. And in inference, only `feature` is needed.
 
     Args:
-    frame_dim(int): The frame dimension of feature data.
-	hidden_dim(int): The hidden state's dimension of the LSTMP layer.
-	proj_dim(int): The projection size of the LSTMP layer.
-	stacked_num(int): The number of stacked LSTMP layers.
-	parallel(bool): Run in parallel or not, default `False`.
-	is_train(bool): Run in training phase or not, default `True`.
-	class_dim(int): The number of output classes.
+        frame_dim(int): The frame dimension of feature data.
+        hidden_dim(int): The hidden state's dimension of the LSTMP layer.
+        proj_dim(int): The projection size of the LSTMP layer.
+        stacked_num(int): The number of stacked LSTMP layers.
+        parallel(bool): Run in parallel or not, default `False`.
+        is_train(bool): Run in training phase or not, default `True`.
+        class_dim(int): The number of output classes.
     """
 
     # network configuration