From 8268ddcf16edf97326182200564e66e7a99631ed Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Mon, 26 Mar 2018 04:36:16 -0700 Subject: [PATCH 1/2] Solve the problem of fetching prediction --- fluid/DeepASR/infer_by_ckpt.py | 14 ++++++++++++-- fluid/DeepASR/model_utils/model.py | 11 +++++++---- fluid/DeepASR/tools/profile.py | 13 ++++++++++++- fluid/DeepASR/train.py | 13 ++++++++++++- 4 files changed, 43 insertions(+), 8 deletions(-) diff --git a/fluid/DeepASR/infer_by_ckpt.py b/fluid/DeepASR/infer_by_ckpt.py index 68dd573647..edaa2b5ac4 100644 --- a/fluid/DeepASR/infer_by_ckpt.py +++ b/fluid/DeepASR/infer_by_ckpt.py @@ -32,6 +32,11 @@ def parse_args(): default=1, help='The minimum sequence number of a batch data. ' '(default: %(default)d)') + parser.add_argument( + '--frame_dim', + type=int, + default=120 * 11, + help='Frame dimension of feature data. (default: %(default)d)') parser.add_argument( '--stacked_num', type=int, @@ -47,6 +52,11 @@ def parse_args(): type=int, default=1024, help='Hidden size of lstmp unit. (default: %(default)d)') + parser.add_argument( + '--class_num', + type=int, + default=1749, + help='Number of classes in label. (default: %(default)d)') parser.add_argument( '--learning_rate', type=float, @@ -99,10 +109,11 @@ def infer_from_ckpt(args): raise IOError("Invalid checkpoint!") prediction, avg_cost, accuracy = stacked_lstmp_model( + frame_dim=args.frame_dim, hidden_dim=args.hidden_dim, proj_dim=args.proj_dim, stacked_num=args.stacked_num, - class_num=1749, + class_num=args.class_num, parallel=args.parallel) infer_program = fluid.default_main_program().clone() @@ -156,7 +167,6 @@ def infer_from_ckpt(args): for index, sample in enumerate(infer_batch): print("Decoding %d: " % (batch_id * args.batch_size + index), decoder.decode(sample)) - print(np.mean(infer_costs), np.mean(infer_accs)) diff --git a/fluid/DeepASR/model_utils/model.py b/fluid/DeepASR/model_utils/model.py index 541f869c72..e138783ca2 100644 --- a/fluid/DeepASR/model_utils/model.py +++ b/fluid/DeepASR/model_utils/model.py @@ -6,7 +6,8 @@ import paddle.fluid as fluid -def stacked_lstmp_model(hidden_dim, +def stacked_lstmp_model(frame_dim, + hidden_dim, proj_dim, stacked_num, class_num, @@ -20,6 +21,7 @@ def stacked_lstmp_model(hidden_dim, label data respectively. And in inference, only `feature` is needed. Args: + frame_dim(int): The frame dimension of feature data. hidden_dim(int): The hidden state's dimension of the LSTMP layer. proj_dim(int): The projection size of the LSTMP layer. stacked_num(int): The number of stacked LSTMP layers. @@ -78,7 +80,7 @@ def _net_conf(feature, label): # data feeder feature = fluid.layers.data( - name="feature", shape=[-1, 120 * 11], dtype="float32", lod_level=1) + name="feature", shape=[-1, frame_dim], dtype="float32", lod_level=1) label = fluid.layers.data( name="label", shape=[-1, 1], dtype="int64", lod_level=1) @@ -92,11 +94,12 @@ def _net_conf(feature, label): feat_ = pd.read_input(feature) label_ = pd.read_input(label) prediction, avg_cost, acc = _net_conf(feat_, label_) - for out in [avg_cost, acc]: + for out in [prediction, avg_cost, acc]: pd.write_output(out) # get mean loss and acc through every devices. - avg_cost, acc = pd() + prediction, avg_cost, acc = pd() + prediction.stop_gradient = True avg_cost = fluid.layers.mean(x=avg_cost) acc = fluid.layers.mean(x=acc) else: diff --git a/fluid/DeepASR/tools/profile.py b/fluid/DeepASR/tools/profile.py index 77dff3cb37..cf73294453 100644 --- a/fluid/DeepASR/tools/profile.py +++ b/fluid/DeepASR/tools/profile.py @@ -31,6 +31,11 @@ def parse_args(): default=1, help='The minimum sequence number of a batch data. ' '(default: %(default)d)') + parser.add_argument( + '--frame_dim', + type=int, + default=120 * 11, + help='Frame dimension of feature data. (default: %(default)d)') parser.add_argument( '--stacked_num', type=int, @@ -46,6 +51,11 @@ def parse_args(): type=int, default=1024, help='Hidden size of lstmp unit. (default: %(default)d)') + parser.add_argument( + '--class_num', + type=int, + default=1749, + help='Number of classes in label. (default: %(default)d)') parser.add_argument( '--learning_rate', type=float, @@ -119,10 +129,11 @@ def profile(args): "arg 'first_batches_to_skip' must not be smaller than 0.") _, avg_cost, accuracy = stacked_lstmp_model( + frame_dim=args.frame_dim, hidden_dim=args.hidden_dim, proj_dim=args.proj_dim, stacked_num=args.stacked_num, - class_num=1749, + class_num=args.class_num, parallel=args.parallel) optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate) diff --git a/fluid/DeepASR/train.py b/fluid/DeepASR/train.py index 8297ab7403..446e9e0ab1 100644 --- a/fluid/DeepASR/train.py +++ b/fluid/DeepASR/train.py @@ -30,6 +30,11 @@ def parse_args(): default=1, help='The minimum sequence number of a batch data. ' '(default: %(default)d)') + parser.add_argument( + '--frame_dim', + type=int, + default=120 * 11, + help='Frame dimension of feature data. (default: %(default)d)') parser.add_argument( '--stacked_num', type=int, @@ -45,6 +50,11 @@ def parse_args(): type=int, default=1024, help='Hidden size of lstmp unit. (default: %(default)d)') + parser.add_argument( + '--class_num', + type=int, + default=1749, + help='Number of classes in label. (default: %(default)d)') parser.add_argument( '--pass_num', type=int, @@ -137,10 +147,11 @@ def train(args): os.mkdir(args.infer_models) prediction, avg_cost, accuracy = stacked_lstmp_model( + frame_dim=args.frame_dim, hidden_dim=args.hidden_dim, proj_dim=args.proj_dim, stacked_num=args.stacked_num, - class_num=1749, + class_num=args.class_num, parallel=args.parallel) # program for test From b6baf323b376eb26b0c098651dcdd79dc404c604 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Mon, 26 Mar 2018 18:33:54 -0700 Subject: [PATCH 2/2] Format the indent of args list --- fluid/DeepASR/model_utils/model.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fluid/DeepASR/model_utils/model.py b/fluid/DeepASR/model_utils/model.py index e138783ca2..8fb7596e12 100644 --- a/fluid/DeepASR/model_utils/model.py +++ b/fluid/DeepASR/model_utils/model.py @@ -21,13 +21,13 @@ def stacked_lstmp_model(frame_dim, label data respectively. And in inference, only `feature` is needed. Args: - frame_dim(int): The frame dimension of feature data. - hidden_dim(int): The hidden state's dimension of the LSTMP layer. - proj_dim(int): The projection size of the LSTMP layer. - stacked_num(int): The number of stacked LSTMP layers. - parallel(bool): Run in parallel or not, default `False`. - is_train(bool): Run in training phase or not, default `True`. - class_dim(int): The number of output classes. + frame_dim(int): The frame dimension of feature data. + hidden_dim(int): The hidden state's dimension of the LSTMP layer. + proj_dim(int): The projection size of the LSTMP layer. + stacked_num(int): The number of stacked LSTMP layers. + parallel(bool): Run in parallel or not, default `False`. + is_train(bool): Run in training phase or not, default `True`. + class_dim(int): The number of output classes. """ # network configuration