From 67a05a2536d678d970529263d9fc9a710e3e2a7e Mon Sep 17 00:00:00 2001 From: phlrain Date: Wed, 4 Nov 2020 12:56:44 +0000 Subject: [PATCH 1/3] change speed to ips; and add reader cost; test=develop --- dygraph/transformer/train.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/dygraph/transformer/train.py b/dygraph/transformer/train.py index 7e0c8942b1..6b2e099d16 100644 --- a/dygraph/transformer/train.py +++ b/dygraph/transformer/train.py @@ -156,10 +156,13 @@ def do_train(args): batch_id = 0 batch_start = time.time() interval_word_num = 0.0 + total_reader_costs = 0.0 + batch_reader_start = time.time() for input_data in train_loader(): if args.max_iter and step_idx == args.max_iter: #NOTE: used for benchmark return batch_reader_end = time.time() + total_reader_costs += batch_reader_end - batch_reader_start (src_word, src_pos, src_slf_attn_bias, trg_word, trg_pos, trg_slf_attn_bias, trg_src_attn_bias, lbl_word, @@ -201,13 +204,16 @@ def do_train(args): logger.info( "step_idx: %d, epoch: %d, batch: %d, avg loss: %f, " "normalized loss: %f, ppl: %f, avg_speed: %.2f step/s, " - "words speed: %0.2f words/s" % + "reader cost: %0.2f sec, ips: %0.2f words/s" % (step_idx, pass_id, batch_id, total_avg_cost, total_avg_cost - loss_normalizer, np.exp([min(total_avg_cost, 100)]), - train_avg_batch_cost, word_speed)) - batch_start = time.time() + train_avg_batch_cost, + total_reader_costs / args.print_step, word_speed)) + interval_word_num = 0.0 + total_reader_costs = 0.0 + batch_start = time.time() if step_idx % args.save_step == 0 and step_idx != 0: # validation @@ -250,6 +256,7 @@ def do_train(args): batch_id += 1 step_idx += 1 + batch_reader_start = time.time() train_epoch_cost = time.time() - epoch_start ce_time.append(train_epoch_cost) From 8e40e77d521948e961c50da8afeefb8924fc6e3e Mon Sep 17 00:00:00 2001 From: phlrain Date: Wed, 11 Nov 2020 13:32:55 +0000 Subject: [PATCH 2/3] s to sec; test=develop --- dygraph/transformer/train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dygraph/transformer/train.py b/dygraph/transformer/train.py index 6b2e099d16..a7d62bff37 100644 --- a/dygraph/transformer/train.py +++ b/dygraph/transformer/train.py @@ -204,7 +204,7 @@ def do_train(args): logger.info( "step_idx: %d, epoch: %d, batch: %d, avg loss: %f, " "normalized loss: %f, ppl: %f, avg_speed: %.2f step/s, " - "reader cost: %0.2f sec, ips: %0.2f words/s" % + "reader cost: %0.2f sec, ips: %0.2f words/sec" % (step_idx, pass_id, batch_id, total_avg_cost, total_avg_cost - loss_normalizer, np.exp([min(total_avg_cost, 100)]), From 2be6a7cb55032d156fd353487c50f586b5afe88b Mon Sep 17 00:00:00 2001 From: phlrain Date: Wed, 11 Nov 2020 13:39:18 +0000 Subject: [PATCH 3/3] change seq2seq s to sec; test=develop --- dygraph/seq2seq/train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dygraph/seq2seq/train.py b/dygraph/seq2seq/train.py index 18fdee1da9..575ee4fb30 100644 --- a/dygraph/seq2seq/train.py +++ b/dygraph/seq2seq/train.py @@ -180,7 +180,7 @@ def eval(data, epoch_id=0): batch_times.append(train_batch_cost) if batch_id > 0 and batch_id % 100 == 0: print( - "-- Epoch:[%d]; Batch:[%d]; ppl: %.5f, batch_cost: %.5f s, reader_cost: %.5f s, ips: %.5f words/s" + "-- Epoch:[%d]; Batch:[%d]; ppl: %.5f, batch_cost: %.5f s, reader_cost: %.5f s, ips: %.5f words/sec" % (epoch_id, batch_id, np.exp(total_loss.numpy() / word_count), train_batch_cost, total_reader_cost / 100,