In [1]:
from mnmt.encoder import BasicEncoder
from mnmt.decoder import BasicFeedForwardDecoder
from mnmt.decoder import GreedyDecoder
from mnmt.attention import AdditiveAttention
from mnmt.decoder import BridgeLayer
from mnmt.model import Seq2Seq
from mnmt.datasets import *
from mnmt.inputter import ArgsFeeder
from mnmt.inputter import ModuleArgsFeeder
from mnmt.trainer.utils import *
from mnmt.trainer import Trainer
import sys

Loading DICT dataset...
Loading the Dataset into the container...
Field names: ['en', 'ch', 'pinyin_str', 'pinyin_char']
Data sizes: [(Train, 46620), (Valid, 5828), (Test, 5828)]
Loading NEWS dataset...
Loading the Dataset into the container...
Field names: ['en', 'ch', 'pinyin_str', 'pinyin_char']
Data sizes: [(Train, 81252), (Valid, 513), (Test, 1000)]


In [2]:
def set_up_args(data_container, exp_num):
    build_vocabs(data_container, dict_min_freqs={'en': 1, 'ch': 1, 'pinyin_str': 1, 'pinyin_char': 1})
    for name, field in data_container.fields:
        if name == 'en':
            input_dim = len(field.vocab)
            src_pad_idx = field.vocab.stoi[field.pad_token]
        elif name == 'ch':
            output_dim = len(field.vocab)
            trg_pad_idx = field.vocab[field.pad_token]

    enc_args_feeder = ModuleArgsFeeder(input_dim=input_dim, embedding_dim=256, hidden_dim=512,
                                       embedding_dropout=0.1, rnn_type='LSTM',
                                       num_layers=2, rnn_dropout=0.2)
    dec_args_feeder = ModuleArgsFeeder(input_dim=output_dim, embedding_dim=128, hidden_dim=256,
                                       embedding_dropout=0.1, rnn_type='LSTM',
                                       num_layers=2, rnn_dropout=0.2)
    return ArgsFeeder(enc_args_feeder, [dec_args_feeder],
                      batch_size=64, src_pad_idx=src_pad_idx, trg_pad_idx=trg_pad_idx,
                      optim_choice='Adam', learning_rate=0.003, decay_patience=0,
                      lr_decay_factor=0.9, valid_criterion='ACC', early_stopping_patience=1000,
                      total_epochs=100, report_interval=50, exp_num=exp_num, multi_task_ratio=0, data_container=data_container,
                      src_lang='en', trg_lang='pinyin_str', auxiliary_name=None, quiet_translate=True,
                      valid_out_path=f"experiments/exp{exp_num}/valid.out", test_out_path=f"experiments/exp{exp_num}/test.out")


def test_seq2seq(args_feeder):
    decoder_args_feeder = args_feeder.decoder_args_feeders[0]
    encoder = BasicEncoder(args_feeder)
    feed_forward_decoder = \
        BasicFeedForwardDecoder(args_feeder,
                                AdditiveAttention(encoder_hidden_dim=args_feeder.encoder_args_feeder.hidden_dim,
                                                  decoder_hidden_dim=decoder_args_feeder.hidden_dim), decoder_index=0)
    bridge_layer = BridgeLayer(encoder_hidden_dim=args_feeder.encoder_args_feeder.hidden_dim,
                               decoder_hidden_dim=decoder_args_feeder.hidden_dim,
                               num_of_states=2)
    decoder = GreedyDecoder(feed_forward_decoder, bridge_layer, device=args_feeder.device)
    model = Seq2Seq(args_feeder, encoder, decoder, teacher_forcing_ratio=0.8).to(args_feeder.device)
    return model


if __name__ == '__main__':
    set_reproducibility(seed=1234)
    
    try:
        # DICT pinyin-str
        dict_dataset = DICT['data_container']
        seq2seq_args_feeder = set_up_args(dict_dataset, exp_num=500)
        seq2seq_args_feeder.trg_lang = 'pinyin_str'
        test_model = test_seq2seq(seq2seq_args_feeder)
        test_trainer = Trainer(seq2seq_args_feeder, test_model)
        test_trainer.run(burning_epoch=0)
        test_trainer.best_model_output()
        # DICT pinyin-char
        seq2seq_args_feeder = set_up_args(dict_dataset, exp_num=501)
        seq2seq_args_feeder.trg_lang = "pinyin_char"
        test_model = test_seq2seq(seq2seq_args_feeder)
        test_trainer = Trainer(seq2seq_args_feeder, test_model)
        test_trainer.run(burning_epoch=0)
        test_trainer.best_model_output()

        # NEWS pinyin-str
        news_dataset = NEWS['data_container']
        seq2seq_args_feeder = set_up_args(news_dataset, exp_num=502)
        seq2seq_args_feeder.trg_lang = 'pinyin_str'
        test_model = test_seq2seq(seq2seq_args_feeder)
        test_trainer = Trainer(seq2seq_args_feeder, test_model)
        test_trainer.run(burning_epoch=0)
        test_trainer.best_model_output()
        # NEWS pinyin-char
        seq2seq_args_feeder = set_up_args(news_dataset, exp_num=503)
        seq2seq_args_feeder.trg_lang = 'pinyin_char'
        test_model = test_seq2seq(seq2seq_args_feeder)
        test_trainer = Trainer(seq2seq_args_feeder, test_model)
        test_trainer.run(burning_epoch=0)
        test_trainer.best_model_output()
    except KeyboardInterrupt:
        print("Exiting loop")

The current device for PyTorch is cuda
Seq2Seq(
  (encoder): BasicEncoder(
    (embedding): Sequential(
      (0): Embedding(30, 256)
      (1): Dropout(p=0.1, inplace=False)
    )
    (rnn): LSTM(256, 512, num_layers=2, dropout=0.2, bidirectional=True)
  )
  (decoder): GreedyDecoder(
    (feed_forward_decoder): BasicFeedForwardDecoder(
      (attention): AdditiveAttention(
        (additive_mapping): Linear(in_features=1280, out_features=256, bias=True)
        (v): Linear(in_features=256, out_features=1, bias=False)
      )
      (embedding): Sequential(
        (0): Embedding(437, 128)
        (1): Dropout(p=0.1, inplace=False)
      )
      (rnn): LSTM(1152, 256, num_layers=2, dropout=0.2)
      (prediction): Sequential(
        (0): Linear(in_features=1408, out_features=437, bias=True)
        (1): LogSoftmax()
      )
    )
    (bridge_layer): BridgeLayer(
      (bridge_layer): ModuleList(
        (0): Linear(in_features=1024, out_features=256, bias=True)
        (1): Linear(in_f

[Epoch: 2][#examples: 34688/46620][#steps: 2000]
	Train Loss: 0.299 | Train PPL:   1.348 | lr: 2.700e-03
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 3993/5828
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 4031/5828

---------------------------------------
[Epoch: 2][Validatiing...]
		 Better Valid Loss! (at least equal)
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 1000/1000
	 Val. Loss: 0.453 | Val. Acc: 0.685 | Val. PPL:   1.574
	 BEST. Val. Loss: 0.453 | BEST. Val. Acc: 0.685 | Val. Loss: 0.453 | BEST. Val. Epoch: 2 | BEST. Val. Step: 2000
---------------------------------------

[Epoch: 2][#examples: 37888/46620][#steps: 2050]
	Train Loss: 0.300 | Train PPL:   1.349 | lr: 2.700e-03
[Epoch: 2][#examples: 41088/46620][#steps: 2100]
	Train Loss: 0.300 | Train PPL:   1.350 | lr: 2.700e-03
[Epoch: 2][#examples: 44288/46620][#steps: 2150]
	Train Loss: 0.300 | Train PPL:   1.350 | lr: 2.700e-03
[VAL]: The 

[VAL]: The number of correct predictions (aux-task (single)): 4121/5828
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 4129/5828

---------------------------------------
[Epoch: 5][Validatiing...]
	 Early Stopping Patience: 998/1000
	 Val. Loss: 0.473 | Val. Acc: 0.707 | Val. PPL:   1.605
	 BEST. Val. Loss: 0.423 | BEST. Val. Acc: 0.711 | Val. Loss: 0.430 | BEST. Val. Epoch: 4 | BEST. Val. Step: 3645
---------------------------------------

[Epoch: 5][#examples: 25920/46620][#steps: 4050]
	Train Loss: 0.146 | Train PPL:   1.157 | lr: 1.968e-03
[Epoch: 5][#examples: 29120/46620][#steps: 4100]
	Train Loss: 0.147 | Train PPL:   1.159 | lr: 1.968e-03
[Epoch: 5][#examples: 32320/46620][#steps: 4150]
	Train Loss: 0.147 | Train PPL:   1.158 | lr: 1.968e-03
[Epoch: 5][#examples: 35520/46620][#steps: 4200]
	Train Loss: 0.148 | Train PPL:   1.160 | lr: 1.968e-03
[Epoch: 5][#examples: 38720/46620][#steps: 4250]
	Train Loss: 0.150 | Train PPL:   1.162 | lr: 1.968e-03

[Epoch: 8][#examples: 13952/46620][#steps: 6050]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 1.291e-03
[Epoch: 8][#examples: 17152/46620][#steps: 6100]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 1.291e-03
[Epoch: 8][#examples: 20352/46620][#steps: 6150]
	Train Loss: 0.048 | Train PPL:   1.049 | lr: 1.291e-03
[Epoch: 8][#examples: 23552/46620][#steps: 6200]
	Train Loss: 0.049 | Train PPL:   1.050 | lr: 1.291e-03
[Epoch: 8][#examples: 26752/46620][#steps: 6250]
	Train Loss: 0.048 | Train PPL:   1.050 | lr: 1.291e-03
[Epoch: 8][#examples: 29952/46620][#steps: 6300]
	Train Loss: 0.049 | Train PPL:   1.050 | lr: 1.291e-03
[Epoch: 8][#examples: 33152/46620][#steps: 6350]
	Train Loss: 0.049 | Train PPL:   1.050 | lr: 1.291e-03
[Epoch: 8][#examples: 36352/46620][#steps: 6400]
	Train Loss: 0.049 | Train PPL:   1.050 | lr: 1.291e-03
[Epoch: 8][#examples: 39552/46620][#steps: 6450]
	Train Loss: 0.050 | Train PPL:   1.051 | lr: 1.291e-03
[Epoch: 8][#examples: 42752/46620][#steps: 6500]
	Train

[Epoch: 11][#examples: 8384/46620][#steps: 8150]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 9.414e-04
[Epoch: 11][#examples: 11584/46620][#steps: 8200]
	Train Loss: 0.017 | Train PPL:   1.017 | lr: 9.414e-04
[Epoch: 11][#examples: 14784/46620][#steps: 8250]
	Train Loss: 0.017 | Train PPL:   1.017 | lr: 9.414e-04
[Epoch: 11][#examples: 17984/46620][#steps: 8300]
	Train Loss: 0.017 | Train PPL:   1.017 | lr: 9.414e-04
[Epoch: 11][#examples: 21184/46620][#steps: 8350]
	Train Loss: 0.017 | Train PPL:   1.017 | lr: 9.414e-04
[Epoch: 11][#examples: 24384/46620][#steps: 8400]
	Train Loss: 0.017 | Train PPL:   1.017 | lr: 9.414e-04
[Epoch: 11][#examples: 27584/46620][#steps: 8450]
	Train Loss: 0.017 | Train PPL:   1.017 | lr: 9.414e-04
[Epoch: 11][#examples: 30784/46620][#steps: 8500]
	Train Loss: 0.017 | Train PPL:   1.017 | lr: 9.414e-04
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 4256/5828
-----Tst------
[TEST]: The number of correct predictions (aux-task

[Epoch: 14][#examples: 2816/46620][#steps: 10250]
	Train Loss: 0.008 | Train PPL:   1.008 | lr: 6.863e-04
[Epoch: 14][#examples: 6016/46620][#steps: 10300]
	Train Loss: 0.008 | Train PPL:   1.008 | lr: 6.863e-04
[Epoch: 14][#examples: 9216/46620][#steps: 10350]
	Train Loss: 0.009 | Train PPL:   1.009 | lr: 6.863e-04
[Epoch: 14][#examples: 12416/46620][#steps: 10400]
	Train Loss: 0.008 | Train PPL:   1.008 | lr: 6.863e-04
[Epoch: 14][#examples: 15616/46620][#steps: 10450]
	Train Loss: 0.009 | Train PPL:   1.009 | lr: 6.863e-04
[Epoch: 14][#examples: 18816/46620][#steps: 10500]
	Train Loss: 0.009 | Train PPL:   1.009 | lr: 6.863e-04
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 4315/5828
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 4286/5828

---------------------------------------
[Epoch: 14][Validatiing...]
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 976/1000
	 Val. Loss: 0.600 | Val. Acc: 0.740 | Val.

[Epoch: 17][#examples: 448/46620][#steps: 12400]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 4.053e-04
[Epoch: 17][#examples: 3648/46620][#steps: 12450]
	Train Loss: 0.004 | Train PPL:   1.004 | lr: 4.053e-04
[Epoch: 17][#examples: 6848/46620][#steps: 12500]
	Train Loss: 0.004 | Train PPL:   1.004 | lr: 4.053e-04
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 4315/5828
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 4308/5828

---------------------------------------
[Epoch: 17][Validatiing...]
	 Early Stopping Patience: 969/1000
	 Val. Loss: 0.623 | Val. Acc: 0.740 | Val. PPL:   1.864
	 BEST. Val. Loss: 0.423 | BEST. Val. Acc: 0.741 | Val. Loss: 0.606 | BEST. Val. Epoch: 15 | BEST. Val. Step: 11500
---------------------------------------

[Epoch: 17][#examples: 10048/46620][#steps: 12550]
	Train Loss: 0.004 | Train PPL:   1.004 | lr: 3.647e-04
[Epoch: 17][#examples: 13248/46620][#steps: 12600]
	Train Loss: 0.004 | Train PPL:

[VAL]: The number of correct predictions (aux-task (single)): 4315/5828

---------------------------------------
[Epoch: 19][Validatiing...]
	 Early Stopping Patience: 962/1000
	 Val. Loss: 0.658 | Val. Acc: 0.740 | Val. PPL:   1.931
	 BEST. Val. Loss: 0.423 | BEST. Val. Acc: 0.741 | Val. Loss: 0.606 | BEST. Val. Epoch: 15 | BEST. Val. Step: 11500
---------------------------------------

Epoch: 20 | Time: 1m 12s
	Train Loss: 0.002 | Train PPL:   1.002
	 Val. Loss: 0.658 | Val. Acc: 0.740 | Val. PPL:   1.931
[Train]: Current Teacher Forcing Ratio: 0.200
[Epoch: 20][#examples: 1280/46620][#steps: 14600]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 1.744e-04
[Epoch: 20][#examples: 4480/46620][#steps: 14650]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 1.744e-04
[Epoch: 20][#examples: 7680/46620][#steps: 14700]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 1.744e-04
[Epoch: 20][#examples: 10880/46620][#steps: 14750]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 1.744e-04
[Epoch: 20][#exa

[Epoch: 22][#examples: 42368/46620][#steps: 16700]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 1.145e-04
[Epoch: 22][#examples: 45568/46620][#steps: 16750]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 1.145e-04
[VAL]: The number of correct predictions (aux-task (single)): 4314/5828

---------------------------------------
[Epoch: 22][Validatiing...]
	 Early Stopping Patience: 955/1000
	 Val. Loss: 0.679 | Val. Acc: 0.740 | Val. PPL:   1.972
	 BEST. Val. Loss: 0.423 | BEST. Val. Acc: 0.742 | Val. Loss: 0.676 | BEST. Val. Epoch: 21 | BEST. Val. Step: 16038
---------------------------------------

Epoch: 23 | Time: 1m 4s
	Train Loss: 0.002 | Train PPL:   1.002
	 Val. Loss: 0.679 | Val. Acc: 0.740 | Val. PPL:   1.972
[Train]: Current Teacher Forcing Ratio: 0.200
[Epoch: 23][#examples: 2112/46620][#steps: 16800]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 1.030e-04
[Epoch: 23][#examples: 5312/46620][#steps: 16850]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 1.030e-04
[Epoch: 23][#exa

[Epoch: 25][#examples: 40000/46620][#steps: 18850]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 5.474e-05
[Epoch: 25][#examples: 43200/46620][#steps: 18900]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 5.474e-05
[Epoch: 25][#examples: 46400/46620][#steps: 18950]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 5.474e-05
[VAL]: The number of correct predictions (aux-task (single)): 4310/5828

---------------------------------------
[Epoch: 25][Validatiing...]
	 Early Stopping Patience: 948/1000
	 Val. Loss: 0.697 | Val. Acc: 0.740 | Val. PPL:   2.008
	 BEST. Val. Loss: 0.423 | BEST. Val. Acc: 0.742 | Val. Loss: 0.676 | BEST. Val. Epoch: 21 | BEST. Val. Step: 16038
---------------------------------------

Epoch: 26 | Time: 1m 5s
	Train Loss: 0.001 | Train PPL:   1.001
	 Val. Loss: 0.697 | Val. Acc: 0.740 | Val. PPL:   2.008
[Train]: Current Teacher Forcing Ratio: 0.200
[Epoch: 26][#examples: 2944/46620][#steps: 19000]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 4.927e-05
-----Val------


[Epoch: 28][#examples: 37632/46620][#steps: 21000]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 2.618e-05
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 4319/5828
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 4301/5828

---------------------------------------
[Epoch: 28][Validatiing...]
	 Early Stopping Patience: 941/1000
	 Val. Loss: 0.703 | Val. Acc: 0.741 | Val. PPL:   2.021
	 BEST. Val. Loss: 0.423 | BEST. Val. Acc: 0.742 | Val. Loss: 0.676 | BEST. Val. Epoch: 21 | BEST. Val. Step: 16038
---------------------------------------

[Epoch: 28][#examples: 40832/46620][#steps: 21050]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 2.357e-05
[Epoch: 28][#examples: 44032/46620][#steps: 21100]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 2.357e-05
[VAL]: The number of correct predictions (aux-task (single)): 4319/5828

---------------------------------------
[Epoch: 28][Validatiing...]
	 Early Stopping Patience: 940/1000
	 Val. L

[Epoch: 31][#examples: 28864/46620][#steps: 23050]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 1.127e-05
[Epoch: 31][#examples: 32064/46620][#steps: 23100]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 1.127e-05
[Epoch: 31][#examples: 35264/46620][#steps: 23150]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 1.127e-05
[Epoch: 31][#examples: 38464/46620][#steps: 23200]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 1.127e-05
[Epoch: 31][#examples: 41664/46620][#steps: 23250]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 1.127e-05
[Epoch: 31][#examples: 44864/46620][#steps: 23300]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 1.127e-05
[VAL]: The number of correct predictions (aux-task (single)): 4310/5828

---------------------------------------
[Epoch: 31][Validatiing...]
	 Early Stopping Patience: 933/1000
	 Val. Loss: 0.710 | Val. Acc: 0.740 | Val. PPL:   2.034
	 BEST. Val. Loss: 0.423 | BEST. Val. Acc: 0.742 | Val. Loss: 0.676 | BEST. Val. Epoch: 21 | BEST. Val. Step: 16038
--------

[Epoch: 34][#examples: 26496/46620][#steps: 25200]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 5.391e-06
[Epoch: 34][#examples: 29696/46620][#steps: 25250]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 5.391e-06
[Epoch: 34][#examples: 32896/46620][#steps: 25300]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 5.391e-06
[Epoch: 34][#examples: 36096/46620][#steps: 25350]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 5.391e-06
[Epoch: 34][#examples: 39296/46620][#steps: 25400]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 5.391e-06
[Epoch: 34][#examples: 42496/46620][#steps: 25450]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 5.391e-06
[Epoch: 34][#examples: 45696/46620][#steps: 25500]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 5.391e-06
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 4319/5828
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 4300/5828

---------------------------------------
[Epoch: 34][Validatiing...]
	 Early

[Epoch: 37][#examples: 24128/46620][#steps: 27350]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 2.579e-06
[Epoch: 37][#examples: 27328/46620][#steps: 27400]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 2.579e-06
[Epoch: 37][#examples: 30528/46620][#steps: 27450]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 2.579e-06
[Epoch: 37][#examples: 33728/46620][#steps: 27500]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 2.579e-06
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 4316/5828
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 4301/5828

---------------------------------------
[Epoch: 37][Validatiing...]
	 Early Stopping Patience: 919/1000
	 Val. Loss: 0.713 | Val. Acc: 0.741 | Val. PPL:   2.040
	 BEST. Val. Loss: 0.423 | BEST. Val. Acc: 0.742 | Val. Loss: 0.676 | BEST. Val. Epoch: 21 | BEST. Val. Step: 16038
---------------------------------------

[Epoch: 37][#examples: 36928/46620][#steps: 27550]
	Train Loss: 0.001 | Train 

[Epoch: 40][#examples: 21760/46620][#steps: 29500]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 1.233e-06
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 4317/5828
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 4302/5828

---------------------------------------
[Epoch: 40][Validatiing...]
	 Early Stopping Patience: 912/1000
	 Val. Loss: 0.713 | Val. Acc: 0.741 | Val. PPL:   2.040
	 BEST. Val. Loss: 0.423 | BEST. Val. Acc: 0.742 | Val. Loss: 0.676 | BEST. Val. Epoch: 21 | BEST. Val. Step: 16038
---------------------------------------

[Epoch: 40][#examples: 24960/46620][#steps: 29550]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 1.110e-06
[Epoch: 40][#examples: 28160/46620][#steps: 29600]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 1.110e-06
[Epoch: 40][#examples: 31360/46620][#steps: 29650]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 1.110e-06
[Epoch: 40][#examples: 34560/46620][#steps: 29700]
	Train Loss: 0.001 | Train 

[Epoch: 43][#examples: 12992/46620][#steps: 31550]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 5.309e-07
[Epoch: 43][#examples: 16192/46620][#steps: 31600]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 5.309e-07
[Epoch: 43][#examples: 19392/46620][#steps: 31650]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 5.309e-07
[Epoch: 43][#examples: 22592/46620][#steps: 31700]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 5.309e-07
[Epoch: 43][#examples: 25792/46620][#steps: 31750]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 5.309e-07
[Epoch: 43][#examples: 28992/46620][#steps: 31800]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 5.309e-07
[Epoch: 43][#examples: 32192/46620][#steps: 31850]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 5.309e-07
[Epoch: 43][#examples: 35392/46620][#steps: 31900]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 5.309e-07
[Epoch: 43][#examples: 38592/46620][#steps: 31950]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 5.309e-07
[Epoch: 43][#examples: 41792/46620][#

[Epoch: 46][#examples: 10624/46620][#steps: 33700]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 2.539e-07
[Epoch: 46][#examples: 13824/46620][#steps: 33750]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 2.539e-07
[Epoch: 46][#examples: 17024/46620][#steps: 33800]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 2.539e-07
[Epoch: 46][#examples: 20224/46620][#steps: 33850]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 2.539e-07
[Epoch: 46][#examples: 23424/46620][#steps: 33900]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 2.539e-07
[Epoch: 46][#examples: 26624/46620][#steps: 33950]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 2.539e-07
[Epoch: 46][#examples: 29824/46620][#steps: 34000]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 2.539e-07
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 4316/5828
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 4303/5828

---------------------------------------
[Epoch: 46][Validatiing...]
	 Early

[Epoch: 49][#examples: 8256/46620][#steps: 35850]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 1.215e-07
[Epoch: 49][#examples: 11456/46620][#steps: 35900]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 1.215e-07
[Epoch: 49][#examples: 14656/46620][#steps: 35950]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 1.215e-07
[Epoch: 49][#examples: 17856/46620][#steps: 36000]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 1.215e-07
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 4316/5828
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 4303/5828

---------------------------------------
[Epoch: 49][Validatiing...]
	 Early Stopping Patience: 890/1000
	 Val. Loss: 0.713 | Val. Acc: 0.741 | Val. PPL:   2.040
	 BEST. Val. Loss: 0.423 | BEST. Val. Acc: 0.742 | Val. Loss: 0.676 | BEST. Val. Epoch: 21 | BEST. Val. Step: 16038
---------------------------------------

[Epoch: 49][#examples: 21056/46620][#steps: 36050]
	Train Loss: 0.001 | Train P

[Epoch: 52][#examples: 5888/46620][#steps: 38000]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 4316/5828
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 4303/5828

---------------------------------------
[Epoch: 52][Validatiing...]
	 Early Stopping Patience: 883/1000
	 Val. Loss: 0.713 | Val. Acc: 0.741 | Val. PPL:   2.040
	 BEST. Val. Loss: 0.423 | BEST. Val. Acc: 0.742 | Val. Loss: 0.676 | BEST. Val. Epoch: 21 | BEST. Val. Step: 16038
---------------------------------------

[Epoch: 52][#examples: 9088/46620][#steps: 38050]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 52][#examples: 12288/46620][#steps: 38100]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 52][#examples: 15488/46620][#steps: 38150]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 52][#examples: 18688/46620][#steps: 38200]
	Train Loss: 0.001 | Train PP

[Epoch: 55][#examples: 320/46620][#steps: 40100]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 55][#examples: 3520/46620][#steps: 40150]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 55][#examples: 6720/46620][#steps: 40200]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 55][#examples: 9920/46620][#steps: 40250]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 55][#examples: 13120/46620][#steps: 40300]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 55][#examples: 16320/46620][#steps: 40350]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 55][#examples: 19520/46620][#steps: 40400]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 55][#examples: 22720/46620][#steps: 40450]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 55][#examples: 25920/46620][#steps: 40500]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
-----Val------
[VAL]: The number of correc

[Epoch: 58][#examples: 1152/46620][#steps: 42300]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 58][#examples: 4352/46620][#steps: 42350]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 58][#examples: 7552/46620][#steps: 42400]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 58][#examples: 10752/46620][#steps: 42450]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 58][#examples: 13952/46620][#steps: 42500]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 4316/5828
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 4302/5828

---------------------------------------
[Epoch: 58][Validatiing...]
	 Early Stopping Patience: 868/1000
	 Val. Loss: 0.713 | Val. Acc: 0.741 | Val. PPL:   2.040
	 BEST. Val. Loss: 0.423 | BEST. Val. Acc: 0.742 | Val. Loss: 0.676 | BEST. Val. Epoch: 21 | BEST. Val. Step: 16038
---------------

[Epoch: 61][#examples: 1984/46620][#steps: 44500]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 4316/5828
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 4302/5828

---------------------------------------
[Epoch: 61][Validatiing...]
	 Early Stopping Patience: 861/1000
	 Val. Loss: 0.713 | Val. Acc: 0.741 | Val. PPL:   2.040
	 BEST. Val. Loss: 0.423 | BEST. Val. Acc: 0.742 | Val. Loss: 0.676 | BEST. Val. Epoch: 21 | BEST. Val. Step: 16038
---------------------------------------

[Epoch: 61][#examples: 5184/46620][#steps: 44550]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 61][#examples: 8384/46620][#steps: 44600]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 61][#examples: 11584/46620][#steps: 44650]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 61][#examples: 14784/46620][#steps: 44700]
	Train Loss: 0.001 | Train PPL

[VAL]: The number of correct predictions (aux-task (single)): 4316/5828

---------------------------------------
[Epoch: 63][Validatiing...]
	 Early Stopping Patience: 854/1000
	 Val. Loss: 0.713 | Val. Acc: 0.741 | Val. PPL:   2.040
	 BEST. Val. Loss: 0.423 | BEST. Val. Acc: 0.742 | Val. Loss: 0.676 | BEST. Val. Epoch: 21 | BEST. Val. Step: 16038
---------------------------------------

Epoch: 64 | Time: 1m 11s
	Train Loss: 0.001 | Train PPL:   1.001
	 Val. Loss: 0.713 | Val. Acc: 0.741 | Val. PPL:   2.040
[Train]: Current Teacher Forcing Ratio: 0.200
[Epoch: 64][#examples: 2816/46620][#steps: 46700]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 64][#examples: 6016/46620][#steps: 46750]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 64][#examples: 9216/46620][#steps: 46800]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 64][#examples: 12416/46620][#steps: 46850]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 64][#exa

[VAL]: The number of correct predictions (aux-task (single)): 4316/5828

---------------------------------------
[Epoch: 66][Validatiing...]
	 Early Stopping Patience: 847/1000
	 Val. Loss: 0.713 | Val. Acc: 0.741 | Val. PPL:   2.040
	 BEST. Val. Loss: 0.423 | BEST. Val. Acc: 0.742 | Val. Loss: 0.676 | BEST. Val. Epoch: 21 | BEST. Val. Step: 16038
---------------------------------------

Epoch: 67 | Time: 1m 6s
	Train Loss: 0.001 | Train PPL:   1.001
	 Val. Loss: 0.713 | Val. Acc: 0.741 | Val. PPL:   2.040
[Train]: Current Teacher Forcing Ratio: 0.200
[Epoch: 67][#examples: 448/46620][#steps: 48850]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 67][#examples: 3648/46620][#steps: 48900]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 67][#examples: 6848/46620][#steps: 48950]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 67][#examples: 10048/46620][#steps: 49000]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
-----Val------
[VA

[Epoch: 69][#examples: 44736/46620][#steps: 51000]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 4316/5828
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 4302/5828

---------------------------------------
[Epoch: 69][Validatiing...]
	 Early Stopping Patience: 840/1000
	 Val. Loss: 0.713 | Val. Acc: 0.741 | Val. PPL:   2.040
	 BEST. Val. Loss: 0.423 | BEST. Val. Acc: 0.742 | Val. Loss: 0.676 | BEST. Val. Epoch: 21 | BEST. Val. Step: 16038
---------------------------------------

[VAL]: The number of correct predictions (aux-task (single)): 4316/5828

---------------------------------------
[Epoch: 69][Validatiing...]
	 Early Stopping Patience: 839/1000
	 Val. Loss: 0.713 | Val. Acc: 0.741 | Val. PPL:   2.040
	 BEST. Val. Loss: 0.423 | BEST. Val. Acc: 0.742 | Val. Loss: 0.676 | BEST. Val. Epoch: 21 | BEST. Val. Step: 16038
---------------------------------------

Epoch: 7

[Epoch: 72][#examples: 35968/46620][#steps: 53050]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 72][#examples: 39168/46620][#steps: 53100]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 72][#examples: 42368/46620][#steps: 53150]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 72][#examples: 45568/46620][#steps: 53200]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[VAL]: The number of correct predictions (aux-task (single)): 4316/5828

---------------------------------------
[Epoch: 72][Validatiing...]
	 Early Stopping Patience: 832/1000
	 Val. Loss: 0.713 | Val. Acc: 0.741 | Val. PPL:   2.040
	 BEST. Val. Loss: 0.423 | BEST. Val. Acc: 0.742 | Val. Loss: 0.676 | BEST. Val. Epoch: 21 | BEST. Val. Step: 16038
---------------------------------------

Epoch: 73 | Time: 1m 14s
	Train Loss: 0.001 | Train PPL:   1.001
	 Val. Loss: 0.713 | Val. Acc: 0.741 | Val. PPL:   2.040
[Train]: Current Teacher Forcing Ratio: 0.200
[Epoch: 73][#

[Epoch: 75][#examples: 33600/46620][#steps: 55200]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 75][#examples: 36800/46620][#steps: 55250]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 75][#examples: 40000/46620][#steps: 55300]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 75][#examples: 43200/46620][#steps: 55350]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 75][#examples: 46400/46620][#steps: 55400]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[VAL]: The number of correct predictions (aux-task (single)): 4316/5828

---------------------------------------
[Epoch: 75][Validatiing...]
	 Early Stopping Patience: 825/1000
	 Val. Loss: 0.713 | Val. Acc: 0.741 | Val. PPL:   2.040
	 BEST. Val. Loss: 0.423 | BEST. Val. Acc: 0.742 | Val. Loss: 0.676 | BEST. Val. Epoch: 21 | BEST. Val. Step: 16038
---------------------------------------

Epoch: 76 | Time: 1m 4s
	Train Loss: 0.001 | Train PPL:   1.001
	 Val. Los

[Epoch: 78][#examples: 31232/46620][#steps: 57350]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 78][#examples: 34432/46620][#steps: 57400]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 78][#examples: 37632/46620][#steps: 57450]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 78][#examples: 40832/46620][#steps: 57500]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 4316/5828
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 4302/5828

---------------------------------------
[Epoch: 78][Validatiing...]
	 Early Stopping Patience: 818/1000
	 Val. Loss: 0.713 | Val. Acc: 0.741 | Val. PPL:   2.041
	 BEST. Val. Loss: 0.423 | BEST. Val. Acc: 0.742 | Val. Loss: 0.676 | BEST. Val. Epoch: 21 | BEST. Val. Step: 16038
---------------------------------------

[Epoch: 78][#examples: 44032/46620][#steps: 57550]
	Train Loss: 0.001 | Train 

[Epoch: 81][#examples: 28864/46620][#steps: 59500]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 4316/5828
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 4302/5828

---------------------------------------
[Epoch: 81][Validatiing...]
	 Early Stopping Patience: 811/1000
	 Val. Loss: 0.713 | Val. Acc: 0.741 | Val. PPL:   2.041
	 BEST. Val. Loss: 0.423 | BEST. Val. Acc: 0.742 | Val. Loss: 0.676 | BEST. Val. Epoch: 21 | BEST. Val. Step: 16038
---------------------------------------

[Epoch: 81][#examples: 32064/46620][#steps: 59550]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 81][#examples: 35264/46620][#steps: 59600]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 81][#examples: 38464/46620][#steps: 59650]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 81][#examples: 41664/46620][#steps: 59700]
	Train Loss: 0.001 | Train 

[Epoch: 84][#examples: 20096/46620][#steps: 61550]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 84][#examples: 23296/46620][#steps: 61600]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 84][#examples: 26496/46620][#steps: 61650]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 84][#examples: 29696/46620][#steps: 61700]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 84][#examples: 32896/46620][#steps: 61750]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 84][#examples: 36096/46620][#steps: 61800]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 84][#examples: 39296/46620][#steps: 61850]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 84][#examples: 42496/46620][#steps: 61900]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 84][#examples: 45696/46620][#steps: 61950]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[VAL]: The number of correct predicti

[Epoch: 87][#examples: 17728/46620][#steps: 63700]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 87][#examples: 20928/46620][#steps: 63750]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 87][#examples: 24128/46620][#steps: 63800]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 87][#examples: 27328/46620][#steps: 63850]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 87][#examples: 30528/46620][#steps: 63900]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 87][#examples: 33728/46620][#steps: 63950]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 87][#examples: 36928/46620][#steps: 64000]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 4316/5828
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 4303/5828

---------------------------------------
[Epoch: 87][Validatiing...]
	 Early

[Epoch: 90][#examples: 15360/46620][#steps: 65850]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 90][#examples: 18560/46620][#steps: 65900]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 90][#examples: 21760/46620][#steps: 65950]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 90][#examples: 24960/46620][#steps: 66000]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 4316/5828
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 4303/5828

---------------------------------------
[Epoch: 90][Validatiing...]
	 Early Stopping Patience: 789/1000
	 Val. Loss: 0.713 | Val. Acc: 0.741 | Val. PPL:   2.041
	 BEST. Val. Loss: 0.423 | BEST. Val. Acc: 0.742 | Val. Loss: 0.676 | BEST. Val. Epoch: 21 | BEST. Val. Step: 16038
---------------------------------------

[Epoch: 90][#examples: 28160/46620][#steps: 66050]
	Train Loss: 0.001 | Train 

[Epoch: 93][#examples: 12992/46620][#steps: 68000]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 4316/5828
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 4303/5828

---------------------------------------
[Epoch: 93][Validatiing...]
	 Early Stopping Patience: 782/1000
	 Val. Loss: 0.713 | Val. Acc: 0.741 | Val. PPL:   2.041
	 BEST. Val. Loss: 0.423 | BEST. Val. Acc: 0.742 | Val. Loss: 0.676 | BEST. Val. Epoch: 21 | BEST. Val. Step: 16038
---------------------------------------

[Epoch: 93][#examples: 16192/46620][#steps: 68050]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 93][#examples: 19392/46620][#steps: 68100]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 93][#examples: 22592/46620][#steps: 68150]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 93][#examples: 25792/46620][#steps: 68200]
	Train Loss: 0.001 | Train 

[Epoch: 96][#examples: 4224/46620][#steps: 70050]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 96][#examples: 7424/46620][#steps: 70100]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 96][#examples: 10624/46620][#steps: 70150]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 96][#examples: 13824/46620][#steps: 70200]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 96][#examples: 17024/46620][#steps: 70250]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 96][#examples: 20224/46620][#steps: 70300]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 96][#examples: 23424/46620][#steps: 70350]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 96][#examples: 26624/46620][#steps: 70400]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 96][#examples: 29824/46620][#steps: 70450]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 96][#examples: 33024/46620][#st

[Epoch: 99][#examples: 1856/46620][#steps: 72200]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 99][#examples: 5056/46620][#steps: 72250]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 99][#examples: 8256/46620][#steps: 72300]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 99][#examples: 11456/46620][#steps: 72350]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 99][#examples: 14656/46620][#steps: 72400]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 99][#examples: 17856/46620][#steps: 72450]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
[Epoch: 99][#examples: 21056/46620][#steps: 72500]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 4316/5828
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 4303/5828

---------------------------------------
[Epoch: 99][Validatiing...]
	 Early St

[Epoch: 1][#examples: 30144/46620][#steps: 1200]
	Train Loss: 0.343 | Train PPL:   1.409 | lr: 3.000e-03
[Epoch: 1][#examples: 33344/46620][#steps: 1250]
	Train Loss: 0.339 | Train PPL:   1.404 | lr: 3.000e-03
[Epoch: 1][#examples: 36544/46620][#steps: 1300]
	Train Loss: 0.337 | Train PPL:   1.401 | lr: 3.000e-03
[Epoch: 1][#examples: 39744/46620][#steps: 1350]
	Train Loss: 0.332 | Train PPL:   1.394 | lr: 3.000e-03
[Epoch: 1][#examples: 42944/46620][#steps: 1400]
	Train Loss: 0.331 | Train PPL:   1.392 | lr: 3.000e-03
[Epoch: 1][#examples: 46144/46620][#steps: 1450]
	Train Loss: 0.329 | Train PPL:   1.390 | lr: 3.000e-03
[VAL]: The number of correct predictions (aux-task (single)): 3495/5828

---------------------------------------
[Epoch: 1][Validatiing...]
		 Better Valid Loss! (at least equal)
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 1000/1000
	 Val. Loss: 0.837 | Val. Acc: 0.600 | Val. PPL:   2.310
	 BEST. Val. Loss: 0.837 | BEST. Val. Acc: 0.600 | Val. Los

[Epoch: 4][#examples: 18176/46620][#steps: 3200]
	Train Loss: 0.199 | Train PPL:   1.220 | lr: 2.700e-03
[Epoch: 4][#examples: 21376/46620][#steps: 3250]
	Train Loss: 0.203 | Train PPL:   1.224 | lr: 2.700e-03
[Epoch: 4][#examples: 24576/46620][#steps: 3300]
	Train Loss: 0.201 | Train PPL:   1.223 | lr: 2.700e-03
[Epoch: 4][#examples: 27776/46620][#steps: 3350]
	Train Loss: 0.203 | Train PPL:   1.225 | lr: 2.700e-03
[Epoch: 4][#examples: 30976/46620][#steps: 3400]
	Train Loss: 0.202 | Train PPL:   1.224 | lr: 2.700e-03
[Epoch: 4][#examples: 34176/46620][#steps: 3450]
	Train Loss: 0.204 | Train PPL:   1.226 | lr: 2.700e-03
[Epoch: 4][#examples: 37376/46620][#steps: 3500]
	Train Loss: 0.202 | Train PPL:   1.224 | lr: 2.700e-03
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 4021/5828
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 4026/5828

---------------------------------------
[Epoch: 4][Validatiing...]
		 Better Valid Loss! 

[Epoch: 7][#examples: 9408/46620][#steps: 5250]
	Train Loss: 0.136 | Train PPL:   1.145 | lr: 1.771e-03
[Epoch: 7][#examples: 12608/46620][#steps: 5300]
	Train Loss: 0.133 | Train PPL:   1.142 | lr: 1.771e-03
[Epoch: 7][#examples: 15808/46620][#steps: 5350]
	Train Loss: 0.132 | Train PPL:   1.141 | lr: 1.771e-03
[Epoch: 7][#examples: 19008/46620][#steps: 5400]
	Train Loss: 0.130 | Train PPL:   1.139 | lr: 1.771e-03
[Epoch: 7][#examples: 22208/46620][#steps: 5450]
	Train Loss: 0.130 | Train PPL:   1.139 | lr: 1.771e-03
[Epoch: 7][#examples: 25408/46620][#steps: 5500]
	Train Loss: 0.130 | Train PPL:   1.139 | lr: 1.771e-03
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 4130/5828
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 4140/5828

---------------------------------------
[Epoch: 7][Validatiing...]
		 Better Valid Loss! (at least equal)
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 1000/1000
	 Val. Loss: 0

[Epoch: 10][#examples: 640/46620][#steps: 7300]
	Train Loss: 0.064 | Train PPL:   1.066 | lr: 1.291e-03
[Epoch: 10][#examples: 3840/46620][#steps: 7350]
	Train Loss: 0.067 | Train PPL:   1.069 | lr: 1.291e-03
[Epoch: 10][#examples: 7040/46620][#steps: 7400]
	Train Loss: 0.067 | Train PPL:   1.069 | lr: 1.291e-03
[Epoch: 10][#examples: 10240/46620][#steps: 7450]
	Train Loss: 0.065 | Train PPL:   1.067 | lr: 1.291e-03
[Epoch: 10][#examples: 13440/46620][#steps: 7500]
	Train Loss: 0.066 | Train PPL:   1.068 | lr: 1.291e-03
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 4194/5828
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 4189/5828

---------------------------------------
[Epoch: 10][Validatiing...]
	 Early Stopping Patience: 996/1000
	 Val. Loss: 0.666 | Val. Acc: 0.720 | Val. PPL:   1.946
	 BEST. Val. Loss: 0.604 | BEST. Val. Acc: 0.725 | Val. Loss: 0.620 | BEST. Val. Epoch: 9 | BEST. Val. Step: 7290
-----------------------

[Epoch: 13][#examples: 1472/46620][#steps: 9500]
	Train Loss: 0.034 | Train PPL:   1.035 | lr: 7.626e-04
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 4237/5828
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 4269/5828

---------------------------------------
[Epoch: 13][Validatiing...]
	 Early Stopping Patience: 989/1000
	 Val. Loss: 0.703 | Val. Acc: 0.727 | Val. PPL:   2.020
	 BEST. Val. Loss: 0.604 | BEST. Val. Acc: 0.729 | Val. Loss: 0.680 | BEST. Val. Epoch: 11 | BEST. Val. Step: 8748
---------------------------------------

[Epoch: 13][#examples: 4672/46620][#steps: 9550]
	Train Loss: 0.037 | Train PPL:   1.038 | lr: 6.863e-04
[Epoch: 13][#examples: 7872/46620][#steps: 9600]
	Train Loss: 0.036 | Train PPL:   1.037 | lr: 6.863e-04
[Epoch: 13][#examples: 11072/46620][#steps: 9650]
	Train Loss: 0.035 | Train PPL:   1.036 | lr: 6.863e-04
[Epoch: 13][#examples: 14272/46620][#steps: 9700]
	Train Loss: 0.035 | Train PPL:   1.

[Epoch: 15][#examples: 45760/46620][#steps: 11650]
	Train Loss: 0.024 | Train PPL:   1.024 | lr: 4.503e-04
[VAL]: The number of correct predictions (aux-task (single)): 4263/5828

---------------------------------------
[Epoch: 15][Validatiing...]
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 982/1000
	 Val. Loss: 0.769 | Val. Acc: 0.731 | Val. PPL:   2.158
	 BEST. Val. Loss: 0.604 | BEST. Val. Acc: 0.731 | Val. Loss: 0.769 | BEST. Val. Epoch: 15 | BEST. Val. Step: 11664
---------------------------------------

Epoch: 16 | Time: 1m 47s
	Train Loss: 0.024 | Train PPL:   1.024
	 Val. Loss: 0.769 | Val. Acc: 0.731 | Val. PPL:   2.158
[Train]: Current Teacher Forcing Ratio: 0.320
[Epoch: 16][#examples: 2304/46620][#steps: 11700]
	Train Loss: 0.017 | Train PPL:   1.018 | lr: 4.503e-04
[Epoch: 16][#examples: 5504/46620][#steps: 11750]
	Train Loss: 0.016 | Train PPL:   1.017 | lr: 4.503e-04
[Epoch: 16][#examples: 8704/46620][#steps: 11800]
	Train Loss: 0.015 | Train PPL:   

[Epoch: 18][#examples: 40192/46620][#steps: 13750]
	Train Loss: 0.012 | Train PPL:   1.012 | lr: 2.954e-04
[Epoch: 18][#examples: 43392/46620][#steps: 13800]
	Train Loss: 0.012 | Train PPL:   1.012 | lr: 2.954e-04
[Epoch: 18][#examples: 46592/46620][#steps: 13850]
	Train Loss: 0.012 | Train PPL:   1.012 | lr: 2.954e-04
[VAL]: The number of correct predictions (aux-task (single)): 4266/5828

---------------------------------------
[Epoch: 18][Validatiing...]
	 Early Stopping Patience: 975/1000
	 Val. Loss: 0.853 | Val. Acc: 0.732 | Val. PPL:   2.346
	 BEST. Val. Loss: 0.604 | BEST. Val. Acc: 0.732 | Val. Loss: 0.842 | BEST. Val. Epoch: 17 | BEST. Val. Step: 13000
---------------------------------------

Epoch: 19 | Time: 1m 41s
	Train Loss: 0.012 | Train PPL:   1.012
	 Val. Loss: 0.853 | Val. Acc: 0.732 | Val. PPL:   2.346
[Train]: Current Teacher Forcing Ratio: 0.230
[Epoch: 19][#examples: 3136/46620][#steps: 13900]
	Train Loss: 0.010 | Train PPL:   1.010 | lr: 2.659e-04
[Epoch: 19][#e

[Epoch: 21][#examples: 34624/46620][#steps: 15850]
	Train Loss: 0.007 | Train PPL:   1.007 | lr: 1.744e-04
[Epoch: 21][#examples: 37824/46620][#steps: 15900]
	Train Loss: 0.007 | Train PPL:   1.007 | lr: 1.744e-04
[Epoch: 21][#examples: 41024/46620][#steps: 15950]
	Train Loss: 0.007 | Train PPL:   1.007 | lr: 1.744e-04
[Epoch: 21][#examples: 44224/46620][#steps: 16000]
	Train Loss: 0.008 | Train PPL:   1.008 | lr: 1.744e-04
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 4275/5828
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 4271/5828

---------------------------------------
[Epoch: 21][Validatiing...]
	 Early Stopping Patience: 968/1000
	 Val. Loss: 0.896 | Val. Acc: 0.734 | Val. PPL:   2.450
	 BEST. Val. Loss: 0.604 | BEST. Val. Acc: 0.735 | Val. Loss: 0.869 | BEST. Val. Epoch: 20 | BEST. Val. Step: 15309
---------------------------------------

[VAL]: The number of correct predictions (aux-task (single)): 4275/5828

-----

[Epoch: 24][#examples: 29056/46620][#steps: 17950]
	Train Loss: 0.005 | Train PPL:   1.005 | lr: 1.030e-04
[Epoch: 24][#examples: 32256/46620][#steps: 18000]
	Train Loss: 0.005 | Train PPL:   1.005 | lr: 1.030e-04
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 4287/5828
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 4298/5828

---------------------------------------
[Epoch: 24][Validatiing...]
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 961/1000
	 Val. Loss: 0.929 | Val. Acc: 0.736 | Val. PPL:   2.531
	 BEST. Val. Loss: 0.604 | BEST. Val. Acc: 0.736 | Val. Loss: 0.929 | BEST. Val. Epoch: 24 | BEST. Val. Step: 18000
---------------------------------------

[Epoch: 24][#examples: 35456/46620][#steps: 18050]
	Train Loss: 0.005 | Train PPL:   1.005 | lr: 1.030e-04
[Epoch: 24][#examples: 38656/46620][#steps: 18100]
	Train Loss: 0.005 | Train PPL:   1.005 | lr: 1.030e-04
[Epoch: 24][#examples: 41856/46620][#ste

[VAL]: The number of correct predictions (aux-task (single)): 4288/5828
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 4289/5828

---------------------------------------
[Epoch: 27][Validatiing...]
	 Early Stopping Patience: 954/1000
	 Val. Loss: 0.955 | Val. Acc: 0.736 | Val. PPL:   2.598
	 BEST. Val. Loss: 0.604 | BEST. Val. Acc: 0.737 | Val. Loss: 0.921 | BEST. Val. Epoch: 24 | BEST. Val. Step: 18225
---------------------------------------

[Epoch: 27][#examples: 23488/46620][#steps: 20050]
	Train Loss: 0.005 | Train PPL:   1.005 | lr: 5.474e-05
[Epoch: 27][#examples: 26688/46620][#steps: 20100]
	Train Loss: 0.005 | Train PPL:   1.005 | lr: 5.474e-05
[Epoch: 27][#examples: 29888/46620][#steps: 20150]
	Train Loss: 0.005 | Train PPL:   1.005 | lr: 5.474e-05
[Epoch: 27][#examples: 33088/46620][#steps: 20200]
	Train Loss: 0.005 | Train PPL:   1.005 | lr: 5.474e-05
[Epoch: 27][#examples: 36288/46620][#steps: 20250]
	Train Loss: 0.005 | Train PPL:   1.005 | 

[Epoch: 30][#examples: 11520/46620][#steps: 22050]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 2.618e-05
[Epoch: 30][#examples: 14720/46620][#steps: 22100]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 2.618e-05
[Epoch: 30][#examples: 17920/46620][#steps: 22150]
	Train Loss: 0.004 | Train PPL:   1.004 | lr: 2.618e-05
[Epoch: 30][#examples: 21120/46620][#steps: 22200]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 2.618e-05
[Epoch: 30][#examples: 24320/46620][#steps: 22250]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 2.618e-05
[Epoch: 30][#examples: 27520/46620][#steps: 22300]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 2.618e-05
[Epoch: 30][#examples: 30720/46620][#steps: 22350]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 2.618e-05
[Epoch: 30][#examples: 33920/46620][#steps: 22400]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 2.618e-05
[Epoch: 30][#examples: 37120/46620][#steps: 22450]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 2.618e-05
[Epoch: 30][#examples: 40320/46620][#

[Epoch: 33][#examples: 9152/46620][#steps: 24200]
	Train Loss: 0.004 | Train PPL:   1.004 | lr: 1.252e-05
[Epoch: 33][#examples: 12352/46620][#steps: 24250]
	Train Loss: 0.004 | Train PPL:   1.004 | lr: 1.252e-05
[Epoch: 33][#examples: 15552/46620][#steps: 24300]
	Train Loss: 0.004 | Train PPL:   1.004 | lr: 1.252e-05
[Epoch: 33][#examples: 18752/46620][#steps: 24350]
	Train Loss: 0.004 | Train PPL:   1.004 | lr: 1.252e-05
[Epoch: 33][#examples: 21952/46620][#steps: 24400]
	Train Loss: 0.004 | Train PPL:   1.004 | lr: 1.252e-05
[Epoch: 33][#examples: 25152/46620][#steps: 24450]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 1.252e-05
[Epoch: 33][#examples: 28352/46620][#steps: 24500]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 1.252e-05
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 4288/5828
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 4297/5828

---------------------------------------
[Epoch: 33][Validatiing...]
	 Early 

[Epoch: 36][#examples: 6784/46620][#steps: 26350]
	Train Loss: 0.005 | Train PPL:   1.005 | lr: 5.990e-06
[Epoch: 36][#examples: 9984/46620][#steps: 26400]
	Train Loss: 0.004 | Train PPL:   1.004 | lr: 5.990e-06
[Epoch: 36][#examples: 13184/46620][#steps: 26450]
	Train Loss: 0.004 | Train PPL:   1.004 | lr: 5.990e-06
[Epoch: 36][#examples: 16384/46620][#steps: 26500]
	Train Loss: 0.004 | Train PPL:   1.004 | lr: 5.990e-06
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 4291/5828
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 4301/5828

---------------------------------------
[Epoch: 36][Validatiing...]
	 Early Stopping Patience: 932/1000
	 Val. Loss: 0.971 | Val. Acc: 0.736 | Val. PPL:   2.642
	 BEST. Val. Loss: 0.604 | BEST. Val. Acc: 0.737 | Val. Loss: 0.921 | BEST. Val. Epoch: 24 | BEST. Val. Step: 18225
---------------------------------------

[Epoch: 36][#examples: 19584/46620][#steps: 26550]
	Train Loss: 0.004 | Train PP

[Epoch: 39][#examples: 4416/46620][#steps: 28500]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 2.865e-06
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 4291/5828
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 4303/5828

---------------------------------------
[Epoch: 39][Validatiing...]
	 Early Stopping Patience: 925/1000
	 Val. Loss: 0.973 | Val. Acc: 0.736 | Val. PPL:   2.646
	 BEST. Val. Loss: 0.604 | BEST. Val. Acc: 0.737 | Val. Loss: 0.921 | BEST. Val. Epoch: 24 | BEST. Val. Step: 18225
---------------------------------------

[Epoch: 39][#examples: 7616/46620][#steps: 28550]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 2.579e-06
[Epoch: 39][#examples: 10816/46620][#steps: 28600]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 2.579e-06
[Epoch: 39][#examples: 14016/46620][#steps: 28650]
	Train Loss: 0.004 | Train PPL:   1.004 | lr: 2.579e-06
[Epoch: 39][#examples: 17216/46620][#steps: 28700]
	Train Loss: 0.004 | Train PP

[Epoch: 42][#examples: 2048/46620][#steps: 30650]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 1.233e-06
[Epoch: 42][#examples: 5248/46620][#steps: 30700]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 1.233e-06
[Epoch: 42][#examples: 8448/46620][#steps: 30750]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 1.233e-06
[Epoch: 42][#examples: 11648/46620][#steps: 30800]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 1.233e-06
[Epoch: 42][#examples: 14848/46620][#steps: 30850]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 1.233e-06
[Epoch: 42][#examples: 18048/46620][#steps: 30900]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 1.233e-06
[Epoch: 42][#examples: 21248/46620][#steps: 30950]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 1.233e-06
[Epoch: 42][#examples: 24448/46620][#steps: 31000]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 1.233e-06
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 4293/5828
-----Tst------
[TEST]: The number of correct predictions (au

[Epoch: 45][#examples: 2880/46620][#steps: 32850]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 5.899e-07
[Epoch: 45][#examples: 6080/46620][#steps: 32900]
	Train Loss: 0.004 | Train PPL:   1.004 | lr: 5.899e-07
[Epoch: 45][#examples: 9280/46620][#steps: 32950]
	Train Loss: 0.004 | Train PPL:   1.004 | lr: 5.899e-07
[Epoch: 45][#examples: 12480/46620][#steps: 33000]
	Train Loss: 0.004 | Train PPL:   1.004 | lr: 5.899e-07
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 4294/5828
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 4304/5828

---------------------------------------
[Epoch: 45][Validatiing...]
	 Early Stopping Patience: 910/1000
	 Val. Loss: 0.972 | Val. Acc: 0.737 | Val. PPL:   2.644
	 BEST. Val. Loss: 0.604 | BEST. Val. Acc: 0.737 | Val. Loss: 0.921 | BEST. Val. Epoch: 24 | BEST. Val. Step: 18225
---------------------------------------

[Epoch: 45][#examples: 15680/46620][#steps: 33050]
	Train Loss: 0.004 | Train PPL

[Epoch: 48][#examples: 512/46620][#steps: 35000]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 2.821e-07
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 4294/5828
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 4304/5828

---------------------------------------
[Epoch: 48][Validatiing...]
	 Early Stopping Patience: 903/1000
	 Val. Loss: 0.972 | Val. Acc: 0.737 | Val. PPL:   2.644
	 BEST. Val. Loss: 0.604 | BEST. Val. Acc: 0.737 | Val. Loss: 0.921 | BEST. Val. Epoch: 24 | BEST. Val. Step: 18225
---------------------------------------

[Epoch: 48][#examples: 3712/46620][#steps: 35050]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 2.539e-07
[Epoch: 48][#examples: 6912/46620][#steps: 35100]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 2.539e-07
[Epoch: 48][#examples: 10112/46620][#steps: 35150]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 2.539e-07
[Epoch: 48][#examples: 13312/46620][#steps: 35200]
	Train Loss: 0.004 | Train PPL:

[VAL]: The number of correct predictions (aux-task (single)): 4294/5828

---------------------------------------
[Epoch: 50][Validatiing...]
	 Early Stopping Patience: 896/1000
	 Val. Loss: 0.972 | Val. Acc: 0.737 | Val. PPL:   2.644
	 BEST. Val. Loss: 0.604 | BEST. Val. Acc: 0.737 | Val. Loss: 0.921 | BEST. Val. Epoch: 24 | BEST. Val. Step: 18225
---------------------------------------

Epoch: 51 | Time: 1m 58s
	Train Loss: 0.003 | Train PPL:   1.003
	 Val. Loss: 0.972 | Val. Acc: 0.737 | Val. PPL:   2.644
[Train]: Current Teacher Forcing Ratio: 0.200
[Epoch: 51][#examples: 1344/46620][#steps: 37200]
	Train Loss: 0.004 | Train PPL:   1.004 | lr: 1.215e-07
[Epoch: 51][#examples: 4544/46620][#steps: 37250]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 1.215e-07
[Epoch: 51][#examples: 7744/46620][#steps: 37300]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 1.215e-07
[Epoch: 51][#examples: 10944/46620][#steps: 37350]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 1.215e-07
[Epoch: 51][#exa

[Epoch: 53][#examples: 45632/46620][#steps: 39350]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 9.838e-08
[VAL]: The number of correct predictions (aux-task (single)): 4294/5828

---------------------------------------
[Epoch: 53][Validatiing...]
	 Early Stopping Patience: 889/1000
	 Val. Loss: 0.972 | Val. Acc: 0.737 | Val. PPL:   2.645
	 BEST. Val. Loss: 0.604 | BEST. Val. Acc: 0.737 | Val. Loss: 0.921 | BEST. Val. Epoch: 24 | BEST. Val. Step: 18225
---------------------------------------

Epoch: 54 | Time: 1m 46s
	Train Loss: 0.003 | Train PPL:   1.003
	 Val. Loss: 0.972 | Val. Acc: 0.737 | Val. PPL:   2.645
[Train]: Current Teacher Forcing Ratio: 0.200
[Epoch: 54][#examples: 2176/46620][#steps: 39400]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 9.838e-08
[Epoch: 54][#examples: 5376/46620][#steps: 39450]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 9.838e-08
[Epoch: 54][#examples: 8576/46620][#steps: 39500]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 9.838e-08
-----Val------
[

[Epoch: 56][#examples: 43264/46620][#steps: 41500]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 4294/5828
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 4303/5828

---------------------------------------
[Epoch: 56][Validatiing...]
	 Early Stopping Patience: 882/1000
	 Val. Loss: 0.973 | Val. Acc: 0.737 | Val. PPL:   2.645
	 BEST. Val. Loss: 0.604 | BEST. Val. Acc: 0.737 | Val. Loss: 0.921 | BEST. Val. Epoch: 24 | BEST. Val. Step: 18225
---------------------------------------

[Epoch: 56][#examples: 46464/46620][#steps: 41550]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 9.838e-08
[VAL]: The number of correct predictions (aux-task (single)): 4294/5828

---------------------------------------
[Epoch: 56][Validatiing...]
	 Early Stopping Patience: 881/1000
	 Val. Loss: 0.973 | Val. Acc: 0.737 | Val. PPL:   2.645
	 BEST. Val. Loss: 0.604 | BEST. Val. Acc: 0.737 | Val. Lo

[Epoch: 59][#examples: 34496/46620][#steps: 43550]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 9.838e-08
[Epoch: 59][#examples: 37696/46620][#steps: 43600]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 9.838e-08
[Epoch: 59][#examples: 40896/46620][#steps: 43650]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 9.838e-08
[Epoch: 59][#examples: 44096/46620][#steps: 43700]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 9.838e-08
[VAL]: The number of correct predictions (aux-task (single)): 4294/5828

---------------------------------------
[Epoch: 59][Validatiing...]
	 Early Stopping Patience: 874/1000
	 Val. Loss: 0.973 | Val. Acc: 0.737 | Val. PPL:   2.645
	 BEST. Val. Loss: 0.604 | BEST. Val. Acc: 0.737 | Val. Loss: 0.921 | BEST. Val. Epoch: 24 | BEST. Val. Step: 18225
---------------------------------------

Epoch: 60 | Time: 1m 45s
	Train Loss: 0.003 | Train PPL:   1.003
	 Val. Loss: 0.973 | Val. Acc: 0.737 | Val. PPL:   2.645
[Train]: Current Teacher Forcing Ratio: 0.200
[Epoch: 60][#

[Epoch: 62][#examples: 32128/46620][#steps: 45700]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 9.838e-08
[Epoch: 62][#examples: 35328/46620][#steps: 45750]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 9.838e-08
[Epoch: 62][#examples: 38528/46620][#steps: 45800]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 9.838e-08
[Epoch: 62][#examples: 41728/46620][#steps: 45850]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 9.838e-08
[Epoch: 62][#examples: 44928/46620][#steps: 45900]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 9.838e-08
[VAL]: The number of correct predictions (aux-task (single)): 4294/5828

---------------------------------------
[Epoch: 62][Validatiing...]
	 Early Stopping Patience: 867/1000
	 Val. Loss: 0.973 | Val. Acc: 0.737 | Val. PPL:   2.645
	 BEST. Val. Loss: 0.604 | BEST. Val. Acc: 0.737 | Val. Loss: 0.921 | BEST. Val. Epoch: 24 | BEST. Val. Step: 18225
---------------------------------------

Epoch: 63 | Time: 1m 45s
	Train Loss: 0.003 | Train PPL:   1.003
	 Val. Lo

[Epoch: 65][#examples: 29760/46620][#steps: 47850]
	Train Loss: 0.004 | Train PPL:   1.004 | lr: 9.838e-08
[Epoch: 65][#examples: 32960/46620][#steps: 47900]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 9.838e-08
[Epoch: 65][#examples: 36160/46620][#steps: 47950]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 9.838e-08
[Epoch: 65][#examples: 39360/46620][#steps: 48000]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 4294/5828
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 4303/5828

---------------------------------------
[Epoch: 65][Validatiing...]
	 Early Stopping Patience: 860/1000
	 Val. Loss: 0.973 | Val. Acc: 0.737 | Val. PPL:   2.645
	 BEST. Val. Loss: 0.604 | BEST. Val. Acc: 0.737 | Val. Loss: 0.921 | BEST. Val. Epoch: 24 | BEST. Val. Step: 18225
---------------------------------------

[Epoch: 65][#examples: 42560/46620][#steps: 48050]
	Train Loss: 0.003 | Train 

[Epoch: 68][#examples: 27392/46620][#steps: 50000]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 4294/5828
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 4304/5828

---------------------------------------
[Epoch: 68][Validatiing...]
	 Early Stopping Patience: 853/1000
	 Val. Loss: 0.973 | Val. Acc: 0.737 | Val. PPL:   2.645
	 BEST. Val. Loss: 0.604 | BEST. Val. Acc: 0.737 | Val. Loss: 0.921 | BEST. Val. Epoch: 24 | BEST. Val. Step: 18225
---------------------------------------

[Epoch: 68][#examples: 30592/46620][#steps: 50050]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 9.838e-08
[Epoch: 68][#examples: 33792/46620][#steps: 50100]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 9.838e-08
[Epoch: 68][#examples: 36992/46620][#steps: 50150]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 9.838e-08
[Epoch: 68][#examples: 40192/46620][#steps: 50200]
	Train Loss: 0.003 | Train 

[Epoch: 71][#examples: 18624/46620][#steps: 52050]
	Train Loss: 0.004 | Train PPL:   1.004 | lr: 9.838e-08
[Epoch: 71][#examples: 21824/46620][#steps: 52100]
	Train Loss: 0.004 | Train PPL:   1.004 | lr: 9.838e-08
[Epoch: 71][#examples: 25024/46620][#steps: 52150]
	Train Loss: 0.004 | Train PPL:   1.004 | lr: 9.838e-08
[Epoch: 71][#examples: 28224/46620][#steps: 52200]
	Train Loss: 0.004 | Train PPL:   1.004 | lr: 9.838e-08
[Epoch: 71][#examples: 31424/46620][#steps: 52250]
	Train Loss: 0.004 | Train PPL:   1.004 | lr: 9.838e-08
[Epoch: 71][#examples: 34624/46620][#steps: 52300]
	Train Loss: 0.004 | Train PPL:   1.004 | lr: 9.838e-08
[Epoch: 71][#examples: 37824/46620][#steps: 52350]
	Train Loss: 0.004 | Train PPL:   1.004 | lr: 9.838e-08
[Epoch: 71][#examples: 41024/46620][#steps: 52400]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 9.838e-08
[Epoch: 71][#examples: 44224/46620][#steps: 52450]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 9.838e-08
[VAL]: The number of correct predicti

[Epoch: 74][#examples: 16256/46620][#steps: 54200]
	Train Loss: 0.004 | Train PPL:   1.004 | lr: 9.838e-08
[Epoch: 74][#examples: 19456/46620][#steps: 54250]
	Train Loss: 0.004 | Train PPL:   1.004 | lr: 9.838e-08
[Epoch: 74][#examples: 22656/46620][#steps: 54300]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 9.838e-08
[Epoch: 74][#examples: 25856/46620][#steps: 54350]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 9.838e-08
[Epoch: 74][#examples: 29056/46620][#steps: 54400]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 9.838e-08
[Epoch: 74][#examples: 32256/46620][#steps: 54450]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 9.838e-08
[Epoch: 74][#examples: 35456/46620][#steps: 54500]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 4294/5828
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 4304/5828

---------------------------------------
[Epoch: 74][Validatiing...]
	 Early

[Epoch: 77][#examples: 13888/46620][#steps: 56350]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 9.838e-08
[Epoch: 77][#examples: 17088/46620][#steps: 56400]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 9.838e-08
[Epoch: 77][#examples: 20288/46620][#steps: 56450]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 9.838e-08
[Epoch: 77][#examples: 23488/46620][#steps: 56500]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 4294/5828
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 4304/5828

---------------------------------------
[Epoch: 77][Validatiing...]
	 Early Stopping Patience: 831/1000
	 Val. Loss: 0.974 | Val. Acc: 0.737 | Val. PPL:   2.648
	 BEST. Val. Loss: 0.604 | BEST. Val. Acc: 0.737 | Val. Loss: 0.921 | BEST. Val. Epoch: 24 | BEST. Val. Step: 18225
---------------------------------------

[Epoch: 77][#examples: 26688/46620][#steps: 56550]
	Train Loss: 0.003 | Train 

[Epoch: 80][#examples: 11520/46620][#steps: 58500]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 4294/5828
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 4304/5828

---------------------------------------
[Epoch: 80][Validatiing...]
	 Early Stopping Patience: 824/1000
	 Val. Loss: 0.974 | Val. Acc: 0.737 | Val. PPL:   2.648
	 BEST. Val. Loss: 0.604 | BEST. Val. Acc: 0.737 | Val. Loss: 0.921 | BEST. Val. Epoch: 24 | BEST. Val. Step: 18225
---------------------------------------

[Epoch: 80][#examples: 14720/46620][#steps: 58550]
	Train Loss: 0.004 | Train PPL:   1.004 | lr: 9.838e-08
[Epoch: 80][#examples: 17920/46620][#steps: 58600]
	Train Loss: 0.004 | Train PPL:   1.004 | lr: 9.838e-08
[Epoch: 80][#examples: 21120/46620][#steps: 58650]
	Train Loss: 0.004 | Train PPL:   1.004 | lr: 9.838e-08
[Epoch: 80][#examples: 24320/46620][#steps: 58700]
	Train Loss: 0.004 | Train 

[Epoch: 83][#examples: 2752/46620][#steps: 60550]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 9.838e-08
[Epoch: 83][#examples: 5952/46620][#steps: 60600]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 9.838e-08
[Epoch: 83][#examples: 9152/46620][#steps: 60650]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 9.838e-08
[Epoch: 83][#examples: 12352/46620][#steps: 60700]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 9.838e-08
[Epoch: 83][#examples: 15552/46620][#steps: 60750]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 9.838e-08
[Epoch: 83][#examples: 18752/46620][#steps: 60800]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 9.838e-08
[Epoch: 83][#examples: 21952/46620][#steps: 60850]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 9.838e-08
[Epoch: 83][#examples: 25152/46620][#steps: 60900]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 9.838e-08
[Epoch: 83][#examples: 28352/46620][#steps: 60950]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 9.838e-08
[Epoch: 83][#examples: 31552/46620][#ste

[Epoch: 86][#examples: 384/46620][#steps: 62700]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 9.838e-08
[Epoch: 86][#examples: 3584/46620][#steps: 62750]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 9.838e-08
[Epoch: 86][#examples: 6784/46620][#steps: 62800]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 9.838e-08
[Epoch: 86][#examples: 9984/46620][#steps: 62850]
	Train Loss: 0.005 | Train PPL:   1.005 | lr: 9.838e-08
[Epoch: 86][#examples: 13184/46620][#steps: 62900]
	Train Loss: 0.004 | Train PPL:   1.004 | lr: 9.838e-08
[Epoch: 86][#examples: 16384/46620][#steps: 62950]
	Train Loss: 0.004 | Train PPL:   1.004 | lr: 9.838e-08
[Epoch: 86][#examples: 19584/46620][#steps: 63000]
	Train Loss: 0.004 | Train PPL:   1.004 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 4294/5828
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 4304/5828

---------------------------------------
[Epoch: 86][Validatiing...]
	 Early Stop

[Epoch: 89][#examples: 1216/46620][#steps: 64900]
	Train Loss: 0.005 | Train PPL:   1.005 | lr: 9.838e-08
[Epoch: 89][#examples: 4416/46620][#steps: 64950]
	Train Loss: 0.004 | Train PPL:   1.004 | lr: 9.838e-08
[Epoch: 89][#examples: 7616/46620][#steps: 65000]
	Train Loss: 0.004 | Train PPL:   1.004 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 4294/5828
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 4304/5828

---------------------------------------
[Epoch: 89][Validatiing...]
	 Early Stopping Patience: 802/1000
	 Val. Loss: 0.974 | Val. Acc: 0.737 | Val. PPL:   2.648
	 BEST. Val. Loss: 0.604 | BEST. Val. Acc: 0.737 | Val. Loss: 0.921 | BEST. Val. Epoch: 24 | BEST. Val. Step: 18225
---------------------------------------

[Epoch: 89][#examples: 10816/46620][#steps: 65050]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 9.838e-08
[Epoch: 89][#examples: 14016/46620][#steps: 65100]
	Train Loss: 0.003 | Train PPL

[VAL]: The number of correct predictions (aux-task (single)): 4294/5828

---------------------------------------
[Epoch: 91][Validatiing...]
	 Early Stopping Patience: 795/1000
	 Val. Loss: 0.974 | Val. Acc: 0.737 | Val. PPL:   2.648
	 BEST. Val. Loss: 0.604 | BEST. Val. Acc: 0.737 | Val. Loss: 0.921 | BEST. Val. Epoch: 24 | BEST. Val. Step: 18225
---------------------------------------

Epoch: 92 | Time: 1m 32s
	Train Loss: 0.003 | Train PPL:   1.003
	 Val. Loss: 0.974 | Val. Acc: 0.737 | Val. PPL:   2.648
[Train]: Current Teacher Forcing Ratio: 0.200
[Epoch: 92][#examples: 2048/46620][#steps: 67100]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 9.838e-08
[Epoch: 92][#examples: 5248/46620][#steps: 67150]
	Train Loss: 0.005 | Train PPL:   1.005 | lr: 9.838e-08
[Epoch: 92][#examples: 8448/46620][#steps: 67200]
	Train Loss: 0.004 | Train PPL:   1.004 | lr: 9.838e-08
[Epoch: 92][#examples: 11648/46620][#steps: 67250]
	Train Loss: 0.004 | Train PPL:   1.004 | lr: 9.838e-08
[Epoch: 92][#exa

[Epoch: 94][#examples: 46336/46620][#steps: 69250]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 9.838e-08
[VAL]: The number of correct predictions (aux-task (single)): 4294/5828

---------------------------------------
[Epoch: 94][Validatiing...]
	 Early Stopping Patience: 788/1000
	 Val. Loss: 0.974 | Val. Acc: 0.737 | Val. PPL:   2.648
	 BEST. Val. Loss: 0.604 | BEST. Val. Acc: 0.737 | Val. Loss: 0.921 | BEST. Val. Epoch: 24 | BEST. Val. Step: 18225
---------------------------------------

Epoch: 95 | Time: 1m 21s
	Train Loss: 0.003 | Train PPL:   1.003
	 Val. Loss: 0.974 | Val. Acc: 0.737 | Val. PPL:   2.648
[Train]: Current Teacher Forcing Ratio: 0.200
[Epoch: 95][#examples: 2880/46620][#steps: 69300]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 9.838e-08
[Epoch: 95][#examples: 6080/46620][#steps: 69350]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 9.838e-08
[Epoch: 95][#examples: 9280/46620][#steps: 69400]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 9.838e-08
[Epoch: 95][#exa

[Epoch: 97][#examples: 43968/46620][#steps: 71400]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 9.838e-08
[VAL]: The number of correct predictions (aux-task (single)): 4294/5828

---------------------------------------
[Epoch: 97][Validatiing...]
	 Early Stopping Patience: 781/1000
	 Val. Loss: 0.974 | Val. Acc: 0.737 | Val. PPL:   2.648
	 BEST. Val. Loss: 0.604 | BEST. Val. Acc: 0.737 | Val. Loss: 0.921 | BEST. Val. Epoch: 24 | BEST. Val. Step: 18225
---------------------------------------

Epoch: 98 | Time: 1m 7s
	Train Loss: 0.003 | Train PPL:   1.003
	 Val. Loss: 0.974 | Val. Acc: 0.737 | Val. PPL:   2.648
[Train]: Current Teacher Forcing Ratio: 0.200
[Epoch: 98][#examples: 512/46620][#steps: 71450]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 9.838e-08
[Epoch: 98][#examples: 3712/46620][#steps: 71500]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 4294/5828
-----Tst------
[TEST]: The number of 

[Epoch: 0][#examples: 12800/81252][#steps: 200]
	Train Loss: 3.747 | Train PPL:  42.378 | lr: 3.000e-03
[Epoch: 0][#examples: 16000/81252][#steps: 250]
	Train Loss: 3.615 | Train PPL:  37.144 | lr: 3.000e-03
[Epoch: 0][#examples: 19200/81252][#steps: 300]
	Train Loss: 3.452 | Train PPL:  31.565 | lr: 3.000e-03
[Epoch: 0][#examples: 22400/81252][#steps: 350]
	Train Loss: 3.264 | Train PPL:  26.157 | lr: 3.000e-03
[Epoch: 0][#examples: 25600/81252][#steps: 400]
	Train Loss: 3.061 | Train PPL:  21.340 | lr: 3.000e-03
[Epoch: 0][#examples: 28800/81252][#steps: 450]
	Train Loss: 2.854 | Train PPL:  17.356 | lr: 3.000e-03
[Epoch: 0][#examples: 32000/81252][#steps: 500]
	Train Loss: 2.678 | Train PPL:  14.560 | lr: 3.000e-03
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 169/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 387/1000

---------------------------------------
[Epoch: 0][Validatiing...]
		 Better Valid Loss! (at least 

[VAL]: The number of correct predictions (aux-task (single)): 319/513

---------------------------------------
[Epoch: 1][Validatiing...]
	 Early Stopping Patience: 998/1000
	 Val. Loss: 1.230 | Val. Acc: 0.622 | Val. PPL:   3.420
	 BEST. Val. Loss: 0.935 | BEST. Val. Acc: 0.639 | Val. Loss: 0.935 | BEST. Val. Epoch: 1 | BEST. Val. Step: 2000
---------------------------------------

Epoch: 02 | Time: 1m 13s
	Train Loss: 0.398 | Train PPL:   1.489
	 Val. Loss: 1.230 | Val. Acc: 0.622 | Val. PPL:   3.420
[Train]: Current Teacher Forcing Ratio: 0.740
[Epoch: 2][#examples: 640/81252][#steps: 2550]
	Train Loss: 0.258 | Train PPL:   1.295 | lr: 2.187e-03
[Epoch: 2][#examples: 3840/81252][#steps: 2600]
	Train Loss: 0.290 | Train PPL:   1.337 | lr: 2.187e-03
[Epoch: 2][#examples: 7040/81252][#steps: 2650]
	Train Loss: 0.290 | Train PPL:   1.337 | lr: 2.187e-03
[Epoch: 2][#examples: 10240/81252][#steps: 2700]
	Train Loss: 0.287 | Train PPL:   1.333 | lr: 2.187e-03
[Epoch: 2][#examples: 13440/81

[Epoch: 3][#examples: 76160/81252][#steps: 5000]
	Train Loss: 0.191 | Train PPL:   1.210 | lr: 1.435e-03
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 339/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 723/1000

---------------------------------------
[Epoch: 3][Validatiing...]
	 Early Stopping Patience: 999/1000
	 Val. Loss: 1.120 | Val. Acc: 0.661 | Val. PPL:   3.065
	 BEST. Val. Loss: 0.912 | BEST. Val. Acc: 0.663 | Val. Loss: 1.136 | BEST. Val. Epoch: 2 | BEST. Val. Step: 3500
---------------------------------------

[Epoch: 3][#examples: 79360/81252][#steps: 5050]
	Train Loss: 0.190 | Train PPL:   1.209 | lr: 1.291e-03
[VAL]: The number of correct predictions (aux-task (single)): 347/513

---------------------------------------
[Epoch: 3][Validatiing...]
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 998/1000
	 Val. Loss: 1.175 | Val. Acc: 0.676 | Val. PPL:   3.237
	 BEST. Val. Loss: 0.912 | BEST. 

[Epoch: 5][#examples: 54400/81252][#steps: 7200]
	Train Loss: 0.089 | Train PPL:   1.093 | lr: 9.414e-04
[Epoch: 5][#examples: 57600/81252][#steps: 7250]
	Train Loss: 0.090 | Train PPL:   1.094 | lr: 9.414e-04
[Epoch: 5][#examples: 60800/81252][#steps: 7300]
	Train Loss: 0.090 | Train PPL:   1.094 | lr: 9.414e-04
[Epoch: 5][#examples: 64000/81252][#steps: 7350]
	Train Loss: 0.090 | Train PPL:   1.094 | lr: 9.414e-04
[Epoch: 5][#examples: 67200/81252][#steps: 7400]
	Train Loss: 0.090 | Train PPL:   1.094 | lr: 9.414e-04
[Epoch: 5][#examples: 70400/81252][#steps: 7450]
	Train Loss: 0.090 | Train PPL:   1.094 | lr: 9.414e-04
[Epoch: 5][#examples: 73600/81252][#steps: 7500]
	Train Loss: 0.091 | Train PPL:   1.095 | lr: 9.414e-04
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 356/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 730/1000

---------------------------------------
[Epoch: 5][Validatiing...]
		 Better Valid Acc! (at 

[Epoch: 7][#examples: 42240/81252][#steps: 9550]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 5.559e-04
[Epoch: 7][#examples: 45440/81252][#steps: 9600]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 5.559e-04
[Epoch: 7][#examples: 48640/81252][#steps: 9650]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 5.559e-04
[Epoch: 7][#examples: 51840/81252][#steps: 9700]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 5.559e-04
[Epoch: 7][#examples: 55040/81252][#steps: 9750]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 5.559e-04
[Epoch: 7][#examples: 58240/81252][#steps: 9800]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 5.559e-04
[Epoch: 7][#examples: 61440/81252][#steps: 9850]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 5.559e-04
[Epoch: 7][#examples: 64640/81252][#steps: 9900]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 5.559e-04
[Epoch: 7][#examples: 67840/81252][#steps: 9950]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 5.559e-04
[Epoch: 7][#examples: 71040/81252][#steps: 10000]
	Trai

[Epoch: 9][#examples: 36480/81252][#steps: 12000]
	Train Loss: 0.027 | Train PPL:   1.027 | lr: 2.954e-04
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 351/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 736/1000

---------------------------------------
[Epoch: 9][Validatiing...]
	 Early Stopping Patience: 979/1000
	 Val. Loss: 1.371 | Val. Acc: 0.684 | Val. PPL:   3.940
	 BEST. Val. Loss: 0.912 | BEST. Val. Acc: 0.696 | Val. Loss: 1.421 | BEST. Val. Epoch: 8 | BEST. Val. Step: 11000
---------------------------------------

[Epoch: 9][#examples: 39680/81252][#steps: 12050]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 2.659e-04
[Epoch: 9][#examples: 42880/81252][#steps: 12100]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 2.659e-04
[Epoch: 9][#examples: 46080/81252][#steps: 12150]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 2.659e-04
[Epoch: 9][#examples: 49280/81252][#steps: 12200]
	Train Loss: 0.029 | Train PPL:   1.0

[Epoch: 11][#examples: 14720/81252][#steps: 14200]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 1.413e-04
[Epoch: 11][#examples: 17920/81252][#steps: 14250]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 1.413e-04
[Epoch: 11][#examples: 21120/81252][#steps: 14300]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 1.413e-04
[Epoch: 11][#examples: 24320/81252][#steps: 14350]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 1.413e-04
[Epoch: 11][#examples: 27520/81252][#steps: 14400]
	Train Loss: 0.022 | Train PPL:   1.023 | lr: 1.413e-04
[Epoch: 11][#examples: 30720/81252][#steps: 14450]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 1.413e-04
[Epoch: 11][#examples: 33920/81252][#steps: 14500]
	Train Loss: 0.022 | Train PPL:   1.023 | lr: 1.413e-04
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 347/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 740/1000

---------------------------------------
[Epoch: 11][Validatiing...]
	 Early St

[Epoch: 13][#examples: 2560/81252][#steps: 16550]
	Train Loss: 0.016 | Train PPL:   1.016 | lr: 6.759e-05
[Epoch: 13][#examples: 5760/81252][#steps: 16600]
	Train Loss: 0.017 | Train PPL:   1.017 | lr: 6.759e-05
[Epoch: 13][#examples: 8960/81252][#steps: 16650]
	Train Loss: 0.017 | Train PPL:   1.017 | lr: 6.759e-05
[Epoch: 13][#examples: 12160/81252][#steps: 16700]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 6.759e-05
[Epoch: 13][#examples: 15360/81252][#steps: 16750]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 6.759e-05
[Epoch: 13][#examples: 18560/81252][#steps: 16800]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 6.759e-05
[Epoch: 13][#examples: 21760/81252][#steps: 16850]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 6.759e-05
[Epoch: 13][#examples: 24960/81252][#steps: 16900]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 6.759e-05
[Epoch: 13][#examples: 28160/81252][#steps: 16950]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 6.759e-05
[Epoch: 13][#examples: 31360/81252][#ste

[Epoch: 14][#examples: 81280/81252][#steps: 19050]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 3.592e-05
[VAL]: The number of correct predictions (aux-task (single)): 348/513

---------------------------------------
[Epoch: 14][Validatiing...]
	 Early Stopping Patience: 959/1000
	 Val. Loss: 1.472 | Val. Acc: 0.678 | Val. PPL:   4.358
	 BEST. Val. Loss: 0.912 | BEST. Val. Acc: 0.696 | Val. Loss: 1.421 | BEST. Val. Epoch: 8 | BEST. Val. Step: 11000
---------------------------------------

Epoch: 15 | Time: 1m 14s
	Train Loss: 0.021 | Train PPL:   1.021
	 Val. Loss: 1.472 | Val. Acc: 0.678 | Val. PPL:   4.358
[Train]: Current Teacher Forcing Ratio: 0.350
[Epoch: 15][#examples: 3200/81252][#steps: 19100]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 3.233e-05
[Epoch: 15][#examples: 6400/81252][#steps: 19150]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 3.233e-05
[Epoch: 15][#examples: 9600/81252][#steps: 19200]
	Train Loss: 0.019 | Train PPL:   1.020 | lr: 3.233e-05
[Epoch: 15][#exampl

[Epoch: 16][#examples: 72320/81252][#steps: 21450]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 1.909e-05
[Epoch: 16][#examples: 75520/81252][#steps: 21500]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 1.909e-05
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 345/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 739/1000

---------------------------------------
[Epoch: 16][Validatiing...]
	 Early Stopping Patience: 953/1000
	 Val. Loss: 1.466 | Val. Acc: 0.673 | Val. PPL:   4.331
	 BEST. Val. Loss: 0.912 | BEST. Val. Acc: 0.696 | Val. Loss: 1.421 | BEST. Val. Epoch: 8 | BEST. Val. Step: 11000
---------------------------------------

[Epoch: 16][#examples: 78720/81252][#steps: 21550]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 1.718e-05
[VAL]: The number of correct predictions (aux-task (single)): 345/513

---------------------------------------
[Epoch: 16][Validatiing...]
	 Early Stopping Patience: 952/1000
	 Val. Loss: 1

[Epoch: 18][#examples: 50560/81252][#steps: 23650]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.130e-06
[Epoch: 18][#examples: 53760/81252][#steps: 23700]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.130e-06
[Epoch: 18][#examples: 56960/81252][#steps: 23750]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.130e-06
[Epoch: 18][#examples: 60160/81252][#steps: 23800]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.130e-06
[Epoch: 18][#examples: 63360/81252][#steps: 23850]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.130e-06
[Epoch: 18][#examples: 66560/81252][#steps: 23900]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.130e-06
[Epoch: 18][#examples: 69760/81252][#steps: 23950]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.130e-06
[Epoch: 18][#examples: 72960/81252][#steps: 24000]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.130e-06
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 344/513
-----Tst------
[TEST]: The number of correct predictions (a

[VAL]: The number of correct predictions (aux-task (single)): 346/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 738/1000

---------------------------------------
[Epoch: 20][Validatiing...]
	 Early Stopping Patience: 940/1000
	 Val. Loss: 1.469 | Val. Acc: 0.674 | Val. PPL:   4.347
	 BEST. Val. Loss: 0.912 | BEST. Val. Acc: 0.696 | Val. Loss: 1.421 | BEST. Val. Epoch: 8 | BEST. Val. Step: 11000
---------------------------------------

[Epoch: 20][#examples: 41600/81252][#steps: 26050]
	Train Loss: 0.019 | Train PPL:   1.020 | lr: 4.367e-06
[Epoch: 20][#examples: 44800/81252][#steps: 26100]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 4.367e-06
[Epoch: 20][#examples: 48000/81252][#steps: 26150]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 4.367e-06
[Epoch: 20][#examples: 51200/81252][#steps: 26200]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 4.367e-06
[Epoch: 20][#examples: 54400/81252][#steps: 26250]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 

[Epoch: 22][#examples: 19840/81252][#steps: 28250]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 2.321e-06
[Epoch: 22][#examples: 23040/81252][#steps: 28300]
	Train Loss: 0.019 | Train PPL:   1.020 | lr: 2.321e-06
[Epoch: 22][#examples: 26240/81252][#steps: 28350]
	Train Loss: 0.019 | Train PPL:   1.020 | lr: 2.321e-06
[Epoch: 22][#examples: 29440/81252][#steps: 28400]
	Train Loss: 0.019 | Train PPL:   1.020 | lr: 2.321e-06
[Epoch: 22][#examples: 32640/81252][#steps: 28450]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 2.321e-06
[Epoch: 22][#examples: 35840/81252][#steps: 28500]
	Train Loss: 0.019 | Train PPL:   1.020 | lr: 2.321e-06
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 346/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 738/1000

---------------------------------------
[Epoch: 22][Validatiing...]
	 Early Stopping Patience: 933/1000
	 Val. Loss: 1.472 | Val. Acc: 0.674 | Val. PPL:   4.359
	 BEST. Val. Loss: 0.912

[Epoch: 24][#examples: 4480/81252][#steps: 30550]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 1.110e-06
[Epoch: 24][#examples: 7680/81252][#steps: 30600]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 1.110e-06
[Epoch: 24][#examples: 10880/81252][#steps: 30650]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 1.110e-06
[Epoch: 24][#examples: 14080/81252][#steps: 30700]
	Train Loss: 0.018 | Train PPL:   1.019 | lr: 1.110e-06
[Epoch: 24][#examples: 17280/81252][#steps: 30750]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 1.110e-06
[Epoch: 24][#examples: 20480/81252][#steps: 30800]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 1.110e-06
[Epoch: 24][#examples: 23680/81252][#steps: 30850]
	Train Loss: 0.018 | Train PPL:   1.019 | lr: 1.110e-06
[Epoch: 24][#examples: 26880/81252][#steps: 30900]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 1.110e-06
[Epoch: 24][#examples: 30080/81252][#steps: 30950]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 1.110e-06
[Epoch: 24][#examples: 33280/81252][#st

[VAL]: The number of correct predictions (aux-task (single)): 346/513

---------------------------------------
[Epoch: 25][Validatiing...]
	 Early Stopping Patience: 920/1000
	 Val. Loss: 1.473 | Val. Acc: 0.674 | Val. PPL:   4.364
	 BEST. Val. Loss: 0.912 | BEST. Val. Acc: 0.696 | Val. Loss: 1.421 | BEST. Val. Epoch: 8 | BEST. Val. Step: 11000
---------------------------------------

Epoch: 26 | Time: 1m 14s
	Train Loss: 0.018 | Train PPL:   1.019
	 Val. Loss: 1.473 | Val. Acc: 0.674 | Val. PPL:   4.364
[Train]: Current Teacher Forcing Ratio: 0.200
[Epoch: 26][#examples: 1920/81252][#steps: 33050]
	Train Loss: 0.019 | Train PPL:   1.020 | lr: 5.309e-07
[Epoch: 26][#examples: 5120/81252][#steps: 33100]
	Train Loss: 0.017 | Train PPL:   1.018 | lr: 5.309e-07
[Epoch: 26][#examples: 8320/81252][#steps: 33150]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 5.309e-07
[Epoch: 26][#examples: 11520/81252][#steps: 33200]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 5.309e-07
[Epoch: 26][#exampl

[Epoch: 27][#examples: 74240/81252][#steps: 35450]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 3.135e-07
[Epoch: 27][#examples: 77440/81252][#steps: 35500]
	Train Loss: 0.018 | Train PPL:   1.019 | lr: 3.135e-07
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 346/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 740/1000

---------------------------------------
[Epoch: 27][Validatiing...]
	 Early Stopping Patience: 914/1000
	 Val. Loss: 1.474 | Val. Acc: 0.674 | Val. PPL:   4.365
	 BEST. Val. Loss: 0.912 | BEST. Val. Acc: 0.696 | Val. Loss: 1.421 | BEST. Val. Epoch: 8 | BEST. Val. Step: 11000
---------------------------------------

[Epoch: 27][#examples: 80640/81252][#steps: 35550]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 2.821e-07
[VAL]: The number of correct predictions (aux-task (single)): 346/513

---------------------------------------
[Epoch: 27][Validatiing...]
	 Early Stopping Patience: 913/1000
	 Val. Loss: 1

[Epoch: 29][#examples: 52480/81252][#steps: 37650]
	Train Loss: 0.018 | Train PPL:   1.019 | lr: 1.499e-07
[Epoch: 29][#examples: 55680/81252][#steps: 37700]
	Train Loss: 0.018 | Train PPL:   1.019 | lr: 1.499e-07
[Epoch: 29][#examples: 58880/81252][#steps: 37750]
	Train Loss: 0.018 | Train PPL:   1.019 | lr: 1.499e-07
[Epoch: 29][#examples: 62080/81252][#steps: 37800]
	Train Loss: 0.018 | Train PPL:   1.019 | lr: 1.499e-07
[Epoch: 29][#examples: 65280/81252][#steps: 37850]
	Train Loss: 0.018 | Train PPL:   1.019 | lr: 1.499e-07
[Epoch: 29][#examples: 68480/81252][#steps: 37900]
	Train Loss: 0.018 | Train PPL:   1.019 | lr: 1.499e-07
[Epoch: 29][#examples: 71680/81252][#steps: 37950]
	Train Loss: 0.018 | Train PPL:   1.019 | lr: 1.499e-07
[Epoch: 29][#examples: 74880/81252][#steps: 38000]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 1.499e-07
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 346/513
-----Tst------
[TEST]: The number of correct predictions (a

[VAL]: The number of correct predictions (aux-task (single)): 346/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 740/1000

---------------------------------------
[Epoch: 31][Validatiing...]
	 Early Stopping Patience: 901/1000
	 Val. Loss: 1.473 | Val. Acc: 0.674 | Val. PPL:   4.364
	 BEST. Val. Loss: 0.912 | BEST. Val. Acc: 0.696 | Val. Loss: 1.421 | BEST. Val. Epoch: 8 | BEST. Val. Step: 11000
---------------------------------------

[Epoch: 31][#examples: 43520/81252][#steps: 40050]
	Train Loss: 0.018 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 31][#examples: 46720/81252][#steps: 40100]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 9.838e-08
[Epoch: 31][#examples: 49920/81252][#steps: 40150]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 9.838e-08
[Epoch: 31][#examples: 53120/81252][#steps: 40200]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 9.838e-08
[Epoch: 31][#examples: 56320/81252][#steps: 40250]
	Train Loss: 0.018 | Train PPL:   1.019 | lr: 

[Epoch: 33][#examples: 21760/81252][#steps: 42250]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 9.838e-08
[Epoch: 33][#examples: 24960/81252][#steps: 42300]
	Train Loss: 0.019 | Train PPL:   1.020 | lr: 9.838e-08
[Epoch: 33][#examples: 28160/81252][#steps: 42350]
	Train Loss: 0.019 | Train PPL:   1.020 | lr: 9.838e-08
[Epoch: 33][#examples: 31360/81252][#steps: 42400]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 33][#examples: 34560/81252][#steps: 42450]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 33][#examples: 37760/81252][#steps: 42500]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 346/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 740/1000

---------------------------------------
[Epoch: 33][Validatiing...]
	 Early Stopping Patience: 894/1000
	 Val. Loss: 1.474 | Val. Acc: 0.674 | Val. PPL:   4.365
	 BEST. Val. Loss: 0.912

[Epoch: 35][#examples: 6400/81252][#steps: 44550]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 9.838e-08
[Epoch: 35][#examples: 9600/81252][#steps: 44600]
	Train Loss: 0.019 | Train PPL:   1.020 | lr: 9.838e-08
[Epoch: 35][#examples: 12800/81252][#steps: 44650]
	Train Loss: 0.019 | Train PPL:   1.020 | lr: 9.838e-08
[Epoch: 35][#examples: 16000/81252][#steps: 44700]
	Train Loss: 0.019 | Train PPL:   1.020 | lr: 9.838e-08
[Epoch: 35][#examples: 19200/81252][#steps: 44750]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 35][#examples: 22400/81252][#steps: 44800]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 35][#examples: 25600/81252][#steps: 44850]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 35][#examples: 28800/81252][#steps: 44900]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 35][#examples: 32000/81252][#steps: 44950]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 35][#examples: 35200/81252][#st

[Epoch: 37][#examples: 640/81252][#steps: 47000]
	Train Loss: 0.015 | Train PPL:   1.015 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 346/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 740/1000

---------------------------------------
[Epoch: 37][Validatiing...]
	 Early Stopping Patience: 881/1000
	 Val. Loss: 1.474 | Val. Acc: 0.674 | Val. PPL:   4.365
	 BEST. Val. Loss: 0.912 | BEST. Val. Acc: 0.696 | Val. Loss: 1.421 | BEST. Val. Epoch: 8 | BEST. Val. Step: 11000
---------------------------------------

[Epoch: 37][#examples: 3840/81252][#steps: 47050]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 37][#examples: 7040/81252][#steps: 47100]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 37][#examples: 10240/81252][#steps: 47150]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 9.838e-08
[Epoch: 37][#examples: 13440/81252][#steps: 47200]
	Train Loss: 0.019 | Train PPL:   1

[Epoch: 38][#examples: 76160/81252][#steps: 49450]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 38][#examples: 79360/81252][#steps: 49500]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 346/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 740/1000

---------------------------------------
[Epoch: 38][Validatiing...]
	 Early Stopping Patience: 875/1000
	 Val. Loss: 1.474 | Val. Acc: 0.674 | Val. PPL:   4.365
	 BEST. Val. Loss: 0.912 | BEST. Val. Acc: 0.696 | Val. Loss: 1.421 | BEST. Val. Epoch: 8 | BEST. Val. Step: 11000
---------------------------------------

[VAL]: The number of correct predictions (aux-task (single)): 346/513

---------------------------------------
[Epoch: 38][Validatiing...]
	 Early Stopping Patience: 874/1000
	 Val. Loss: 1.474 | Val. Acc: 0.674 | Val. PPL:   4.365
	 BEST. Val. Loss: 0.912 | BEST. Val. Acc: 0.696 | Val. Loss: 1.

[Epoch: 40][#examples: 54400/81252][#steps: 51650]
	Train Loss: 0.018 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 40][#examples: 57600/81252][#steps: 51700]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 9.838e-08
[Epoch: 40][#examples: 60800/81252][#steps: 51750]
	Train Loss: 0.018 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 40][#examples: 64000/81252][#steps: 51800]
	Train Loss: 0.018 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 40][#examples: 67200/81252][#steps: 51850]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 9.838e-08
[Epoch: 40][#examples: 70400/81252][#steps: 51900]
	Train Loss: 0.018 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 40][#examples: 73600/81252][#steps: 51950]
	Train Loss: 0.018 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 40][#examples: 76800/81252][#steps: 52000]
	Train Loss: 0.018 | Train PPL:   1.019 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 346/513
-----Tst------
[TEST]: The number of correct predictions (a

[VAL]: The number of correct predictions (aux-task (single)): 346/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 740/1000

---------------------------------------
[Epoch: 42][Validatiing...]
	 Early Stopping Patience: 862/1000
	 Val. Loss: 1.474 | Val. Acc: 0.674 | Val. PPL:   4.365
	 BEST. Val. Loss: 0.912 | BEST. Val. Acc: 0.696 | Val. Loss: 1.421 | BEST. Val. Epoch: 8 | BEST. Val. Step: 11000
---------------------------------------

[Epoch: 42][#examples: 45440/81252][#steps: 54050]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 42][#examples: 48640/81252][#steps: 54100]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 42][#examples: 51840/81252][#steps: 54150]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 42][#examples: 55040/81252][#steps: 54200]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 42][#examples: 58240/81252][#steps: 54250]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 

[Epoch: 44][#examples: 23680/81252][#steps: 56250]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 44][#examples: 26880/81252][#steps: 56300]
	Train Loss: 0.019 | Train PPL:   1.020 | lr: 9.838e-08
[Epoch: 44][#examples: 30080/81252][#steps: 56350]
	Train Loss: 0.019 | Train PPL:   1.020 | lr: 9.838e-08
[Epoch: 44][#examples: 33280/81252][#steps: 56400]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 9.838e-08
[Epoch: 44][#examples: 36480/81252][#steps: 56450]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 9.838e-08
[Epoch: 44][#examples: 39680/81252][#steps: 56500]
	Train Loss: 0.019 | Train PPL:   1.020 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 346/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 740/1000

---------------------------------------
[Epoch: 44][Validatiing...]
	 Early Stopping Patience: 855/1000
	 Val. Loss: 1.474 | Val. Acc: 0.674 | Val. PPL:   4.365
	 BEST. Val. Loss: 0.912

[Epoch: 46][#examples: 8320/81252][#steps: 58550]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 9.838e-08
[Epoch: 46][#examples: 11520/81252][#steps: 58600]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 46][#examples: 14720/81252][#steps: 58650]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 9.838e-08
[Epoch: 46][#examples: 17920/81252][#steps: 58700]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 9.838e-08
[Epoch: 46][#examples: 21120/81252][#steps: 58750]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 46][#examples: 24320/81252][#steps: 58800]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 46][#examples: 27520/81252][#steps: 58850]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 46][#examples: 30720/81252][#steps: 58900]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 46][#examples: 33920/81252][#steps: 58950]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 46][#examples: 37120/81252][#s

[Epoch: 48][#examples: 2560/81252][#steps: 61000]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 346/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 740/1000

---------------------------------------
[Epoch: 48][Validatiing...]
	 Early Stopping Patience: 842/1000
	 Val. Loss: 1.474 | Val. Acc: 0.674 | Val. PPL:   4.365
	 BEST. Val. Loss: 0.912 | BEST. Val. Acc: 0.696 | Val. Loss: 1.421 | BEST. Val. Epoch: 8 | BEST. Val. Step: 11000
---------------------------------------

[Epoch: 48][#examples: 5760/81252][#steps: 61050]
	Train Loss: 0.022 | Train PPL:   1.023 | lr: 9.838e-08
[Epoch: 48][#examples: 8960/81252][#steps: 61100]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 9.838e-08
[Epoch: 48][#examples: 12160/81252][#steps: 61150]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 9.838e-08
[Epoch: 48][#examples: 15360/81252][#steps: 61200]
	Train Loss: 0.019 | Train PPL:   

[Epoch: 49][#examples: 78080/81252][#steps: 63450]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 49][#examples: 81280/81252][#steps: 63500]
	Train Loss: 0.018 | Train PPL:   1.019 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 346/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 740/1000

---------------------------------------
[Epoch: 49][Validatiing...]
	 Early Stopping Patience: 836/1000
	 Val. Loss: 1.474 | Val. Acc: 0.674 | Val. PPL:   4.365
	 BEST. Val. Loss: 0.912 | BEST. Val. Acc: 0.696 | Val. Loss: 1.421 | BEST. Val. Epoch: 8 | BEST. Val. Step: 11000
---------------------------------------

[VAL]: The number of correct predictions (aux-task (single)): 346/513

---------------------------------------
[Epoch: 49][Validatiing...]
	 Early Stopping Patience: 835/1000
	 Val. Loss: 1.474 | Val. Acc: 0.674 | Val. PPL:   4.365
	 BEST. Val. Loss: 0.912 | BEST. Val. Acc: 0.696 | Val. Loss: 1.

[Epoch: 51][#examples: 56320/81252][#steps: 65650]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 51][#examples: 59520/81252][#steps: 65700]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 51][#examples: 62720/81252][#steps: 65750]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 51][#examples: 65920/81252][#steps: 65800]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 51][#examples: 69120/81252][#steps: 65850]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 51][#examples: 72320/81252][#steps: 65900]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 51][#examples: 75520/81252][#steps: 65950]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 51][#examples: 78720/81252][#steps: 66000]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 346/513
-----Tst------
[TEST]: The number of correct predictions (a

[VAL]: The number of correct predictions (aux-task (single)): 346/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 740/1000

---------------------------------------
[Epoch: 53][Validatiing...]
	 Early Stopping Patience: 823/1000
	 Val. Loss: 1.474 | Val. Acc: 0.674 | Val. PPL:   4.366
	 BEST. Val. Loss: 0.912 | BEST. Val. Acc: 0.696 | Val. Loss: 1.421 | BEST. Val. Epoch: 8 | BEST. Val. Step: 11000
---------------------------------------

[Epoch: 53][#examples: 47360/81252][#steps: 68050]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 9.838e-08
[Epoch: 53][#examples: 50560/81252][#steps: 68100]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 9.838e-08
[Epoch: 53][#examples: 53760/81252][#steps: 68150]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 9.838e-08
[Epoch: 53][#examples: 56960/81252][#steps: 68200]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 9.838e-08
[Epoch: 53][#examples: 60160/81252][#steps: 68250]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 

[Epoch: 55][#examples: 25600/81252][#steps: 70250]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 9.838e-08
[Epoch: 55][#examples: 28800/81252][#steps: 70300]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 55][#examples: 32000/81252][#steps: 70350]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 55][#examples: 35200/81252][#steps: 70400]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 55][#examples: 38400/81252][#steps: 70450]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 55][#examples: 41600/81252][#steps: 70500]
	Train Loss: 0.018 | Train PPL:   1.019 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 346/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 740/1000

---------------------------------------
[Epoch: 55][Validatiing...]
	 Early Stopping Patience: 816/1000
	 Val. Loss: 1.474 | Val. Acc: 0.674 | Val. PPL:   4.366
	 BEST. Val. Loss: 0.912

[Epoch: 57][#examples: 10240/81252][#steps: 72550]
	Train Loss: 0.018 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 57][#examples: 13440/81252][#steps: 72600]
	Train Loss: 0.019 | Train PPL:   1.020 | lr: 9.838e-08
[Epoch: 57][#examples: 16640/81252][#steps: 72650]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 9.838e-08
[Epoch: 57][#examples: 19840/81252][#steps: 72700]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 57][#examples: 23040/81252][#steps: 72750]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 57][#examples: 26240/81252][#steps: 72800]
	Train Loss: 0.019 | Train PPL:   1.020 | lr: 9.838e-08
[Epoch: 57][#examples: 29440/81252][#steps: 72850]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 9.838e-08
[Epoch: 57][#examples: 32640/81252][#steps: 72900]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 9.838e-08
[Epoch: 57][#examples: 35840/81252][#steps: 72950]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 57][#examples: 39040/81252][#

[Epoch: 59][#examples: 1280/81252][#steps: 74950]
	Train Loss: 0.017 | Train PPL:   1.017 | lr: 9.838e-08
[Epoch: 59][#examples: 4480/81252][#steps: 75000]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 346/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 740/1000

---------------------------------------
[Epoch: 59][Validatiing...]
	 Early Stopping Patience: 803/1000
	 Val. Loss: 1.474 | Val. Acc: 0.674 | Val. PPL:   4.367
	 BEST. Val. Loss: 0.912 | BEST. Val. Acc: 0.696 | Val. Loss: 1.421 | BEST. Val. Epoch: 8 | BEST. Val. Step: 11000
---------------------------------------

[Epoch: 59][#examples: 7680/81252][#steps: 75050]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 59][#examples: 10880/81252][#steps: 75100]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 9.838e-08
[Epoch: 59][#examples: 14080/81252][#steps: 75150]
	Train Loss: 0.018 | Train PPL:   

[Epoch: 60][#examples: 76800/81252][#steps: 77400]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 60][#examples: 80000/81252][#steps: 77450]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[VAL]: The number of correct predictions (aux-task (single)): 346/513

---------------------------------------
[Epoch: 60][Validatiing...]
	 Early Stopping Patience: 797/1000
	 Val. Loss: 1.474 | Val. Acc: 0.674 | Val. PPL:   4.367
	 BEST. Val. Loss: 0.912 | BEST. Val. Acc: 0.696 | Val. Loss: 1.421 | BEST. Val. Epoch: 8 | BEST. Val. Step: 11000
---------------------------------------

Epoch: 61 | Time: 1m 13s
	Train Loss: 0.019 | Train PPL:   1.019
	 Val. Loss: 1.474 | Val. Acc: 0.674 | Val. PPL:   4.367
[Train]: Current Teacher Forcing Ratio: 0.200
[Epoch: 61][#examples: 1920/81252][#steps: 77500]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 346/513
-----Tst------
[TEST]: The number of co

[Epoch: 62][#examples: 55040/81252][#steps: 79600]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 62][#examples: 58240/81252][#steps: 79650]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 62][#examples: 61440/81252][#steps: 79700]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 62][#examples: 64640/81252][#steps: 79750]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 62][#examples: 67840/81252][#steps: 79800]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 62][#examples: 71040/81252][#steps: 79850]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 62][#examples: 74240/81252][#steps: 79900]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 62][#examples: 77440/81252][#steps: 79950]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 62][#examples: 80640/81252][#steps: 80000]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
-----Val------
[VAL]: The number of c

[Epoch: 64][#examples: 46080/81252][#steps: 82000]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 346/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 740/1000

---------------------------------------
[Epoch: 64][Validatiing...]
	 Early Stopping Patience: 784/1000
	 Val. Loss: 1.474 | Val. Acc: 0.674 | Val. PPL:   4.367
	 BEST. Val. Loss: 0.912 | BEST. Val. Acc: 0.696 | Val. Loss: 1.421 | BEST. Val. Epoch: 8 | BEST. Val. Step: 11000
---------------------------------------

[Epoch: 64][#examples: 49280/81252][#steps: 82050]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 64][#examples: 52480/81252][#steps: 82100]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 64][#examples: 55680/81252][#steps: 82150]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 64][#examples: 58880/81252][#steps: 82200]
	Train Loss: 0.019 | Train PPL:

[Epoch: 66][#examples: 24320/81252][#steps: 84200]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 66][#examples: 27520/81252][#steps: 84250]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 66][#examples: 30720/81252][#steps: 84300]
	Train Loss: 0.018 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 66][#examples: 33920/81252][#steps: 84350]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 66][#examples: 37120/81252][#steps: 84400]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 66][#examples: 40320/81252][#steps: 84450]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 66][#examples: 43520/81252][#steps: 84500]
	Train Loss: 0.019 | Train PPL:   1.020 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 346/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 740/1000

---------------------------------------
[Epoch: 66][Validatiing...]
	 Early St

[TEST]: The number of correct predictions (aux-task (single)): 740/1000

---------------------------------------
[Epoch: 68][Validatiing...]
	 Early Stopping Patience: 771/1000
	 Val. Loss: 1.474 | Val. Acc: 0.674 | Val. PPL:   4.367
	 BEST. Val. Loss: 0.912 | BEST. Val. Acc: 0.696 | Val. Loss: 1.421 | BEST. Val. Epoch: 8 | BEST. Val. Step: 11000
---------------------------------------

[Epoch: 68][#examples: 12160/81252][#steps: 86550]
	Train Loss: 0.017 | Train PPL:   1.018 | lr: 9.838e-08
[Epoch: 68][#examples: 15360/81252][#steps: 86600]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 9.838e-08
[Epoch: 68][#examples: 18560/81252][#steps: 86650]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 9.838e-08
[Epoch: 68][#examples: 21760/81252][#steps: 86700]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 9.838e-08
[Epoch: 68][#examples: 24960/81252][#steps: 86750]
	Train Loss: 0.018 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 68][#examples: 28160/81252][#steps: 86800]
	Train Loss: 0.018 | Tra

[Epoch: 70][#examples: 3200/81252][#steps: 88950]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 70][#examples: 6400/81252][#steps: 89000]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 346/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 740/1000

---------------------------------------
[Epoch: 70][Validatiing...]
	 Early Stopping Patience: 764/1000
	 Val. Loss: 1.474 | Val. Acc: 0.674 | Val. PPL:   4.367
	 BEST. Val. Loss: 0.912 | BEST. Val. Acc: 0.696 | Val. Loss: 1.421 | BEST. Val. Epoch: 8 | BEST. Val. Step: 11000
---------------------------------------

[Epoch: 70][#examples: 9600/81252][#steps: 89050]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 70][#examples: 12800/81252][#steps: 89100]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 70][#examples: 16000/81252][#steps: 89150]
	Train Loss: 0.019 | Train PPL:   

[Epoch: 71][#examples: 78720/81252][#steps: 91400]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[VAL]: The number of correct predictions (aux-task (single)): 346/513

---------------------------------------
[Epoch: 71][Validatiing...]
	 Early Stopping Patience: 758/1000
	 Val. Loss: 1.474 | Val. Acc: 0.674 | Val. PPL:   4.367
	 BEST. Val. Loss: 0.912 | BEST. Val. Acc: 0.696 | Val. Loss: 1.421 | BEST. Val. Epoch: 8 | BEST. Val. Step: 11000
---------------------------------------

Epoch: 72 | Time: 1m 15s
	Train Loss: 0.019 | Train PPL:   1.019
	 Val. Loss: 1.474 | Val. Acc: 0.674 | Val. PPL:   4.367
[Train]: Current Teacher Forcing Ratio: 0.200
[Epoch: 72][#examples: 640/81252][#steps: 91450]
	Train Loss: 0.009 | Train PPL:   1.009 | lr: 9.838e-08
[Epoch: 72][#examples: 3840/81252][#steps: 91500]
	Train Loss: 0.016 | Train PPL:   1.016 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 346/513
-----Tst------
[TEST]: The number of corr

[Epoch: 73][#examples: 56960/81252][#steps: 93600]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 73][#examples: 60160/81252][#steps: 93650]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 73][#examples: 63360/81252][#steps: 93700]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 73][#examples: 66560/81252][#steps: 93750]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 73][#examples: 69760/81252][#steps: 93800]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 73][#examples: 72960/81252][#steps: 93850]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 73][#examples: 76160/81252][#steps: 93900]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 73][#examples: 79360/81252][#steps: 93950]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[VAL]: The number of correct predictions (aux-task (single)): 346/513

---------------------------------------
[Epoch: 73][Validatiing...]
	 Ear

[Epoch: 75][#examples: 48000/81252][#steps: 96000]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 346/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 740/1000

---------------------------------------
[Epoch: 75][Validatiing...]
	 Early Stopping Patience: 745/1000
	 Val. Loss: 1.474 | Val. Acc: 0.674 | Val. PPL:   4.367
	 BEST. Val. Loss: 0.912 | BEST. Val. Acc: 0.696 | Val. Loss: 1.421 | BEST. Val. Epoch: 8 | BEST. Val. Step: 11000
---------------------------------------

[Epoch: 75][#examples: 51200/81252][#steps: 96050]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 9.838e-08
[Epoch: 75][#examples: 54400/81252][#steps: 96100]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 9.838e-08
[Epoch: 75][#examples: 57600/81252][#steps: 96150]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 9.838e-08
[Epoch: 75][#examples: 60800/81252][#steps: 96200]
	Train Loss: 0.018 | Train PPL:

[Epoch: 77][#examples: 26240/81252][#steps: 98200]
	Train Loss: 0.018 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 77][#examples: 29440/81252][#steps: 98250]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 77][#examples: 32640/81252][#steps: 98300]
	Train Loss: 0.018 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 77][#examples: 35840/81252][#steps: 98350]
	Train Loss: 0.018 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 77][#examples: 39040/81252][#steps: 98400]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 9.838e-08
[Epoch: 77][#examples: 42240/81252][#steps: 98450]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 9.838e-08
[Epoch: 77][#examples: 45440/81252][#steps: 98500]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 346/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 740/1000

---------------------------------------
[Epoch: 77][Validatiing...]
	 Early St

[TEST]: The number of correct predictions (aux-task (single)): 740/1000

---------------------------------------
[Epoch: 79][Validatiing...]
	 Early Stopping Patience: 732/1000
	 Val. Loss: 1.474 | Val. Acc: 0.674 | Val. PPL:   4.367
	 BEST. Val. Loss: 0.912 | BEST. Val. Acc: 0.696 | Val. Loss: 1.421 | BEST. Val. Epoch: 8 | BEST. Val. Step: 11000
---------------------------------------

[Epoch: 79][#examples: 14080/81252][#steps: 100550]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 9.838e-08
[Epoch: 79][#examples: 17280/81252][#steps: 100600]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 9.838e-08
[Epoch: 79][#examples: 20480/81252][#steps: 100650]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 9.838e-08
[Epoch: 79][#examples: 23680/81252][#steps: 100700]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 9.838e-08
[Epoch: 79][#examples: 26880/81252][#steps: 100750]
	Train Loss: 0.018 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 79][#examples: 30080/81252][#steps: 100800]
	Train Loss: 0.018

[Epoch: 81][#examples: 1920/81252][#steps: 102900]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 81][#examples: 5120/81252][#steps: 102950]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 9.838e-08
[Epoch: 81][#examples: 8320/81252][#steps: 103000]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 346/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 740/1000

---------------------------------------
[Epoch: 81][Validatiing...]
	 Early Stopping Patience: 725/1000
	 Val. Loss: 1.474 | Val. Acc: 0.674 | Val. PPL:   4.367
	 BEST. Val. Loss: 0.912 | BEST. Val. Acc: 0.696 | Val. Loss: 1.421 | BEST. Val. Epoch: 8 | BEST. Val. Step: 11000
---------------------------------------

[Epoch: 81][#examples: 11520/81252][#steps: 103050]
	Train Loss: 0.018 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 81][#examples: 14720/81252][#steps: 103100]
	Train Loss: 0.019 | Train PP

[Epoch: 82][#examples: 77440/81252][#steps: 105350]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 82][#examples: 80640/81252][#steps: 105400]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[VAL]: The number of correct predictions (aux-task (single)): 346/513

---------------------------------------
[Epoch: 82][Validatiing...]
	 Early Stopping Patience: 719/1000
	 Val. Loss: 1.474 | Val. Acc: 0.674 | Val. PPL:   4.367
	 BEST. Val. Loss: 0.912 | BEST. Val. Acc: 0.696 | Val. Loss: 1.421 | BEST. Val. Epoch: 8 | BEST. Val. Step: 11000
---------------------------------------

Epoch: 83 | Time: 1m 13s
	Train Loss: 0.019 | Train PPL:   1.019
	 Val. Loss: 1.474 | Val. Acc: 0.674 | Val. PPL:   4.367
[Train]: Current Teacher Forcing Ratio: 0.200
[Epoch: 83][#examples: 2560/81252][#steps: 105450]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 9.838e-08
[Epoch: 83][#examples: 5760/81252][#steps: 105500]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 9.838e-08
-----Val------

[Epoch: 84][#examples: 55680/81252][#steps: 107550]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 84][#examples: 58880/81252][#steps: 107600]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 84][#examples: 62080/81252][#steps: 107650]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 84][#examples: 65280/81252][#steps: 107700]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 84][#examples: 68480/81252][#steps: 107750]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 84][#examples: 71680/81252][#steps: 107800]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 84][#examples: 74880/81252][#steps: 107850]
	Train Loss: 0.018 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 84][#examples: 78080/81252][#steps: 107900]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 84][#examples: 81280/81252][#steps: 107950]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[VAL]: The number of correct

[Epoch: 86][#examples: 46720/81252][#steps: 109950]
	Train Loss: 0.019 | Train PPL:   1.020 | lr: 9.838e-08
[Epoch: 86][#examples: 49920/81252][#steps: 110000]
	Train Loss: 0.019 | Train PPL:   1.020 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 346/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 740/1000

---------------------------------------
[Epoch: 86][Validatiing...]
	 Early Stopping Patience: 706/1000
	 Val. Loss: 1.474 | Val. Acc: 0.674 | Val. PPL:   4.367
	 BEST. Val. Loss: 0.912 | BEST. Val. Acc: 0.696 | Val. Loss: 1.421 | BEST. Val. Epoch: 8 | BEST. Val. Step: 11000
---------------------------------------

[Epoch: 86][#examples: 53120/81252][#steps: 110050]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 86][#examples: 56320/81252][#steps: 110100]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 86][#examples: 59520/81252][#steps: 110150]
	Train Loss: 0.019 | Train

[Epoch: 88][#examples: 21760/81252][#steps: 112100]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 88][#examples: 24960/81252][#steps: 112150]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 88][#examples: 28160/81252][#steps: 112200]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 88][#examples: 31360/81252][#steps: 112250]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 88][#examples: 34560/81252][#steps: 112300]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 88][#examples: 37760/81252][#steps: 112350]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 88][#examples: 40960/81252][#steps: 112400]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 88][#examples: 44160/81252][#steps: 112450]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 88][#examples: 47360/81252][#steps: 112500]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
-----Val------
[VAL]: The nu

[Epoch: 90][#examples: 12800/81252][#steps: 114500]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 346/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 740/1000

---------------------------------------
[Epoch: 90][Validatiing...]
	 Early Stopping Patience: 693/1000
	 Val. Loss: 1.474 | Val. Acc: 0.674 | Val. PPL:   4.367
	 BEST. Val. Loss: 0.912 | BEST. Val. Acc: 0.696 | Val. Loss: 1.421 | BEST. Val. Epoch: 8 | BEST. Val. Step: 11000
---------------------------------------

[Epoch: 90][#examples: 16000/81252][#steps: 114550]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 9.838e-08
[Epoch: 90][#examples: 19200/81252][#steps: 114600]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 9.838e-08
[Epoch: 90][#examples: 22400/81252][#steps: 114650]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 9.838e-08
[Epoch: 90][#examples: 25600/81252][#steps: 114700]
	Train Loss: 0.019 | Train

[Epoch: 92][#examples: 640/81252][#steps: 116850]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 92][#examples: 3840/81252][#steps: 116900]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 92][#examples: 7040/81252][#steps: 116950]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 9.838e-08
[Epoch: 92][#examples: 10240/81252][#steps: 117000]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 346/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 740/1000

---------------------------------------
[Epoch: 92][Validatiing...]
	 Early Stopping Patience: 686/1000
	 Val. Loss: 1.474 | Val. Acc: 0.674 | Val. PPL:   4.367
	 BEST. Val. Loss: 0.912 | BEST. Val. Acc: 0.696 | Val. Loss: 1.421 | BEST. Val. Epoch: 8 | BEST. Val. Step: 11000
---------------------------------------

[Epoch: 92][#examples: 13440/81252][#steps: 117050]
	Train Loss: 0.020 | Train PPL

[Epoch: 93][#examples: 76160/81252][#steps: 119300]
	Train Loss: 0.018 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 93][#examples: 79360/81252][#steps: 119350]
	Train Loss: 0.018 | Train PPL:   1.019 | lr: 9.838e-08
[VAL]: The number of correct predictions (aux-task (single)): 346/513

---------------------------------------
[Epoch: 93][Validatiing...]
	 Early Stopping Patience: 680/1000
	 Val. Loss: 1.474 | Val. Acc: 0.674 | Val. PPL:   4.367
	 BEST. Val. Loss: 0.912 | BEST. Val. Acc: 0.696 | Val. Loss: 1.421 | BEST. Val. Epoch: 8 | BEST. Val. Step: 11000
---------------------------------------

Epoch: 94 | Time: 1m 14s
	Train Loss: 0.018 | Train PPL:   1.018
	 Val. Loss: 1.474 | Val. Acc: 0.674 | Val. PPL:   4.367
[Train]: Current Teacher Forcing Ratio: 0.200
[Epoch: 94][#examples: 1280/81252][#steps: 119400]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 9.838e-08
[Epoch: 94][#examples: 4480/81252][#steps: 119450]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 94][#e

[Epoch: 95][#examples: 57600/81252][#steps: 121550]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 9.838e-08
[Epoch: 95][#examples: 60800/81252][#steps: 121600]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 9.838e-08
[Epoch: 95][#examples: 64000/81252][#steps: 121650]
	Train Loss: 0.018 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 95][#examples: 67200/81252][#steps: 121700]
	Train Loss: 0.018 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 95][#examples: 70400/81252][#steps: 121750]
	Train Loss: 0.018 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 95][#examples: 73600/81252][#steps: 121800]
	Train Loss: 0.018 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 95][#examples: 76800/81252][#steps: 121850]
	Train Loss: 0.018 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 95][#examples: 80000/81252][#steps: 121900]
	Train Loss: 0.018 | Train PPL:   1.019 | lr: 9.838e-08
[VAL]: The number of correct predictions (aux-task (single)): 346/513

---------------------------------------
[Epoch: 95][Validatiing..

[Epoch: 97][#examples: 48640/81252][#steps: 123950]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 97][#examples: 51840/81252][#steps: 124000]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 346/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 740/1000

---------------------------------------
[Epoch: 97][Validatiing...]
	 Early Stopping Patience: 667/1000
	 Val. Loss: 1.474 | Val. Acc: 0.674 | Val. PPL:   4.367
	 BEST. Val. Loss: 0.912 | BEST. Val. Acc: 0.696 | Val. Loss: 1.421 | BEST. Val. Epoch: 8 | BEST. Val. Step: 11000
---------------------------------------

[Epoch: 97][#examples: 55040/81252][#steps: 124050]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 97][#examples: 58240/81252][#steps: 124100]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 97][#examples: 61440/81252][#steps: 124150]
	Train Loss: 0.019 | Train

[Epoch: 99][#examples: 23680/81252][#steps: 126100]
	Train Loss: 0.019 | Train PPL:   1.020 | lr: 9.838e-08
[Epoch: 99][#examples: 26880/81252][#steps: 126150]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 99][#examples: 30080/81252][#steps: 126200]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 99][#examples: 33280/81252][#steps: 126250]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 9.838e-08
[Epoch: 99][#examples: 36480/81252][#steps: 126300]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 99][#examples: 39680/81252][#steps: 126350]
	Train Loss: 0.019 | Train PPL:   1.020 | lr: 9.838e-08
[Epoch: 99][#examples: 42880/81252][#steps: 126400]
	Train Loss: 0.019 | Train PPL:   1.020 | lr: 9.838e-08
[Epoch: 99][#examples: 46080/81252][#steps: 126450]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 99][#examples: 49280/81252][#steps: 126500]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
-----Val------
[VAL]: The nu

[Epoch: 0][#examples: 67200/81252][#steps: 1050]
	Train Loss: 1.564 | Train PPL:   4.779 | lr: 3.000e-03
[Epoch: 0][#examples: 70400/81252][#steps: 1100]
	Train Loss: 1.531 | Train PPL:   4.625 | lr: 3.000e-03
[Epoch: 0][#examples: 73600/81252][#steps: 1150]
	Train Loss: 1.502 | Train PPL:   4.490 | lr: 3.000e-03
[Epoch: 0][#examples: 76800/81252][#steps: 1200]
	Train Loss: 1.473 | Train PPL:   4.363 | lr: 3.000e-03
[Epoch: 0][#examples: 80000/81252][#steps: 1250]
	Train Loss: 1.447 | Train PPL:   4.250 | lr: 3.000e-03
[VAL]: The number of correct predictions (aux-task (single)): 94/513

---------------------------------------
[Epoch: 0][Validatiing...]
		 Better Valid Loss! (at least equal)
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 1000/1000
	 Val. Loss: 1.992 | Val. Acc: 0.183 | Val. PPL:   7.330
	 BEST. Val. Loss: 1.992 | BEST. Val. Acc: 0.183 | Val. Loss: 1.992 | BEST. Val. Epoch: 0 | BEST. Val. Step: 1270
---------------------------------------

Epoch: 01 | 

[Epoch: 2][#examples: 51840/81252][#steps: 3350]
	Train Loss: 0.414 | Train PPL:   1.513 | lr: 2.700e-03
[Epoch: 2][#examples: 55040/81252][#steps: 3400]
	Train Loss: 0.414 | Train PPL:   1.512 | lr: 2.700e-03
[Epoch: 2][#examples: 58240/81252][#steps: 3450]
	Train Loss: 0.410 | Train PPL:   1.507 | lr: 2.700e-03
[Epoch: 2][#examples: 61440/81252][#steps: 3500]
	Train Loss: 0.408 | Train PPL:   1.503 | lr: 2.700e-03
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 259/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 527/1000

---------------------------------------
[Epoch: 2][Validatiing...]
		 Better Valid Loss! (at least equal)
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 1000/1000
	 Val. Loss: 1.372 | Val. Acc: 0.505 | Val. PPL:   3.943
	 BEST. Val. Loss: 1.372 | BEST. Val. Acc: 0.505 | Val. Loss: 1.372 | BEST. Val. Epoch: 2 | BEST. Val. Step: 3500
---------------------------------------

[Epoch: 2][#ex

[Epoch: 4][#examples: 30080/81252][#steps: 5550]
	Train Loss: 0.244 | Train PPL:   1.277 | lr: 1.968e-03
[Epoch: 4][#examples: 33280/81252][#steps: 5600]
	Train Loss: 0.246 | Train PPL:   1.279 | lr: 1.968e-03
[Epoch: 4][#examples: 36480/81252][#steps: 5650]
	Train Loss: 0.245 | Train PPL:   1.277 | lr: 1.968e-03
[Epoch: 4][#examples: 39680/81252][#steps: 5700]
	Train Loss: 0.245 | Train PPL:   1.277 | lr: 1.968e-03
[Epoch: 4][#examples: 42880/81252][#steps: 5750]
	Train Loss: 0.244 | Train PPL:   1.276 | lr: 1.968e-03
[Epoch: 4][#examples: 46080/81252][#steps: 5800]
	Train Loss: 0.244 | Train PPL:   1.276 | lr: 1.968e-03
[Epoch: 4][#examples: 49280/81252][#steps: 5850]
	Train Loss: 0.244 | Train PPL:   1.276 | lr: 1.968e-03
[Epoch: 4][#examples: 52480/81252][#steps: 5900]
	Train Loss: 0.243 | Train PPL:   1.274 | lr: 1.968e-03
[Epoch: 4][#examples: 55680/81252][#steps: 5950]
	Train Loss: 0.242 | Train PPL:   1.273 | lr: 1.968e-03
[Epoch: 4][#examples: 58880/81252][#steps: 6000]
	Train

[Epoch: 6][#examples: 21120/81252][#steps: 7950]
	Train Loss: 0.145 | Train PPL:   1.156 | lr: 1.291e-03
[Epoch: 6][#examples: 24320/81252][#steps: 8000]
	Train Loss: 0.145 | Train PPL:   1.156 | lr: 1.291e-03
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 295/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 635/1000

---------------------------------------
[Epoch: 6][Validatiing...]
	 Early Stopping Patience: 996/1000
	 Val. Loss: 1.363 | Val. Acc: 0.575 | Val. PPL:   3.909
	 BEST. Val. Loss: 1.108 | BEST. Val. Acc: 0.585 | Val. Loss: 1.108 | BEST. Val. Epoch: 5 | BEST. Val. Step: 6500
---------------------------------------

[Epoch: 6][#examples: 27520/81252][#steps: 8050]
	Train Loss: 0.143 | Train PPL:   1.154 | lr: 1.162e-03
[Epoch: 6][#examples: 30720/81252][#steps: 8100]
	Train Loss: 0.143 | Train PPL:   1.153 | lr: 1.162e-03
[Epoch: 6][#examples: 33920/81252][#steps: 8150]
	Train Loss: 0.143 | Train PPL:   1.154 | l

[Epoch: 8][#examples: 2560/81252][#steps: 10200]
	Train Loss: 0.086 | Train PPL:   1.090 | lr: 7.626e-04
[Epoch: 8][#examples: 5760/81252][#steps: 10250]
	Train Loss: 0.085 | Train PPL:   1.088 | lr: 7.626e-04
[Epoch: 8][#examples: 8960/81252][#steps: 10300]
	Train Loss: 0.077 | Train PPL:   1.080 | lr: 7.626e-04
[Epoch: 8][#examples: 12160/81252][#steps: 10350]
	Train Loss: 0.077 | Train PPL:   1.080 | lr: 7.626e-04
[Epoch: 8][#examples: 15360/81252][#steps: 10400]
	Train Loss: 0.080 | Train PPL:   1.084 | lr: 7.626e-04
[Epoch: 8][#examples: 18560/81252][#steps: 10450]
	Train Loss: 0.080 | Train PPL:   1.083 | lr: 7.626e-04
[Epoch: 8][#examples: 21760/81252][#steps: 10500]
	Train Loss: 0.079 | Train PPL:   1.083 | lr: 7.626e-04
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 311/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 660/1000

---------------------------------------
[Epoch: 8][Validatiing...]
		 Better Valid Acc! 

[Epoch: 9][#examples: 78080/81252][#steps: 12650]
	Train Loss: 0.070 | Train PPL:   1.072 | lr: 5.003e-04
[Epoch: 9][#examples: 81280/81252][#steps: 12700]
	Train Loss: 0.070 | Train PPL:   1.072 | lr: 5.003e-04
[VAL]: The number of correct predictions (aux-task (single)): 306/513

---------------------------------------
[Epoch: 9][Validatiing...]
	 Early Stopping Patience: 983/1000
	 Val. Loss: 1.421 | Val. Acc: 0.596 | Val. PPL:   4.141
	 BEST. Val. Loss: 1.108 | BEST. Val. Acc: 0.608 | Val. Loss: 1.414 | BEST. Val. Epoch: 9 | BEST. Val. Step: 12500
---------------------------------------

Epoch: 10 | Time: 2m 3s
	Train Loss: 0.070 | Train PPL:   1.072
	 Val. Loss: 1.421 | Val. Acc: 0.596 | Val. PPL:   4.141
[Train]: Current Teacher Forcing Ratio: 0.500
[Epoch: 10][#examples: 3200/81252][#steps: 12750]
	Train Loss: 0.054 | Train PPL:   1.055 | lr: 4.503e-04
[Epoch: 10][#examples: 6400/81252][#steps: 12800]
	Train Loss: 0.052 | Train PPL:   1.054 | lr: 4.503e-04
[Epoch: 10][#examples:

[VAL]: The number of correct predictions (aux-task (single)): 312/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 676/1000

---------------------------------------
[Epoch: 11][Validatiing...]
	 Early Stopping Patience: 977/1000
	 Val. Loss: 1.496 | Val. Acc: 0.608 | Val. PPL:   4.464
	 BEST. Val. Loss: 1.108 | BEST. Val. Acc: 0.620 | Val. Loss: 1.522 | BEST. Val. Epoch: 11 | BEST. Val. Step: 14500
---------------------------------------

[Epoch: 11][#examples: 69120/81252][#steps: 15050]
	Train Loss: 0.054 | Train PPL:   1.056 | lr: 2.954e-04
[Epoch: 11][#examples: 72320/81252][#steps: 15100]
	Train Loss: 0.054 | Train PPL:   1.056 | lr: 2.954e-04
[Epoch: 11][#examples: 75520/81252][#steps: 15150]
	Train Loss: 0.054 | Train PPL:   1.055 | lr: 2.954e-04
[Epoch: 11][#examples: 78720/81252][#steps: 15200]
	Train Loss: 0.054 | Train PPL:   1.056 | lr: 2.954e-04
[VAL]: The number of correct predictions (aux-task (single)): 316/513

-------------------------

[Epoch: 13][#examples: 47360/81252][#steps: 17250]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 1.570e-04
[Epoch: 13][#examples: 50560/81252][#steps: 17300]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 1.570e-04
[Epoch: 13][#examples: 53760/81252][#steps: 17350]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 1.570e-04
[Epoch: 13][#examples: 56960/81252][#steps: 17400]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 1.570e-04
[Epoch: 13][#examples: 60160/81252][#steps: 17450]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 1.570e-04
[Epoch: 13][#examples: 63360/81252][#steps: 17500]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 1.570e-04
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 319/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 676/1000

---------------------------------------
[Epoch: 13][Validatiing...]
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 970/1000
	 Val. Loss: 1.520 | Val. Acc: 0.622 | Val.

[Epoch: 15][#examples: 32000/81252][#steps: 19550]
	Train Loss: 0.045 | Train PPL:   1.046 | lr: 8.344e-05
[Epoch: 15][#examples: 35200/81252][#steps: 19600]
	Train Loss: 0.044 | Train PPL:   1.045 | lr: 8.344e-05
[Epoch: 15][#examples: 38400/81252][#steps: 19650]
	Train Loss: 0.045 | Train PPL:   1.046 | lr: 8.344e-05
[Epoch: 15][#examples: 41600/81252][#steps: 19700]
	Train Loss: 0.045 | Train PPL:   1.046 | lr: 8.344e-05
[Epoch: 15][#examples: 44800/81252][#steps: 19750]
	Train Loss: 0.045 | Train PPL:   1.046 | lr: 8.344e-05
[Epoch: 15][#examples: 48000/81252][#steps: 19800]
	Train Loss: 0.045 | Train PPL:   1.046 | lr: 8.344e-05
[Epoch: 15][#examples: 51200/81252][#steps: 19850]
	Train Loss: 0.045 | Train PPL:   1.046 | lr: 8.344e-05
[Epoch: 15][#examples: 54400/81252][#steps: 19900]
	Train Loss: 0.045 | Train PPL:   1.046 | lr: 8.344e-05
[Epoch: 15][#examples: 57600/81252][#steps: 19950]
	Train Loss: 0.046 | Train PPL:   1.047 | lr: 8.344e-05
[Epoch: 15][#examples: 60800/81252][#

[Epoch: 17][#examples: 23040/81252][#steps: 21950]
	Train Loss: 0.044 | Train PPL:   1.045 | lr: 5.474e-05
[Epoch: 17][#examples: 26240/81252][#steps: 22000]
	Train Loss: 0.045 | Train PPL:   1.046 | lr: 5.474e-05
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 318/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 684/1000

---------------------------------------
[Epoch: 17][Validatiing...]
	 Early Stopping Patience: 957/1000
	 Val. Loss: 1.539 | Val. Acc: 0.620 | Val. PPL:   4.659
	 BEST. Val. Loss: 1.108 | BEST. Val. Acc: 0.626 | Val. Loss: 1.539 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21590
---------------------------------------

[Epoch: 17][#examples: 29440/81252][#steps: 22050]
	Train Loss: 0.045 | Train PPL:   1.046 | lr: 4.927e-05
[Epoch: 17][#examples: 32640/81252][#steps: 22100]
	Train Loss: 0.045 | Train PPL:   1.046 | lr: 4.927e-05
[Epoch: 17][#examples: 35840/81252][#steps: 22150]
	Train Loss: 0.045 | Train PPL

[Epoch: 19][#examples: 1280/81252][#steps: 24150]
	Train Loss: 0.060 | Train PPL:   1.062 | lr: 2.618e-05
[Epoch: 19][#examples: 4480/81252][#steps: 24200]
	Train Loss: 0.054 | Train PPL:   1.056 | lr: 2.618e-05
[Epoch: 19][#examples: 7680/81252][#steps: 24250]
	Train Loss: 0.048 | Train PPL:   1.049 | lr: 2.618e-05
[Epoch: 19][#examples: 10880/81252][#steps: 24300]
	Train Loss: 0.050 | Train PPL:   1.051 | lr: 2.618e-05
[Epoch: 19][#examples: 14080/81252][#steps: 24350]
	Train Loss: 0.047 | Train PPL:   1.049 | lr: 2.618e-05
[Epoch: 19][#examples: 17280/81252][#steps: 24400]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 2.618e-05
[Epoch: 19][#examples: 20480/81252][#steps: 24450]
	Train Loss: 0.046 | Train PPL:   1.047 | lr: 2.618e-05
[Epoch: 19][#examples: 23680/81252][#steps: 24500]
	Train Loss: 0.046 | Train PPL:   1.047 | lr: 2.618e-05
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 319/513
-----Tst------
[TEST]: The number of correct predictions (aux-

[Epoch: 20][#examples: 76800/81252][#steps: 26600]
	Train Loss: 0.049 | Train PPL:   1.050 | lr: 1.392e-05
[Epoch: 20][#examples: 80000/81252][#steps: 26650]
	Train Loss: 0.049 | Train PPL:   1.050 | lr: 1.392e-05
[VAL]: The number of correct predictions (aux-task (single)): 320/513

---------------------------------------
[Epoch: 20][Validatiing...]
	 Early Stopping Patience: 944/1000
	 Val. Loss: 1.559 | Val. Acc: 0.624 | Val. PPL:   4.754
	 BEST. Val. Loss: 1.108 | BEST. Val. Acc: 0.626 | Val. Loss: 1.539 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21590
---------------------------------------

Epoch: 21 | Time: 2m 5s
	Train Loss: 0.049 | Train PPL:   1.050
	 Val. Loss: 1.559 | Val. Acc: 0.624 | Val. PPL:   4.754
[Train]: Current Teacher Forcing Ratio: 0.200
[Epoch: 21][#examples: 1920/81252][#steps: 26700]
	Train Loss: 0.040 | Train PPL:   1.041 | lr: 1.252e-05
[Epoch: 21][#examples: 5120/81252][#steps: 26750]
	Train Loss: 0.049 | Train PPL:   1.050 | lr: 1.252e-05
[Epoch: 21][#examp

[Epoch: 22][#examples: 67840/81252][#steps: 29000]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 7.395e-06
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 319/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 677/1000

---------------------------------------
[Epoch: 22][Validatiing...]
	 Early Stopping Patience: 938/1000
	 Val. Loss: 1.560 | Val. Acc: 0.622 | Val. PPL:   4.759
	 BEST. Val. Loss: 1.108 | BEST. Val. Acc: 0.626 | Val. Loss: 1.539 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21590
---------------------------------------

[Epoch: 22][#examples: 71040/81252][#steps: 29050]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 6.656e-06
[Epoch: 22][#examples: 74240/81252][#steps: 29100]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 6.656e-06
[Epoch: 22][#examples: 77440/81252][#steps: 29150]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 6.656e-06
[Epoch: 22][#examples: 80640/81252][#steps: 29200]
	Train Loss: 0.047 | Train PPL

[Epoch: 24][#examples: 46080/81252][#steps: 31200]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 3.537e-06
[Epoch: 24][#examples: 49280/81252][#steps: 31250]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 3.537e-06
[Epoch: 24][#examples: 52480/81252][#steps: 31300]
	Train Loss: 0.047 | Train PPL:   1.049 | lr: 3.537e-06
[Epoch: 24][#examples: 55680/81252][#steps: 31350]
	Train Loss: 0.047 | Train PPL:   1.049 | lr: 3.537e-06
[Epoch: 24][#examples: 58880/81252][#steps: 31400]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 3.537e-06
[Epoch: 24][#examples: 62080/81252][#steps: 31450]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 3.537e-06
[Epoch: 24][#examples: 65280/81252][#steps: 31500]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 3.537e-06
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 318/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 678/1000

---------------------------------------
[Epoch: 24][Validatiing...]
	 Early St

[TEST]: The number of correct predictions (aux-task (single)): 680/1000

---------------------------------------
[Epoch: 26][Validatiing...]
	 Early Stopping Patience: 925/1000
	 Val. Loss: 1.566 | Val. Acc: 0.618 | Val. PPL:   4.788
	 BEST. Val. Loss: 1.108 | BEST. Val. Acc: 0.626 | Val. Loss: 1.539 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21590
---------------------------------------

[Epoch: 26][#examples: 33920/81252][#steps: 33550]
	Train Loss: 0.049 | Train PPL:   1.050 | lr: 1.692e-06
[Epoch: 26][#examples: 37120/81252][#steps: 33600]
	Train Loss: 0.049 | Train PPL:   1.050 | lr: 1.692e-06
[Epoch: 26][#examples: 40320/81252][#steps: 33650]
	Train Loss: 0.048 | Train PPL:   1.049 | lr: 1.692e-06
[Epoch: 26][#examples: 43520/81252][#steps: 33700]
	Train Loss: 0.048 | Train PPL:   1.049 | lr: 1.692e-06
[Epoch: 26][#examples: 46720/81252][#steps: 33750]
	Train Loss: 0.048 | Train PPL:   1.049 | lr: 1.692e-06
[Epoch: 26][#examples: 49920/81252][#steps: 33800]
	Train Loss: 0.048 | Tr

[Epoch: 28][#examples: 15360/81252][#steps: 35800]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 8.991e-07
[Epoch: 28][#examples: 18560/81252][#steps: 35850]
	Train Loss: 0.048 | Train PPL:   1.050 | lr: 8.991e-07
[Epoch: 28][#examples: 21760/81252][#steps: 35900]
	Train Loss: 0.048 | Train PPL:   1.050 | lr: 8.991e-07
[Epoch: 28][#examples: 24960/81252][#steps: 35950]
	Train Loss: 0.049 | Train PPL:   1.050 | lr: 8.991e-07
[Epoch: 28][#examples: 28160/81252][#steps: 36000]
	Train Loss: 0.048 | Train PPL:   1.049 | lr: 8.991e-07
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 317/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 680/1000

---------------------------------------
[Epoch: 28][Validatiing...]
	 Early Stopping Patience: 918/1000
	 Val. Loss: 1.560 | Val. Acc: 0.618 | Val. PPL:   4.758
	 BEST. Val. Loss: 1.108 | BEST. Val. Acc: 0.626 | Val. Loss: 1.539 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21590
---------------

[Epoch: 30][#examples: 3200/81252][#steps: 38150]
	Train Loss: 0.048 | Train PPL:   1.049 | lr: 4.300e-07
[Epoch: 30][#examples: 6400/81252][#steps: 38200]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 4.300e-07
[Epoch: 30][#examples: 9600/81252][#steps: 38250]
	Train Loss: 0.044 | Train PPL:   1.045 | lr: 4.300e-07
[Epoch: 30][#examples: 12800/81252][#steps: 38300]
	Train Loss: 0.045 | Train PPL:   1.046 | lr: 4.300e-07
[Epoch: 30][#examples: 16000/81252][#steps: 38350]
	Train Loss: 0.043 | Train PPL:   1.044 | lr: 4.300e-07
[Epoch: 30][#examples: 19200/81252][#steps: 38400]
	Train Loss: 0.044 | Train PPL:   1.046 | lr: 4.300e-07
[Epoch: 30][#examples: 22400/81252][#steps: 38450]
	Train Loss: 0.044 | Train PPL:   1.045 | lr: 4.300e-07
[Epoch: 30][#examples: 25600/81252][#steps: 38500]
	Train Loss: 0.045 | Train PPL:   1.046 | lr: 4.300e-07
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 317/513
-----Tst------
[TEST]: The number of correct predictions (aux-

[Epoch: 31][#examples: 78720/81252][#steps: 40600]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 2.285e-07
[VAL]: The number of correct predictions (aux-task (single)): 317/513

---------------------------------------
[Epoch: 31][Validatiing...]
	 Early Stopping Patience: 905/1000
	 Val. Loss: 1.567 | Val. Acc: 0.618 | Val. PPL:   4.794
	 BEST. Val. Loss: 1.108 | BEST. Val. Acc: 0.626 | Val. Loss: 1.539 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21590
---------------------------------------

Epoch: 32 | Time: 2m 2s
	Train Loss: 0.047 | Train PPL:   1.048
	 Val. Loss: 1.567 | Val. Acc: 0.618 | Val. PPL:   4.794
[Train]: Current Teacher Forcing Ratio: 0.200
[Epoch: 32][#examples: 640/81252][#steps: 40650]
	Train Loss: 0.032 | Train PPL:   1.033 | lr: 2.057e-07
[Epoch: 32][#examples: 3840/81252][#steps: 40700]
	Train Loss: 0.048 | Train PPL:   1.049 | lr: 2.057e-07
[Epoch: 32][#examples: 7040/81252][#steps: 40750]
	Train Loss: 0.052 | Train PPL:   1.054 | lr: 2.057e-07
[Epoch: 32][#example

[Epoch: 33][#examples: 69760/81252][#steps: 43000]
	Train Loss: 0.049 | Train PPL:   1.050 | lr: 1.215e-07
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 317/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 679/1000

---------------------------------------
[Epoch: 33][Validatiing...]
	 Early Stopping Patience: 899/1000
	 Val. Loss: 1.567 | Val. Acc: 0.618 | Val. PPL:   4.794
	 BEST. Val. Loss: 1.108 | BEST. Val. Acc: 0.626 | Val. Loss: 1.539 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21590
---------------------------------------

[Epoch: 33][#examples: 72960/81252][#steps: 43050]
	Train Loss: 0.048 | Train PPL:   1.049 | lr: 1.093e-07
[Epoch: 33][#examples: 76160/81252][#steps: 43100]
	Train Loss: 0.048 | Train PPL:   1.049 | lr: 1.093e-07
[Epoch: 33][#examples: 79360/81252][#steps: 43150]
	Train Loss: 0.048 | Train PPL:   1.049 | lr: 1.093e-07
[VAL]: The number of correct predictions (aux-task (single)): 317/513

----------

[Epoch: 35][#examples: 48000/81252][#steps: 45200]
	Train Loss: 0.046 | Train PPL:   1.047 | lr: 9.838e-08
[Epoch: 35][#examples: 51200/81252][#steps: 45250]
	Train Loss: 0.046 | Train PPL:   1.047 | lr: 9.838e-08
[Epoch: 35][#examples: 54400/81252][#steps: 45300]
	Train Loss: 0.046 | Train PPL:   1.047 | lr: 9.838e-08
[Epoch: 35][#examples: 57600/81252][#steps: 45350]
	Train Loss: 0.046 | Train PPL:   1.047 | lr: 9.838e-08
[Epoch: 35][#examples: 60800/81252][#steps: 45400]
	Train Loss: 0.045 | Train PPL:   1.046 | lr: 9.838e-08
[Epoch: 35][#examples: 64000/81252][#steps: 45450]
	Train Loss: 0.045 | Train PPL:   1.047 | lr: 9.838e-08
[Epoch: 35][#examples: 67200/81252][#steps: 45500]
	Train Loss: 0.045 | Train PPL:   1.046 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 317/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 679/1000

---------------------------------------
[Epoch: 35][Validatiing...]
	 Early St

[TEST]: The number of correct predictions (aux-task (single)): 679/1000

---------------------------------------
[Epoch: 37][Validatiing...]
	 Early Stopping Patience: 886/1000
	 Val. Loss: 1.567 | Val. Acc: 0.618 | Val. PPL:   4.795
	 BEST. Val. Loss: 1.108 | BEST. Val. Acc: 0.626 | Val. Loss: 1.539 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21590
---------------------------------------

[Epoch: 37][#examples: 35840/81252][#steps: 47550]
	Train Loss: 0.048 | Train PPL:   1.049 | lr: 9.838e-08
[Epoch: 37][#examples: 39040/81252][#steps: 47600]
	Train Loss: 0.048 | Train PPL:   1.049 | lr: 9.838e-08
[Epoch: 37][#examples: 42240/81252][#steps: 47650]
	Train Loss: 0.048 | Train PPL:   1.050 | lr: 9.838e-08
[Epoch: 37][#examples: 45440/81252][#steps: 47700]
	Train Loss: 0.048 | Train PPL:   1.049 | lr: 9.838e-08
[Epoch: 37][#examples: 48640/81252][#steps: 47750]
	Train Loss: 0.048 | Train PPL:   1.049 | lr: 9.838e-08
[Epoch: 37][#examples: 51840/81252][#steps: 47800]
	Train Loss: 0.048 | Tr

[Epoch: 39][#examples: 17280/81252][#steps: 49800]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 9.838e-08
[Epoch: 39][#examples: 20480/81252][#steps: 49850]
	Train Loss: 0.048 | Train PPL:   1.049 | lr: 9.838e-08
[Epoch: 39][#examples: 23680/81252][#steps: 49900]
	Train Loss: 0.047 | Train PPL:   1.049 | lr: 9.838e-08
[Epoch: 39][#examples: 26880/81252][#steps: 49950]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 9.838e-08
[Epoch: 39][#examples: 30080/81252][#steps: 50000]
	Train Loss: 0.048 | Train PPL:   1.049 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 317/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 679/1000

---------------------------------------
[Epoch: 39][Validatiing...]
	 Early Stopping Patience: 879/1000
	 Val. Loss: 1.567 | Val. Acc: 0.618 | Val. PPL:   4.794
	 BEST. Val. Loss: 1.108 | BEST. Val. Acc: 0.626 | Val. Loss: 1.539 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21590
---------------

[Epoch: 41][#examples: 1920/81252][#steps: 52100]
	Train Loss: 0.055 | Train PPL:   1.056 | lr: 9.838e-08
[Epoch: 41][#examples: 5120/81252][#steps: 52150]
	Train Loss: 0.050 | Train PPL:   1.051 | lr: 9.838e-08
[Epoch: 41][#examples: 8320/81252][#steps: 52200]
	Train Loss: 0.048 | Train PPL:   1.049 | lr: 9.838e-08
[Epoch: 41][#examples: 11520/81252][#steps: 52250]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 9.838e-08
[Epoch: 41][#examples: 14720/81252][#steps: 52300]
	Train Loss: 0.050 | Train PPL:   1.051 | lr: 9.838e-08
[Epoch: 41][#examples: 17920/81252][#steps: 52350]
	Train Loss: 0.050 | Train PPL:   1.051 | lr: 9.838e-08
[Epoch: 41][#examples: 21120/81252][#steps: 52400]
	Train Loss: 0.050 | Train PPL:   1.051 | lr: 9.838e-08
[Epoch: 41][#examples: 24320/81252][#steps: 52450]
	Train Loss: 0.049 | Train PPL:   1.050 | lr: 9.838e-08
[Epoch: 41][#examples: 27520/81252][#steps: 52500]
	Train Loss: 0.049 | Train PPL:   1.051 | lr: 9.838e-08
-----Val------
[VAL]: The number of corr

[Epoch: 42][#examples: 77440/81252][#steps: 54550]
	Train Loss: 0.048 | Train PPL:   1.049 | lr: 9.838e-08
[Epoch: 42][#examples: 80640/81252][#steps: 54600]
	Train Loss: 0.048 | Train PPL:   1.049 | lr: 9.838e-08
[VAL]: The number of correct predictions (aux-task (single)): 317/513

---------------------------------------
[Epoch: 42][Validatiing...]
	 Early Stopping Patience: 866/1000
	 Val. Loss: 1.567 | Val. Acc: 0.618 | Val. PPL:   4.794
	 BEST. Val. Loss: 1.108 | BEST. Val. Acc: 0.626 | Val. Loss: 1.539 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21590
---------------------------------------

Epoch: 43 | Time: 2m 3s
	Train Loss: 0.048 | Train PPL:   1.049
	 Val. Loss: 1.567 | Val. Acc: 0.618 | Val. PPL:   4.794
[Train]: Current Teacher Forcing Ratio: 0.200
[Epoch: 43][#examples: 2560/81252][#steps: 54650]
	Train Loss: 0.041 | Train PPL:   1.042 | lr: 9.838e-08
[Epoch: 43][#examples: 5760/81252][#steps: 54700]
	Train Loss: 0.046 | Train PPL:   1.047 | lr: 9.838e-08
[Epoch: 43][#examp

[Epoch: 44][#examples: 68480/81252][#steps: 56950]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 9.838e-08
[Epoch: 44][#examples: 71680/81252][#steps: 57000]
	Train Loss: 0.047 | Train PPL:   1.049 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 317/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 679/1000

---------------------------------------
[Epoch: 44][Validatiing...]
	 Early Stopping Patience: 860/1000
	 Val. Loss: 1.567 | Val. Acc: 0.618 | Val. PPL:   4.794
	 BEST. Val. Loss: 1.108 | BEST. Val. Acc: 0.626 | Val. Loss: 1.539 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21590
---------------------------------------

[Epoch: 44][#examples: 74880/81252][#steps: 57050]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 9.838e-08
[Epoch: 44][#examples: 78080/81252][#steps: 57100]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 9.838e-08
[Epoch: 44][#examples: 81280/81252][#steps: 57150]
	Train Loss: 0.047 | Train PPL

[Epoch: 46][#examples: 46720/81252][#steps: 59150]
	Train Loss: 0.046 | Train PPL:   1.047 | lr: 9.838e-08
[Epoch: 46][#examples: 49920/81252][#steps: 59200]
	Train Loss: 0.046 | Train PPL:   1.047 | lr: 9.838e-08
[Epoch: 46][#examples: 53120/81252][#steps: 59250]
	Train Loss: 0.046 | Train PPL:   1.047 | lr: 9.838e-08
[Epoch: 46][#examples: 56320/81252][#steps: 59300]
	Train Loss: 0.045 | Train PPL:   1.047 | lr: 9.838e-08
[Epoch: 46][#examples: 59520/81252][#steps: 59350]
	Train Loss: 0.046 | Train PPL:   1.047 | lr: 9.838e-08
[Epoch: 46][#examples: 62720/81252][#steps: 59400]
	Train Loss: 0.046 | Train PPL:   1.047 | lr: 9.838e-08
[Epoch: 46][#examples: 65920/81252][#steps: 59450]
	Train Loss: 0.046 | Train PPL:   1.047 | lr: 9.838e-08
[Epoch: 46][#examples: 69120/81252][#steps: 59500]
	Train Loss: 0.046 | Train PPL:   1.047 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 317/513
-----Tst------
[TEST]: The number of correct predictions (a

[VAL]: The number of correct predictions (aux-task (single)): 317/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 679/1000

---------------------------------------
[Epoch: 48][Validatiing...]
	 Early Stopping Patience: 847/1000
	 Val. Loss: 1.567 | Val. Acc: 0.618 | Val. PPL:   4.794
	 BEST. Val. Loss: 1.108 | BEST. Val. Acc: 0.626 | Val. Loss: 1.539 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21590
---------------------------------------

[Epoch: 48][#examples: 37760/81252][#steps: 61550]
	Train Loss: 0.048 | Train PPL:   1.049 | lr: 9.838e-08
[Epoch: 48][#examples: 40960/81252][#steps: 61600]
	Train Loss: 0.048 | Train PPL:   1.050 | lr: 9.838e-08
[Epoch: 48][#examples: 44160/81252][#steps: 61650]
	Train Loss: 0.048 | Train PPL:   1.050 | lr: 9.838e-08
[Epoch: 48][#examples: 47360/81252][#steps: 61700]
	Train Loss: 0.048 | Train PPL:   1.049 | lr: 9.838e-08
[Epoch: 48][#examples: 50560/81252][#steps: 61750]
	Train Loss: 0.048 | Train PPL:   1.049 | lr:

[Epoch: 50][#examples: 16000/81252][#steps: 63750]
	Train Loss: 0.042 | Train PPL:   1.043 | lr: 9.838e-08
[Epoch: 50][#examples: 19200/81252][#steps: 63800]
	Train Loss: 0.044 | Train PPL:   1.045 | lr: 9.838e-08
[Epoch: 50][#examples: 22400/81252][#steps: 63850]
	Train Loss: 0.046 | Train PPL:   1.047 | lr: 9.838e-08
[Epoch: 50][#examples: 25600/81252][#steps: 63900]
	Train Loss: 0.045 | Train PPL:   1.046 | lr: 9.838e-08
[Epoch: 50][#examples: 28800/81252][#steps: 63950]
	Train Loss: 0.046 | Train PPL:   1.047 | lr: 9.838e-08
[Epoch: 50][#examples: 32000/81252][#steps: 64000]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 317/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 679/1000

---------------------------------------
[Epoch: 50][Validatiing...]
	 Early Stopping Patience: 840/1000
	 Val. Loss: 1.567 | Val. Acc: 0.618 | Val. PPL:   4.794
	 BEST. Val. Loss: 1.108

[Epoch: 52][#examples: 640/81252][#steps: 66050]
	Train Loss: 0.039 | Train PPL:   1.039 | lr: 9.838e-08
[Epoch: 52][#examples: 3840/81252][#steps: 66100]
	Train Loss: 0.045 | Train PPL:   1.046 | lr: 9.838e-08
[Epoch: 52][#examples: 7040/81252][#steps: 66150]
	Train Loss: 0.045 | Train PPL:   1.046 | lr: 9.838e-08
[Epoch: 52][#examples: 10240/81252][#steps: 66200]
	Train Loss: 0.045 | Train PPL:   1.046 | lr: 9.838e-08
[Epoch: 52][#examples: 13440/81252][#steps: 66250]
	Train Loss: 0.048 | Train PPL:   1.049 | lr: 9.838e-08
[Epoch: 52][#examples: 16640/81252][#steps: 66300]
	Train Loss: 0.049 | Train PPL:   1.050 | lr: 9.838e-08
[Epoch: 52][#examples: 19840/81252][#steps: 66350]
	Train Loss: 0.048 | Train PPL:   1.049 | lr: 9.838e-08
[Epoch: 52][#examples: 23040/81252][#steps: 66400]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 9.838e-08
[Epoch: 52][#examples: 26240/81252][#steps: 66450]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 9.838e-08
[Epoch: 52][#examples: 29440/81252][#step

[Epoch: 53][#examples: 79360/81252][#steps: 68550]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 9.838e-08
[VAL]: The number of correct predictions (aux-task (single)): 317/513

---------------------------------------
[Epoch: 53][Validatiing...]
	 Early Stopping Patience: 827/1000
	 Val. Loss: 1.567 | Val. Acc: 0.618 | Val. PPL:   4.794
	 BEST. Val. Loss: 1.108 | BEST. Val. Acc: 0.626 | Val. Loss: 1.539 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21590
---------------------------------------

Epoch: 54 | Time: 2m 3s
	Train Loss: 0.047 | Train PPL:   1.048
	 Val. Loss: 1.567 | Val. Acc: 0.618 | Val. PPL:   4.794
[Train]: Current Teacher Forcing Ratio: 0.200
[Epoch: 54][#examples: 1280/81252][#steps: 68600]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 54][#examples: 4480/81252][#steps: 68650]
	Train Loss: 0.048 | Train PPL:   1.049 | lr: 9.838e-08
[Epoch: 54][#examples: 7680/81252][#steps: 68700]
	Train Loss: 0.048 | Train PPL:   1.049 | lr: 9.838e-08
[Epoch: 54][#exampl

[Epoch: 55][#examples: 70400/81252][#steps: 70950]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 9.838e-08
[Epoch: 55][#examples: 73600/81252][#steps: 71000]
	Train Loss: 0.048 | Train PPL:   1.049 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 317/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 679/1000

---------------------------------------
[Epoch: 55][Validatiing...]
	 Early Stopping Patience: 821/1000
	 Val. Loss: 1.567 | Val. Acc: 0.618 | Val. PPL:   4.794
	 BEST. Val. Loss: 1.108 | BEST. Val. Acc: 0.626 | Val. Loss: 1.539 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21590
---------------------------------------

[Epoch: 55][#examples: 76800/81252][#steps: 71050]
	Train Loss: 0.048 | Train PPL:   1.049 | lr: 9.838e-08
[Epoch: 55][#examples: 80000/81252][#steps: 71100]
	Train Loss: 0.048 | Train PPL:   1.049 | lr: 9.838e-08
[VAL]: The number of correct predictions (aux-task (single)): 317/513

----------

[Epoch: 57][#examples: 48640/81252][#steps: 73150]
	Train Loss: 0.046 | Train PPL:   1.047 | lr: 9.838e-08
[Epoch: 57][#examples: 51840/81252][#steps: 73200]
	Train Loss: 0.045 | Train PPL:   1.046 | lr: 9.838e-08
[Epoch: 57][#examples: 55040/81252][#steps: 73250]
	Train Loss: 0.045 | Train PPL:   1.046 | lr: 9.838e-08
[Epoch: 57][#examples: 58240/81252][#steps: 73300]
	Train Loss: 0.045 | Train PPL:   1.046 | lr: 9.838e-08
[Epoch: 57][#examples: 61440/81252][#steps: 73350]
	Train Loss: 0.046 | Train PPL:   1.047 | lr: 9.838e-08
[Epoch: 57][#examples: 64640/81252][#steps: 73400]
	Train Loss: 0.046 | Train PPL:   1.047 | lr: 9.838e-08
[Epoch: 57][#examples: 67840/81252][#steps: 73450]
	Train Loss: 0.046 | Train PPL:   1.047 | lr: 9.838e-08
[Epoch: 57][#examples: 71040/81252][#steps: 73500]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 317/513
-----Tst------
[TEST]: The number of correct predictions (a

[VAL]: The number of correct predictions (aux-task (single)): 317/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 679/1000

---------------------------------------
[Epoch: 59][Validatiing...]
	 Early Stopping Patience: 808/1000
	 Val. Loss: 1.567 | Val. Acc: 0.618 | Val. PPL:   4.794
	 BEST. Val. Loss: 1.108 | BEST. Val. Acc: 0.626 | Val. Loss: 1.539 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21590
---------------------------------------

[Epoch: 59][#examples: 39680/81252][#steps: 75550]
	Train Loss: 0.045 | Train PPL:   1.046 | lr: 9.838e-08
[Epoch: 59][#examples: 42880/81252][#steps: 75600]
	Train Loss: 0.045 | Train PPL:   1.046 | lr: 9.838e-08
[Epoch: 59][#examples: 46080/81252][#steps: 75650]
	Train Loss: 0.045 | Train PPL:   1.046 | lr: 9.838e-08
[Epoch: 59][#examples: 49280/81252][#steps: 75700]
	Train Loss: 0.045 | Train PPL:   1.046 | lr: 9.838e-08
[Epoch: 59][#examples: 52480/81252][#steps: 75750]
	Train Loss: 0.045 | Train PPL:   1.046 | lr:

[Epoch: 61][#examples: 17920/81252][#steps: 77750]
	Train Loss: 0.051 | Train PPL:   1.053 | lr: 9.838e-08
[Epoch: 61][#examples: 21120/81252][#steps: 77800]
	Train Loss: 0.050 | Train PPL:   1.051 | lr: 9.838e-08
[Epoch: 61][#examples: 24320/81252][#steps: 77850]
	Train Loss: 0.048 | Train PPL:   1.049 | lr: 9.838e-08
[Epoch: 61][#examples: 27520/81252][#steps: 77900]
	Train Loss: 0.048 | Train PPL:   1.049 | lr: 9.838e-08
[Epoch: 61][#examples: 30720/81252][#steps: 77950]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 9.838e-08
[Epoch: 61][#examples: 33920/81252][#steps: 78000]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 317/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 679/1000

---------------------------------------
[Epoch: 61][Validatiing...]
	 Early Stopping Patience: 801/1000
	 Val. Loss: 1.567 | Val. Acc: 0.618 | Val. PPL:   4.794
	 BEST. Val. Loss: 1.108

[Epoch: 63][#examples: 2560/81252][#steps: 80050]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 9.838e-08
[Epoch: 63][#examples: 5760/81252][#steps: 80100]
	Train Loss: 0.042 | Train PPL:   1.042 | lr: 9.838e-08
[Epoch: 63][#examples: 8960/81252][#steps: 80150]
	Train Loss: 0.041 | Train PPL:   1.041 | lr: 9.838e-08
[Epoch: 63][#examples: 12160/81252][#steps: 80200]
	Train Loss: 0.041 | Train PPL:   1.042 | lr: 9.838e-08
[Epoch: 63][#examples: 15360/81252][#steps: 80250]
	Train Loss: 0.042 | Train PPL:   1.043 | lr: 9.838e-08
[Epoch: 63][#examples: 18560/81252][#steps: 80300]
	Train Loss: 0.042 | Train PPL:   1.043 | lr: 9.838e-08
[Epoch: 63][#examples: 21760/81252][#steps: 80350]
	Train Loss: 0.041 | Train PPL:   1.042 | lr: 9.838e-08
[Epoch: 63][#examples: 24960/81252][#steps: 80400]
	Train Loss: 0.042 | Train PPL:   1.043 | lr: 9.838e-08
[Epoch: 63][#examples: 28160/81252][#steps: 80450]
	Train Loss: 0.042 | Train PPL:   1.043 | lr: 9.838e-08
[Epoch: 63][#examples: 31360/81252][#ste

[Epoch: 64][#examples: 81280/81252][#steps: 82550]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 9.838e-08
[VAL]: The number of correct predictions (aux-task (single)): 317/513

---------------------------------------
[Epoch: 64][Validatiing...]
	 Early Stopping Patience: 788/1000
	 Val. Loss: 1.567 | Val. Acc: 0.618 | Val. PPL:   4.794
	 BEST. Val. Loss: 1.108 | BEST. Val. Acc: 0.626 | Val. Loss: 1.539 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21590
---------------------------------------

Epoch: 65 | Time: 2m 3s
	Train Loss: 0.047 | Train PPL:   1.048
	 Val. Loss: 1.567 | Val. Acc: 0.618 | Val. PPL:   4.794
[Train]: Current Teacher Forcing Ratio: 0.200
[Epoch: 65][#examples: 3200/81252][#steps: 82600]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 9.838e-08
[Epoch: 65][#examples: 6400/81252][#steps: 82650]
	Train Loss: 0.045 | Train PPL:   1.046 | lr: 9.838e-08
[Epoch: 65][#examples: 9600/81252][#steps: 82700]
	Train Loss: 0.050 | Train PPL:   1.051 | lr: 9.838e-08
[Epoch: 65][#exampl

[Epoch: 66][#examples: 72320/81252][#steps: 84950]
	Train Loss: 0.048 | Train PPL:   1.049 | lr: 9.838e-08
[Epoch: 66][#examples: 75520/81252][#steps: 85000]
	Train Loss: 0.048 | Train PPL:   1.049 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 317/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 679/1000

---------------------------------------
[Epoch: 66][Validatiing...]
	 Early Stopping Patience: 782/1000
	 Val. Loss: 1.567 | Val. Acc: 0.618 | Val. PPL:   4.794
	 BEST. Val. Loss: 1.108 | BEST. Val. Acc: 0.626 | Val. Loss: 1.539 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21590
---------------------------------------

[Epoch: 66][#examples: 78720/81252][#steps: 85050]
	Train Loss: 0.048 | Train PPL:   1.049 | lr: 9.838e-08
[VAL]: The number of correct predictions (aux-task (single)): 317/513

---------------------------------------
[Epoch: 66][Validatiing...]
	 Early Stopping Patience: 781/1000
	 Val. Loss: 

[Epoch: 68][#examples: 50560/81252][#steps: 87150]
	Train Loss: 0.046 | Train PPL:   1.048 | lr: 9.838e-08
[Epoch: 68][#examples: 53760/81252][#steps: 87200]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 9.838e-08
[Epoch: 68][#examples: 56960/81252][#steps: 87250]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 9.838e-08
[Epoch: 68][#examples: 60160/81252][#steps: 87300]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 9.838e-08
[Epoch: 68][#examples: 63360/81252][#steps: 87350]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 9.838e-08
[Epoch: 68][#examples: 66560/81252][#steps: 87400]
	Train Loss: 0.046 | Train PPL:   1.047 | lr: 9.838e-08
[Epoch: 68][#examples: 69760/81252][#steps: 87450]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 9.838e-08
[Epoch: 68][#examples: 72960/81252][#steps: 87500]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 317/513
-----Tst------
[TEST]: The number of correct predictions (a

[VAL]: The number of correct predictions (aux-task (single)): 317/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 679/1000

---------------------------------------
[Epoch: 70][Validatiing...]
	 Early Stopping Patience: 769/1000
	 Val. Loss: 1.567 | Val. Acc: 0.618 | Val. PPL:   4.794
	 BEST. Val. Loss: 1.108 | BEST. Val. Acc: 0.626 | Val. Loss: 1.539 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21590
---------------------------------------

[Epoch: 70][#examples: 41600/81252][#steps: 89550]
	Train Loss: 0.046 | Train PPL:   1.047 | lr: 9.838e-08
[Epoch: 70][#examples: 44800/81252][#steps: 89600]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 9.838e-08
[Epoch: 70][#examples: 48000/81252][#steps: 89650]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 9.838e-08
[Epoch: 70][#examples: 51200/81252][#steps: 89700]
	Train Loss: 0.048 | Train PPL:   1.049 | lr: 9.838e-08
[Epoch: 70][#examples: 54400/81252][#steps: 89750]
	Train Loss: 0.047 | Train PPL:   1.049 | lr:

[Epoch: 72][#examples: 19840/81252][#steps: 91750]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 9.838e-08
[Epoch: 72][#examples: 23040/81252][#steps: 91800]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 9.838e-08
[Epoch: 72][#examples: 26240/81252][#steps: 91850]
	Train Loss: 0.045 | Train PPL:   1.046 | lr: 9.838e-08
[Epoch: 72][#examples: 29440/81252][#steps: 91900]
	Train Loss: 0.044 | Train PPL:   1.045 | lr: 9.838e-08
[Epoch: 72][#examples: 32640/81252][#steps: 91950]
	Train Loss: 0.044 | Train PPL:   1.045 | lr: 9.838e-08
[Epoch: 72][#examples: 35840/81252][#steps: 92000]
	Train Loss: 0.045 | Train PPL:   1.046 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 317/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 679/1000

---------------------------------------
[Epoch: 72][Validatiing...]
	 Early Stopping Patience: 762/1000
	 Val. Loss: 1.567 | Val. Acc: 0.618 | Val. PPL:   4.794
	 BEST. Val. Loss: 1.108

[Epoch: 74][#examples: 4480/81252][#steps: 94050]
	Train Loss: 0.049 | Train PPL:   1.050 | lr: 9.838e-08
[Epoch: 74][#examples: 7680/81252][#steps: 94100]
	Train Loss: 0.046 | Train PPL:   1.047 | lr: 9.838e-08
[Epoch: 74][#examples: 10880/81252][#steps: 94150]
	Train Loss: 0.046 | Train PPL:   1.047 | lr: 9.838e-08
[Epoch: 74][#examples: 14080/81252][#steps: 94200]
	Train Loss: 0.044 | Train PPL:   1.045 | lr: 9.838e-08
[Epoch: 74][#examples: 17280/81252][#steps: 94250]
	Train Loss: 0.045 | Train PPL:   1.046 | lr: 9.838e-08
[Epoch: 74][#examples: 20480/81252][#steps: 94300]
	Train Loss: 0.046 | Train PPL:   1.047 | lr: 9.838e-08
[Epoch: 74][#examples: 23680/81252][#steps: 94350]
	Train Loss: 0.045 | Train PPL:   1.046 | lr: 9.838e-08
[Epoch: 74][#examples: 26880/81252][#steps: 94400]
	Train Loss: 0.046 | Train PPL:   1.047 | lr: 9.838e-08
[Epoch: 74][#examples: 30080/81252][#steps: 94450]
	Train Loss: 0.045 | Train PPL:   1.046 | lr: 9.838e-08
[Epoch: 74][#examples: 33280/81252][#st

[VAL]: The number of correct predictions (aux-task (single)): 317/513

---------------------------------------
[Epoch: 75][Validatiing...]
	 Early Stopping Patience: 749/1000
	 Val. Loss: 1.567 | Val. Acc: 0.618 | Val. PPL:   4.793
	 BEST. Val. Loss: 1.108 | BEST. Val. Acc: 0.626 | Val. Loss: 1.539 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21590
---------------------------------------

Epoch: 76 | Time: 2m 1s
	Train Loss: 0.049 | Train PPL:   1.050
	 Val. Loss: 1.567 | Val. Acc: 0.618 | Val. PPL:   4.793
[Train]: Current Teacher Forcing Ratio: 0.200
[Epoch: 76][#examples: 1920/81252][#steps: 96550]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 76][#examples: 5120/81252][#steps: 96600]
	Train Loss: 0.038 | Train PPL:   1.038 | lr: 9.838e-08
[Epoch: 76][#examples: 8320/81252][#steps: 96650]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 9.838e-08
[Epoch: 76][#examples: 11520/81252][#steps: 96700]
	Train Loss: 0.045 | Train PPL:   1.046 | lr: 9.838e-08
[Epoch: 76][#exampl

[Epoch: 77][#examples: 74240/81252][#steps: 98950]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 9.838e-08
[Epoch: 77][#examples: 77440/81252][#steps: 99000]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 317/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 679/1000

---------------------------------------
[Epoch: 77][Validatiing...]
	 Early Stopping Patience: 743/1000
	 Val. Loss: 1.567 | Val. Acc: 0.618 | Val. PPL:   4.794
	 BEST. Val. Loss: 1.108 | BEST. Val. Acc: 0.626 | Val. Loss: 1.539 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21590
---------------------------------------

[Epoch: 77][#examples: 80640/81252][#steps: 99050]
	Train Loss: 0.046 | Train PPL:   1.047 | lr: 9.838e-08
[VAL]: The number of correct predictions (aux-task (single)): 317/513

---------------------------------------
[Epoch: 77][Validatiing...]
	 Early Stopping Patience: 742/1000
	 Val. Loss: 

[Epoch: 79][#examples: 52480/81252][#steps: 101150]
	Train Loss: 0.048 | Train PPL:   1.049 | lr: 9.838e-08
[Epoch: 79][#examples: 55680/81252][#steps: 101200]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 9.838e-08
[Epoch: 79][#examples: 58880/81252][#steps: 101250]
	Train Loss: 0.047 | Train PPL:   1.049 | lr: 9.838e-08
[Epoch: 79][#examples: 62080/81252][#steps: 101300]
	Train Loss: 0.047 | Train PPL:   1.049 | lr: 9.838e-08
[Epoch: 79][#examples: 65280/81252][#steps: 101350]
	Train Loss: 0.047 | Train PPL:   1.049 | lr: 9.838e-08
[Epoch: 79][#examples: 68480/81252][#steps: 101400]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 9.838e-08
[Epoch: 79][#examples: 71680/81252][#steps: 101450]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 9.838e-08
[Epoch: 79][#examples: 74880/81252][#steps: 101500]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 317/513
-----Tst------
[TEST]: The number of correct predic

[VAL]: The number of correct predictions (aux-task (single)): 317/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 679/1000

---------------------------------------
[Epoch: 81][Validatiing...]
	 Early Stopping Patience: 730/1000
	 Val. Loss: 1.567 | Val. Acc: 0.618 | Val. PPL:   4.794
	 BEST. Val. Loss: 1.108 | BEST. Val. Acc: 0.626 | Val. Loss: 1.539 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21590
---------------------------------------

[Epoch: 81][#examples: 43520/81252][#steps: 103550]
	Train Loss: 0.049 | Train PPL:   1.050 | lr: 9.838e-08
[Epoch: 81][#examples: 46720/81252][#steps: 103600]
	Train Loss: 0.049 | Train PPL:   1.050 | lr: 9.838e-08
[Epoch: 81][#examples: 49920/81252][#steps: 103650]
	Train Loss: 0.049 | Train PPL:   1.050 | lr: 9.838e-08
[Epoch: 81][#examples: 53120/81252][#steps: 103700]
	Train Loss: 0.049 | Train PPL:   1.050 | lr: 9.838e-08
[Epoch: 81][#examples: 56320/81252][#steps: 103750]
	Train Loss: 0.048 | Train PPL:   1.049 

[Epoch: 83][#examples: 21760/81252][#steps: 105750]
	Train Loss: 0.043 | Train PPL:   1.044 | lr: 9.838e-08
[Epoch: 83][#examples: 24960/81252][#steps: 105800]
	Train Loss: 0.043 | Train PPL:   1.044 | lr: 9.838e-08
[Epoch: 83][#examples: 28160/81252][#steps: 105850]
	Train Loss: 0.045 | Train PPL:   1.046 | lr: 9.838e-08
[Epoch: 83][#examples: 31360/81252][#steps: 105900]
	Train Loss: 0.045 | Train PPL:   1.046 | lr: 9.838e-08
[Epoch: 83][#examples: 34560/81252][#steps: 105950]
	Train Loss: 0.046 | Train PPL:   1.047 | lr: 9.838e-08
[Epoch: 83][#examples: 37760/81252][#steps: 106000]
	Train Loss: 0.046 | Train PPL:   1.047 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 317/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 679/1000

---------------------------------------
[Epoch: 83][Validatiing...]
	 Early Stopping Patience: 723/1000
	 Val. Loss: 1.567 | Val. Acc: 0.618 | Val. PPL:   4.794
	 BEST. Val. Loss:

[Epoch: 85][#examples: 6400/81252][#steps: 108050]
	Train Loss: 0.050 | Train PPL:   1.051 | lr: 9.838e-08
[Epoch: 85][#examples: 9600/81252][#steps: 108100]
	Train Loss: 0.052 | Train PPL:   1.054 | lr: 9.838e-08
[Epoch: 85][#examples: 12800/81252][#steps: 108150]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 9.838e-08
[Epoch: 85][#examples: 16000/81252][#steps: 108200]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 9.838e-08
[Epoch: 85][#examples: 19200/81252][#steps: 108250]
	Train Loss: 0.048 | Train PPL:   1.049 | lr: 9.838e-08
[Epoch: 85][#examples: 22400/81252][#steps: 108300]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 9.838e-08
[Epoch: 85][#examples: 25600/81252][#steps: 108350]
	Train Loss: 0.048 | Train PPL:   1.049 | lr: 9.838e-08
[Epoch: 85][#examples: 28800/81252][#steps: 108400]
	Train Loss: 0.049 | Train PPL:   1.050 | lr: 9.838e-08
[Epoch: 85][#examples: 32000/81252][#steps: 108450]
	Train Loss: 0.049 | Train PPL:   1.050 | lr: 9.838e-08
[Epoch: 85][#examples: 35200/8

[Epoch: 87][#examples: 640/81252][#steps: 110500]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 317/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 679/1000

---------------------------------------
[Epoch: 87][Validatiing...]
	 Early Stopping Patience: 710/1000
	 Val. Loss: 1.567 | Val. Acc: 0.618 | Val. PPL:   4.794
	 BEST. Val. Loss: 1.108 | BEST. Val. Acc: 0.626 | Val. Loss: 1.539 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21590
---------------------------------------

[Epoch: 87][#examples: 3840/81252][#steps: 110550]
	Train Loss: 0.046 | Train PPL:   1.047 | lr: 9.838e-08
[Epoch: 87][#examples: 7040/81252][#steps: 110600]
	Train Loss: 0.045 | Train PPL:   1.046 | lr: 9.838e-08
[Epoch: 87][#examples: 10240/81252][#steps: 110650]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 9.838e-08
[Epoch: 87][#examples: 13440/81252][#steps: 110700]
	Train Loss: 0.047 | Train PP

[Epoch: 88][#examples: 76160/81252][#steps: 112950]
	Train Loss: 0.046 | Train PPL:   1.047 | lr: 9.838e-08
[Epoch: 88][#examples: 79360/81252][#steps: 113000]
	Train Loss: 0.046 | Train PPL:   1.047 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 317/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 679/1000

---------------------------------------
[Epoch: 88][Validatiing...]
	 Early Stopping Patience: 704/1000
	 Val. Loss: 1.567 | Val. Acc: 0.618 | Val. PPL:   4.795
	 BEST. Val. Loss: 1.108 | BEST. Val. Acc: 0.626 | Val. Loss: 1.539 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21590
---------------------------------------

[VAL]: The number of correct predictions (aux-task (single)): 317/513

---------------------------------------
[Epoch: 88][Validatiing...]
	 Early Stopping Patience: 703/1000
	 Val. Loss: 1.567 | Val. Acc: 0.618 | Val. PPL:   4.795
	 BEST. Val. Loss: 1.108 | BEST. Val. Acc: 0.626 | Val. Loss:

[Epoch: 90][#examples: 51200/81252][#steps: 115100]
	Train Loss: 0.049 | Train PPL:   1.050 | lr: 9.838e-08
[Epoch: 90][#examples: 54400/81252][#steps: 115150]
	Train Loss: 0.048 | Train PPL:   1.050 | lr: 9.838e-08
[Epoch: 90][#examples: 57600/81252][#steps: 115200]
	Train Loss: 0.048 | Train PPL:   1.050 | lr: 9.838e-08
[Epoch: 90][#examples: 60800/81252][#steps: 115250]
	Train Loss: 0.049 | Train PPL:   1.050 | lr: 9.838e-08
[Epoch: 90][#examples: 64000/81252][#steps: 115300]
	Train Loss: 0.048 | Train PPL:   1.050 | lr: 9.838e-08
[Epoch: 90][#examples: 67200/81252][#steps: 115350]
	Train Loss: 0.048 | Train PPL:   1.049 | lr: 9.838e-08
[Epoch: 90][#examples: 70400/81252][#steps: 115400]
	Train Loss: 0.048 | Train PPL:   1.049 | lr: 9.838e-08
[Epoch: 90][#examples: 73600/81252][#steps: 115450]
	Train Loss: 0.048 | Train PPL:   1.049 | lr: 9.838e-08
[Epoch: 90][#examples: 76800/81252][#steps: 115500]
	Train Loss: 0.047 | Train PPL:   1.049 | lr: 9.838e-08
-----Val------
[VAL]: The nu

[Epoch: 92][#examples: 42240/81252][#steps: 117500]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 317/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 679/1000

---------------------------------------
[Epoch: 92][Validatiing...]
	 Early Stopping Patience: 691/1000
	 Val. Loss: 1.567 | Val. Acc: 0.618 | Val. PPL:   4.794
	 BEST. Val. Loss: 1.108 | BEST. Val. Acc: 0.626 | Val. Loss: 1.539 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21590
---------------------------------------

[Epoch: 92][#examples: 45440/81252][#steps: 117550]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 9.838e-08
[Epoch: 92][#examples: 48640/81252][#steps: 117600]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 9.838e-08
[Epoch: 92][#examples: 51840/81252][#steps: 117650]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 9.838e-08
[Epoch: 92][#examples: 55040/81252][#steps: 117700]
	Train Loss: 0.047 | Trai

[Epoch: 94][#examples: 17280/81252][#steps: 119650]
	Train Loss: 0.049 | Train PPL:   1.050 | lr: 9.838e-08
[Epoch: 94][#examples: 20480/81252][#steps: 119700]
	Train Loss: 0.050 | Train PPL:   1.051 | lr: 9.838e-08
[Epoch: 94][#examples: 23680/81252][#steps: 119750]
	Train Loss: 0.049 | Train PPL:   1.050 | lr: 9.838e-08
[Epoch: 94][#examples: 26880/81252][#steps: 119800]
	Train Loss: 0.050 | Train PPL:   1.051 | lr: 9.838e-08
[Epoch: 94][#examples: 30080/81252][#steps: 119850]
	Train Loss: 0.049 | Train PPL:   1.050 | lr: 9.838e-08
[Epoch: 94][#examples: 33280/81252][#steps: 119900]
	Train Loss: 0.049 | Train PPL:   1.050 | lr: 9.838e-08
[Epoch: 94][#examples: 36480/81252][#steps: 119950]
	Train Loss: 0.048 | Train PPL:   1.050 | lr: 9.838e-08
[Epoch: 94][#examples: 39680/81252][#steps: 120000]
	Train Loss: 0.048 | Train PPL:   1.049 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 317/513
-----Tst------
[TEST]: The number of correct predic

[VAL]: The number of correct predictions (aux-task (single)): 317/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 679/1000

---------------------------------------
[Epoch: 96][Validatiing...]
	 Early Stopping Patience: 678/1000
	 Val. Loss: 1.567 | Val. Acc: 0.618 | Val. PPL:   4.793
	 BEST. Val. Loss: 1.108 | BEST. Val. Acc: 0.626 | Val. Loss: 1.539 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21590
---------------------------------------

[Epoch: 96][#examples: 8320/81252][#steps: 122050]
	Train Loss: 0.048 | Train PPL:   1.049 | lr: 9.838e-08
[Epoch: 96][#examples: 11520/81252][#steps: 122100]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 9.838e-08
[Epoch: 96][#examples: 14720/81252][#steps: 122150]
	Train Loss: 0.049 | Train PPL:   1.050 | lr: 9.838e-08
[Epoch: 96][#examples: 17920/81252][#steps: 122200]
	Train Loss: 0.048 | Train PPL:   1.050 | lr: 9.838e-08
[Epoch: 96][#examples: 21120/81252][#steps: 122250]
	Train Loss: 0.049 | Train PPL:   1.050 |

[VAL]: The number of correct predictions (aux-task (single)): 317/513

---------------------------------------
[Epoch: 97][Validatiing...]
	 Early Stopping Patience: 672/1000
	 Val. Loss: 1.567 | Val. Acc: 0.618 | Val. PPL:   4.793
	 BEST. Val. Loss: 1.108 | BEST. Val. Acc: 0.626 | Val. Loss: 1.539 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21590
---------------------------------------

Epoch: 98 | Time: 2m 2s
	Train Loss: 0.046 | Train PPL:   1.047
	 Val. Loss: 1.567 | Val. Acc: 0.618 | Val. PPL:   4.793
[Train]: Current Teacher Forcing Ratio: 0.200
[Epoch: 98][#examples: 2560/81252][#steps: 124500]
	Train Loss: 0.036 | Train PPL:   1.037 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 317/513
-----Tst------
[TEST]: The number of correct predictions (aux-task (single)): 679/1000

---------------------------------------
[Epoch: 98][Validatiing...]
	 Early Stopping Patience: 671/1000
	 Val. Loss: 1.567 | Val. Acc: 0.618 | Val. PPL:   4.793
	 B

[Epoch: 99][#examples: 58880/81252][#steps: 126650]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 9.838e-08
[Epoch: 99][#examples: 62080/81252][#steps: 126700]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 9.838e-08
[Epoch: 99][#examples: 65280/81252][#steps: 126750]
	Train Loss: 0.046 | Train PPL:   1.048 | lr: 9.838e-08
[Epoch: 99][#examples: 68480/81252][#steps: 126800]
	Train Loss: 0.046 | Train PPL:   1.047 | lr: 9.838e-08
[Epoch: 99][#examples: 71680/81252][#steps: 126850]
	Train Loss: 0.046 | Train PPL:   1.048 | lr: 9.838e-08
[Epoch: 99][#examples: 74880/81252][#steps: 126900]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 9.838e-08
[Epoch: 99][#examples: 78080/81252][#steps: 126950]
	Train Loss: 0.046 | Train PPL:   1.048 | lr: 9.838e-08
[Epoch: 99][#examples: 81280/81252][#steps: 127000]
	Train Loss: 0.046 | Train PPL:   1.048 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (aux-task (single)): 317/513
-----Tst------
[TEST]: The number of correct predic