In [1]:
from mnmt.encoder import BasicEncoder
from mnmt.decoder import BasicFeedForwardDecoder
from mnmt.decoder import GreedyDecoder
from mnmt.attention import AdditiveAttention
from mnmt.decoder import BridgeLayer
from mnmt.model import Seq2Seq, Seq2MultiSeq
from mnmt.datasets import *
from mnmt.inputter import ArgsFeeder
from mnmt.inputter import ModuleArgsFeeder
from mnmt.trainer.utils import *
from mnmt.trainer import Trainer
import torch.nn as nn
import sys

Loading DICT dataset...
Loading the Dataset into the container...
Field names: ['en', 'ch', 'pinyin_str', 'pinyin_char']
Data sizes: [(Train, 46620), (Valid, 5828), (Test, 5828)]
The first example of the training data is:
en :  ['k', 'a', 'r', 't', 'm', 'a', 'n']
ch :  ['卡', '特', '曼']
pinyin_str :  ['ka', 'te', 'man']
pinyin_char :  ['k', 'a', 't', 'e', 'm', 'a', 'n']
Loading NEWS dataset...
Loading the Dataset into the container...
Field names: ['en', 'ch', 'pinyin_str', 'pinyin_char']
Data sizes: [(Train, 81252), (Valid, 513), (Test, 1000)]
The first example of the training data is:
en :  ['k', 'u', 's', 'i', 'c', 'k']
ch :  ['库', '西', '克']
pinyin_str :  ['ku', 'xi', 'ke']
pinyin_char :  ['k', 'u', 'x', 'i', 'k', 'e']


In [5]:
def set_up_args(data_container, exp_num):
    build_vocabs(data_container, dict_min_freqs={'en': 1, 'ch': 1, 'pinyin_str': 1, 'pinyin_char': 1})
    for name, field in data_container.fields:
        if name == 'en':
            input_dim = len(field.vocab)
            src_pad_idx = field.vocab.stoi[field.pad_token]
        elif name == 'ch':
            output_dim = len(field.vocab)
            trg_pad_idx = field.vocab[field.pad_token]

    enc_args_feeder = ModuleArgsFeeder(input_dim=input_dim, embedding_dim=256, hidden_dim=512,
                                       embedding_dropout=0.1, rnn_type='LSTM',
                                       num_layers=2, rnn_dropout=0.2)
    dec_args_feeder = ModuleArgsFeeder(input_dim=output_dim, embedding_dim=256, hidden_dim=512,
                                       embedding_dropout=0.1, rnn_type='LSTM',
                                       num_layers=2, rnn_dropout=0.2)
    dec_args_feeder_aux = ModuleArgsFeeder(input_dim=output_dim, embedding_dim=128, hidden_dim=256,
                                       embedding_dropout=0.1, rnn_type='LSTM',
                                       num_layers=2, rnn_dropout=0.1)
    return ArgsFeeder(enc_args_feeder, [dec_args_feeder, dec_args_feeder_aux],
                      batch_size=64, src_pad_idx=src_pad_idx, trg_pad_idx=trg_pad_idx,
                      optim_choice='Adam', learning_rate=0.003, decay_patience=0,
                      lr_decay_factor=0.9, valid_criterion='ACC', early_stopping_patience=100,
                      total_epochs=100, report_interval=50, exp_num=exp_num, multi_task_ratio=66, data_container=data_container,
                      src_lang='en', trg_lang='ch', auxiliary_name='pinyin_str', quiet_translate=True,
                      valid_out_path=f"experiments/exp{exp_num}/valid.out", test_out_path=f"experiments/exp{exp_num}/test.out",
                      beam_size=1)


def test_seq2seq(args_feeder):
    
    decoder_args_feeder = args_feeder.decoder_args_feeders[0]
    decoder_args_feeder_aux = args_feeder.decoder_args_feeders[1]
    
    encoder = BasicEncoder(args_feeder)
    
    feed_forward_decoder = \
        BasicFeedForwardDecoder(args_feeder,
                                AdditiveAttention(encoder_hidden_dim=args_feeder.encoder_args_feeder.hidden_dim,
                                                  decoder_hidden_dim=decoder_args_feeder.hidden_dim), 
                                                  decoder_index=0)
    feed_forward_decoder_aux = \
            BasicFeedForwardDecoder(args_feeder,
                                    AdditiveAttention(encoder_hidden_dim=args_feeder.encoder_args_feeder.hidden_dim,
                                                      decoder_hidden_dim=decoder_args_feeder_aux.hidden_dim), 
                                                      decoder_index=1)
                                
    bridge_layer = BridgeLayer(encoder_hidden_dim=args_feeder.encoder_args_feeder.hidden_dim,
                               decoder_hidden_dim=decoder_args_feeder.hidden_dim,
                               num_of_states=2)
    bridge_layer_aux = BridgeLayer(encoder_hidden_dim=args_feeder.encoder_args_feeder.hidden_dim,
                                   decoder_hidden_dim=decoder_args_feeder_aux.hidden_dim,
                                   num_of_states=2)
                                
    decoder = GreedyDecoder(feed_forward_decoder, bridge_layer, device=args_feeder.device)
    decoder_aux = GreedyDecoder(feed_forward_decoder_aux, bridge_layer_aux, device=args_feeder.device)
                                
    model = Seq2MultiSeq(args_feeder, encoder, 
                         nn.ModuleList([decoder, decoder_aux]), 
                         teacher_forcing_ratio=0.8).to(args_feeder.device)
    return model


if __name__ == '__main__':
    set_reproducibility(seed=1234)
    mtrs = [0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1]
    aux_lang = "pinyin_char"  # pinyin_str, pinyin_char
    mtrs.reverse()
    try:
        # DICT Multi 
        dict_dataset = DICT['data_container']
        for i in range(6, len(mtrs)):
            i += 1
            seq2seq_args_feeder = set_up_args(dict_dataset, exp_num=20 + i)
            seq2seq_args_feeder.multi_task_ratio = mtrs[i-1]
            seq2seq_args_feeder.auxiliary_name = aux_lang
            test_model = test_seq2seq(seq2seq_args_feeder)
            test_trainer = Trainer(seq2seq_args_feeder, test_model)
            test_trainer.run(burn_in_epoch=0)
            test_trainer.best_model_output()
        # NEWS Multi
        news_dataset = NEWS['data_container']
        for i in range(len(mtrs)):
            i += 1
            seq2seq_args_feeder = set_up_args(news_dataset, exp_num=30 + i)
            seq2seq_args_feeder.multi_task_ratio = mtrs[i-1]
            seq2seq_args_feeder.auxiliary_name = aux_lang
            test_model = test_seq2seq(seq2seq_args_feeder)
            test_trainer = Trainer(seq2seq_args_feeder, test_model)
            test_trainer.run(burn_in_epoch=15)
            test_trainer.best_model_output(test_ref_dict=NEWS['test-set-dict'])
    except KeyboardInterrupt:
        print("Exiting loop")

The current device for PyTorch is cuda
Seq2MultiSeq(
  (encoder): BasicEncoder(
    (embedding): Sequential(
      (0): Embedding(30, 256)
      (1): Dropout(p=0.1, inplace=False)
    )
    (rnn): LSTM(256, 512, num_layers=2, dropout=0.2, bidirectional=True)
  )
  (decoder_list): ModuleList(
    (0): GreedyDecoder(
      (feed_forward_decoder): BasicFeedForwardDecoder(
        (attention): AdditiveAttention(
          (additive_mapping): Linear(in_features=1536, out_features=512, bias=True)
          (v): Linear(in_features=512, out_features=1, bias=False)
        )
        (embedding): Sequential(
          (0): Embedding(437, 256)
          (1): Dropout(p=0.1, inplace=False)
        )
        (rnn): LSTM(1280, 512, num_layers=2, dropout=0.2)
        (prediction): Sequential(
          (0): Linear(in_features=1792, out_features=437, bias=True)
          (1): LogSoftmax()
        )
      )
      (bridge_layer): BridgeLayer(
        (bridge_layer): ModuleList(
          (0): Linear(in_f

[VAL]: The number of correct predictions (main-task (multi)): 3814/5828
[VAL]: The number of correct predictions (aux-task (multi)): 3782/5828

---------------------------------------
[Epoch: 2][Validatiing...]
		 Better Valid Loss! (at least equal)
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 0.590 | Val. Acc: 0.654 | Val. PPL:   1.804
	 BEST. Val. Loss: 0.590 | BEST. Val. Acc: 0.654 | Val. Loss: 0.590 | BEST. Val. Epoch: 2 | BEST. Val. Step: 1500
---------------------------------------

		 Better Valid Acc on Auxiliary Task! (at least equal)
	BEST. Val. Acc Aux: 0.649
---------------------------------------

[Epoch: 2][#examples: 5888/46620][#steps: 1550]
	Train Loss: 0.298 | Train PPL:   1.347 | lr: 3.000e-03
[Epoch: 2][#examples: 9088/46620][#steps: 1600]
	Train Loss: 0.297 | Train PPL:   1.346 | lr: 3.000e-03
[Epoch: 2][#examples: 12288/46620][#steps: 1650]
	Train Loss: 0.301 | Train PPL:   1.351 | lr: 3.000e-03
[Epoch: 2][#examples: 15488/

[Epoch: 4][#examples: 37376/46620][#steps: 3500]
	Train Loss: 0.221 | Train PPL:   1.247 | lr: 2.700e-03
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 3979/5828
[VAL]: The number of correct predictions (aux-task (multi)): 3964/5828

---------------------------------------
[Epoch: 4][Validatiing...]
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 99/100
	 Val. Loss: 0.563 | Val. Acc: 0.683 | Val. PPL:   1.755
	 BEST. Val. Loss: 0.552 | BEST. Val. Acc: 0.683 | Val. Loss: 0.563 | BEST. Val. Epoch: 4 | BEST. Val. Step: 3500
---------------------------------------

		 Better Valid Acc on Auxiliary Task! (at least equal)
	BEST. Val. Acc Aux: 0.680
---------------------------------------

[Epoch: 4][#examples: 40576/46620][#steps: 3550]
	Train Loss: 0.222 | Train PPL:   1.249 | lr: 2.700e-03
[Epoch: 4][#examples: 43776/46620][#steps: 3600]
	Train Loss: 0.223 | Train PPL:   1.250 | lr: 2.700e-03
[VAL]: The number of correct predictions (main-task

[Epoch: 7][#examples: 6208/46620][#steps: 5200]
	Train Loss: 0.093 | Train PPL:   1.098 | lr: 1.771e-03
[Epoch: 7][#examples: 9408/46620][#steps: 5250]
	Train Loss: 0.092 | Train PPL:   1.097 | lr: 1.771e-03
[Epoch: 7][#examples: 12608/46620][#steps: 5300]
	Train Loss: 0.097 | Train PPL:   1.102 | lr: 1.771e-03
[Epoch: 7][#examples: 15808/46620][#steps: 5350]
	Train Loss: 0.097 | Train PPL:   1.102 | lr: 1.771e-03
[Epoch: 7][#examples: 19008/46620][#steps: 5400]
	Train Loss: 0.097 | Train PPL:   1.102 | lr: 1.771e-03
[Epoch: 7][#examples: 22208/46620][#steps: 5450]
	Train Loss: 0.097 | Train PPL:   1.102 | lr: 1.771e-03
[Epoch: 7][#examples: 25408/46620][#steps: 5500]
	Train Loss: 0.098 | Train PPL:   1.103 | lr: 1.771e-03
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 4085/5828
[VAL]: The number of correct predictions (aux-task (multi)): 4120/5828

---------------------------------------
[Epoch: 7][Validatiing...]
		 Better Valid Acc! (at least equal)
	 E

[VAL]: The number of correct predictions (main-task (multi)): 4126/5828
[VAL]: The number of correct predictions (aux-task (multi)): 4147/5828

---------------------------------------
[Epoch: 9][Validatiing...]
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 91/100
	 Val. Loss: 0.591 | Val. Acc: 0.708 | Val. PPL:   1.805
	 BEST. Val. Loss: 0.541 | BEST. Val. Acc: 0.708 | Val. Loss: 0.591 | BEST. Val. Epoch: 9 | BEST. Val. Step: 7290
---------------------------------------

	BEST. Val. Acc Aux: 0.716
---------------------------------------

Epoch: 10 | Time: 1m 27s
	Train Loss: 0.059 | Train PPL:   1.061
	 Val. Loss: 0.591 | Val. Acc: 0.708 | Val. PPL:   1.805
[Train]: Current Teacher Forcing Ratio: 0.500
[Epoch: 10][#examples: 640/46620][#steps: 7300]
	Train Loss: 0.042 | Train PPL:   1.043 | lr: 1.291e-03
[Epoch: 10][#examples: 3840/46620][#steps: 7350]
	Train Loss: 0.042 | Train PPL:   1.042 | lr: 1.291e-03
[Epoch: 10][#examples: 7040/46620][#steps: 7400]
	Train Loss

[Epoch: 12][#examples: 22528/46620][#steps: 9100]
	Train Loss: 0.026 | Train PPL:   1.026 | lr: 8.473e-04
[Epoch: 12][#examples: 25728/46620][#steps: 9150]
	Train Loss: 0.026 | Train PPL:   1.026 | lr: 8.473e-04
[Epoch: 12][#examples: 28928/46620][#steps: 9200]
	Train Loss: 0.026 | Train PPL:   1.026 | lr: 8.473e-04
[Epoch: 12][#examples: 32128/46620][#steps: 9250]
	Train Loss: 0.025 | Train PPL:   1.026 | lr: 8.473e-04
[Epoch: 12][#examples: 35328/46620][#steps: 9300]
	Train Loss: 0.026 | Train PPL:   1.026 | lr: 8.473e-04
[Epoch: 12][#examples: 38528/46620][#steps: 9350]
	Train Loss: 0.026 | Train PPL:   1.026 | lr: 8.473e-04
[Epoch: 12][#examples: 41728/46620][#steps: 9400]
	Train Loss: 0.026 | Train PPL:   1.026 | lr: 8.473e-04
[Epoch: 12][#examples: 44928/46620][#steps: 9450]
	Train Loss: 0.026 | Train PPL:   1.026 | lr: 8.473e-04
[VAL]: The number of correct predictions (main-task (multi)): 4171/5828
[VAL]: The number of correct predictions (aux-task (multi)): 4218/5828

--------

	 Early Stopping Patience: 78/100
	 Val. Loss: 0.707 | Val. Acc: 0.719 | Val. PPL:   2.028
	 BEST. Val. Loss: 0.541 | BEST. Val. Acc: 0.719 | Val. Loss: 0.707 | BEST. Val. Epoch: 15 | BEST. Val. Step: 11000
---------------------------------------

		 Better Valid Acc on Auxiliary Task! (at least equal)
	BEST. Val. Acc Aux: 0.729
---------------------------------------

[Epoch: 15][#examples: 7360/46620][#steps: 11050]
	Train Loss: 0.010 | Train PPL:   1.010 | lr: 6.177e-04
[Epoch: 15][#examples: 10560/46620][#steps: 11100]
	Train Loss: 0.009 | Train PPL:   1.009 | lr: 6.177e-04
[Epoch: 15][#examples: 13760/46620][#steps: 11150]
	Train Loss: 0.010 | Train PPL:   1.010 | lr: 6.177e-04
[Epoch: 15][#examples: 16960/46620][#steps: 11200]
	Train Loss: 0.010 | Train PPL:   1.010 | lr: 6.177e-04
[Epoch: 15][#examples: 20160/46620][#steps: 11250]
	Train Loss: 0.010 | Train PPL:   1.010 | lr: 6.177e-04
[Epoch: 15][#examples: 23360/46620][#steps: 11300]
	Train Loss: 0.010 | Train PPL:   1.010 | l

[Epoch: 17][#examples: 42048/46620][#steps: 13050]
	Train Loss: 0.007 | Train PPL:   1.007 | lr: 4.053e-04
[Epoch: 17][#examples: 45248/46620][#steps: 13100]
	Train Loss: 0.007 | Train PPL:   1.007 | lr: 4.053e-04
[VAL]: The number of correct predictions (main-task (multi)): 4193/5828
[VAL]: The number of correct predictions (aux-task (multi)): 4263/5828

---------------------------------------
[Epoch: 17][Validatiing...]
	 Early Stopping Patience: 71/100
	 Val. Loss: 0.754 | Val. Acc: 0.719 | Val. PPL:   2.125
	 BEST. Val. Loss: 0.541 | BEST. Val. Acc: 0.725 | Val. Loss: 0.745 | BEST. Val. Epoch: 17 | BEST. Val. Step: 12500
---------------------------------------

		 Better Valid Acc on Auxiliary Task! (at least equal)
	BEST. Val. Acc Aux: 0.731
---------------------------------------

Epoch: 18 | Time: 1m 32s
	Train Loss: 0.007 | Train PPL:   1.007
	 Val. Loss: 0.754 | Val. Acc: 0.719 | Val. PPL:   2.125
[Train]: Current Teacher Forcing Ratio: 0.260
[Epoch: 18][#examples: 1792/46620]

[Epoch: 20][#examples: 30080/46620][#steps: 15050]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 1.938e-04
[Epoch: 20][#examples: 33280/46620][#steps: 15100]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 1.938e-04
[Epoch: 20][#examples: 36480/46620][#steps: 15150]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 1.938e-04
[Epoch: 20][#examples: 39680/46620][#steps: 15200]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 1.938e-04
[Epoch: 20][#examples: 42880/46620][#steps: 15250]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 1.938e-04
[Epoch: 20][#examples: 46080/46620][#steps: 15300]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 1.938e-04
[VAL]: The number of correct predictions (main-task (multi)): 4202/5828
[VAL]: The number of correct predictions (aux-task (multi)): 4277/5828

---------------------------------------
[Epoch: 20][Validatiing...]
	 Early Stopping Patience: 64/100
	 Val. Loss: 0.795 | Val. Acc: 0.721 | Val. PPL:   2.214
	 BEST. Val. Loss: 0.541 | BEST. Val. Acc: 0.725 | Val.

[Epoch: 23][#examples: 14912/46620][#steps: 17000]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 1.030e-04
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 4204/5828
[VAL]: The number of correct predictions (aux-task (multi)): 4302/5828

---------------------------------------
[Epoch: 23][Validatiing...]
	 Early Stopping Patience: 58/100
	 Val. Loss: 0.811 | Val. Acc: 0.721 | Val. PPL:   2.251
	 BEST. Val. Loss: 0.541 | BEST. Val. Acc: 0.725 | Val. Loss: 0.745 | BEST. Val. Epoch: 17 | BEST. Val. Step: 12500
---------------------------------------

		 Better Valid Acc on Auxiliary Task! (at least equal)
	BEST. Val. Acc Aux: 0.738
---------------------------------------

[Epoch: 23][#examples: 18112/46620][#steps: 17050]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 9.271e-05
[Epoch: 23][#examples: 21312/46620][#steps: 17100]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 9.271e-05
[Epoch: 23][#examples: 24512/46620][#steps: 17150]
	Train Loss: 0.002 | Train PP

[Epoch: 26][#examples: 2944/46620][#steps: 19000]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 4.927e-05
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 4200/5828
[VAL]: The number of correct predictions (aux-task (multi)): 4299/5828

---------------------------------------
[Epoch: 26][Validatiing...]
	 Early Stopping Patience: 51/100
	 Val. Loss: 0.826 | Val. Acc: 0.721 | Val. PPL:   2.284
	 BEST. Val. Loss: 0.541 | BEST. Val. Acc: 0.725 | Val. Loss: 0.745 | BEST. Val. Epoch: 17 | BEST. Val. Step: 12500
---------------------------------------

	BEST. Val. Acc Aux: 0.738
---------------------------------------

[Epoch: 26][#examples: 6144/46620][#steps: 19050]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 4.434e-05
[Epoch: 26][#examples: 9344/46620][#steps: 19100]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 4.434e-05
[Epoch: 26][#examples: 12544/46620][#steps: 19150]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 4.434e-05
[Epoch: 26][#examples: 15744/466

[Epoch: 28][#examples: 40832/46620][#steps: 21050]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 2.357e-05
[Epoch: 28][#examples: 44032/46620][#steps: 21100]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 2.357e-05
[VAL]: The number of correct predictions (main-task (multi)): 4197/5828
[VAL]: The number of correct predictions (aux-task (multi)): 4294/5828

---------------------------------------
[Epoch: 28][Validatiing...]
	 Early Stopping Patience: 44/100
	 Val. Loss: 0.839 | Val. Acc: 0.720 | Val. PPL:   2.314
	 BEST. Val. Loss: 0.541 | BEST. Val. Acc: 0.725 | Val. Loss: 0.745 | BEST. Val. Epoch: 17 | BEST. Val. Step: 12500
---------------------------------------

	BEST. Val. Acc Aux: 0.738
---------------------------------------

Epoch: 29 | Time: 1m 31s
	Train Loss: 0.002 | Train PPL:   1.002
	 Val. Loss: 0.839 | Val. Acc: 0.720 | Val. PPL:   2.314
[Train]: Current Teacher Forcing Ratio: 0.200
[Epoch: 29][#examples: 576/46620][#steps: 21150]
	Train Loss: 0.005 | Train PPL:   1.005 |

[Epoch: 31][#examples: 28864/46620][#steps: 23050]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 1.127e-05
[Epoch: 31][#examples: 32064/46620][#steps: 23100]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 1.127e-05
[Epoch: 31][#examples: 35264/46620][#steps: 23150]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 1.127e-05
[Epoch: 31][#examples: 38464/46620][#steps: 23200]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 1.127e-05
[Epoch: 31][#examples: 41664/46620][#steps: 23250]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 1.127e-05
[Epoch: 31][#examples: 44864/46620][#steps: 23300]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 1.127e-05
[VAL]: The number of correct predictions (main-task (multi)): 4200/5828
[VAL]: The number of correct predictions (aux-task (multi)): 4291/5828

---------------------------------------
[Epoch: 31][Validatiing...]
	 Early Stopping Patience: 37/100
	 Val. Loss: 0.843 | Val. Acc: 0.721 | Val. PPL:   2.323
	 BEST. Val. Loss: 0.541 | BEST. Val. Acc: 0.725 | Val.

[Epoch: 34][#examples: 16896/46620][#steps: 25050]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 5.391e-06
[Epoch: 34][#examples: 20096/46620][#steps: 25100]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 5.391e-06
[Epoch: 34][#examples: 23296/46620][#steps: 25150]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 5.391e-06
[Epoch: 34][#examples: 26496/46620][#steps: 25200]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 5.391e-06
[Epoch: 34][#examples: 29696/46620][#steps: 25250]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 5.391e-06
[Epoch: 34][#examples: 32896/46620][#steps: 25300]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 5.391e-06
[Epoch: 34][#examples: 36096/46620][#steps: 25350]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 5.391e-06
[Epoch: 34][#examples: 39296/46620][#steps: 25400]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 5.391e-06
[Epoch: 34][#examples: 42496/46620][#steps: 25450]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 5.391e-06
[Epoch: 34][#examples: 45696/46620][#

[Epoch: 37][#examples: 4928/46620][#steps: 27050]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 2.579e-06
[Epoch: 37][#examples: 8128/46620][#steps: 27100]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 2.579e-06
[Epoch: 37][#examples: 11328/46620][#steps: 27150]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 2.579e-06
[Epoch: 37][#examples: 14528/46620][#steps: 27200]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 2.579e-06
[Epoch: 37][#examples: 17728/46620][#steps: 27250]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 2.579e-06
[Epoch: 37][#examples: 20928/46620][#steps: 27300]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 2.579e-06
[Epoch: 37][#examples: 24128/46620][#steps: 27350]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 2.579e-06
[Epoch: 37][#examples: 27328/46620][#steps: 27400]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 2.579e-06
[Epoch: 37][#examples: 30528/46620][#steps: 27450]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 2.579e-06
[Epoch: 37][#examples: 33728/46620][#st

[Epoch: 40][#examples: 2560/46620][#steps: 29200]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 1.233e-06
[Epoch: 40][#examples: 5760/46620][#steps: 29250]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 1.233e-06
[Epoch: 40][#examples: 8960/46620][#steps: 29300]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 1.233e-06
[Epoch: 40][#examples: 12160/46620][#steps: 29350]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 1.233e-06
[Epoch: 40][#examples: 15360/46620][#steps: 29400]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 1.233e-06
[Epoch: 40][#examples: 18560/46620][#steps: 29450]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 1.233e-06
[Epoch: 40][#examples: 21760/46620][#steps: 29500]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 1.233e-06
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 4202/5828
[VAL]: The number of correct predictions (aux-task (multi)): 4292/5828

---------------------------------------
[Epoch: 40][Validatiing...]
	 Early Stopping Patience: 

[Epoch: 43][#examples: 192/46620][#steps: 31350]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 5.899e-07
[Epoch: 43][#examples: 3392/46620][#steps: 31400]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 5.899e-07
[Epoch: 43][#examples: 6592/46620][#steps: 31450]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 5.899e-07
[Epoch: 43][#examples: 9792/46620][#steps: 31500]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 5.899e-07
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 4201/5828
[VAL]: The number of correct predictions (aux-task (multi)): 4293/5828

---------------------------------------
[Epoch: 43][Validatiing...]
	 Early Stopping Patience: 9/100
	 Val. Loss: 0.848 | Val. Acc: 0.721 | Val. PPL:   2.336
	 BEST. Val. Loss: 0.541 | BEST. Val. Acc: 0.725 | Val. Loss: 0.745 | BEST. Val. Epoch: 17 | BEST. Val. Step: 12500
---------------------------------------

	BEST. Val. Acc Aux: 0.739
---------------------------------------

[Epoch: 43][#examples: 12992/46620]

[VAL]: The number of correct predictions (main-task (multi)): 4202/5828
[VAL]: The number of correct predictions (aux-task (multi)): 4294/5828

---------------------------------------
[Epoch: 45][Validatiing...]
	 Early Stopping Patience: 3/100
	 Val. Loss: 0.848 | Val. Acc: 0.721 | Val. PPL:   2.335
	 BEST. Val. Loss: 0.541 | BEST. Val. Acc: 0.725 | Val. Loss: 0.745 | BEST. Val. Epoch: 17 | BEST. Val. Step: 12500
---------------------------------------

	BEST. Val. Acc Aux: 0.739
---------------------------------------

[VAL]: The number of correct predictions (main-task (multi)): 4202/5828
[VAL]: The number of correct predictions (aux-task (multi)): 4294/5828

---------------------------------------
[Epoch: 45][Validatiing...]
	 Early Stopping Patience: 2/100
	 Val. Loss: 0.848 | Val. Acc: 0.721 | Val. PPL:   2.335
	 BEST. Val. Loss: 0.541 | BEST. Val. Acc: 0.725 | Val. Loss: 0.745 | BEST. Val. Epoch: 17 | BEST. Val. Step: 12500
---------------------------------------

	BEST. Val. Ac

[Epoch: 0][#examples: 35200/46620][#steps: 550]
	Train Loss: 1.676 | Train PPL:   5.342 | lr: 3.000e-03
[Epoch: 0][#examples: 38400/46620][#steps: 600]
	Train Loss: 1.579 | Train PPL:   4.852 | lr: 3.000e-03
[Epoch: 0][#examples: 41600/46620][#steps: 650]
	Train Loss: 1.497 | Train PPL:   4.468 | lr: 3.000e-03
[Epoch: 0][#examples: 44800/46620][#steps: 700]
	Train Loss: 1.425 | Train PPL:   4.157 | lr: 3.000e-03
[VAL]: The number of correct predictions (main-task (multi)): 3624/5828
[VAL]: The number of correct predictions (aux-task (multi)): 2955/5828

---------------------------------------
[Epoch: 0][Validatiing...]
		 Better Valid Loss! (at least equal)
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 0.661 | Val. Acc: 0.622 | Val. PPL:   1.938
	 BEST. Val. Loss: 0.661 | BEST. Val. Acc: 0.622 | Val. Loss: 0.661 | BEST. Val. Epoch: 0 | BEST. Val. Step: 729
---------------------------------------

		 Better Valid Acc on Auxiliary Task! (at least e

[Epoch: 3][#examples: 13632/46620][#steps: 2400]
	Train Loss: 0.221 | Train PPL:   1.247 | lr: 2.700e-03
[Epoch: 3][#examples: 16832/46620][#steps: 2450]
	Train Loss: 0.223 | Train PPL:   1.250 | lr: 2.700e-03
[Epoch: 3][#examples: 20032/46620][#steps: 2500]
	Train Loss: 0.225 | Train PPL:   1.252 | lr: 2.700e-03
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 3926/5828
[VAL]: The number of correct predictions (aux-task (multi)): 3754/5828

---------------------------------------
[Epoch: 3][Validatiing...]
	 Early Stopping Patience: 99/100
	 Val. Loss: 0.552 | Val. Acc: 0.674 | Val. PPL:   1.737
	 BEST. Val. Loss: 0.520 | BEST. Val. Acc: 0.677 | Val. Loss: 0.520 | BEST. Val. Epoch: 2 | BEST. Val. Step: 2187
---------------------------------------

		 Better Valid Acc on Auxiliary Task! (at least equal)
	BEST. Val. Acc Aux: 0.644
---------------------------------------

[Epoch: 3][#examples: 23232/46620][#steps: 2550]
	Train Loss: 0.227 | Train PPL:   1.255 

[Epoch: 6][#examples: 1664/46620][#steps: 4400]
	Train Loss: 0.093 | Train PPL:   1.097 | lr: 1.771e-03
[Epoch: 6][#examples: 4864/46620][#steps: 4450]
	Train Loss: 0.086 | Train PPL:   1.089 | lr: 1.771e-03
[Epoch: 6][#examples: 8064/46620][#steps: 4500]
	Train Loss: 0.088 | Train PPL:   1.093 | lr: 1.771e-03
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 4091/5828
[VAL]: The number of correct predictions (aux-task (multi)): 4061/5828

---------------------------------------
[Epoch: 6][Validatiing...]
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 92/100
	 Val. Loss: 0.537 | Val. Acc: 0.702 | Val. PPL:   1.710
	 BEST. Val. Loss: 0.520 | BEST. Val. Acc: 0.702 | Val. Loss: 0.537 | BEST. Val. Epoch: 6 | BEST. Val. Step: 4500
---------------------------------------

		 Better Valid Acc on Auxiliary Task! (at least equal)
	BEST. Val. Acc Aux: 0.697
---------------------------------------

[Epoch: 6][#examples: 11264/46620][#steps: 4550]
	Trai

[Epoch: 8][#examples: 42752/46620][#steps: 6500]
	Train Loss: 0.047 | Train PPL:   1.048 | lr: 1.435e-03
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 4149/5828
[VAL]: The number of correct predictions (aux-task (multi)): 4141/5828

---------------------------------------
[Epoch: 8][Validatiing...]
	 Early Stopping Patience: 86/100
	 Val. Loss: 0.585 | Val. Acc: 0.712 | Val. PPL:   1.796
	 BEST. Val. Loss: 0.520 | BEST. Val. Acc: 0.716 | Val. Loss: 0.565 | BEST. Val. Epoch: 8 | BEST. Val. Step: 6000
---------------------------------------

		 Better Valid Acc on Auxiliary Task! (at least equal)
	BEST. Val. Acc Aux: 0.711
---------------------------------------

[Epoch: 8][#examples: 45952/46620][#steps: 6550]
	Train Loss: 0.047 | Train PPL:   1.049 | lr: 1.291e-03
[VAL]: The number of correct predictions (main-task (multi)): 4165/5828
[VAL]: The number of correct predictions (aux-task (multi)): 4136/5828

---------------------------------------
[Epoch: 8]

[Epoch: 11][#examples: 11584/46620][#steps: 8200]
	Train Loss: 0.015 | Train PPL:   1.015 | lr: 8.473e-04
[Epoch: 11][#examples: 14784/46620][#steps: 8250]
	Train Loss: 0.014 | Train PPL:   1.015 | lr: 8.473e-04
[Epoch: 11][#examples: 17984/46620][#steps: 8300]
	Train Loss: 0.014 | Train PPL:   1.015 | lr: 8.473e-04
[Epoch: 11][#examples: 21184/46620][#steps: 8350]
	Train Loss: 0.014 | Train PPL:   1.015 | lr: 8.473e-04
[Epoch: 11][#examples: 24384/46620][#steps: 8400]
	Train Loss: 0.015 | Train PPL:   1.015 | lr: 8.473e-04
[Epoch: 11][#examples: 27584/46620][#steps: 8450]
	Train Loss: 0.015 | Train PPL:   1.015 | lr: 8.473e-04
[Epoch: 11][#examples: 30784/46620][#steps: 8500]
	Train Loss: 0.015 | Train PPL:   1.015 | lr: 8.473e-04
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 4178/5828
[VAL]: The number of correct predictions (aux-task (multi)): 4196/5828

---------------------------------------
[Epoch: 11][Validatiing...]
	 Early Stopping Patience: 79/1

[VAL]: The number of correct predictions (main-task (multi)): 4227/5828
[VAL]: The number of correct predictions (aux-task (multi)): 4245/5828

---------------------------------------
[Epoch: 13][Validatiing...]
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 73/100
	 Val. Loss: 0.692 | Val. Acc: 0.725 | Val. PPL:   1.998
	 BEST. Val. Loss: 0.520 | BEST. Val. Acc: 0.725 | Val. Loss: 0.692 | BEST. Val. Epoch: 13 | BEST. Val. Step: 10206
---------------------------------------

		 Better Valid Acc on Auxiliary Task! (at least equal)
	BEST. Val. Acc Aux: 0.728
---------------------------------------

Epoch: 14 | Time: 1m 32s
	Train Loss: 0.008 | Train PPL:   1.008
	 Val. Loss: 0.692 | Val. Acc: 0.725 | Val. PPL:   1.998
[Train]: Current Teacher Forcing Ratio: 0.380
[Epoch: 14][#examples: 2816/46620][#steps: 10250]
	Train Loss: 0.004 | Train PPL:   1.004 | lr: 5.559e-04
[Epoch: 14][#examples: 6016/46620][#steps: 10300]
	Train Loss: 0.004 | Train PPL:   1.004 | lr: 5.559e-0

[Epoch: 16][#examples: 24704/46620][#steps: 12050]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 3.647e-04
[Epoch: 16][#examples: 27904/46620][#steps: 12100]
	Train Loss: 0.004 | Train PPL:   1.004 | lr: 3.647e-04
[Epoch: 16][#examples: 31104/46620][#steps: 12150]
	Train Loss: 0.004 | Train PPL:   1.004 | lr: 3.647e-04
[Epoch: 16][#examples: 34304/46620][#steps: 12200]
	Train Loss: 0.004 | Train PPL:   1.004 | lr: 3.647e-04
[Epoch: 16][#examples: 37504/46620][#steps: 12250]
	Train Loss: 0.004 | Train PPL:   1.004 | lr: 3.647e-04
[Epoch: 16][#examples: 40704/46620][#steps: 12300]
	Train Loss: 0.004 | Train PPL:   1.004 | lr: 3.647e-04
[Epoch: 16][#examples: 43904/46620][#steps: 12350]
	Train Loss: 0.004 | Train PPL:   1.004 | lr: 3.647e-04
[VAL]: The number of correct predictions (main-task (multi)): 4230/5828
[VAL]: The number of correct predictions (aux-task (multi)): 4265/5828

---------------------------------------
[Epoch: 16][Validatiing...]
	 Early Stopping Patience: 66/100
	 Val

[VAL]: The number of correct predictions (main-task (multi)): 4240/5828
[VAL]: The number of correct predictions (aux-task (multi)): 4286/5828

---------------------------------------
[Epoch: 19][Validatiing...]
	 Early Stopping Patience: 60/100
	 Val. Loss: 0.750 | Val. Acc: 0.728 | Val. PPL:   2.118
	 BEST. Val. Loss: 0.520 | BEST. Val. Acc: 0.728 | Val. Loss: 0.753 | BEST. Val. Epoch: 18 | BEST. Val. Step: 13851
---------------------------------------

		 Better Valid Acc on Auxiliary Task! (at least equal)
	BEST. Val. Acc Aux: 0.735
---------------------------------------

[Epoch: 19][#examples: 12736/46620][#steps: 14050]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 1.938e-04
[Epoch: 19][#examples: 15936/46620][#steps: 14100]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 1.938e-04
[Epoch: 19][#examples: 19136/46620][#steps: 14150]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 1.938e-04
[Epoch: 19][#examples: 22336/46620][#steps: 14200]
	Train Loss: 0.002 | Train PPL:   1.002 | lr

[VAL]: The number of correct predictions (main-task (multi)): 4248/5828
[VAL]: The number of correct predictions (aux-task (multi)): 4269/5828

---------------------------------------
[Epoch: 21][Validatiing...]
	 Early Stopping Patience: 53/100
	 Val. Loss: 0.788 | Val. Acc: 0.729 | Val. PPL:   2.198
	 BEST. Val. Loss: 0.520 | BEST. Val. Acc: 0.730 | Val. Loss: 0.777 | BEST. Val. Epoch: 20 | BEST. Val. Step: 15309
---------------------------------------

	BEST. Val. Acc Aux: 0.735
---------------------------------------

Epoch: 22 | Time: 1m 31s
	Train Loss: 0.001 | Train PPL:   1.001
	 Val. Loss: 0.788 | Val. Acc: 0.729 | Val. PPL:   2.198
[Train]: Current Teacher Forcing Ratio: 0.200
[Epoch: 22][#examples: 768/46620][#steps: 16050]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 1.030e-04
[Epoch: 22][#examples: 3968/46620][#steps: 16100]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 1.030e-04
[Epoch: 22][#examples: 7168/46620][#steps: 16150]
	Train Loss: 0.001 | Train PPL:   1.001 | l

[Epoch: 24][#examples: 35456/46620][#steps: 18050]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 5.474e-05
[Epoch: 24][#examples: 38656/46620][#steps: 18100]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 5.474e-05
[Epoch: 24][#examples: 41856/46620][#steps: 18150]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 5.474e-05
[Epoch: 24][#examples: 45056/46620][#steps: 18200]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 5.474e-05
[VAL]: The number of correct predictions (main-task (multi)): 4241/5828
[VAL]: The number of correct predictions (aux-task (multi)): 4268/5828

---------------------------------------
[Epoch: 24][Validatiing...]
	 Early Stopping Patience: 46/100
	 Val. Loss: 0.806 | Val. Acc: 0.728 | Val. PPL:   2.239
	 BEST. Val. Loss: 0.520 | BEST. Val. Acc: 0.730 | Val. Loss: 0.777 | BEST. Val. Epoch: 20 | BEST. Val. Step: 15309
---------------------------------------

	BEST. Val. Acc Aux: 0.735
---------------------------------------

Epoch: 25 | Time: 1m 37s
	Train Loss: 0.001 

[Epoch: 27][#examples: 23488/46620][#steps: 20050]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 2.618e-05
[Epoch: 27][#examples: 26688/46620][#steps: 20100]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 2.618e-05
[Epoch: 27][#examples: 29888/46620][#steps: 20150]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 2.618e-05
[Epoch: 27][#examples: 33088/46620][#steps: 20200]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 2.618e-05
[Epoch: 27][#examples: 36288/46620][#steps: 20250]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 2.618e-05
[Epoch: 27][#examples: 39488/46620][#steps: 20300]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 2.618e-05
[Epoch: 27][#examples: 42688/46620][#steps: 20350]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 2.618e-05
[Epoch: 27][#examples: 45888/46620][#steps: 20400]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 2.618e-05
[VAL]: The number of correct predictions (main-task (multi)): 4247/5828
[VAL]: The number of correct predictions (aux-task (multi)): 4265/5828



[Epoch: 30][#examples: 11520/46620][#steps: 22050]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 1.718e-05
[Epoch: 30][#examples: 14720/46620][#steps: 22100]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 1.718e-05
[Epoch: 30][#examples: 17920/46620][#steps: 22150]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 1.718e-05
[Epoch: 30][#examples: 21120/46620][#steps: 22200]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 1.718e-05
[Epoch: 30][#examples: 24320/46620][#steps: 22250]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 1.718e-05
[Epoch: 30][#examples: 27520/46620][#steps: 22300]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 1.718e-05
[Epoch: 30][#examples: 30720/46620][#steps: 22350]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 1.718e-05
[Epoch: 30][#examples: 33920/46620][#steps: 22400]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 1.718e-05
[Epoch: 30][#examples: 37120/46620][#steps: 22450]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 1.718e-05
[Epoch: 30][#examples: 40320/46620][#

[Epoch: 33][#examples: 2752/46620][#steps: 24100]
	Train Loss: 0.000 | Train PPL:   1.000 | lr: 9.130e-06
[Epoch: 33][#examples: 5952/46620][#steps: 24150]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.130e-06
[Epoch: 33][#examples: 9152/46620][#steps: 24200]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.130e-06
[Epoch: 33][#examples: 12352/46620][#steps: 24250]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.130e-06
[Epoch: 33][#examples: 15552/46620][#steps: 24300]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.130e-06
[Epoch: 33][#examples: 18752/46620][#steps: 24350]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.130e-06
[Epoch: 33][#examples: 21952/46620][#steps: 24400]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.130e-06
[Epoch: 33][#examples: 25152/46620][#steps: 24450]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.130e-06
[Epoch: 33][#examples: 28352/46620][#steps: 24500]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 9.130e-06
-----Val------
[VAL]: The number of corr

[Epoch: 36][#examples: 384/46620][#steps: 26250]
	Train Loss: 0.000 | Train PPL:   1.000 | lr: 4.367e-06
[Epoch: 36][#examples: 3584/46620][#steps: 26300]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 4.367e-06
[Epoch: 36][#examples: 6784/46620][#steps: 26350]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 4.367e-06
[Epoch: 36][#examples: 9984/46620][#steps: 26400]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 4.367e-06
[Epoch: 36][#examples: 13184/46620][#steps: 26450]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 4.367e-06
[Epoch: 36][#examples: 16384/46620][#steps: 26500]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 4.367e-06
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 4254/5828
[VAL]: The number of correct predictions (aux-task (multi)): 4276/5828

---------------------------------------
[Epoch: 36][Validatiing...]
	 Early Stopping Patience: 18/100
	 Val. Loss: 0.834 | Val. Acc: 0.730 | Val. PPL:   2.304
	 BEST. Val. Loss: 0.520 | BEST. Val. Acc: 0.

[VAL]: The number of correct predictions (main-task (multi)): 4255/5828
[VAL]: The number of correct predictions (aux-task (multi)): 4276/5828

---------------------------------------
[Epoch: 38][Validatiing...]
	 Early Stopping Patience: 12/100
	 Val. Loss: 0.836 | Val. Acc: 0.730 | Val. PPL:   2.307
	 BEST. Val. Loss: 0.520 | BEST. Val. Acc: 0.731 | Val. Loss: 0.829 | BEST. Val. Epoch: 30 | BEST. Val. Step: 22599
---------------------------------------

	BEST. Val. Acc Aux: 0.735
---------------------------------------

Epoch: 39 | Time: 1m 26s
	Train Loss: 0.001 | Train PPL:   1.001
	 Val. Loss: 0.836 | Val. Acc: 0.730 | Val. PPL:   2.307
[Train]: Current Teacher Forcing Ratio: 0.200
[Epoch: 39][#examples: 1216/46620][#steps: 28450]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 2.089e-06
[Epoch: 39][#examples: 4416/46620][#steps: 28500]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 2.089e-06
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 4255/5828
[VAL]

[Epoch: 41][#examples: 26304/46620][#steps: 30300]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 1.110e-06
[Epoch: 41][#examples: 29504/46620][#steps: 30350]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 1.110e-06
[Epoch: 41][#examples: 32704/46620][#steps: 30400]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 1.110e-06
[Epoch: 41][#examples: 35904/46620][#steps: 30450]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 1.110e-06
[Epoch: 41][#examples: 39104/46620][#steps: 30500]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 1.110e-06
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 4255/5828
[VAL]: The number of correct predictions (aux-task (multi)): 4273/5828

---------------------------------------
[Epoch: 41][Validatiing...]
	 Early Stopping Patience: 5/100
	 Val. Loss: 0.837 | Val. Acc: 0.730 | Val. PPL:   2.309
	 BEST. Val. Loss: 0.520 | BEST. Val. Acc: 0.731 | Val. Loss: 0.829 | BEST. Val. Epoch: 30 | BEST. Val. Step: 22599
--------------------------------

[TEST]: The number of correct predictions (main-task (multi)): 4219/5828
[TEST]: The number of correct predictions (aux-task (multi)): 4282/5828
           Loss       ACC   ACC-aux   ACC-ACT  Replaced
Valid  0.829236  0.730782  0.733528  0.752917  129/1489
Test   0.829267  0.723919  0.734729  0.748456  143/1535
The current device for PyTorch is cuda
Seq2MultiSeq(
  (encoder): BasicEncoder(
    (embedding): Sequential(
      (0): Embedding(30, 256)
      (1): Dropout(p=0.1, inplace=False)
    )
    (rnn): LSTM(256, 512, num_layers=2, dropout=0.2, bidirectional=True)
  )
  (decoder_list): ModuleList(
    (0): GreedyDecoder(
      (feed_forward_decoder): BasicFeedForwardDecoder(
        (attention): AdditiveAttention(
          (additive_mapping): Linear(in_features=1536, out_features=512, bias=True)
          (v): Linear(in_features=512, out_features=1, bias=False)
        )
        (embedding): Sequential(
          (0): Embedding(437, 256)
          (1): Dropout(p=0.1, inplace=False)
 

[Epoch: 2][#examples: 2688/46620][#steps: 1500]
	Train Loss: 0.303 | Train PPL:   1.354 | lr: 3.000e-03
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 3780/5828
[VAL]: The number of correct predictions (aux-task (multi)): 3558/5828

---------------------------------------
[Epoch: 2][Validatiing...]
		 Better Valid Loss! (at least equal)
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 0.554 | Val. Acc: 0.649 | Val. PPL:   1.740
	 BEST. Val. Loss: 0.554 | BEST. Val. Acc: 0.649 | Val. Loss: 0.554 | BEST. Val. Epoch: 2 | BEST. Val. Step: 1500
---------------------------------------

		 Better Valid Acc on Auxiliary Task! (at least equal)
	BEST. Val. Acc Aux: 0.611
---------------------------------------

[Epoch: 2][#examples: 5888/46620][#steps: 1550]
	Train Loss: 0.305 | Train PPL:   1.357 | lr: 3.000e-03
[Epoch: 2][#examples: 9088/46620][#steps: 1600]
	Train Loss: 0.306 | Train PPL:   1.358 | lr: 3.000e-03
[Epoch: 2][#ex

[Epoch: 4][#examples: 30976/46620][#steps: 3400]
	Train Loss: 0.209 | Train PPL:   1.232 | lr: 2.700e-03
[Epoch: 4][#examples: 34176/46620][#steps: 3450]
	Train Loss: 0.211 | Train PPL:   1.235 | lr: 2.700e-03
[Epoch: 4][#examples: 37376/46620][#steps: 3500]
	Train Loss: 0.212 | Train PPL:   1.236 | lr: 2.700e-03
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 3907/5828
[VAL]: The number of correct predictions (aux-task (multi)): 3898/5828

---------------------------------------
[Epoch: 4][Validatiing...]
	 Early Stopping Patience: 99/100
	 Val. Loss: 0.537 | Val. Acc: 0.670 | Val. PPL:   1.711
	 BEST. Val. Loss: 0.518 | BEST. Val. Acc: 0.688 | Val. Loss: 0.518 | BEST. Val. Epoch: 4 | BEST. Val. Step: 3000
---------------------------------------

	BEST. Val. Acc Aux: 0.669
---------------------------------------

[Epoch: 4][#examples: 40576/46620][#steps: 3550]
	Train Loss: 0.214 | Train PPL:   1.239 | lr: 2.430e-03
[Epoch: 4][#examples: 43776/46620][#step

[Epoch: 7][#examples: 3008/46620][#steps: 5150]
	Train Loss: 0.076 | Train PPL:   1.079 | lr: 1.594e-03
[Epoch: 7][#examples: 6208/46620][#steps: 5200]
	Train Loss: 0.074 | Train PPL:   1.077 | lr: 1.594e-03
[Epoch: 7][#examples: 9408/46620][#steps: 5250]
	Train Loss: 0.072 | Train PPL:   1.074 | lr: 1.594e-03
[Epoch: 7][#examples: 12608/46620][#steps: 5300]
	Train Loss: 0.073 | Train PPL:   1.076 | lr: 1.594e-03
[Epoch: 7][#examples: 15808/46620][#steps: 5350]
	Train Loss: 0.073 | Train PPL:   1.076 | lr: 1.594e-03
[Epoch: 7][#examples: 19008/46620][#steps: 5400]
	Train Loss: 0.073 | Train PPL:   1.076 | lr: 1.594e-03
[Epoch: 7][#examples: 22208/46620][#steps: 5450]
	Train Loss: 0.073 | Train PPL:   1.076 | lr: 1.594e-03
[Epoch: 7][#examples: 25408/46620][#steps: 5500]
	Train Loss: 0.074 | Train PPL:   1.077 | lr: 1.594e-03
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 4102/5828
[VAL]: The number of correct predictions (aux-task (multi)): 4106/5828

----

[Epoch: 9][#examples: 40896/46620][#steps: 7200]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 1.046e-03
[Epoch: 9][#examples: 44096/46620][#steps: 7250]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 1.046e-03
[VAL]: The number of correct predictions (main-task (multi)): 4130/5828
[VAL]: The number of correct predictions (aux-task (multi)): 4124/5828

---------------------------------------
[Epoch: 9][Validatiing...]
	 Early Stopping Patience: 88/100
	 Val. Loss: 0.589 | Val. Acc: 0.709 | Val. PPL:   1.803
	 BEST. Val. Loss: 0.515 | BEST. Val. Acc: 0.713 | Val. Loss: 0.577 | BEST. Val. Epoch: 9 | BEST. Val. Step: 7000
---------------------------------------

	BEST. Val. Acc Aux: 0.719
---------------------------------------

Epoch: 10 | Time: 1m 26s
	Train Loss: 0.031 | Train PPL:   1.031
	 Val. Loss: 0.589 | Val. Acc: 0.709 | Val. PPL:   1.803
[Train]: Current Teacher Forcing Ratio: 0.500
[Epoch: 10][#examples: 640/46620][#steps: 7300]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 9.4

[Epoch: 12][#examples: 19328/46620][#steps: 9050]
	Train Loss: 0.013 | Train PPL:   1.013 | lr: 6.863e-04
[Epoch: 12][#examples: 22528/46620][#steps: 9100]
	Train Loss: 0.013 | Train PPL:   1.013 | lr: 6.863e-04
[Epoch: 12][#examples: 25728/46620][#steps: 9150]
	Train Loss: 0.013 | Train PPL:   1.013 | lr: 6.863e-04
[Epoch: 12][#examples: 28928/46620][#steps: 9200]
	Train Loss: 0.013 | Train PPL:   1.013 | lr: 6.863e-04
[Epoch: 12][#examples: 32128/46620][#steps: 9250]
	Train Loss: 0.013 | Train PPL:   1.013 | lr: 6.863e-04
[Epoch: 12][#examples: 35328/46620][#steps: 9300]
	Train Loss: 0.013 | Train PPL:   1.013 | lr: 6.863e-04
[Epoch: 12][#examples: 38528/46620][#steps: 9350]
	Train Loss: 0.013 | Train PPL:   1.013 | lr: 6.863e-04
[Epoch: 12][#examples: 41728/46620][#steps: 9400]
	Train Loss: 0.014 | Train PPL:   1.014 | lr: 6.863e-04
[Epoch: 12][#examples: 44928/46620][#steps: 9450]
	Train Loss: 0.014 | Train PPL:   1.014 | lr: 6.863e-04
[VAL]: The number of correct predictions (main

[Epoch: 15][#examples: 7360/46620][#steps: 11050]
	Train Loss: 0.005 | Train PPL:   1.005 | lr: 3.647e-04
[Epoch: 15][#examples: 10560/46620][#steps: 11100]
	Train Loss: 0.005 | Train PPL:   1.005 | lr: 3.647e-04
[Epoch: 15][#examples: 13760/46620][#steps: 11150]
	Train Loss: 0.005 | Train PPL:   1.005 | lr: 3.647e-04
[Epoch: 15][#examples: 16960/46620][#steps: 11200]
	Train Loss: 0.005 | Train PPL:   1.005 | lr: 3.647e-04
[Epoch: 15][#examples: 20160/46620][#steps: 11250]
	Train Loss: 0.005 | Train PPL:   1.005 | lr: 3.647e-04
[Epoch: 15][#examples: 23360/46620][#steps: 11300]
	Train Loss: 0.005 | Train PPL:   1.005 | lr: 3.647e-04
[Epoch: 15][#examples: 26560/46620][#steps: 11350]
	Train Loss: 0.005 | Train PPL:   1.005 | lr: 3.647e-04
[Epoch: 15][#examples: 29760/46620][#steps: 11400]
	Train Loss: 0.005 | Train PPL:   1.005 | lr: 3.647e-04
[Epoch: 15][#examples: 32960/46620][#steps: 11450]
	Train Loss: 0.005 | Train PPL:   1.005 | lr: 3.647e-04
[Epoch: 15][#examples: 36160/46620][#s

[Epoch: 18][#examples: 1792/46620][#steps: 13150]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 2.154e-04
[Epoch: 18][#examples: 4992/46620][#steps: 13200]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 2.154e-04
[Epoch: 18][#examples: 8192/46620][#steps: 13250]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 2.154e-04
[Epoch: 18][#examples: 11392/46620][#steps: 13300]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 2.154e-04
[Epoch: 18][#examples: 14592/46620][#steps: 13350]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 2.154e-04
[Epoch: 18][#examples: 17792/46620][#steps: 13400]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 2.154e-04
[Epoch: 18][#examples: 20992/46620][#steps: 13450]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 2.154e-04
[Epoch: 18][#examples: 24192/46620][#steps: 13500]
	Train Loss: 0.003 | Train PPL:   1.003 | lr: 2.154e-04
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 4177/5828
[VAL]: The number of correct predictions (aux-task (multi)):

[Epoch: 20][#examples: 46080/46620][#steps: 15300]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 1.413e-04
[VAL]: The number of correct predictions (main-task (multi)): 4195/5828
[VAL]: The number of correct predictions (aux-task (multi)): 4238/5828

---------------------------------------
[Epoch: 20][Validatiing...]
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 61/100
	 Val. Loss: 0.745 | Val. Acc: 0.720 | Val. PPL:   2.107
	 BEST. Val. Loss: 0.515 | BEST. Val. Acc: 0.720 | Val. Loss: 0.745 | BEST. Val. Epoch: 20 | BEST. Val. Step: 15309
---------------------------------------

		 Better Valid Acc on Auxiliary Task! (at least equal)
	BEST. Val. Acc Aux: 0.727
---------------------------------------

Epoch: 21 | Time: 1m 27s
	Train Loss: 0.002 | Train PPL:   1.002
	 Val. Loss: 0.745 | Val. Acc: 0.720 | Val. PPL:   2.107
[Train]: Current Teacher Forcing Ratio: 0.200
[Epoch: 21][#examples: 2624/46620][#steps: 15350]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 1.272e-

[Epoch: 23][#examples: 18112/46620][#steps: 17050]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 8.344e-05
[Epoch: 23][#examples: 21312/46620][#steps: 17100]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 8.344e-05
[Epoch: 23][#examples: 24512/46620][#steps: 17150]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 8.344e-05
[Epoch: 23][#examples: 27712/46620][#steps: 17200]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 8.344e-05
[Epoch: 23][#examples: 30912/46620][#steps: 17250]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 8.344e-05
[Epoch: 23][#examples: 34112/46620][#steps: 17300]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 8.344e-05
[Epoch: 23][#examples: 37312/46620][#steps: 17350]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 8.344e-05
[Epoch: 23][#examples: 40512/46620][#steps: 17400]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 8.344e-05
[Epoch: 23][#examples: 43712/46620][#steps: 17450]
	Train Loss: 0.002 | Train PPL:   1.002 | lr: 8.344e-05
[VAL]: The number of correct predicti

[VAL]: The number of correct predictions (main-task (multi)): 4185/5828
[VAL]: The number of correct predictions (aux-task (multi)): 4241/5828

---------------------------------------
[Epoch: 26][Validatiing...]
	 Early Stopping Patience: 48/100
	 Val. Loss: 0.773 | Val. Acc: 0.718 | Val. PPL:   2.167
	 BEST. Val. Loss: 0.515 | BEST. Val. Acc: 0.721 | Val. Loss: 0.760 | BEST. Val. Epoch: 23 | BEST. Val. Step: 17000
---------------------------------------

	BEST. Val. Acc Aux: 0.730
---------------------------------------

[Epoch: 26][#examples: 6144/46620][#steps: 19050]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 3.991e-05
[Epoch: 26][#examples: 9344/46620][#steps: 19100]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 3.991e-05
[Epoch: 26][#examples: 12544/46620][#steps: 19150]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 3.991e-05
[Epoch: 26][#examples: 15744/46620][#steps: 19200]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 3.991e-05
[Epoch: 26][#examples: 18944/46620][#steps: 19

[Epoch: 28][#examples: 40832/46620][#steps: 21050]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 2.121e-05
[Epoch: 28][#examples: 44032/46620][#steps: 21100]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 2.121e-05
[VAL]: The number of correct predictions (main-task (multi)): 4195/5828
[VAL]: The number of correct predictions (aux-task (multi)): 4247/5828

---------------------------------------
[Epoch: 28][Validatiing...]
	 Early Stopping Patience: 41/100
	 Val. Loss: 0.782 | Val. Acc: 0.720 | Val. PPL:   2.186
	 BEST. Val. Loss: 0.515 | BEST. Val. Acc: 0.721 | Val. Loss: 0.760 | BEST. Val. Epoch: 23 | BEST. Val. Step: 17000
---------------------------------------

	BEST. Val. Acc Aux: 0.730
---------------------------------------

Epoch: 29 | Time: 1m 32s
	Train Loss: 0.001 | Train PPL:   1.001
	 Val. Loss: 0.782 | Val. Acc: 0.720 | Val. PPL:   2.186
[Train]: Current Teacher Forcing Ratio: 0.200
[Epoch: 29][#examples: 576/46620][#steps: 21150]
	Train Loss: 0.001 | Train PPL:   1.001 |

[Epoch: 31][#examples: 28864/46620][#steps: 23050]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 1.014e-05
[Epoch: 31][#examples: 32064/46620][#steps: 23100]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 1.014e-05
[Epoch: 31][#examples: 35264/46620][#steps: 23150]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 1.014e-05
[Epoch: 31][#examples: 38464/46620][#steps: 23200]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 1.014e-05
[Epoch: 31][#examples: 41664/46620][#steps: 23250]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 1.014e-05
[Epoch: 31][#examples: 44864/46620][#steps: 23300]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 1.014e-05
[VAL]: The number of correct predictions (main-task (multi)): 4198/5828
[VAL]: The number of correct predictions (aux-task (multi)): 4246/5828

---------------------------------------
[Epoch: 31][Validatiing...]
	 Early Stopping Patience: 34/100
	 Val. Loss: 0.785 | Val. Acc: 0.720 | Val. PPL:   2.192
	 BEST. Val. Loss: 0.515 | BEST. Val. Acc: 0.721 | Val.

[Epoch: 34][#examples: 16896/46620][#steps: 25050]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 4.852e-06
[Epoch: 34][#examples: 20096/46620][#steps: 25100]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 4.852e-06
[Epoch: 34][#examples: 23296/46620][#steps: 25150]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 4.852e-06
[Epoch: 34][#examples: 26496/46620][#steps: 25200]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 4.852e-06
[Epoch: 34][#examples: 29696/46620][#steps: 25250]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 4.852e-06
[Epoch: 34][#examples: 32896/46620][#steps: 25300]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 4.852e-06
[Epoch: 34][#examples: 36096/46620][#steps: 25350]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 4.852e-06
[Epoch: 34][#examples: 39296/46620][#steps: 25400]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 4.852e-06
[Epoch: 34][#examples: 42496/46620][#steps: 25450]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 4.852e-06
[Epoch: 34][#examples: 45696/46620][#

[Epoch: 37][#examples: 4928/46620][#steps: 27050]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 2.321e-06
[Epoch: 37][#examples: 8128/46620][#steps: 27100]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 2.321e-06
[Epoch: 37][#examples: 11328/46620][#steps: 27150]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 2.321e-06
[Epoch: 37][#examples: 14528/46620][#steps: 27200]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 2.321e-06
[Epoch: 37][#examples: 17728/46620][#steps: 27250]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 2.321e-06
[Epoch: 37][#examples: 20928/46620][#steps: 27300]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 2.321e-06
[Epoch: 37][#examples: 24128/46620][#steps: 27350]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 2.321e-06
[Epoch: 37][#examples: 27328/46620][#steps: 27400]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 2.321e-06
[Epoch: 37][#examples: 30528/46620][#steps: 27450]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 2.321e-06
[Epoch: 37][#examples: 33728/46620][#st

[Epoch: 40][#examples: 2560/46620][#steps: 29200]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 1.110e-06
[Epoch: 40][#examples: 5760/46620][#steps: 29250]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 1.110e-06
[Epoch: 40][#examples: 8960/46620][#steps: 29300]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 1.110e-06
[Epoch: 40][#examples: 12160/46620][#steps: 29350]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 1.110e-06
[Epoch: 40][#examples: 15360/46620][#steps: 29400]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 1.110e-06
[Epoch: 40][#examples: 18560/46620][#steps: 29450]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 1.110e-06
[Epoch: 40][#examples: 21760/46620][#steps: 29500]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 1.110e-06
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 4200/5828
[VAL]: The number of correct predictions (aux-task (multi)): 4244/5828

---------------------------------------
[Epoch: 40][Validatiing...]
	 Early Stopping Patience: 

[Epoch: 43][#examples: 192/46620][#steps: 31350]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 5.309e-07
[Epoch: 43][#examples: 3392/46620][#steps: 31400]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 5.309e-07
[Epoch: 43][#examples: 6592/46620][#steps: 31450]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 5.309e-07
[Epoch: 43][#examples: 9792/46620][#steps: 31500]
	Train Loss: 0.001 | Train PPL:   1.001 | lr: 5.309e-07
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 4201/5828
[VAL]: The number of correct predictions (aux-task (multi)): 4245/5828

---------------------------------------
[Epoch: 43][Validatiing...]
	 Early Stopping Patience: 6/100
	 Val. Loss: 0.786 | Val. Acc: 0.721 | Val. PPL:   2.195
	 BEST. Val. Loss: 0.515 | BEST. Val. Acc: 0.721 | Val. Loss: 0.760 | BEST. Val. Epoch: 23 | BEST. Val. Step: 17000
---------------------------------------

	BEST. Val. Acc Aux: 0.730
---------------------------------------

[Epoch: 43][#examples: 12992/46620]

[VAL]: The number of correct predictions (main-task (multi)): 4201/5828
[VAL]: The number of correct predictions (aux-task (multi)): 4245/5828

---------------------------------------
[Epoch: 45][Validatiing...]
	 Early Stopping Patience: 0/100
	 Val. Loss: 0.786 | Val. Acc: 0.721 | Val. PPL:   2.195
	 BEST. Val. Loss: 0.515 | BEST. Val. Acc: 0.721 | Val. Loss: 0.760 | BEST. Val. Epoch: 23 | BEST. Val. Step: 17000
---------------------------------------

	BEST. Val. Acc Aux: 0.730
---------------------------------------

[VAL]: The number of correct predictions (main-task (multi)): 4201/5828
[VAL]: The number of correct predictions (aux-task (multi)): 4245/5828

---------------------------------------
[Epoch: 45][Validatiing...]
	 Early Stopping Patience: 0/100
	 Val. Loss: 0.786 | Val. Acc: 0.721 | Val. PPL:   2.195
	 BEST. Val. Loss: 0.515 | BEST. Val. Acc: 0.721 | Val. Loss: 0.760 | BEST. Val. Epoch: 23 | BEST. Val. Step: 17000
---------------------------------------

	BEST. Val. Ac

[Epoch: 1][#examples: 1920/81252][#steps: 1300]
	Train Loss: 0.365 | Train PPL:   1.441 | lr: 3.000e-03
[Epoch: 1][#examples: 5120/81252][#steps: 1350]
	Train Loss: 0.349 | Train PPL:   1.417 | lr: 3.000e-03
[Epoch: 1][#examples: 8320/81252][#steps: 1400]
	Train Loss: 0.339 | Train PPL:   1.404 | lr: 3.000e-03
[Epoch: 1][#examples: 11520/81252][#steps: 1450]
	Train Loss: 0.343 | Train PPL:   1.409 | lr: 3.000e-03
[Epoch: 1][#examples: 14720/81252][#steps: 1500]
	Train Loss: 0.337 | Train PPL:   1.401 | lr: 3.000e-03
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 282/513
[VAL]: The number of correct predictions (aux-task (multi)): 280/513

---------------------------------------
[Epoch: 1][Validatiing...]
		 Better Valid Loss! (at least equal)
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.490 | Val. Acc: 0.550 | Val. PPL:   4.438
	 BEST. Val. Loss: 1.490 | BEST. Val. Acc: 0.550 | Val. Loss: 1.490 | BEST. Val. Epoch:

	 Early Stopping Patience: 99/100
	 Val. Loss: 1.380 | Val. Acc: 0.618 | Val. PPL:   3.974
	 BEST. Val. Loss: 1.196 | BEST. Val. Acc: 0.618 | Val. Loss: 1.380 | BEST. Val. Epoch: 2 | BEST. Val. Step: 3500
---------------------------------------

	BEST. Val. Acc Aux: 0.630
---------------------------------------

[Epoch: 2][#examples: 64640/81252][#steps: 3550]
	Train Loss: 0.242 | Train PPL:   1.274 | lr: 2.430e-03
[Epoch: 2][#examples: 67840/81252][#steps: 3600]
	Train Loss: 0.241 | Train PPL:   1.273 | lr: 2.430e-03
[Epoch: 2][#examples: 71040/81252][#steps: 3650]
	Train Loss: 0.241 | Train PPL:   1.272 | lr: 2.430e-03
[Epoch: 2][#examples: 74240/81252][#steps: 3700]
	Train Loss: 0.240 | Train PPL:   1.272 | lr: 2.430e-03
[Epoch: 2][#examples: 77440/81252][#steps: 3750]
	Train Loss: 0.239 | Train PPL:   1.270 | lr: 2.430e-03
[Epoch: 2][#examples: 80640/81252][#steps: 3800]
	Train Loss: 0.238 | Train PPL:   1.269 | lr: 2.430e-03
[VAL]: The number of correct predictions (main-task (mul

	 Early Stopping Patience: 100/100
	 Val. Loss: 1.294 | Val. Acc: 0.624 | Val. PPL:   3.648
	 BEST. Val. Loss: 1.294 | BEST. Val. Acc: 0.624 | Val. Loss: 1.294 | BEST. Val. Epoch: 4 | BEST. Val. Step: 5500
---------------------------------------

		 Better Valid Acc on Auxiliary Task! (at least equal)
	BEST. Val. Acc Aux: 0.657
---------------------------------------

[Epoch: 4][#examples: 30080/81252][#steps: 5550]
	Train Loss: 0.137 | Train PPL:   1.147 | lr: 1.594e-03
[Epoch: 4][#examples: 33280/81252][#steps: 5600]
	Train Loss: 0.137 | Train PPL:   1.147 | lr: 1.594e-03
[Epoch: 4][#examples: 36480/81252][#steps: 5650]
	Train Loss: 0.136 | Train PPL:   1.145 | lr: 1.594e-03
[Epoch: 4][#examples: 39680/81252][#steps: 5700]
	Train Loss: 0.136 | Train PPL:   1.146 | lr: 1.594e-03
[Epoch: 4][#examples: 42880/81252][#steps: 5750]
	Train Loss: 0.136 | Train PPL:   1.145 | lr: 1.594e-03
[Epoch: 4][#examples: 46080/81252][#steps: 5800]
	Train Loss: 0.136 | Train PPL:   1.145 | lr: 1.594e-03

	 Early Stopping Patience: 97/100
	 Val. Loss: 1.300 | Val. Acc: 0.651 | Val. PPL:   3.669
	 BEST. Val. Loss: 1.205 | BEST. Val. Acc: 0.651 | Val. Loss: 1.300 | BEST. Val. Epoch: 5 | BEST. Val. Step: 7620
---------------------------------------

	BEST. Val. Acc Aux: 0.688
---------------------------------------

Epoch: 06 | Time: 2m 26s
	Train Loss: 0.095 | Train PPL:   1.099
	 Val. Loss: 1.300 | Val. Acc: 0.651 | Val. PPL:   3.669
Renew Evaluation Records in the Burning Phase...
[Train]: Current Teacher Forcing Ratio: 0.620
[Epoch: 6][#examples: 1920/81252][#steps: 7650]
	Train Loss: 0.075 | Train PPL:   1.078 | lr: 9.414e-04
[Epoch: 6][#examples: 5120/81252][#steps: 7700]
	Train Loss: 0.064 | Train PPL:   1.066 | lr: 9.414e-04
[Epoch: 6][#examples: 8320/81252][#steps: 7750]
	Train Loss: 0.065 | Train PPL:   1.067 | lr: 9.414e-04
[Epoch: 6][#examples: 11520/81252][#steps: 7800]
	Train Loss: 0.066 | Train PPL:   1.069 | lr: 9.414e-04
[Epoch: 6][#examples: 14720/81252][#steps: 7850]
	Tr

[Epoch: 7][#examples: 55040/81252][#steps: 9750]
	Train Loss: 0.054 | Train PPL:   1.055 | lr: 7.626e-04
[Epoch: 7][#examples: 58240/81252][#steps: 9800]
	Train Loss: 0.054 | Train PPL:   1.055 | lr: 7.626e-04
[Epoch: 7][#examples: 61440/81252][#steps: 9850]
	Train Loss: 0.054 | Train PPL:   1.055 | lr: 7.626e-04
[Epoch: 7][#examples: 64640/81252][#steps: 9900]
	Train Loss: 0.054 | Train PPL:   1.056 | lr: 7.626e-04
[Epoch: 7][#examples: 67840/81252][#steps: 9950]
	Train Loss: 0.054 | Train PPL:   1.056 | lr: 7.626e-04
[Epoch: 7][#examples: 71040/81252][#steps: 10000]
	Train Loss: 0.054 | Train PPL:   1.056 | lr: 7.626e-04
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 332/513
[VAL]: The number of correct predictions (aux-task (multi)): 349/513

---------------------------------------
[Epoch: 7][Validatiing...]
	 Early Stopping Patience: 98/100
	 Val. Loss: 1.431 | Val. Acc: 0.647 | Val. PPL:   4.183
	 BEST. Val. Loss: 1.391 | BEST. Val. Acc: 0.667 | Val. 

[Epoch: 9][#examples: 26880/81252][#steps: 11850]
	Train Loss: 0.033 | Train PPL:   1.034 | lr: 4.503e-04
[Epoch: 9][#examples: 30080/81252][#steps: 11900]
	Train Loss: 0.033 | Train PPL:   1.034 | lr: 4.503e-04
[Epoch: 9][#examples: 33280/81252][#steps: 11950]
	Train Loss: 0.033 | Train PPL:   1.033 | lr: 4.503e-04
[Epoch: 9][#examples: 36480/81252][#steps: 12000]
	Train Loss: 0.033 | Train PPL:   1.034 | lr: 4.503e-04
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 341/513
[VAL]: The number of correct predictions (aux-task (multi)): 348/513

---------------------------------------
[Epoch: 9][Validatiing...]
		 Better Valid Loss! (at least equal)
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.519 | Val. Acc: 0.665 | Val. PPL:   4.566
	 BEST. Val. Loss: 1.519 | BEST. Val. Acc: 0.665 | Val. Loss: 1.519 | BEST. Val. Epoch: 9 | BEST. Val. Step: 12000
---------------------------------------

	BEST. Val. Acc Aux: 0.706
--

[Epoch: 11][#examples: 1920/81252][#steps: 14000]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 2.393e-04
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 339/513
[VAL]: The number of correct predictions (aux-task (multi)): 351/513

---------------------------------------
[Epoch: 11][Validatiing...]
		 Better Valid Loss! (at least equal)
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.520 | Val. Acc: 0.661 | Val. PPL:   4.570
	 BEST. Val. Loss: 1.520 | BEST. Val. Acc: 0.661 | Val. Loss: 1.520 | BEST. Val. Epoch: 11 | BEST. Val. Step: 14000
---------------------------------------

	BEST. Val. Acc Aux: 0.706
---------------------------------------

[Epoch: 11][#examples: 5120/81252][#steps: 14050]
	Train Loss: 0.025 | Train PPL:   1.025 | lr: 2.154e-04
[Epoch: 11][#examples: 8320/81252][#steps: 14100]
	Train Loss: 0.024 | Train PPL:   1.024 | lr: 2.154e-04
[Epoch: 11][#examples: 11520/81252][#steps: 14150]
	Train Loss: 0

[Epoch: 12][#examples: 55040/81252][#steps: 16100]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 1.570e-04
[Epoch: 12][#examples: 58240/81252][#steps: 16150]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 1.570e-04
[Epoch: 12][#examples: 61440/81252][#steps: 16200]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 1.570e-04
[Epoch: 12][#examples: 64640/81252][#steps: 16250]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 1.570e-04
[Epoch: 12][#examples: 67840/81252][#steps: 16300]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 1.570e-04
[Epoch: 12][#examples: 71040/81252][#steps: 16350]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 1.570e-04
[Epoch: 12][#examples: 74240/81252][#steps: 16400]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 1.570e-04
[Epoch: 12][#examples: 77440/81252][#steps: 16450]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 1.570e-04
[Epoch: 12][#examples: 80640/81252][#steps: 16500]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 1.570e-04
-----Val------
[VAL]: The number of c

[Epoch: 14][#examples: 23680/81252][#steps: 18150]
	Train Loss: 0.025 | Train PPL:   1.026 | lr: 8.344e-05
[Epoch: 14][#examples: 26880/81252][#steps: 18200]
	Train Loss: 0.025 | Train PPL:   1.026 | lr: 8.344e-05
[Epoch: 14][#examples: 30080/81252][#steps: 18250]
	Train Loss: 0.026 | Train PPL:   1.026 | lr: 8.344e-05
[Epoch: 14][#examples: 33280/81252][#steps: 18300]
	Train Loss: 0.025 | Train PPL:   1.026 | lr: 8.344e-05
[Epoch: 14][#examples: 36480/81252][#steps: 18350]
	Train Loss: 0.025 | Train PPL:   1.026 | lr: 8.344e-05
[Epoch: 14][#examples: 39680/81252][#steps: 18400]
	Train Loss: 0.025 | Train PPL:   1.026 | lr: 8.344e-05
[Epoch: 14][#examples: 42880/81252][#steps: 18450]
	Train Loss: 0.025 | Train PPL:   1.026 | lr: 8.344e-05
[Epoch: 14][#examples: 46080/81252][#steps: 18500]
	Train Loss: 0.025 | Train PPL:   1.026 | lr: 8.344e-05
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 340/513
[VAL]: The number of correct predictions (aux-task (multi))

[Epoch: 16][#examples: 1920/81252][#steps: 20350]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 4.434e-05
[Epoch: 16][#examples: 5120/81252][#steps: 20400]
	Train Loss: 0.022 | Train PPL:   1.023 | lr: 4.434e-05
[Epoch: 16][#examples: 8320/81252][#steps: 20450]
	Train Loss: 0.024 | Train PPL:   1.024 | lr: 4.434e-05
[Epoch: 16][#examples: 11520/81252][#steps: 20500]
	Train Loss: 0.025 | Train PPL:   1.025 | lr: 4.434e-05
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 341/513
[VAL]: The number of correct predictions (aux-task (multi)): 351/513

---------------------------------------
[Epoch: 16][Validatiing...]
	 Early Stopping Patience: 97/100
	 Val. Loss: 1.573 | Val. Acc: 0.665 | Val. PPL:   4.823
	 BEST. Val. Loss: 1.507 | BEST. Val. Acc: 0.667 | Val. Loss: 1.575 | BEST. Val. Epoch: 15 | BEST. Val. Step: 20320
---------------------------------------

	BEST. Val. Acc Aux: 0.706
---------------------------------------

[Epoch: 16][#examples: 14720/81252][

[Epoch: 17][#examples: 61440/81252][#steps: 22550]
	Train Loss: 0.025 | Train PPL:   1.025 | lr: 2.357e-05
[Epoch: 17][#examples: 64640/81252][#steps: 22600]
	Train Loss: 0.025 | Train PPL:   1.025 | lr: 2.357e-05
[Epoch: 17][#examples: 67840/81252][#steps: 22650]
	Train Loss: 0.025 | Train PPL:   1.026 | lr: 2.357e-05
[Epoch: 17][#examples: 71040/81252][#steps: 22700]
	Train Loss: 0.025 | Train PPL:   1.025 | lr: 2.357e-05
[Epoch: 17][#examples: 74240/81252][#steps: 22750]
	Train Loss: 0.025 | Train PPL:   1.025 | lr: 2.357e-05
[Epoch: 17][#examples: 77440/81252][#steps: 22800]
	Train Loss: 0.025 | Train PPL:   1.025 | lr: 2.357e-05
[Epoch: 17][#examples: 80640/81252][#steps: 22850]
	Train Loss: 0.025 | Train PPL:   1.025 | lr: 2.357e-05
[VAL]: The number of correct predictions (main-task (multi)): 342/513
[VAL]: The number of correct predictions (aux-task (multi)): 355/513

---------------------------------------
[Epoch: 17][Validatiing...]
	 Early Stopping Patience: 91/100
	 Val. Lo

[Epoch: 19][#examples: 39680/81252][#steps: 24750]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 1.252e-05
[Epoch: 19][#examples: 42880/81252][#steps: 24800]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 1.252e-05
[Epoch: 19][#examples: 46080/81252][#steps: 24850]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 1.252e-05
[Epoch: 19][#examples: 49280/81252][#steps: 24900]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 1.252e-05
[Epoch: 19][#examples: 52480/81252][#steps: 24950]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 1.252e-05
[Epoch: 19][#examples: 55680/81252][#steps: 25000]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 1.252e-05
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 340/513
[VAL]: The number of correct predictions (aux-task (multi)): 355/513

---------------------------------------
[Epoch: 19][Validatiing...]
	 Early Stopping Patience: 85/100
	 Val. Loss: 1.514 | Val. Acc: 0.663 | Val. PPL:   4.547
	 BEST. Val. Loss: 1.507 | BEST. Val. Acc: 0

[Epoch: 21][#examples: 14720/81252][#steps: 26900]
	Train Loss: 0.027 | Train PPL:   1.027 | lr: 6.656e-06
[Epoch: 21][#examples: 17920/81252][#steps: 26950]
	Train Loss: 0.027 | Train PPL:   1.027 | lr: 6.656e-06
[Epoch: 21][#examples: 21120/81252][#steps: 27000]
	Train Loss: 0.027 | Train PPL:   1.027 | lr: 6.656e-06
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 341/513
[VAL]: The number of correct predictions (aux-task (multi)): 356/513

---------------------------------------
[Epoch: 21][Validatiing...]
		 Better Valid Loss! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.494 | Val. Acc: 0.665 | Val. PPL:   4.456
	 BEST. Val. Loss: 1.494 | BEST. Val. Acc: 0.671 | Val. Loss: 1.520 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21590
---------------------------------------

	BEST. Val. Acc Aux: 0.706
---------------------------------------

[Epoch: 21][#examples: 24320/81252][#steps: 27050]
	Train Loss: 0.027 | Train PPL:   1.027 | lr: 5.9

[Epoch: 22][#examples: 77440/81252][#steps: 29150]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 3.537e-06
[Epoch: 22][#examples: 80640/81252][#steps: 29200]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 3.537e-06
[VAL]: The number of correct predictions (main-task (multi)): 341/513
[VAL]: The number of correct predictions (aux-task (multi)): 356/513

---------------------------------------
[Epoch: 22][Validatiing...]
	 Early Stopping Patience: 94/100
	 Val. Loss: 1.504 | Val. Acc: 0.665 | Val. PPL:   4.498
	 BEST. Val. Loss: 1.494 | BEST. Val. Acc: 0.671 | Val. Loss: 1.520 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21590
---------------------------------------

	BEST. Val. Acc Aux: 0.706
---------------------------------------

Epoch: 23 | Time: 2m 23s
	Train Loss: 0.028 | Train PPL:   1.028
	 Val. Loss: 1.504 | Val. Acc: 0.665 | Val. PPL:   4.498
[Train]: Current Teacher Forcing Ratio: 0.200
[Epoch: 23][#examples: 2560/81252][#steps: 29250]
	Train Loss: 0.022 | Train PPL:   1.023 | lr

[Epoch: 24][#examples: 55680/81252][#steps: 31350]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 1.880e-06
[Epoch: 24][#examples: 58880/81252][#steps: 31400]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 1.880e-06
[Epoch: 24][#examples: 62080/81252][#steps: 31450]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 1.880e-06
[Epoch: 24][#examples: 65280/81252][#steps: 31500]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 1.880e-06
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 341/513
[VAL]: The number of correct predictions (aux-task (multi)): 356/513

---------------------------------------
[Epoch: 24][Validatiing...]
	 Early Stopping Patience: 88/100
	 Val. Loss: 1.503 | Val. Acc: 0.665 | Val. PPL:   4.494
	 BEST. Val. Loss: 1.494 | BEST. Val. Acc: 0.671 | Val. Loss: 1.520 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21590
---------------------------------------

	BEST. Val. Acc Aux: 0.706
---------------------------------------

[Epoch: 24][#examples: 68480/8125

[VAL]: The number of correct predictions (main-task (multi)): 341/513
[VAL]: The number of correct predictions (aux-task (multi)): 356/513

---------------------------------------
[Epoch: 26][Validatiing...]
	 Early Stopping Patience: 82/100
	 Val. Loss: 1.502 | Val. Acc: 0.665 | Val. PPL:   4.489
	 BEST. Val. Loss: 1.494 | BEST. Val. Acc: 0.671 | Val. Loss: 1.520 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21590
---------------------------------------

	BEST. Val. Acc Aux: 0.706
---------------------------------------

[Epoch: 26][#examples: 33920/81252][#steps: 33550]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 8.991e-07
[Epoch: 26][#examples: 37120/81252][#steps: 33600]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 8.991e-07
[Epoch: 26][#examples: 40320/81252][#steps: 33650]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 8.991e-07
[Epoch: 26][#examples: 43520/81252][#steps: 33700]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 8.991e-07
[Epoch: 26][#examples: 46720/81252][#steps: 3375

[Epoch: 28][#examples: 2560/81252][#steps: 35600]
	Train Loss: 0.027 | Train PPL:   1.027 | lr: 4.778e-07
[Epoch: 28][#examples: 5760/81252][#steps: 35650]
	Train Loss: 0.026 | Train PPL:   1.027 | lr: 4.778e-07
[Epoch: 28][#examples: 8960/81252][#steps: 35700]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 4.778e-07
[Epoch: 28][#examples: 12160/81252][#steps: 35750]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 4.778e-07
[Epoch: 28][#examples: 15360/81252][#steps: 35800]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 4.778e-07
[Epoch: 28][#examples: 18560/81252][#steps: 35850]
	Train Loss: 0.027 | Train PPL:   1.027 | lr: 4.778e-07
[Epoch: 28][#examples: 21760/81252][#steps: 35900]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 4.778e-07
[Epoch: 28][#examples: 24960/81252][#steps: 35950]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 4.778e-07
[Epoch: 28][#examples: 28160/81252][#steps: 36000]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 4.778e-07
-----Val------
[VAL]: The number of corr

[Epoch: 29][#examples: 78080/81252][#steps: 38050]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 2.539e-07
[Epoch: 29][#examples: 81280/81252][#steps: 38100]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 2.539e-07
[VAL]: The number of correct predictions (main-task (multi)): 341/513
[VAL]: The number of correct predictions (aux-task (multi)): 356/513

---------------------------------------
[Epoch: 29][Validatiing...]
	 Early Stopping Patience: 69/100
	 Val. Loss: 1.502 | Val. Acc: 0.665 | Val. PPL:   4.488
	 BEST. Val. Loss: 1.494 | BEST. Val. Acc: 0.671 | Val. Loss: 1.520 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21590
---------------------------------------

	BEST. Val. Acc Aux: 0.706
---------------------------------------

Epoch: 30 | Time: 2m 24s
	Train Loss: 0.028 | Train PPL:   1.028
	 Val. Loss: 1.502 | Val. Acc: 0.665 | Val. PPL:   4.488
[Train]: Current Teacher Forcing Ratio: 0.200
[Epoch: 30][#examples: 3200/81252][#steps: 38150]
	Train Loss: 0.023 | Train PPL:   1.023 | lr

[Epoch: 31][#examples: 56320/81252][#steps: 40250]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 1.349e-07
[Epoch: 31][#examples: 59520/81252][#steps: 40300]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 1.349e-07
[Epoch: 31][#examples: 62720/81252][#steps: 40350]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 1.349e-07
[Epoch: 31][#examples: 65920/81252][#steps: 40400]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 1.349e-07
[Epoch: 31][#examples: 69120/81252][#steps: 40450]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 1.349e-07
[Epoch: 31][#examples: 72320/81252][#steps: 40500]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 1.349e-07
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 341/513
[VAL]: The number of correct predictions (aux-task (multi)): 356/513

---------------------------------------
[Epoch: 31][Validatiing...]
	 Early Stopping Patience: 63/100
	 Val. Loss: 1.501 | Val. Acc: 0.665 | Val. PPL:   4.488
	 BEST. Val. Loss: 1.494 | BEST. Val. Acc: 0

[Epoch: 33][#examples: 34560/81252][#steps: 42450]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 33][#examples: 37760/81252][#steps: 42500]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 341/513
[VAL]: The number of correct predictions (aux-task (multi)): 356/513

---------------------------------------
[Epoch: 33][Validatiing...]
	 Early Stopping Patience: 57/100
	 Val. Loss: 1.501 | Val. Acc: 0.665 | Val. PPL:   4.488
	 BEST. Val. Loss: 1.494 | BEST. Val. Acc: 0.671 | Val. Loss: 1.520 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21590
---------------------------------------

	BEST. Val. Acc Aux: 0.706
---------------------------------------

[Epoch: 33][#examples: 40960/81252][#steps: 42550]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 33][#examples: 44160/81252][#steps: 42600]
	Train Loss: 0.027 | Train PPL:   1.027 | lr: 9.838e-08
[Epoch: 33][#examples: 47360/8125

[Epoch: 35][#examples: 6400/81252][#steps: 44550]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 35][#examples: 9600/81252][#steps: 44600]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 35][#examples: 12800/81252][#steps: 44650]
	Train Loss: 0.031 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 35][#examples: 16000/81252][#steps: 44700]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 35][#examples: 19200/81252][#steps: 44750]
	Train Loss: 0.029 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 35][#examples: 22400/81252][#steps: 44800]
	Train Loss: 0.029 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 35][#examples: 25600/81252][#steps: 44850]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 35][#examples: 28800/81252][#steps: 44900]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 35][#examples: 32000/81252][#steps: 44950]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 35][#examples: 35200/81252][#st

[Epoch: 37][#examples: 640/81252][#steps: 47000]
	Train Loss: 0.044 | Train PPL:   1.045 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 341/513
[VAL]: The number of correct predictions (aux-task (multi)): 356/513

---------------------------------------
[Epoch: 37][Validatiing...]
	 Early Stopping Patience: 44/100
	 Val. Loss: 1.501 | Val. Acc: 0.665 | Val. PPL:   4.487
	 BEST. Val. Loss: 1.494 | BEST. Val. Acc: 0.671 | Val. Loss: 1.520 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21590
---------------------------------------

	BEST. Val. Acc Aux: 0.706
---------------------------------------

[Epoch: 37][#examples: 3840/81252][#steps: 47050]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 37][#examples: 7040/81252][#steps: 47100]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 37][#examples: 10240/81252][#steps: 47150]
	Train Loss: 0.029 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 37][#examples: 13440/81252][#

[Epoch: 38][#examples: 63360/81252][#steps: 49250]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 38][#examples: 66560/81252][#steps: 49300]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 38][#examples: 69760/81252][#steps: 49350]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 38][#examples: 72960/81252][#steps: 49400]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 38][#examples: 76160/81252][#steps: 49450]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 38][#examples: 79360/81252][#steps: 49500]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 341/513
[VAL]: The number of correct predictions (aux-task (multi)): 356/513

---------------------------------------
[Epoch: 38][Validatiing...]
	 Early Stopping Patience: 38/100
	 Val. Loss: 1.501 | Val. Acc: 0.665 | Val. PPL:   4.487
	 BEST. Val. Loss: 1.494 | BEST. Val. Acc: 0

[Epoch: 40][#examples: 41600/81252][#steps: 51450]
	Train Loss: 0.027 | Train PPL:   1.027 | lr: 9.838e-08
[Epoch: 40][#examples: 44800/81252][#steps: 51500]
	Train Loss: 0.027 | Train PPL:   1.027 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 341/513
[VAL]: The number of correct predictions (aux-task (multi)): 356/513

---------------------------------------
[Epoch: 40][Validatiing...]
	 Early Stopping Patience: 32/100
	 Val. Loss: 1.501 | Val. Acc: 0.665 | Val. PPL:   4.487
	 BEST. Val. Loss: 1.494 | BEST. Val. Acc: 0.671 | Val. Loss: 1.520 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21590
---------------------------------------

	BEST. Val. Acc Aux: 0.706
---------------------------------------

[Epoch: 40][#examples: 48000/81252][#steps: 51550]
	Train Loss: 0.027 | Train PPL:   1.027 | lr: 9.838e-08
[Epoch: 40][#examples: 51200/81252][#steps: 51600]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 40][#examples: 54400/8125

[Epoch: 42][#examples: 13440/81252][#steps: 53550]
	Train Loss: 0.025 | Train PPL:   1.026 | lr: 9.838e-08
[Epoch: 42][#examples: 16640/81252][#steps: 53600]
	Train Loss: 0.026 | Train PPL:   1.026 | lr: 9.838e-08
[Epoch: 42][#examples: 19840/81252][#steps: 53650]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 42][#examples: 23040/81252][#steps: 53700]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 42][#examples: 26240/81252][#steps: 53750]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 42][#examples: 29440/81252][#steps: 53800]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 42][#examples: 32640/81252][#steps: 53850]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 42][#examples: 35840/81252][#steps: 53900]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 42][#examples: 39040/81252][#steps: 53950]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 42][#examples: 42240/81252][#

[Epoch: 44][#examples: 1280/81252][#steps: 55900]
	Train Loss: 0.024 | Train PPL:   1.024 | lr: 9.838e-08
[Epoch: 44][#examples: 4480/81252][#steps: 55950]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 9.838e-08
[Epoch: 44][#examples: 7680/81252][#steps: 56000]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 341/513
[VAL]: The number of correct predictions (aux-task (multi)): 356/513

---------------------------------------
[Epoch: 44][Validatiing...]
	 Early Stopping Patience: 19/100
	 Val. Loss: 1.501 | Val. Acc: 0.665 | Val. PPL:   4.486
	 BEST. Val. Loss: 1.494 | BEST. Val. Acc: 0.671 | Val. Loss: 1.520 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21590
---------------------------------------

	BEST. Val. Acc Aux: 0.706
---------------------------------------

[Epoch: 44][#examples: 10880/81252][#steps: 56050]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 44][#examples: 14080/81252][

[Epoch: 45][#examples: 64000/81252][#steps: 58150]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 45][#examples: 67200/81252][#steps: 58200]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 45][#examples: 70400/81252][#steps: 58250]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 45][#examples: 73600/81252][#steps: 58300]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 45][#examples: 76800/81252][#steps: 58350]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 45][#examples: 80000/81252][#steps: 58400]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 9.838e-08
[VAL]: The number of correct predictions (main-task (multi)): 341/513
[VAL]: The number of correct predictions (aux-task (multi)): 356/513

---------------------------------------
[Epoch: 45][Validatiing...]
	 Early Stopping Patience: 13/100
	 Val. Loss: 1.501 | Val. Acc: 0.665 | Val. PPL:   4.486
	 BEST. Val. Loss: 1.494 | BEST. Val. Acc: 0.671 | Val. Los

[Epoch: 47][#examples: 42240/81252][#steps: 60350]
	Train Loss: 0.027 | Train PPL:   1.027 | lr: 9.838e-08
[Epoch: 47][#examples: 45440/81252][#steps: 60400]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 47][#examples: 48640/81252][#steps: 60450]
	Train Loss: 0.027 | Train PPL:   1.027 | lr: 9.838e-08
[Epoch: 47][#examples: 51840/81252][#steps: 60500]
	Train Loss: 0.027 | Train PPL:   1.027 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 341/513
[VAL]: The number of correct predictions (aux-task (multi)): 356/513

---------------------------------------
[Epoch: 47][Validatiing...]
	 Early Stopping Patience: 7/100
	 Val. Loss: 1.501 | Val. Acc: 0.665 | Val. PPL:   4.486
	 BEST. Val. Loss: 1.494 | BEST. Val. Acc: 0.671 | Val. Loss: 1.520 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21590
---------------------------------------

	BEST. Val. Acc Aux: 0.706
---------------------------------------

[Epoch: 47][#examples: 55040/81252

[VAL]: The number of correct predictions (main-task (multi)): 341/513
[VAL]: The number of correct predictions (aux-task (multi)): 356/513

---------------------------------------
[Epoch: 49][Validatiing...]
	 Early Stopping Patience: 1/100
	 Val. Loss: 1.501 | Val. Acc: 0.665 | Val. PPL:   4.486
	 BEST. Val. Loss: 1.494 | BEST. Val. Acc: 0.671 | Val. Loss: 1.520 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21590
---------------------------------------

	BEST. Val. Acc Aux: 0.706
---------------------------------------

[Epoch: 49][#examples: 20480/81252][#steps: 62550]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 49][#examples: 23680/81252][#steps: 62600]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 49][#examples: 26880/81252][#steps: 62650]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 49][#examples: 30080/81252][#steps: 62700]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 49][#examples: 33280/81252][#steps: 62750

	 Early Stopping Patience: 100/100
	 Val. Loss: 2.007 | Val. Acc: 0.255 | Val. PPL:   7.439
	 BEST. Val. Loss: 2.007 | BEST. Val. Acc: 0.255 | Val. Loss: 2.007 | BEST. Val. Epoch: 0 | BEST. Val. Step: 500
---------------------------------------

		 Better Valid Acc on Auxiliary Task! (at least equal)
	BEST. Val. Acc Aux: 0.207
---------------------------------------

[Epoch: 0][#examples: 35200/81252][#steps: 550]
	Train Loss: 2.010 | Train PPL:   7.461 | lr: 3.000e-03
[Epoch: 0][#examples: 38400/81252][#steps: 600]
	Train Loss: 1.900 | Train PPL:   6.688 | lr: 3.000e-03
[Epoch: 0][#examples: 41600/81252][#steps: 650]
	Train Loss: 1.798 | Train PPL:   6.040 | lr: 3.000e-03
[Epoch: 0][#examples: 44800/81252][#steps: 700]
	Train Loss: 1.713 | Train PPL:   5.547 | lr: 3.000e-03
[Epoch: 0][#examples: 48000/81252][#steps: 750]
	Train Loss: 1.634 | Train PPL:   5.124 | lr: 3.000e-03
[Epoch: 0][#examples: 51200/81252][#steps: 800]
	Train Loss: 1.562 | Train PPL:   4.771 | lr: 3.000e-03
[Epoch

[Epoch: 2][#examples: 640/81252][#steps: 2550]
	Train Loss: 0.244 | Train PPL:   1.277 | lr: 2.187e-03
[Epoch: 2][#examples: 3840/81252][#steps: 2600]
	Train Loss: 0.242 | Train PPL:   1.274 | lr: 2.187e-03
[Epoch: 2][#examples: 7040/81252][#steps: 2650]
	Train Loss: 0.241 | Train PPL:   1.273 | lr: 2.187e-03
[Epoch: 2][#examples: 10240/81252][#steps: 2700]
	Train Loss: 0.241 | Train PPL:   1.273 | lr: 2.187e-03
[Epoch: 2][#examples: 13440/81252][#steps: 2750]
	Train Loss: 0.244 | Train PPL:   1.277 | lr: 2.187e-03
[Epoch: 2][#examples: 16640/81252][#steps: 2800]
	Train Loss: 0.245 | Train PPL:   1.278 | lr: 2.187e-03
[Epoch: 2][#examples: 19840/81252][#steps: 2850]
	Train Loss: 0.243 | Train PPL:   1.276 | lr: 2.187e-03
[Epoch: 2][#examples: 23040/81252][#steps: 2900]
	Train Loss: 0.244 | Train PPL:   1.276 | lr: 2.187e-03
[Epoch: 2][#examples: 26240/81252][#steps: 2950]
	Train Loss: 0.242 | Train PPL:   1.274 | lr: 2.187e-03
[Epoch: 2][#examples: 29440/81252][#steps: 3000]
	Train Los

[Epoch: 3][#examples: 69760/81252][#steps: 4900]
	Train Loss: 0.181 | Train PPL:   1.199 | lr: 1.594e-03
[Epoch: 3][#examples: 72960/81252][#steps: 4950]
	Train Loss: 0.181 | Train PPL:   1.199 | lr: 1.594e-03
[Epoch: 3][#examples: 76160/81252][#steps: 5000]
	Train Loss: 0.181 | Train PPL:   1.198 | lr: 1.594e-03
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 336/513
[VAL]: The number of correct predictions (aux-task (multi)): 339/513

---------------------------------------
[Epoch: 3][Validatiing...]
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 99/100
	 Val. Loss: 1.248 | Val. Acc: 0.655 | Val. PPL:   3.483
	 BEST. Val. Loss: 1.030 | BEST. Val. Acc: 0.655 | Val. Loss: 1.248 | BEST. Val. Epoch: 3 | BEST. Val. Step: 5000
---------------------------------------

	BEST. Val. Acc Aux: 0.680
---------------------------------------

[Epoch: 3][#examples: 79360/81252][#steps: 5050]
	Train Loss: 0.181 | Train PPL:   1.199 | lr: 1.594e-03
[VAL]:

[Epoch: 5][#examples: 35200/81252][#steps: 6900]
	Train Loss: 0.108 | Train PPL:   1.114 | lr: 1.046e-03
[Epoch: 5][#examples: 38400/81252][#steps: 6950]
	Train Loss: 0.109 | Train PPL:   1.115 | lr: 1.046e-03
[Epoch: 5][#examples: 41600/81252][#steps: 7000]
	Train Loss: 0.108 | Train PPL:   1.114 | lr: 1.046e-03
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 331/513
[VAL]: The number of correct predictions (aux-task (multi)): 353/513

---------------------------------------
[Epoch: 5][Validatiing...]
		 Better Valid Loss! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.211 | Val. Acc: 0.645 | Val. PPL:   3.357
	 BEST. Val. Loss: 1.211 | BEST. Val. Acc: 0.657 | Val. Loss: 1.328 | BEST. Val. Epoch: 5 | BEST. Val. Step: 6500
---------------------------------------

	BEST. Val. Acc Aux: 0.694
---------------------------------------

[Epoch: 5][#examples: 44800/81252][#steps: 7050]
	Train Loss: 0.108 | Train PPL:   1.114 | lr: 9.414e-04
[Epo

[Epoch: 7][#examples: 3840/81252][#steps: 8950]
	Train Loss: 0.064 | Train PPL:   1.066 | lr: 6.177e-04
[Epoch: 7][#examples: 7040/81252][#steps: 9000]
	Train Loss: 0.064 | Train PPL:   1.066 | lr: 6.177e-04
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 336/513
[VAL]: The number of correct predictions (aux-task (multi)): 355/513

---------------------------------------
[Epoch: 7][Validatiing...]
		 Better Valid Loss! (at least equal)
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.367 | Val. Acc: 0.655 | Val. PPL:   3.925
	 BEST. Val. Loss: 1.367 | BEST. Val. Acc: 0.655 | Val. Loss: 1.367 | BEST. Val. Epoch: 7 | BEST. Val. Step: 9000
---------------------------------------

	BEST. Val. Acc Aux: 0.698
---------------------------------------

[Epoch: 7][#examples: 10240/81252][#steps: 9050]
	Train Loss: 0.063 | Train PPL:   1.065 | lr: 5.559e-04
[Epoch: 7][#examples: 13440/81252][#steps: 9100]
	Train Loss: 0.061 | Tra

[Epoch: 8][#examples: 56960/81252][#steps: 11050]
	Train Loss: 0.050 | Train PPL:   1.051 | lr: 3.283e-04
[Epoch: 8][#examples: 60160/81252][#steps: 11100]
	Train Loss: 0.050 | Train PPL:   1.051 | lr: 3.283e-04
[Epoch: 8][#examples: 63360/81252][#steps: 11150]
	Train Loss: 0.050 | Train PPL:   1.051 | lr: 3.283e-04
[Epoch: 8][#examples: 66560/81252][#steps: 11200]
	Train Loss: 0.050 | Train PPL:   1.051 | lr: 3.283e-04
[Epoch: 8][#examples: 69760/81252][#steps: 11250]
	Train Loss: 0.050 | Train PPL:   1.051 | lr: 3.283e-04
[Epoch: 8][#examples: 72960/81252][#steps: 11300]
	Train Loss: 0.050 | Train PPL:   1.051 | lr: 3.283e-04
[Epoch: 8][#examples: 76160/81252][#steps: 11350]
	Train Loss: 0.050 | Train PPL:   1.051 | lr: 3.283e-04
[Epoch: 8][#examples: 79360/81252][#steps: 11400]
	Train Loss: 0.050 | Train PPL:   1.051 | lr: 3.283e-04
[VAL]: The number of correct predictions (main-task (multi)): 336/513
[VAL]: The number of correct predictions (aux-task (multi)): 365/513

------------

[Epoch: 10][#examples: 22400/81252][#steps: 13050]
	Train Loss: 0.036 | Train PPL:   1.036 | lr: 1.744e-04
[Epoch: 10][#examples: 25600/81252][#steps: 13100]
	Train Loss: 0.035 | Train PPL:   1.036 | lr: 1.744e-04
[Epoch: 10][#examples: 28800/81252][#steps: 13150]
	Train Loss: 0.036 | Train PPL:   1.036 | lr: 1.744e-04
[Epoch: 10][#examples: 32000/81252][#steps: 13200]
	Train Loss: 0.035 | Train PPL:   1.036 | lr: 1.744e-04
[Epoch: 10][#examples: 35200/81252][#steps: 13250]
	Train Loss: 0.035 | Train PPL:   1.036 | lr: 1.744e-04
[Epoch: 10][#examples: 38400/81252][#steps: 13300]
	Train Loss: 0.036 | Train PPL:   1.036 | lr: 1.744e-04
[Epoch: 10][#examples: 41600/81252][#steps: 13350]
	Train Loss: 0.036 | Train PPL:   1.037 | lr: 1.744e-04
[Epoch: 10][#examples: 44800/81252][#steps: 13400]
	Train Loss: 0.037 | Train PPL:   1.038 | lr: 1.744e-04
[Epoch: 10][#examples: 48000/81252][#steps: 13450]
	Train Loss: 0.037 | Train PPL:   1.037 | lr: 1.744e-04
[Epoch: 10][#examples: 51200/81252][#

[Epoch: 12][#examples: 640/81252][#steps: 15250]
	Train Loss: 0.026 | Train PPL:   1.027 | lr: 9.271e-05
[Epoch: 12][#examples: 3840/81252][#steps: 15300]
	Train Loss: 0.033 | Train PPL:   1.033 | lr: 9.271e-05
[Epoch: 12][#examples: 7040/81252][#steps: 15350]
	Train Loss: 0.036 | Train PPL:   1.036 | lr: 9.271e-05
[Epoch: 12][#examples: 10240/81252][#steps: 15400]
	Train Loss: 0.035 | Train PPL:   1.035 | lr: 9.271e-05
[Epoch: 12][#examples: 13440/81252][#steps: 15450]
	Train Loss: 0.034 | Train PPL:   1.035 | lr: 9.271e-05
[Epoch: 12][#examples: 16640/81252][#steps: 15500]
	Train Loss: 0.035 | Train PPL:   1.035 | lr: 9.271e-05
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 337/513
[VAL]: The number of correct predictions (aux-task (multi)): 354/513

---------------------------------------
[Epoch: 12][Validatiing...]
		 Better Valid Loss! (at least equal)
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.451 | Val. A

[Epoch: 13][#examples: 66560/81252][#steps: 17550]
	Train Loss: 0.032 | Train PPL:   1.033 | lr: 4.927e-05
[Epoch: 13][#examples: 69760/81252][#steps: 17600]
	Train Loss: 0.032 | Train PPL:   1.033 | lr: 4.927e-05
[Epoch: 13][#examples: 72960/81252][#steps: 17650]
	Train Loss: 0.032 | Train PPL:   1.033 | lr: 4.927e-05
[Epoch: 13][#examples: 76160/81252][#steps: 17700]
	Train Loss: 0.032 | Train PPL:   1.033 | lr: 4.927e-05
[Epoch: 13][#examples: 79360/81252][#steps: 17750]
	Train Loss: 0.033 | Train PPL:   1.033 | lr: 4.927e-05
[VAL]: The number of correct predictions (main-task (multi)): 334/513
[VAL]: The number of correct predictions (aux-task (multi)): 357/513

---------------------------------------
[Epoch: 13][Validatiing...]
	 Early Stopping Patience: 99/100
	 Val. Loss: 1.400 | Val. Acc: 0.651 | Val. PPL:   4.057
	 BEST. Val. Loss: 1.393 | BEST. Val. Acc: 0.657 | Val. Loss: 1.421 | BEST. Val. Epoch: 13 | BEST. Val. Step: 17000
---------------------------------------

	BEST. Va

[Epoch: 15][#examples: 32000/81252][#steps: 19550]
	Train Loss: 0.034 | Train PPL:   1.035 | lr: 2.618e-05
[Epoch: 15][#examples: 35200/81252][#steps: 19600]
	Train Loss: 0.034 | Train PPL:   1.035 | lr: 2.618e-05
[Epoch: 15][#examples: 38400/81252][#steps: 19650]
	Train Loss: 0.034 | Train PPL:   1.035 | lr: 2.618e-05
[Epoch: 15][#examples: 41600/81252][#steps: 19700]
	Train Loss: 0.034 | Train PPL:   1.035 | lr: 2.618e-05
[Epoch: 15][#examples: 44800/81252][#steps: 19750]
	Train Loss: 0.034 | Train PPL:   1.035 | lr: 2.618e-05
[Epoch: 15][#examples: 48000/81252][#steps: 19800]
	Train Loss: 0.034 | Train PPL:   1.034 | lr: 2.618e-05
[Epoch: 15][#examples: 51200/81252][#steps: 19850]
	Train Loss: 0.034 | Train PPL:   1.034 | lr: 2.618e-05
[Epoch: 15][#examples: 54400/81252][#steps: 19900]
	Train Loss: 0.034 | Train PPL:   1.035 | lr: 2.618e-05
[Epoch: 15][#examples: 57600/81252][#steps: 19950]
	Train Loss: 0.034 | Train PPL:   1.035 | lr: 2.618e-05
[Epoch: 15][#examples: 60800/81252][#

[Epoch: 17][#examples: 7040/81252][#steps: 21700]
	Train Loss: 0.033 | Train PPL:   1.033 | lr: 1.392e-05
[Epoch: 17][#examples: 10240/81252][#steps: 21750]
	Train Loss: 0.033 | Train PPL:   1.033 | lr: 1.392e-05
[Epoch: 17][#examples: 13440/81252][#steps: 21800]
	Train Loss: 0.033 | Train PPL:   1.034 | lr: 1.392e-05
[Epoch: 17][#examples: 16640/81252][#steps: 21850]
	Train Loss: 0.035 | Train PPL:   1.035 | lr: 1.392e-05
[Epoch: 17][#examples: 19840/81252][#steps: 21900]
	Train Loss: 0.034 | Train PPL:   1.034 | lr: 1.392e-05
[Epoch: 17][#examples: 23040/81252][#steps: 21950]
	Train Loss: 0.034 | Train PPL:   1.034 | lr: 1.392e-05
[Epoch: 17][#examples: 26240/81252][#steps: 22000]
	Train Loss: 0.033 | Train PPL:   1.034 | lr: 1.392e-05
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 334/513
[VAL]: The number of correct predictions (aux-task (multi)): 357/513

---------------------------------------
[Epoch: 17][Validatiing...]
		 Better Valid Acc! (at leas

[Epoch: 18][#examples: 76160/81252][#steps: 24050]
	Train Loss: 0.036 | Train PPL:   1.037 | lr: 7.395e-06
[Epoch: 18][#examples: 79360/81252][#steps: 24100]
	Train Loss: 0.037 | Train PPL:   1.037 | lr: 7.395e-06
[VAL]: The number of correct predictions (main-task (multi)): 333/513
[VAL]: The number of correct predictions (aux-task (multi)): 358/513

---------------------------------------
[Epoch: 18][Validatiing...]
		 Better Valid Loss! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.387 | Val. Acc: 0.649 | Val. PPL:   4.003
	 BEST. Val. Loss: 1.387 | BEST. Val. Acc: 0.651 | Val. Loss: 1.399 | BEST. Val. Epoch: 18 | BEST. Val. Step: 23500
---------------------------------------

	BEST. Val. Acc Aux: 0.712
---------------------------------------

Epoch: 19 | Time: 2m 22s
	Train Loss: 0.037 | Train PPL:   1.037
	 Val. Loss: 1.387 | Val. Acc: 0.649 | Val. PPL:   4.003
[Train]: Current Teacher Forcing Ratio: 0.230
[Epoch: 19][#examples: 1280/81252][#steps: 24150]
	Tra

[Epoch: 20][#examples: 51200/81252][#steps: 26200]
	Train Loss: 0.040 | Train PPL:   1.041 | lr: 3.930e-06
[Epoch: 20][#examples: 54400/81252][#steps: 26250]
	Train Loss: 0.040 | Train PPL:   1.041 | lr: 3.930e-06
[Epoch: 20][#examples: 57600/81252][#steps: 26300]
	Train Loss: 0.040 | Train PPL:   1.041 | lr: 3.930e-06
[Epoch: 20][#examples: 60800/81252][#steps: 26350]
	Train Loss: 0.040 | Train PPL:   1.040 | lr: 3.930e-06
[Epoch: 20][#examples: 64000/81252][#steps: 26400]
	Train Loss: 0.040 | Train PPL:   1.041 | lr: 3.930e-06
[Epoch: 20][#examples: 67200/81252][#steps: 26450]
	Train Loss: 0.040 | Train PPL:   1.040 | lr: 3.930e-06
[Epoch: 20][#examples: 70400/81252][#steps: 26500]
	Train Loss: 0.039 | Train PPL:   1.040 | lr: 3.930e-06
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 332/513
[VAL]: The number of correct predictions (aux-task (multi)): 357/513

---------------------------------------
[Epoch: 20][Validatiing...]
	 Early Stopping Patience: 9

[Epoch: 22][#examples: 29440/81252][#steps: 28400]
	Train Loss: 0.041 | Train PPL:   1.042 | lr: 2.089e-06
[Epoch: 22][#examples: 32640/81252][#steps: 28450]
	Train Loss: 0.040 | Train PPL:   1.041 | lr: 2.089e-06
[Epoch: 22][#examples: 35840/81252][#steps: 28500]
	Train Loss: 0.040 | Train PPL:   1.041 | lr: 2.089e-06
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 332/513
[VAL]: The number of correct predictions (aux-task (multi)): 357/513

---------------------------------------
[Epoch: 22][Validatiing...]
		 Better Valid Loss! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.385 | Val. Acc: 0.647 | Val. PPL:   3.994
	 BEST. Val. Loss: 1.385 | BEST. Val. Acc: 0.651 | Val. Loss: 1.399 | BEST. Val. Epoch: 18 | BEST. Val. Step: 23500
---------------------------------------

	BEST. Val. Acc Aux: 0.712
---------------------------------------

[Epoch: 22][#examples: 39040/81252][#steps: 28550]
	Train Loss: 0.040 | Train PPL:   1.041 | lr: 1.8

[VAL]: The number of correct predictions (main-task (multi)): 333/513
[VAL]: The number of correct predictions (aux-task (multi)): 357/513

---------------------------------------
[Epoch: 24][Validatiing...]
	 Early Stopping Patience: 99/100
	 Val. Loss: 1.383 | Val. Acc: 0.649 | Val. PPL:   3.986
	 BEST. Val. Loss: 1.383 | BEST. Val. Acc: 0.651 | Val. Loss: 1.399 | BEST. Val. Epoch: 18 | BEST. Val. Step: 23500
---------------------------------------

	BEST. Val. Acc Aux: 0.712
---------------------------------------

[Epoch: 24][#examples: 4480/81252][#steps: 30550]
	Train Loss: 0.042 | Train PPL:   1.043 | lr: 9.990e-07
[Epoch: 24][#examples: 7680/81252][#steps: 30600]
	Train Loss: 0.038 | Train PPL:   1.038 | lr: 9.990e-07
[Epoch: 24][#examples: 10880/81252][#steps: 30650]
	Train Loss: 0.038 | Train PPL:   1.039 | lr: 9.990e-07
[Epoch: 24][#examples: 14080/81252][#steps: 30700]
	Train Loss: 0.037 | Train PPL:   1.038 | lr: 9.990e-07
[Epoch: 24][#examples: 17280/81252][#steps: 30750]

[Epoch: 25][#examples: 67200/81252][#steps: 32800]
	Train Loss: 0.039 | Train PPL:   1.040 | lr: 5.899e-07
[Epoch: 25][#examples: 70400/81252][#steps: 32850]
	Train Loss: 0.039 | Train PPL:   1.040 | lr: 5.899e-07
[Epoch: 25][#examples: 73600/81252][#steps: 32900]
	Train Loss: 0.039 | Train PPL:   1.040 | lr: 5.899e-07
[Epoch: 25][#examples: 76800/81252][#steps: 32950]
	Train Loss: 0.039 | Train PPL:   1.040 | lr: 5.899e-07
[Epoch: 25][#examples: 80000/81252][#steps: 33000]
	Train Loss: 0.039 | Train PPL:   1.040 | lr: 5.899e-07
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 333/513
[VAL]: The number of correct predictions (aux-task (multi)): 356/513

---------------------------------------
[Epoch: 25][Validatiing...]
	 Early Stopping Patience: 95/100
	 Val. Loss: 1.388 | Val. Acc: 0.649 | Val. PPL:   4.006
	 BEST. Val. Loss: 1.383 | BEST. Val. Acc: 0.651 | Val. Loss: 1.399 | BEST. Val. Epoch: 18 | BEST. Val. Step: 23500
-----------------------------------

[Epoch: 27][#examples: 45440/81252][#steps: 35000]
	Train Loss: 0.039 | Train PPL:   1.040 | lr: 3.135e-07
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 333/513
[VAL]: The number of correct predictions (aux-task (multi)): 356/513

---------------------------------------
[Epoch: 27][Validatiing...]
	 Early Stopping Patience: 89/100
	 Val. Loss: 1.387 | Val. Acc: 0.649 | Val. PPL:   4.004
	 BEST. Val. Loss: 1.383 | BEST. Val. Acc: 0.651 | Val. Loss: 1.399 | BEST. Val. Epoch: 18 | BEST. Val. Step: 23500
---------------------------------------

	BEST. Val. Acc Aux: 0.712
---------------------------------------

[Epoch: 27][#examples: 48640/81252][#steps: 35050]
	Train Loss: 0.039 | Train PPL:   1.039 | lr: 2.821e-07
[Epoch: 27][#examples: 51840/81252][#steps: 35100]
	Train Loss: 0.039 | Train PPL:   1.040 | lr: 2.821e-07
[Epoch: 27][#examples: 55040/81252][#steps: 35150]
	Train Loss: 0.039 | Train PPL:   1.040 | lr: 2.821e-07
[Epoch: 27][#examples: 58240/8125

[Epoch: 29][#examples: 14080/81252][#steps: 37050]
	Train Loss: 0.038 | Train PPL:   1.038 | lr: 1.499e-07
[Epoch: 29][#examples: 17280/81252][#steps: 37100]
	Train Loss: 0.037 | Train PPL:   1.038 | lr: 1.499e-07
[Epoch: 29][#examples: 20480/81252][#steps: 37150]
	Train Loss: 0.037 | Train PPL:   1.038 | lr: 1.499e-07
[Epoch: 29][#examples: 23680/81252][#steps: 37200]
	Train Loss: 0.037 | Train PPL:   1.037 | lr: 1.499e-07
[Epoch: 29][#examples: 26880/81252][#steps: 37250]
	Train Loss: 0.037 | Train PPL:   1.038 | lr: 1.499e-07
[Epoch: 29][#examples: 30080/81252][#steps: 37300]
	Train Loss: 0.038 | Train PPL:   1.039 | lr: 1.499e-07
[Epoch: 29][#examples: 33280/81252][#steps: 37350]
	Train Loss: 0.038 | Train PPL:   1.039 | lr: 1.499e-07
[Epoch: 29][#examples: 36480/81252][#steps: 37400]
	Train Loss: 0.038 | Train PPL:   1.039 | lr: 1.499e-07
[Epoch: 29][#examples: 39680/81252][#steps: 37450]
	Train Loss: 0.039 | Train PPL:   1.039 | lr: 1.499e-07
[Epoch: 29][#examples: 42880/81252][#

[Epoch: 31][#examples: 1920/81252][#steps: 39400]
	Train Loss: 0.041 | Train PPL:   1.041 | lr: 9.838e-08
[Epoch: 31][#examples: 5120/81252][#steps: 39450]
	Train Loss: 0.044 | Train PPL:   1.045 | lr: 9.838e-08
[Epoch: 31][#examples: 8320/81252][#steps: 39500]
	Train Loss: 0.045 | Train PPL:   1.046 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 333/513
[VAL]: The number of correct predictions (aux-task (multi)): 356/513

---------------------------------------
[Epoch: 31][Validatiing...]
	 Early Stopping Patience: 76/100
	 Val. Loss: 1.387 | Val. Acc: 0.649 | Val. PPL:   4.003
	 BEST. Val. Loss: 1.383 | BEST. Val. Acc: 0.651 | Val. Loss: 1.399 | BEST. Val. Epoch: 18 | BEST. Val. Step: 23500
---------------------------------------

	BEST. Val. Acc Aux: 0.712
---------------------------------------

[Epoch: 31][#examples: 11520/81252][#steps: 39550]
	Train Loss: 0.042 | Train PPL:   1.043 | lr: 9.838e-08
[Epoch: 31][#examples: 14720/81252][

[Epoch: 32][#examples: 64640/81252][#steps: 41650]
	Train Loss: 0.039 | Train PPL:   1.040 | lr: 9.838e-08
[Epoch: 32][#examples: 67840/81252][#steps: 41700]
	Train Loss: 0.039 | Train PPL:   1.039 | lr: 9.838e-08
[Epoch: 32][#examples: 71040/81252][#steps: 41750]
	Train Loss: 0.039 | Train PPL:   1.039 | lr: 9.838e-08
[Epoch: 32][#examples: 74240/81252][#steps: 41800]
	Train Loss: 0.039 | Train PPL:   1.040 | lr: 9.838e-08
[Epoch: 32][#examples: 77440/81252][#steps: 41850]
	Train Loss: 0.039 | Train PPL:   1.040 | lr: 9.838e-08
[Epoch: 32][#examples: 80640/81252][#steps: 41900]
	Train Loss: 0.039 | Train PPL:   1.040 | lr: 9.838e-08
[VAL]: The number of correct predictions (main-task (multi)): 333/513
[VAL]: The number of correct predictions (aux-task (multi)): 356/513

---------------------------------------
[Epoch: 32][Validatiing...]
	 Early Stopping Patience: 70/100
	 Val. Loss: 1.387 | Val. Acc: 0.649 | Val. PPL:   4.002
	 BEST. Val. Loss: 1.383 | BEST. Val. Acc: 0.651 | Val. Los

[Epoch: 34][#examples: 42880/81252][#steps: 43850]
	Train Loss: 0.036 | Train PPL:   1.037 | lr: 9.838e-08
[Epoch: 34][#examples: 46080/81252][#steps: 43900]
	Train Loss: 0.037 | Train PPL:   1.037 | lr: 9.838e-08
[Epoch: 34][#examples: 49280/81252][#steps: 43950]
	Train Loss: 0.037 | Train PPL:   1.037 | lr: 9.838e-08
[Epoch: 34][#examples: 52480/81252][#steps: 44000]
	Train Loss: 0.037 | Train PPL:   1.038 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 333/513
[VAL]: The number of correct predictions (aux-task (multi)): 356/513

---------------------------------------
[Epoch: 34][Validatiing...]
	 Early Stopping Patience: 64/100
	 Val. Loss: 1.387 | Val. Acc: 0.649 | Val. PPL:   4.002
	 BEST. Val. Loss: 1.383 | BEST. Val. Acc: 0.651 | Val. Loss: 1.399 | BEST. Val. Epoch: 18 | BEST. Val. Step: 23500
---------------------------------------

	BEST. Val. Acc Aux: 0.712
---------------------------------------

[Epoch: 34][#examples: 55680/8125

[VAL]: The number of correct predictions (main-task (multi)): 333/513
[VAL]: The number of correct predictions (aux-task (multi)): 356/513

---------------------------------------
[Epoch: 36][Validatiing...]
	 Early Stopping Patience: 58/100
	 Val. Loss: 1.387 | Val. Acc: 0.649 | Val. PPL:   4.002
	 BEST. Val. Loss: 1.383 | BEST. Val. Acc: 0.651 | Val. Loss: 1.399 | BEST. Val. Epoch: 18 | BEST. Val. Step: 23500
---------------------------------------

	BEST. Val. Acc Aux: 0.712
---------------------------------------

[Epoch: 36][#examples: 21120/81252][#steps: 46050]
	Train Loss: 0.039 | Train PPL:   1.040 | lr: 9.838e-08
[Epoch: 36][#examples: 24320/81252][#steps: 46100]
	Train Loss: 0.039 | Train PPL:   1.039 | lr: 9.838e-08
[Epoch: 36][#examples: 27520/81252][#steps: 46150]
	Train Loss: 0.037 | Train PPL:   1.038 | lr: 9.838e-08
[Epoch: 36][#examples: 30720/81252][#steps: 46200]
	Train Loss: 0.037 | Train PPL:   1.038 | lr: 9.838e-08
[Epoch: 36][#examples: 33920/81252][#steps: 4625

[Epoch: 38][#examples: 2560/81252][#steps: 48300]
	Train Loss: 0.041 | Train PPL:   1.042 | lr: 9.838e-08
[Epoch: 38][#examples: 5760/81252][#steps: 48350]
	Train Loss: 0.043 | Train PPL:   1.044 | lr: 9.838e-08
[Epoch: 38][#examples: 8960/81252][#steps: 48400]
	Train Loss: 0.041 | Train PPL:   1.042 | lr: 9.838e-08
[Epoch: 38][#examples: 12160/81252][#steps: 48450]
	Train Loss: 0.040 | Train PPL:   1.041 | lr: 9.838e-08
[Epoch: 38][#examples: 15360/81252][#steps: 48500]
	Train Loss: 0.039 | Train PPL:   1.039 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 333/513
[VAL]: The number of correct predictions (aux-task (multi)): 356/513

---------------------------------------
[Epoch: 38][Validatiing...]
	 Early Stopping Patience: 51/100
	 Val. Loss: 1.387 | Val. Acc: 0.649 | Val. PPL:   4.002
	 BEST. Val. Loss: 1.383 | BEST. Val. Acc: 0.651 | Val. Loss: 1.399 | BEST. Val. Epoch: 18 | BEST. Val. Step: 23500
--------------------------------------

[Epoch: 39][#examples: 65280/81252][#steps: 50550]
	Train Loss: 0.040 | Train PPL:   1.041 | lr: 9.838e-08
[Epoch: 39][#examples: 68480/81252][#steps: 50600]
	Train Loss: 0.040 | Train PPL:   1.041 | lr: 9.838e-08
[Epoch: 39][#examples: 71680/81252][#steps: 50650]
	Train Loss: 0.040 | Train PPL:   1.041 | lr: 9.838e-08
[Epoch: 39][#examples: 74880/81252][#steps: 50700]
	Train Loss: 0.040 | Train PPL:   1.041 | lr: 9.838e-08
[Epoch: 39][#examples: 78080/81252][#steps: 50750]
	Train Loss: 0.040 | Train PPL:   1.041 | lr: 9.838e-08
[Epoch: 39][#examples: 81280/81252][#steps: 50800]
	Train Loss: 0.040 | Train PPL:   1.041 | lr: 9.838e-08
[VAL]: The number of correct predictions (main-task (multi)): 333/513
[VAL]: The number of correct predictions (aux-task (multi)): 356/513

---------------------------------------
[Epoch: 39][Validatiing...]
	 Early Stopping Patience: 45/100
	 Val. Loss: 1.387 | Val. Acc: 0.649 | Val. PPL:   4.001
	 BEST. Val. Loss: 1.383 | BEST. Val. Acc: 0.651 | Val. Los

[Epoch: 41][#examples: 43520/81252][#steps: 52750]
	Train Loss: 0.040 | Train PPL:   1.040 | lr: 9.838e-08
[Epoch: 41][#examples: 46720/81252][#steps: 52800]
	Train Loss: 0.039 | Train PPL:   1.040 | lr: 9.838e-08
[Epoch: 41][#examples: 49920/81252][#steps: 52850]
	Train Loss: 0.039 | Train PPL:   1.040 | lr: 9.838e-08
[Epoch: 41][#examples: 53120/81252][#steps: 52900]
	Train Loss: 0.039 | Train PPL:   1.040 | lr: 9.838e-08
[Epoch: 41][#examples: 56320/81252][#steps: 52950]
	Train Loss: 0.039 | Train PPL:   1.039 | lr: 9.838e-08
[Epoch: 41][#examples: 59520/81252][#steps: 53000]
	Train Loss: 0.038 | Train PPL:   1.039 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 333/513
[VAL]: The number of correct predictions (aux-task (multi)): 356/513

---------------------------------------
[Epoch: 41][Validatiing...]
	 Early Stopping Patience: 39/100
	 Val. Loss: 1.387 | Val. Acc: 0.649 | Val. PPL:   4.001
	 BEST. Val. Loss: 1.383 | BEST. Val. Acc: 0

[Epoch: 43][#examples: 21760/81252][#steps: 54950]
	Train Loss: 0.036 | Train PPL:   1.037 | lr: 9.838e-08
[Epoch: 43][#examples: 24960/81252][#steps: 55000]
	Train Loss: 0.037 | Train PPL:   1.038 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 333/513
[VAL]: The number of correct predictions (aux-task (multi)): 356/513

---------------------------------------
[Epoch: 43][Validatiing...]
	 Early Stopping Patience: 33/100
	 Val. Loss: 1.387 | Val. Acc: 0.649 | Val. PPL:   4.001
	 BEST. Val. Loss: 1.383 | BEST. Val. Acc: 0.651 | Val. Loss: 1.399 | BEST. Val. Epoch: 18 | BEST. Val. Step: 23500
---------------------------------------

	BEST. Val. Acc Aux: 0.712
---------------------------------------

[Epoch: 43][#examples: 28160/81252][#steps: 55050]
	Train Loss: 0.037 | Train PPL:   1.037 | lr: 9.838e-08
[Epoch: 43][#examples: 31360/81252][#steps: 55100]
	Train Loss: 0.037 | Train PPL:   1.038 | lr: 9.838e-08
[Epoch: 43][#examples: 34560/8125

[VAL]: The number of correct predictions (main-task (multi)): 333/513
[VAL]: The number of correct predictions (aux-task (multi)): 356/513

---------------------------------------
[Epoch: 44][Validatiing...]
	 Early Stopping Patience: 27/100
	 Val. Loss: 1.386 | Val. Acc: 0.649 | Val. PPL:   4.001
	 BEST. Val. Loss: 1.383 | BEST. Val. Acc: 0.651 | Val. Loss: 1.399 | BEST. Val. Epoch: 18 | BEST. Val. Step: 23500
---------------------------------------

	BEST. Val. Acc Aux: 0.712
---------------------------------------

Epoch: 45 | Time: 2m 22s
	Train Loss: 0.039 | Train PPL:   1.040
	 Val. Loss: 1.386 | Val. Acc: 0.649 | Val. PPL:   4.001
[Train]: Current Teacher Forcing Ratio: 0.200
[Epoch: 45][#examples: 3200/81252][#steps: 57200]
	Train Loss: 0.039 | Train PPL:   1.039 | lr: 9.838e-08
[Epoch: 45][#examples: 6400/81252][#steps: 57250]
	Train Loss: 0.037 | Train PPL:   1.038 | lr: 9.838e-08
[Epoch: 45][#examples: 9600/81252][#steps: 57300]
	Train Loss: 0.038 | Train PPL:   1.039 | lr: 

[Epoch: 46][#examples: 62720/81252][#steps: 59400]
	Train Loss: 0.039 | Train PPL:   1.039 | lr: 9.838e-08
[Epoch: 46][#examples: 65920/81252][#steps: 59450]
	Train Loss: 0.039 | Train PPL:   1.039 | lr: 9.838e-08
[Epoch: 46][#examples: 69120/81252][#steps: 59500]
	Train Loss: 0.039 | Train PPL:   1.039 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 333/513
[VAL]: The number of correct predictions (aux-task (multi)): 356/513

---------------------------------------
[Epoch: 46][Validatiing...]
	 Early Stopping Patience: 21/100
	 Val. Loss: 1.386 | Val. Acc: 0.649 | Val. PPL:   4.000
	 BEST. Val. Loss: 1.383 | BEST. Val. Acc: 0.651 | Val. Loss: 1.399 | BEST. Val. Epoch: 18 | BEST. Val. Step: 23500
---------------------------------------

	BEST. Val. Acc Aux: 0.712
---------------------------------------

[Epoch: 46][#examples: 72320/81252][#steps: 59550]
	Train Loss: 0.038 | Train PPL:   1.039 | lr: 9.838e-08
[Epoch: 46][#examples: 75520/8125

[Epoch: 48][#examples: 37760/81252][#steps: 61550]
	Train Loss: 0.039 | Train PPL:   1.040 | lr: 9.838e-08
[Epoch: 48][#examples: 40960/81252][#steps: 61600]
	Train Loss: 0.039 | Train PPL:   1.040 | lr: 9.838e-08
[Epoch: 48][#examples: 44160/81252][#steps: 61650]
	Train Loss: 0.040 | Train PPL:   1.040 | lr: 9.838e-08
[Epoch: 48][#examples: 47360/81252][#steps: 61700]
	Train Loss: 0.039 | Train PPL:   1.040 | lr: 9.838e-08
[Epoch: 48][#examples: 50560/81252][#steps: 61750]
	Train Loss: 0.039 | Train PPL:   1.040 | lr: 9.838e-08
[Epoch: 48][#examples: 53760/81252][#steps: 61800]
	Train Loss: 0.039 | Train PPL:   1.040 | lr: 9.838e-08
[Epoch: 48][#examples: 56960/81252][#steps: 61850]
	Train Loss: 0.039 | Train PPL:   1.040 | lr: 9.838e-08
[Epoch: 48][#examples: 60160/81252][#steps: 61900]
	Train Loss: 0.039 | Train PPL:   1.040 | lr: 9.838e-08
[Epoch: 48][#examples: 63360/81252][#steps: 61950]
	Train Loss: 0.039 | Train PPL:   1.040 | lr: 9.838e-08
[Epoch: 48][#examples: 66560/81252][#

[Epoch: 50][#examples: 16000/81252][#steps: 63750]
	Train Loss: 0.037 | Train PPL:   1.037 | lr: 9.838e-08
[Epoch: 50][#examples: 19200/81252][#steps: 63800]
	Train Loss: 0.036 | Train PPL:   1.037 | lr: 9.838e-08
[Epoch: 50][#examples: 22400/81252][#steps: 63850]
	Train Loss: 0.037 | Train PPL:   1.038 | lr: 9.838e-08
[Epoch: 50][#examples: 25600/81252][#steps: 63900]
	Train Loss: 0.037 | Train PPL:   1.038 | lr: 9.838e-08
[Epoch: 50][#examples: 28800/81252][#steps: 63950]
	Train Loss: 0.037 | Train PPL:   1.038 | lr: 9.838e-08
[Epoch: 50][#examples: 32000/81252][#steps: 64000]
	Train Loss: 0.037 | Train PPL:   1.038 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 333/513
[VAL]: The number of correct predictions (aux-task (multi)): 356/513

---------------------------------------
[Epoch: 50][Validatiing...]
	 Early Stopping Patience: 8/100
	 Val. Loss: 1.386 | Val. Acc: 0.649 | Val. PPL:   3.999
	 BEST. Val. Loss: 1.383 | BEST. Val. Acc: 0.

[VAL]: The number of correct predictions (main-task (multi)): 333/513
[VAL]: The number of correct predictions (aux-task (multi)): 356/513

---------------------------------------
[Epoch: 51][Validatiing...]
	 Early Stopping Patience: 2/100
	 Val. Loss: 1.386 | Val. Acc: 0.649 | Val. PPL:   3.999
	 BEST. Val. Loss: 1.383 | BEST. Val. Acc: 0.651 | Val. Loss: 1.399 | BEST. Val. Epoch: 18 | BEST. Val. Step: 23500
---------------------------------------

	BEST. Val. Acc Aux: 0.712
---------------------------------------

Epoch: 52 | Time: 2m 23s
	Train Loss: 0.037 | Train PPL:   1.038
	 Val. Loss: 1.386 | Val. Acc: 0.649 | Val. PPL:   3.999
[Train]: Current Teacher Forcing Ratio: 0.200
[Epoch: 52][#examples: 640/81252][#steps: 66050]
	Train Loss: 0.031 | Train PPL:   1.032 | lr: 9.838e-08
[Epoch: 52][#examples: 3840/81252][#steps: 66100]
	Train Loss: 0.036 | Train PPL:   1.036 | lr: 9.838e-08
[Epoch: 52][#examples: 7040/81252][#steps: 66150]
	Train Loss: 0.040 | Train PPL:   1.041 | lr: 9.

[Epoch: 0][#examples: 16000/81252][#steps: 250]
	Train Loss: 2.646 | Train PPL:  14.098 | lr: 3.000e-03
[Epoch: 0][#examples: 19200/81252][#steps: 300]
	Train Loss: 2.542 | Train PPL:  12.711 | lr: 3.000e-03
[Epoch: 0][#examples: 22400/81252][#steps: 350]
	Train Loss: 2.429 | Train PPL:  11.352 | lr: 3.000e-03
[Epoch: 0][#examples: 25600/81252][#steps: 400]
	Train Loss: 2.301 | Train PPL:   9.983 | lr: 3.000e-03
[Epoch: 0][#examples: 28800/81252][#steps: 450]
	Train Loss: 2.171 | Train PPL:   8.768 | lr: 3.000e-03
[Epoch: 0][#examples: 32000/81252][#steps: 500]
	Train Loss: 2.050 | Train PPL:   7.770 | lr: 3.000e-03
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 128/513
[VAL]: The number of correct predictions (aux-task (multi)): 107/513

---------------------------------------
[Epoch: 0][Validatiing...]
		 Better Valid Loss! (at least equal)
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.918 | Val. Acc: 0.250 | Val

[Epoch: 1][#examples: 78720/81252][#steps: 2500]
	Train Loss: 0.321 | Train PPL:   1.378 | lr: 2.430e-03
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 310/513
[VAL]: The number of correct predictions (aux-task (multi)): 314/513

---------------------------------------
[Epoch: 1][Validatiing...]
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 98/100
	 Val. Loss: 1.292 | Val. Acc: 0.604 | Val. PPL:   3.639
	 BEST. Val. Loss: 1.242 | BEST. Val. Acc: 0.604 | Val. Loss: 1.292 | BEST. Val. Epoch: 1 | BEST. Val. Step: 2500
---------------------------------------

	BEST. Val. Acc Aux: 0.632
---------------------------------------

[VAL]: The number of correct predictions (main-task (multi)): 309/513
[VAL]: The number of correct predictions (aux-task (multi)): 321/513

---------------------------------------
[Epoch: 1][Validatiing...]
	 Early Stopping Patience: 97/100
	 Val. Loss: 1.249 | Val. Acc: 0.602 | Val. PPL:   3.487
	 BEST. Val. Loss: 1.24

[Epoch: 3][#examples: 44160/81252][#steps: 4500]
	Train Loss: 0.182 | Train PPL:   1.200 | lr: 1.771e-03
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 329/513
[VAL]: The number of correct predictions (aux-task (multi)): 336/513

---------------------------------------
[Epoch: 3][Validatiing...]
		 Better Valid Loss! (at least equal)
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.130 | Val. Acc: 0.641 | Val. PPL:   3.097
	 BEST. Val. Loss: 1.130 | BEST. Val. Acc: 0.641 | Val. Loss: 1.130 | BEST. Val. Epoch: 3 | BEST. Val. Step: 4500
---------------------------------------

	BEST. Val. Acc Aux: 0.667
---------------------------------------

[Epoch: 3][#examples: 47360/81252][#steps: 4550]
	Train Loss: 0.183 | Train PPL:   1.200 | lr: 1.594e-03
[Epoch: 3][#examples: 50560/81252][#steps: 4600]
	Train Loss: 0.182 | Train PPL:   1.199 | lr: 1.594e-03
[Epoch: 3][#examples: 53760/81252][#steps: 4650]
	Train Loss: 0.182 | T

[VAL]: The number of correct predictions (main-task (multi)): 331/513
[VAL]: The number of correct predictions (aux-task (multi)): 347/513

---------------------------------------
[Epoch: 5][Validatiing...]
		 Better Valid Loss! (at least equal)
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.152 | Val. Acc: 0.645 | Val. PPL:   3.166
	 BEST. Val. Loss: 1.152 | BEST. Val. Acc: 0.645 | Val. Loss: 1.152 | BEST. Val. Epoch: 5 | BEST. Val. Step: 6500
---------------------------------------

		 Better Valid Acc on Auxiliary Task! (at least equal)
	BEST. Val. Acc Aux: 0.676
---------------------------------------

[Epoch: 5][#examples: 12800/81252][#steps: 6550]
	Train Loss: 0.093 | Train PPL:   1.097 | lr: 9.414e-04
[Epoch: 5][#examples: 16000/81252][#steps: 6600]
	Train Loss: 0.094 | Train PPL:   1.099 | lr: 9.414e-04
[Epoch: 5][#examples: 19200/81252][#steps: 6650]
	Train Loss: 0.093 | Train PPL:   1.097 | lr: 9.414e-04
[Epoch: 5][#examples: 22400/81

[Epoch: 6][#examples: 62720/81252][#steps: 8600]
	Train Loss: 0.067 | Train PPL:   1.069 | lr: 6.863e-04
[Epoch: 6][#examples: 65920/81252][#steps: 8650]
	Train Loss: 0.067 | Train PPL:   1.069 | lr: 6.863e-04
[Epoch: 6][#examples: 69120/81252][#steps: 8700]
	Train Loss: 0.067 | Train PPL:   1.070 | lr: 6.863e-04
[Epoch: 6][#examples: 72320/81252][#steps: 8750]
	Train Loss: 0.067 | Train PPL:   1.070 | lr: 6.863e-04
[Epoch: 6][#examples: 75520/81252][#steps: 8800]
	Train Loss: 0.067 | Train PPL:   1.070 | lr: 6.863e-04
[Epoch: 6][#examples: 78720/81252][#steps: 8850]
	Train Loss: 0.067 | Train PPL:   1.070 | lr: 6.863e-04
[VAL]: The number of correct predictions (main-task (multi)): 338/513
[VAL]: The number of correct predictions (aux-task (multi)): 352/513

---------------------------------------
[Epoch: 6][Validatiing...]
	 Early Stopping Patience: 98/100
	 Val. Loss: 1.369 | Val. Acc: 0.659 | Val. PPL:   3.930
	 BEST. Val. Loss: 1.363 | BEST. Val. Acc: 0.669 | Val. Loss: 1.449 | BE

[Epoch: 8][#examples: 24960/81252][#steps: 10550]
	Train Loss: 0.039 | Train PPL:   1.039 | lr: 3.647e-04
[Epoch: 8][#examples: 28160/81252][#steps: 10600]
	Train Loss: 0.040 | Train PPL:   1.040 | lr: 3.647e-04
[Epoch: 8][#examples: 31360/81252][#steps: 10650]
	Train Loss: 0.040 | Train PPL:   1.041 | lr: 3.647e-04
[Epoch: 8][#examples: 34560/81252][#steps: 10700]
	Train Loss: 0.040 | Train PPL:   1.041 | lr: 3.647e-04
[Epoch: 8][#examples: 37760/81252][#steps: 10750]
	Train Loss: 0.040 | Train PPL:   1.041 | lr: 3.647e-04
[Epoch: 8][#examples: 40960/81252][#steps: 10800]
	Train Loss: 0.040 | Train PPL:   1.041 | lr: 3.647e-04
[Epoch: 8][#examples: 44160/81252][#steps: 10850]
	Train Loss: 0.040 | Train PPL:   1.041 | lr: 3.647e-04
[Epoch: 8][#examples: 47360/81252][#steps: 10900]
	Train Loss: 0.040 | Train PPL:   1.041 | lr: 3.647e-04
[Epoch: 8][#examples: 50560/81252][#steps: 10950]
	Train Loss: 0.040 | Train PPL:   1.041 | lr: 3.647e-04
[Epoch: 8][#examples: 53760/81252][#steps: 110

[Epoch: 10][#examples: 3200/81252][#steps: 12750]
	Train Loss: 0.033 | Train PPL:   1.033 | lr: 2.393e-04
[Epoch: 10][#examples: 6400/81252][#steps: 12800]
	Train Loss: 0.031 | Train PPL:   1.032 | lr: 2.393e-04
[Epoch: 10][#examples: 9600/81252][#steps: 12850]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 2.393e-04
[Epoch: 10][#examples: 12800/81252][#steps: 12900]
	Train Loss: 0.029 | Train PPL:   1.030 | lr: 2.393e-04
[Epoch: 10][#examples: 16000/81252][#steps: 12950]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 2.393e-04
[Epoch: 10][#examples: 19200/81252][#steps: 13000]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 2.393e-04
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 339/513
[VAL]: The number of correct predictions (aux-task (multi)): 347/513

---------------------------------------
[Epoch: 10][Validatiing...]
		 Better Valid Loss! (at least equal)
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.332 | Val. 

	 Early Stopping Patience: 99/100
	 Val. Loss: 1.397 | Val. Acc: 0.667 | Val. PPL:   4.043
	 BEST. Val. Loss: 1.390 | BEST. Val. Acc: 0.667 | Val. Loss: 1.397 | BEST. Val. Epoch: 11 | BEST. Val. Step: 15000
---------------------------------------

		 Better Valid Acc on Auxiliary Task! (at least equal)
	BEST. Val. Acc Aux: 0.688
---------------------------------------

[Epoch: 11][#examples: 69120/81252][#steps: 15050]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 1.272e-04
[Epoch: 11][#examples: 72320/81252][#steps: 15100]
	Train Loss: 0.029 | Train PPL:   1.030 | lr: 1.272e-04
[Epoch: 11][#examples: 75520/81252][#steps: 15150]
	Train Loss: 0.029 | Train PPL:   1.030 | lr: 1.272e-04
[Epoch: 11][#examples: 78720/81252][#steps: 15200]
	Train Loss: 0.029 | Train PPL:   1.030 | lr: 1.272e-04
[VAL]: The number of correct predictions (main-task (multi)): 342/513
[VAL]: The number of correct predictions (aux-task (multi)): 351/513

---------------------------------------
[Epoch: 11][Validati

	 Early Stopping Patience: 100/100
	 Val. Loss: 1.465 | Val. Acc: 0.671 | Val. PPL:   4.326
	 BEST. Val. Loss: 1.465 | BEST. Val. Acc: 0.671 | Val. Loss: 1.465 | BEST. Val. Epoch: 13 | BEST. Val. Step: 17000
---------------------------------------

	BEST. Val. Acc Aux: 0.688
---------------------------------------

[Epoch: 13][#examples: 34560/81252][#steps: 17050]
	Train Loss: 0.026 | Train PPL:   1.027 | lr: 6.759e-05
[Epoch: 13][#examples: 37760/81252][#steps: 17100]
	Train Loss: 0.026 | Train PPL:   1.027 | lr: 6.759e-05
[Epoch: 13][#examples: 40960/81252][#steps: 17150]
	Train Loss: 0.026 | Train PPL:   1.027 | lr: 6.759e-05
[Epoch: 13][#examples: 44160/81252][#steps: 17200]
	Train Loss: 0.026 | Train PPL:   1.027 | lr: 6.759e-05
[Epoch: 13][#examples: 47360/81252][#steps: 17250]
	Train Loss: 0.027 | Train PPL:   1.027 | lr: 6.759e-05
[Epoch: 13][#examples: 50560/81252][#steps: 17300]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 6.759e-05
[Epoch: 13][#examples: 53760/81252][#step

[Epoch: 15][#examples: 3200/81252][#steps: 19100]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 3.592e-05
[Epoch: 15][#examples: 6400/81252][#steps: 19150]
	Train Loss: 0.027 | Train PPL:   1.027 | lr: 3.592e-05
[Epoch: 15][#examples: 9600/81252][#steps: 19200]
	Train Loss: 0.027 | Train PPL:   1.027 | lr: 3.592e-05
[Epoch: 15][#examples: 12800/81252][#steps: 19250]
	Train Loss: 0.026 | Train PPL:   1.026 | lr: 3.592e-05
[Epoch: 15][#examples: 16000/81252][#steps: 19300]
	Train Loss: 0.025 | Train PPL:   1.026 | lr: 3.592e-05
[Epoch: 15][#examples: 19200/81252][#steps: 19350]
	Train Loss: 0.026 | Train PPL:   1.026 | lr: 3.592e-05
[Epoch: 15][#examples: 22400/81252][#steps: 19400]
	Train Loss: 0.025 | Train PPL:   1.026 | lr: 3.592e-05
[Epoch: 15][#examples: 25600/81252][#steps: 19450]
	Train Loss: 0.026 | Train PPL:   1.026 | lr: 3.592e-05
[Epoch: 15][#examples: 28800/81252][#steps: 19500]
	Train Loss: 0.026 | Train PPL:   1.027 | lr: 3.592e-05
-----Val------
[VAL]: The number of corr

[VAL]: The number of correct predictions (main-task (multi)): 341/513
[VAL]: The number of correct predictions (aux-task (multi)): 351/513

---------------------------------------
[Epoch: 16][Validatiing...]
	 Early Stopping Patience: 96/100
	 Val. Loss: 1.541 | Val. Acc: 0.665 | Val. PPL:   4.667
	 BEST. Val. Loss: 1.534 | BEST. Val. Acc: 0.673 | Val. Loss: 1.534 | BEST. Val. Epoch: 15 | BEST. Val. Step: 20000
---------------------------------------

	BEST. Val. Acc Aux: 0.690
---------------------------------------

[Epoch: 16][#examples: 78720/81252][#steps: 21550]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 1.909e-05
[VAL]: The number of correct predictions (main-task (multi)): 341/513
[VAL]: The number of correct predictions (aux-task (multi)): 351/513

---------------------------------------
[Epoch: 16][Validatiing...]
	 Early Stopping Patience: 95/100
	 Val. Loss: 1.539 | Val. Acc: 0.665 | Val. PPL:   4.662
	 BEST. Val. Loss: 1.534 | BEST. Val. Acc: 0.673 | Val. Loss: 1.534 | 

[Epoch: 18][#examples: 44160/81252][#steps: 23550]
	Train Loss: 0.026 | Train PPL:   1.027 | lr: 1.014e-05
[Epoch: 18][#examples: 47360/81252][#steps: 23600]
	Train Loss: 0.027 | Train PPL:   1.027 | lr: 1.014e-05
[Epoch: 18][#examples: 50560/81252][#steps: 23650]
	Train Loss: 0.027 | Train PPL:   1.027 | lr: 1.014e-05
[Epoch: 18][#examples: 53760/81252][#steps: 23700]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 1.014e-05
[Epoch: 18][#examples: 56960/81252][#steps: 23750]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 1.014e-05
[Epoch: 18][#examples: 60160/81252][#steps: 23800]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 1.014e-05
[Epoch: 18][#examples: 63360/81252][#steps: 23850]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 1.014e-05
[Epoch: 18][#examples: 66560/81252][#steps: 23900]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 1.014e-05
[Epoch: 18][#examples: 69760/81252][#steps: 23950]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 1.014e-05
[Epoch: 18][#examples: 72960/81252][#

[Epoch: 20][#examples: 19200/81252][#steps: 25700]
	Train Loss: 0.034 | Train PPL:   1.034 | lr: 5.391e-06
[Epoch: 20][#examples: 22400/81252][#steps: 25750]
	Train Loss: 0.034 | Train PPL:   1.035 | lr: 5.391e-06
[Epoch: 20][#examples: 25600/81252][#steps: 25800]
	Train Loss: 0.033 | Train PPL:   1.034 | lr: 5.391e-06
[Epoch: 20][#examples: 28800/81252][#steps: 25850]
	Train Loss: 0.032 | Train PPL:   1.032 | lr: 5.391e-06
[Epoch: 20][#examples: 32000/81252][#steps: 25900]
	Train Loss: 0.032 | Train PPL:   1.032 | lr: 5.391e-06
[Epoch: 20][#examples: 35200/81252][#steps: 25950]
	Train Loss: 0.032 | Train PPL:   1.033 | lr: 5.391e-06
[Epoch: 20][#examples: 38400/81252][#steps: 26000]
	Train Loss: 0.032 | Train PPL:   1.032 | lr: 5.391e-06
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 341/513
[VAL]: The number of correct predictions (aux-task (multi)): 352/513

---------------------------------------
[Epoch: 20][Validatiing...]
		 Better Valid Loss! (at le

[Epoch: 22][#examples: 640/81252][#steps: 27950]
	Train Loss: 0.040 | Train PPL:   1.041 | lr: 2.865e-06
[Epoch: 22][#examples: 3840/81252][#steps: 28000]
	Train Loss: 0.035 | Train PPL:   1.035 | lr: 2.865e-06
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 342/513
[VAL]: The number of correct predictions (aux-task (multi)): 352/513

---------------------------------------
[Epoch: 22][Validatiing...]
		 Better Valid Loss! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.528 | Val. Acc: 0.667 | Val. PPL:   4.610
	 BEST. Val. Loss: 1.528 | BEST. Val. Acc: 0.673 | Val. Loss: 1.534 | BEST. Val. Epoch: 15 | BEST. Val. Step: 20000
---------------------------------------

	BEST. Val. Acc Aux: 0.692
---------------------------------------

[Epoch: 22][#examples: 7040/81252][#steps: 28050]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 2.579e-06
[Epoch: 22][#examples: 10240/81252][#steps: 28100]
	Train Loss: 0.029 | Train PPL:   1.030 | lr: 2.579e-

[Epoch: 23][#examples: 60160/81252][#steps: 30150]
	Train Loss: 0.029 | Train PPL:   1.030 | lr: 1.523e-06
[Epoch: 23][#examples: 63360/81252][#steps: 30200]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 1.523e-06
[Epoch: 23][#examples: 66560/81252][#steps: 30250]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 1.523e-06
[Epoch: 23][#examples: 69760/81252][#steps: 30300]
	Train Loss: 0.031 | Train PPL:   1.031 | lr: 1.523e-06
[Epoch: 23][#examples: 72960/81252][#steps: 30350]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 1.523e-06
[Epoch: 23][#examples: 76160/81252][#steps: 30400]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 1.523e-06
[Epoch: 23][#examples: 79360/81252][#steps: 30450]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 1.523e-06
[VAL]: The number of correct predictions (main-task (multi)): 342/513
[VAL]: The number of correct predictions (aux-task (multi)): 353/513

---------------------------------------
[Epoch: 23][Validatiing...]
		 Better Valid Loss! (at least equal)
	 Ea

[Epoch: 25][#examples: 32000/81252][#steps: 32250]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 8.092e-07
[Epoch: 25][#examples: 35200/81252][#steps: 32300]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 8.092e-07
[Epoch: 25][#examples: 38400/81252][#steps: 32350]
	Train Loss: 0.029 | Train PPL:   1.030 | lr: 8.092e-07
[Epoch: 25][#examples: 41600/81252][#steps: 32400]
	Train Loss: 0.029 | Train PPL:   1.030 | lr: 8.092e-07
[Epoch: 25][#examples: 44800/81252][#steps: 32450]
	Train Loss: 0.029 | Train PPL:   1.030 | lr: 8.092e-07
[Epoch: 25][#examples: 48000/81252][#steps: 32500]
	Train Loss: 0.029 | Train PPL:   1.030 | lr: 8.092e-07
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 342/513
[VAL]: The number of correct predictions (aux-task (multi)): 353/513

---------------------------------------
[Epoch: 25][Validatiing...]
	 Early Stopping Patience: 98/100
	 Val. Loss: 1.525 | Val. Acc: 0.667 | Val. PPL:   4.594
	 BEST. Val. Loss: 1.525 | BEST. Val. Acc: 0

[Epoch: 27][#examples: 10240/81252][#steps: 34450]
	Train Loss: 0.029 | Train PPL:   1.030 | lr: 4.300e-07
[Epoch: 27][#examples: 13440/81252][#steps: 34500]
	Train Loss: 0.029 | Train PPL:   1.030 | lr: 4.300e-07
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 342/513
[VAL]: The number of correct predictions (aux-task (multi)): 352/513

---------------------------------------
[Epoch: 27][Validatiing...]
	 Early Stopping Patience: 95/100
	 Val. Loss: 1.527 | Val. Acc: 0.667 | Val. PPL:   4.603
	 BEST. Val. Loss: 1.525 | BEST. Val. Acc: 0.673 | Val. Loss: 1.534 | BEST. Val. Epoch: 15 | BEST. Val. Step: 20000
---------------------------------------

	BEST. Val. Acc Aux: 0.692
---------------------------------------

[Epoch: 27][#examples: 16640/81252][#steps: 34550]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 3.870e-07
[Epoch: 27][#examples: 19840/81252][#steps: 34600]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 3.870e-07
[Epoch: 27][#examples: 23040/8125

[Epoch: 28][#examples: 72960/81252][#steps: 36700]
	Train Loss: 0.029 | Train PPL:   1.030 | lr: 2.285e-07
[Epoch: 28][#examples: 76160/81252][#steps: 36750]
	Train Loss: 0.029 | Train PPL:   1.030 | lr: 2.285e-07
[Epoch: 28][#examples: 79360/81252][#steps: 36800]
	Train Loss: 0.029 | Train PPL:   1.030 | lr: 2.285e-07
[VAL]: The number of correct predictions (main-task (multi)): 342/513
[VAL]: The number of correct predictions (aux-task (multi)): 352/513

---------------------------------------
[Epoch: 28][Validatiing...]
	 Early Stopping Patience: 89/100
	 Val. Loss: 1.526 | Val. Acc: 0.667 | Val. PPL:   4.602
	 BEST. Val. Loss: 1.525 | BEST. Val. Acc: 0.673 | Val. Loss: 1.534 | BEST. Val. Epoch: 15 | BEST. Val. Step: 20000
---------------------------------------

	BEST. Val. Acc Aux: 0.692
---------------------------------------

Epoch: 29 | Time: 2m 22s
	Train Loss: 0.029 | Train PPL:   1.030
	 Val. Loss: 1.526 | Val. Acc: 0.667 | Val. PPL:   4.602
[Train]: Current Teacher Forcing 

[Epoch: 30][#examples: 51200/81252][#steps: 38900]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 1.215e-07
[Epoch: 30][#examples: 54400/81252][#steps: 38950]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 1.215e-07
[Epoch: 30][#examples: 57600/81252][#steps: 39000]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 1.215e-07
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 342/513
[VAL]: The number of correct predictions (aux-task (multi)): 352/513

---------------------------------------
[Epoch: 30][Validatiing...]
	 Early Stopping Patience: 83/100
	 Val. Loss: 1.526 | Val. Acc: 0.667 | Val. PPL:   4.601
	 BEST. Val. Loss: 1.525 | BEST. Val. Acc: 0.673 | Val. Loss: 1.534 | BEST. Val. Epoch: 15 | BEST. Val. Step: 20000
---------------------------------------

	BEST. Val. Acc Aux: 0.692
---------------------------------------

[Epoch: 30][#examples: 60800/81252][#steps: 39050]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 1.093e-07
[Epoch: 30][#examples: 64000/8125

[Epoch: 32][#examples: 26240/81252][#steps: 41050]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 32][#examples: 29440/81252][#steps: 41100]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 32][#examples: 32640/81252][#steps: 41150]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 32][#examples: 35840/81252][#steps: 41200]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 32][#examples: 39040/81252][#steps: 41250]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 32][#examples: 42240/81252][#steps: 41300]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 32][#examples: 45440/81252][#steps: 41350]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 32][#examples: 48640/81252][#steps: 41400]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 32][#examples: 51840/81252][#steps: 41450]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 32][#examples: 55040/81252][#

[Epoch: 34][#examples: 4480/81252][#steps: 43250]
	Train Loss: 0.031 | Train PPL:   1.032 | lr: 9.838e-08
[Epoch: 34][#examples: 7680/81252][#steps: 43300]
	Train Loss: 0.032 | Train PPL:   1.032 | lr: 9.838e-08
[Epoch: 34][#examples: 10880/81252][#steps: 43350]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 34][#examples: 14080/81252][#steps: 43400]
	Train Loss: 0.032 | Train PPL:   1.032 | lr: 9.838e-08
[Epoch: 34][#examples: 17280/81252][#steps: 43450]
	Train Loss: 0.031 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 34][#examples: 20480/81252][#steps: 43500]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 342/513
[VAL]: The number of correct predictions (aux-task (multi)): 352/513

---------------------------------------
[Epoch: 34][Validatiing...]
	 Early Stopping Patience: 94/100
	 Val. Loss: 1.526 | Val. Acc: 0.667 | Val. PPL:   4.601
	 BEST. Val. Loss: 1.524 | BEST. Val. Acc: 0.6

[Epoch: 35][#examples: 70400/81252][#steps: 45550]
	Train Loss: 0.029 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 35][#examples: 73600/81252][#steps: 45600]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 35][#examples: 76800/81252][#steps: 45650]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 35][#examples: 80000/81252][#steps: 45700]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
[VAL]: The number of correct predictions (main-task (multi)): 342/513
[VAL]: The number of correct predictions (aux-task (multi)): 352/513

---------------------------------------
[Epoch: 35][Validatiing...]
	 Early Stopping Patience: 88/100
	 Val. Loss: 1.526 | Val. Acc: 0.667 | Val. PPL:   4.601
	 BEST. Val. Loss: 1.524 | BEST. Val. Acc: 0.673 | Val. Loss: 1.534 | BEST. Val. Epoch: 15 | BEST. Val. Step: 20000
---------------------------------------

	BEST. Val. Acc Aux: 0.692
---------------------------------------

Epoch: 36 | Time: 2m 23s
	Train Loss: 0.030 | Tr

[Epoch: 37][#examples: 48640/81252][#steps: 47750]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 37][#examples: 51840/81252][#steps: 47800]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 37][#examples: 55040/81252][#steps: 47850]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 37][#examples: 58240/81252][#steps: 47900]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 37][#examples: 61440/81252][#steps: 47950]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 37][#examples: 64640/81252][#steps: 48000]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 342/513
[VAL]: The number of correct predictions (aux-task (multi)): 352/513

---------------------------------------
[Epoch: 37][Validatiing...]
	 Early Stopping Patience: 82/100
	 Val. Loss: 1.526 | Val. Acc: 0.667 | Val. PPL:   4.601
	 BEST. Val. Loss: 1.524 | BEST. Val. Acc: 0

[Epoch: 39][#examples: 26880/81252][#steps: 49950]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 39][#examples: 30080/81252][#steps: 50000]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 342/513
[VAL]: The number of correct predictions (aux-task (multi)): 351/513

---------------------------------------
[Epoch: 39][Validatiing...]
	 Early Stopping Patience: 76/100
	 Val. Loss: 1.528 | Val. Acc: 0.667 | Val. PPL:   4.609
	 BEST. Val. Loss: 1.524 | BEST. Val. Acc: 0.673 | Val. Loss: 1.534 | BEST. Val. Epoch: 15 | BEST. Val. Step: 20000
---------------------------------------

	BEST. Val. Acc Aux: 0.692
---------------------------------------

[Epoch: 39][#examples: 33280/81252][#steps: 50050]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 39][#examples: 36480/81252][#steps: 50100]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 39][#examples: 39680/8125

[Epoch: 41][#examples: 1920/81252][#steps: 52100]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 41][#examples: 5120/81252][#steps: 52150]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 41][#examples: 8320/81252][#steps: 52200]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 41][#examples: 11520/81252][#steps: 52250]
	Train Loss: 0.027 | Train PPL:   1.027 | lr: 9.838e-08
[Epoch: 41][#examples: 14720/81252][#steps: 52300]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 41][#examples: 17920/81252][#steps: 52350]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 41][#examples: 21120/81252][#steps: 52400]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 41][#examples: 24320/81252][#steps: 52450]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 41][#examples: 27520/81252][#steps: 52500]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 9.838e-08
-----Val------
[VAL]: The number of corr

[Epoch: 42][#examples: 77440/81252][#steps: 54550]
	Train Loss: 0.029 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 42][#examples: 80640/81252][#steps: 54600]
	Train Loss: 0.029 | Train PPL:   1.030 | lr: 9.838e-08
[VAL]: The number of correct predictions (main-task (multi)): 342/513
[VAL]: The number of correct predictions (aux-task (multi)): 351/513

---------------------------------------
[Epoch: 42][Validatiing...]
	 Early Stopping Patience: 63/100
	 Val. Loss: 1.528 | Val. Acc: 0.667 | Val. PPL:   4.609
	 BEST. Val. Loss: 1.524 | BEST. Val. Acc: 0.673 | Val. Loss: 1.534 | BEST. Val. Epoch: 15 | BEST. Val. Step: 20000
---------------------------------------

	BEST. Val. Acc Aux: 0.692
---------------------------------------

Epoch: 43 | Time: 2m 22s
	Train Loss: 0.029 | Train PPL:   1.030
	 Val. Loss: 1.528 | Val. Acc: 0.667 | Val. PPL:   4.609
[Train]: Current Teacher Forcing Ratio: 0.200
[Epoch: 43][#examples: 2560/81252][#steps: 54650]
	Train Loss: 0.022 | Train PPL:   1.022 | lr

[Epoch: 44][#examples: 55680/81252][#steps: 56750]
	Train Loss: 0.031 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 44][#examples: 58880/81252][#steps: 56800]
	Train Loss: 0.031 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 44][#examples: 62080/81252][#steps: 56850]
	Train Loss: 0.031 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 44][#examples: 65280/81252][#steps: 56900]
	Train Loss: 0.031 | Train PPL:   1.032 | lr: 9.838e-08
[Epoch: 44][#examples: 68480/81252][#steps: 56950]
	Train Loss: 0.031 | Train PPL:   1.032 | lr: 9.838e-08
[Epoch: 44][#examples: 71680/81252][#steps: 57000]
	Train Loss: 0.031 | Train PPL:   1.031 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 342/513
[VAL]: The number of correct predictions (aux-task (multi)): 351/513

---------------------------------------
[Epoch: 44][Validatiing...]
	 Early Stopping Patience: 57/100
	 Val. Loss: 1.528 | Val. Acc: 0.667 | Val. PPL:   4.608
	 BEST. Val. Loss: 1.524 | BEST. Val. Acc: 0

[Epoch: 46][#examples: 33920/81252][#steps: 58950]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 46][#examples: 37120/81252][#steps: 59000]
	Train Loss: 0.031 | Train PPL:   1.031 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 342/513
[VAL]: The number of correct predictions (aux-task (multi)): 352/513

---------------------------------------
[Epoch: 46][Validatiing...]
	 Early Stopping Patience: 51/100
	 Val. Loss: 1.525 | Val. Acc: 0.667 | Val. PPL:   4.597
	 BEST. Val. Loss: 1.524 | BEST. Val. Acc: 0.673 | Val. Loss: 1.534 | BEST. Val. Epoch: 15 | BEST. Val. Step: 20000
---------------------------------------

	BEST. Val. Acc Aux: 0.692
---------------------------------------

[Epoch: 46][#examples: 40320/81252][#steps: 59050]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 46][#examples: 43520/81252][#steps: 59100]
	Train Loss: 0.031 | Train PPL:   1.032 | lr: 9.838e-08
[Epoch: 46][#examples: 46720/8125

[Epoch: 48][#examples: 5760/81252][#steps: 61050]
	Train Loss: 0.031 | Train PPL:   1.032 | lr: 9.838e-08
[Epoch: 48][#examples: 8960/81252][#steps: 61100]
	Train Loss: 0.031 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 48][#examples: 12160/81252][#steps: 61150]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 48][#examples: 15360/81252][#steps: 61200]
	Train Loss: 0.031 | Train PPL:   1.032 | lr: 9.838e-08
[Epoch: 48][#examples: 18560/81252][#steps: 61250]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 48][#examples: 21760/81252][#steps: 61300]
	Train Loss: 0.031 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 48][#examples: 24960/81252][#steps: 61350]
	Train Loss: 0.031 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 48][#examples: 28160/81252][#steps: 61400]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 48][#examples: 31360/81252][#steps: 61450]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 48][#examples: 34560/81252][#st

[VAL]: The number of correct predictions (main-task (multi)): 342/513
[VAL]: The number of correct predictions (aux-task (multi)): 352/513

---------------------------------------
[Epoch: 49][Validatiing...]
	 Early Stopping Patience: 39/100
	 Val. Loss: 1.525 | Val. Acc: 0.667 | Val. PPL:   4.596
	 BEST. Val. Loss: 1.524 | BEST. Val. Acc: 0.673 | Val. Loss: 1.534 | BEST. Val. Epoch: 15 | BEST. Val. Step: 20000
---------------------------------------

	BEST. Val. Acc Aux: 0.692
---------------------------------------

[VAL]: The number of correct predictions (main-task (multi)): 342/513
[VAL]: The number of correct predictions (aux-task (multi)): 352/513

---------------------------------------
[Epoch: 49][Validatiing...]
	 Early Stopping Patience: 38/100
	 Val. Loss: 1.525 | Val. Acc: 0.667 | Val. PPL:   4.596
	 BEST. Val. Loss: 1.524 | BEST. Val. Acc: 0.673 | Val. Loss: 1.534 | BEST. Val. Epoch: 15 | BEST. Val. Step: 20000
---------------------------------------

	BEST. Val. Acc Aux:

[Epoch: 51][#examples: 49920/81252][#steps: 65550]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 51][#examples: 53120/81252][#steps: 65600]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 51][#examples: 56320/81252][#steps: 65650]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 51][#examples: 59520/81252][#steps: 65700]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 51][#examples: 62720/81252][#steps: 65750]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 51][#examples: 65920/81252][#steps: 65800]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 51][#examples: 69120/81252][#steps: 65850]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 51][#examples: 72320/81252][#steps: 65900]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 51][#examples: 75520/81252][#steps: 65950]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 51][#examples: 78720/81252][#

[Epoch: 53][#examples: 24960/81252][#steps: 67700]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 53][#examples: 28160/81252][#steps: 67750]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 53][#examples: 31360/81252][#steps: 67800]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 53][#examples: 34560/81252][#steps: 67850]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 53][#examples: 37760/81252][#steps: 67900]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 53][#examples: 40960/81252][#steps: 67950]
	Train Loss: 0.031 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 53][#examples: 44160/81252][#steps: 68000]
	Train Loss: 0.031 | Train PPL:   1.031 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 342/513
[VAL]: The number of correct predictions (aux-task (multi)): 353/513

---------------------------------------
[Epoch: 53][Validatiing...]
		 Better Valid Loss! (at le

[Epoch: 55][#examples: 3200/81252][#steps: 69900]
	Train Loss: 0.027 | Train PPL:   1.027 | lr: 9.838e-08
[Epoch: 55][#examples: 6400/81252][#steps: 69950]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 55][#examples: 9600/81252][#steps: 70000]
	Train Loss: 0.029 | Train PPL:   1.030 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 342/513
[VAL]: The number of correct predictions (aux-task (multi)): 353/513

---------------------------------------
[Epoch: 55][Validatiing...]
	 Early Stopping Patience: 97/100
	 Val. Loss: 1.523 | Val. Acc: 0.667 | Val. PPL:   4.587
	 BEST. Val. Loss: 1.523 | BEST. Val. Acc: 0.673 | Val. Loss: 1.534 | BEST. Val. Epoch: 15 | BEST. Val. Step: 20000
---------------------------------------

	BEST. Val. Acc Aux: 0.692
---------------------------------------

[Epoch: 55][#examples: 12800/81252][#steps: 70050]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 55][#examples: 16000/81252][

[Epoch: 56][#examples: 65920/81252][#steps: 72150]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 56][#examples: 69120/81252][#steps: 72200]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 56][#examples: 72320/81252][#steps: 72250]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 56][#examples: 75520/81252][#steps: 72300]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 56][#examples: 78720/81252][#steps: 72350]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
[VAL]: The number of correct predictions (main-task (multi)): 342/513
[VAL]: The number of correct predictions (aux-task (multi)): 353/513

---------------------------------------
[Epoch: 56][Validatiing...]
	 Early Stopping Patience: 91/100
	 Val. Loss: 1.523 | Val. Acc: 0.667 | Val. PPL:   4.587
	 BEST. Val. Loss: 1.523 | BEST. Val. Acc: 0.673 | Val. Loss: 1.534 | BEST. Val. Epoch: 15 | BEST. Val. Step: 20000
---------------------------------------

	BEST. Va

[Epoch: 58][#examples: 44160/81252][#steps: 74350]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 58][#examples: 47360/81252][#steps: 74400]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 58][#examples: 50560/81252][#steps: 74450]
	Train Loss: 0.029 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 58][#examples: 53760/81252][#steps: 74500]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 342/513
[VAL]: The number of correct predictions (aux-task (multi)): 353/513

---------------------------------------
[Epoch: 58][Validatiing...]
		 Better Valid Loss! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.523 | Val. Acc: 0.667 | Val. PPL:   4.587
	 BEST. Val. Loss: 1.523 | BEST. Val. Acc: 0.673 | Val. Loss: 1.534 | BEST. Val. Epoch: 15 | BEST. Val. Step: 20000
---------------------------------------

	BEST. Val. Acc Aux: 0.692
----------------------------------

[Epoch: 60][#examples: 16000/81252][#steps: 76450]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 60][#examples: 19200/81252][#steps: 76500]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 342/513
[VAL]: The number of correct predictions (aux-task (multi)): 353/513

---------------------------------------
[Epoch: 60][Validatiing...]
		 Better Valid Loss! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.523 | Val. Acc: 0.667 | Val. PPL:   4.587
	 BEST. Val. Loss: 1.523 | BEST. Val. Acc: 0.673 | Val. Loss: 1.534 | BEST. Val. Epoch: 15 | BEST. Val. Step: 20000
---------------------------------------

	BEST. Val. Acc Aux: 0.692
---------------------------------------

[Epoch: 60][#examples: 22400/81252][#steps: 76550]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 60][#examples: 25600/81252][#steps: 76600]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 9.8

[Epoch: 61][#examples: 72320/81252][#steps: 78600]
	Train Loss: 0.029 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 61][#examples: 75520/81252][#steps: 78650]
	Train Loss: 0.029 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 61][#examples: 78720/81252][#steps: 78700]
	Train Loss: 0.029 | Train PPL:   1.030 | lr: 9.838e-08
[VAL]: The number of correct predictions (main-task (multi)): 342/513
[VAL]: The number of correct predictions (aux-task (multi)): 353/513

---------------------------------------
[Epoch: 61][Validatiing...]
		 Better Valid Loss! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.523 | Val. Acc: 0.667 | Val. PPL:   4.587
	 BEST. Val. Loss: 1.523 | BEST. Val. Acc: 0.673 | Val. Loss: 1.534 | BEST. Val. Epoch: 15 | BEST. Val. Step: 20000
---------------------------------------

	BEST. Val. Acc Aux: 0.692
---------------------------------------

Epoch: 62 | Time: 2m 24s
	Train Loss: 0.029 | Train PPL:   1.029
	 Val. Loss: 1.523 | Val. Acc: 0.667 | Val. PPL:  

[Epoch: 63][#examples: 47360/81252][#steps: 80750]
	Train Loss: 0.031 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 63][#examples: 50560/81252][#steps: 80800]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 63][#examples: 53760/81252][#steps: 80850]
	Train Loss: 0.031 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 63][#examples: 56960/81252][#steps: 80900]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 63][#examples: 60160/81252][#steps: 80950]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 63][#examples: 63360/81252][#steps: 81000]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 342/513
[VAL]: The number of correct predictions (aux-task (multi)): 353/513

---------------------------------------
[Epoch: 63][Validatiing...]
	 Early Stopping Patience: 99/100
	 Val. Loss: 1.523 | Val. Acc: 0.667 | Val. PPL:   4.587
	 BEST. Val. Loss: 1.523 | BEST. Val. Acc: 0

[Epoch: 65][#examples: 22400/81252][#steps: 82900]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 65][#examples: 25600/81252][#steps: 82950]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 65][#examples: 28800/81252][#steps: 83000]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 342/513
[VAL]: The number of correct predictions (aux-task (multi)): 353/513

---------------------------------------
[Epoch: 65][Validatiing...]
	 Early Stopping Patience: 99/100
	 Val. Loss: 1.523 | Val. Acc: 0.667 | Val. PPL:   4.586
	 BEST. Val. Loss: 1.523 | BEST. Val. Acc: 0.673 | Val. Loss: 1.534 | BEST. Val. Epoch: 15 | BEST. Val. Step: 20000
---------------------------------------

	BEST. Val. Acc Aux: 0.692
---------------------------------------

[Epoch: 65][#examples: 32000/81252][#steps: 83050]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 65][#examples: 35200/8125

[VAL]: The number of correct predictions (main-task (multi)): 342/513
[VAL]: The number of correct predictions (aux-task (multi)): 353/513

---------------------------------------
[Epoch: 66][Validatiing...]
	 Early Stopping Patience: 99/100
	 Val. Loss: 1.523 | Val. Acc: 0.667 | Val. PPL:   4.586
	 BEST. Val. Loss: 1.523 | BEST. Val. Acc: 0.673 | Val. Loss: 1.534 | BEST. Val. Epoch: 15 | BEST. Val. Step: 20000
---------------------------------------

	BEST. Val. Acc Aux: 0.692
---------------------------------------

Epoch: 67 | Time: 2m 22s
	Train Loss: 0.030 | Train PPL:   1.030
	 Val. Loss: 1.523 | Val. Acc: 0.667 | Val. PPL:   4.586
[Train]: Current Teacher Forcing Ratio: 0.200
[Epoch: 67][#examples: 640/81252][#steps: 85100]
	Train Loss: 0.022 | Train PPL:   1.023 | lr: 9.838e-08
[Epoch: 67][#examples: 3840/81252][#steps: 85150]
	Train Loss: 0.034 | Train PPL:   1.034 | lr: 9.838e-08
[Epoch: 67][#examples: 7040/81252][#steps: 85200]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 9

[Epoch: 68][#examples: 56960/81252][#steps: 87250]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 68][#examples: 60160/81252][#steps: 87300]
	Train Loss: 0.029 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 68][#examples: 63360/81252][#steps: 87350]
	Train Loss: 0.029 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 68][#examples: 66560/81252][#steps: 87400]
	Train Loss: 0.029 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 68][#examples: 69760/81252][#steps: 87450]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 68][#examples: 72960/81252][#steps: 87500]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 342/513
[VAL]: The number of correct predictions (aux-task (multi)): 353/513

---------------------------------------
[Epoch: 68][Validatiing...]
		 Better Valid Loss! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.523 | Val. Acc: 0.667 | Val. PPL:   4.586
	 BE

[Epoch: 70][#examples: 28800/81252][#steps: 89350]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 70][#examples: 32000/81252][#steps: 89400]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 70][#examples: 35200/81252][#steps: 89450]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 70][#examples: 38400/81252][#steps: 89500]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 342/513
[VAL]: The number of correct predictions (aux-task (multi)): 353/513

---------------------------------------
[Epoch: 70][Validatiing...]
		 Better Valid Loss! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.523 | Val. Acc: 0.667 | Val. PPL:   4.586
	 BEST. Val. Loss: 1.523 | BEST. Val. Acc: 0.673 | Val. Loss: 1.534 | BEST. Val. Epoch: 15 | BEST. Val. Step: 20000
---------------------------------------

	BEST. Val. Acc Aux: 0.692
----------------------------------

[Epoch: 72][#examples: 640/81252][#steps: 91450]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 72][#examples: 3840/81252][#steps: 91500]
	Train Loss: 0.026 | Train PPL:   1.026 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 342/513
[VAL]: The number of correct predictions (aux-task (multi)): 353/513

---------------------------------------
[Epoch: 72][Validatiing...]
	 Early Stopping Patience: 99/100
	 Val. Loss: 1.523 | Val. Acc: 0.667 | Val. PPL:   4.585
	 BEST. Val. Loss: 1.523 | BEST. Val. Acc: 0.673 | Val. Loss: 1.534 | BEST. Val. Epoch: 15 | BEST. Val. Step: 20000
---------------------------------------

	BEST. Val. Acc Aux: 0.692
---------------------------------------

[Epoch: 72][#examples: 7040/81252][#steps: 91550]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 72][#examples: 10240/81252][#steps: 91600]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 72][#examples: 13440/81252][#

[Epoch: 73][#examples: 63360/81252][#steps: 93700]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 73][#examples: 66560/81252][#steps: 93750]
	Train Loss: 0.029 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 73][#examples: 69760/81252][#steps: 93800]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 73][#examples: 72960/81252][#steps: 93850]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 73][#examples: 76160/81252][#steps: 93900]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 73][#examples: 79360/81252][#steps: 93950]
	Train Loss: 0.029 | Train PPL:   1.030 | lr: 9.838e-08
[VAL]: The number of correct predictions (main-task (multi)): 342/513
[VAL]: The number of correct predictions (aux-task (multi)): 353/513

---------------------------------------
[Epoch: 73][Validatiing...]
	 Early Stopping Patience: 99/100
	 Val. Loss: 1.523 | Val. Acc: 0.667 | Val. PPL:   4.585
	 BEST. Val. Loss: 1.523 | BEST. Val. Acc: 0.673 | Val. Los

[Epoch: 75][#examples: 38400/81252][#steps: 95850]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 75][#examples: 41600/81252][#steps: 95900]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 75][#examples: 44800/81252][#steps: 95950]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 75][#examples: 48000/81252][#steps: 96000]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 342/513
[VAL]: The number of correct predictions (aux-task (multi)): 352/513

---------------------------------------
[Epoch: 75][Validatiing...]
		 Better Valid Loss! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.523 | Val. Acc: 0.667 | Val. PPL:   4.585
	 BEST. Val. Loss: 1.523 | BEST. Val. Acc: 0.673 | Val. Loss: 1.534 | BEST. Val. Epoch: 15 | BEST. Val. Step: 20000
---------------------------------------

	BEST. Val. Acc Aux: 0.692
----------------------------------

[Epoch: 77][#examples: 13440/81252][#steps: 98000]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 342/513
[VAL]: The number of correct predictions (aux-task (multi)): 352/513

---------------------------------------
[Epoch: 77][Validatiing...]
	 Early Stopping Patience: 97/100
	 Val. Loss: 1.523 | Val. Acc: 0.667 | Val. PPL:   4.585
	 BEST. Val. Loss: 1.523 | BEST. Val. Acc: 0.673 | Val. Loss: 1.534 | BEST. Val. Epoch: 15 | BEST. Val. Step: 20000
---------------------------------------

	BEST. Val. Acc Aux: 0.692
---------------------------------------

[Epoch: 77][#examples: 16640/81252][#steps: 98050]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 77][#examples: 19840/81252][#steps: 98100]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 77][#examples: 23040/81252][#steps: 98150]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 77][#examples: 26240/8125

[Epoch: 78][#examples: 72960/81252][#steps: 100200]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 78][#examples: 76160/81252][#steps: 100250]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 78][#examples: 79360/81252][#steps: 100300]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
[VAL]: The number of correct predictions (main-task (multi)): 342/513
[VAL]: The number of correct predictions (aux-task (multi)): 352/513

---------------------------------------
[Epoch: 78][Validatiing...]
		 Better Valid Loss! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.523 | Val. Acc: 0.667 | Val. PPL:   4.585
	 BEST. Val. Loss: 1.523 | BEST. Val. Acc: 0.673 | Val. Loss: 1.534 | BEST. Val. Epoch: 15 | BEST. Val. Step: 20000
---------------------------------------

	BEST. Val. Acc Aux: 0.692
---------------------------------------

Epoch: 79 | Time: 2m 23s
	Train Loss: 0.030 | Train PPL:   1.030
	 Val. Loss: 1.523 | Val. Acc: 0.667 | Val. PPL

[Epoch: 80][#examples: 44800/81252][#steps: 102300]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 80][#examples: 48000/81252][#steps: 102350]
	Train Loss: 0.029 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 80][#examples: 51200/81252][#steps: 102400]
	Train Loss: 0.029 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 80][#examples: 54400/81252][#steps: 102450]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 80][#examples: 57600/81252][#steps: 102500]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 342/513
[VAL]: The number of correct predictions (aux-task (multi)): 352/513

---------------------------------------
[Epoch: 80][Validatiing...]
		 Better Valid Loss! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.523 | Val. Acc: 0.667 | Val. PPL:   4.584
	 BEST. Val. Loss: 1.523 | BEST. Val. Acc: 0.673 | Val. Loss: 1.534 | BEST. Val. Epoch: 15 | BEST. Val. St

[Epoch: 82][#examples: 19840/81252][#steps: 104450]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 82][#examples: 23040/81252][#steps: 104500]
	Train Loss: 0.031 | Train PPL:   1.032 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 342/513
[VAL]: The number of correct predictions (aux-task (multi)): 352/513

---------------------------------------
[Epoch: 82][Validatiing...]
		 Better Valid Loss! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.523 | Val. Acc: 0.667 | Val. PPL:   4.584
	 BEST. Val. Loss: 1.523 | BEST. Val. Acc: 0.673 | Val. Loss: 1.534 | BEST. Val. Epoch: 15 | BEST. Val. Step: 20000
---------------------------------------

	BEST. Val. Acc Aux: 0.692
---------------------------------------

[Epoch: 82][#examples: 26240/81252][#steps: 104550]
	Train Loss: 0.031 | Train PPL:   1.032 | lr: 9.838e-08
[Epoch: 82][#examples: 29440/81252][#steps: 104600]
	Train Loss: 0.031 | Train PPL:   1.032 | lr:

[Epoch: 83][#examples: 76160/81252][#steps: 106600]
	Train Loss: 0.029 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 83][#examples: 79360/81252][#steps: 106650]
	Train Loss: 0.029 | Train PPL:   1.030 | lr: 9.838e-08
[VAL]: The number of correct predictions (main-task (multi)): 342/513
[VAL]: The number of correct predictions (aux-task (multi)): 352/513

---------------------------------------
[Epoch: 83][Validatiing...]
	 Early Stopping Patience: 98/100
	 Val. Loss: 1.523 | Val. Acc: 0.667 | Val. PPL:   4.584
	 BEST. Val. Loss: 1.523 | BEST. Val. Acc: 0.673 | Val. Loss: 1.534 | BEST. Val. Epoch: 15 | BEST. Val. Step: 20000
---------------------------------------

	BEST. Val. Acc Aux: 0.692
---------------------------------------

Epoch: 84 | Time: 2m 24s
	Train Loss: 0.029 | Train PPL:   1.030
	 Val. Loss: 1.523 | Val. Acc: 0.667 | Val. PPL:   4.584
[Train]: Current Teacher Forcing Ratio: 0.200
[Epoch: 84][#examples: 1280/81252][#steps: 106700]
	Train Loss: 0.024 | Train PPL:   1.025 |

[Epoch: 85][#examples: 51200/81252][#steps: 108750]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 85][#examples: 54400/81252][#steps: 108800]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 85][#examples: 57600/81252][#steps: 108850]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 85][#examples: 60800/81252][#steps: 108900]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 85][#examples: 64000/81252][#steps: 108950]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 85][#examples: 67200/81252][#steps: 109000]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 342/513
[VAL]: The number of correct predictions (aux-task (multi)): 352/513

---------------------------------------
[Epoch: 85][Validatiing...]
	 Early Stopping Patience: 98/100
	 Val. Loss: 1.523 | Val. Acc: 0.667 | Val. PPL:   4.584
	 BEST. Val. Loss: 1.523 | BEST. Val. 

[Epoch: 87][#examples: 26240/81252][#steps: 110900]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 87][#examples: 29440/81252][#steps: 110950]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 87][#examples: 32640/81252][#steps: 111000]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 342/513
[VAL]: The number of correct predictions (aux-task (multi)): 352/513

---------------------------------------
[Epoch: 87][Validatiing...]
		 Better Valid Loss! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.522 | Val. Acc: 0.667 | Val. PPL:   4.584
	 BEST. Val. Loss: 1.522 | BEST. Val. Acc: 0.673 | Val. Loss: 1.534 | BEST. Val. Epoch: 15 | BEST. Val. Step: 20000
---------------------------------------

	BEST. Val. Acc Aux: 0.692
---------------------------------------

[Epoch: 87][#examples: 35840/81252][#steps: 111050]
	Train Loss: 0.031 | Train PPL:   1.031 | lr:

[Epoch: 89][#examples: 1280/81252][#steps: 113050]
	Train Loss: 0.035 | Train PPL:   1.036 | lr: 9.838e-08
[Epoch: 89][#examples: 4480/81252][#steps: 113100]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 89][#examples: 7680/81252][#steps: 113150]
	Train Loss: 0.032 | Train PPL:   1.032 | lr: 9.838e-08
[Epoch: 89][#examples: 10880/81252][#steps: 113200]
	Train Loss: 0.031 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 89][#examples: 14080/81252][#steps: 113250]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 89][#examples: 17280/81252][#steps: 113300]
	Train Loss: 0.031 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 89][#examples: 20480/81252][#steps: 113350]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 89][#examples: 23680/81252][#steps: 113400]
	Train Loss: 0.031 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 89][#examples: 26880/81252][#steps: 113450]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 89][#examples: 30080/81

[Epoch: 90][#examples: 76800/81252][#steps: 115500]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 342/513
[VAL]: The number of correct predictions (aux-task (multi)): 352/513

---------------------------------------
[Epoch: 90][Validatiing...]
		 Better Valid Loss! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.522 | Val. Acc: 0.667 | Val. PPL:   4.583
	 BEST. Val. Loss: 1.522 | BEST. Val. Acc: 0.673 | Val. Loss: 1.534 | BEST. Val. Epoch: 15 | BEST. Val. Step: 20000
---------------------------------------

	BEST. Val. Acc Aux: 0.692
---------------------------------------

[Epoch: 90][#examples: 80000/81252][#steps: 115550]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
[VAL]: The number of correct predictions (main-task (multi)): 342/513
[VAL]: The number of correct predictions (aux-task (multi)): 352/513

---------------------------------------
[Epoch: 90][Validatiing..

[Epoch: 92][#examples: 45440/81252][#steps: 117550]
	Train Loss: 0.031 | Train PPL:   1.032 | lr: 9.838e-08
[Epoch: 92][#examples: 48640/81252][#steps: 117600]
	Train Loss: 0.031 | Train PPL:   1.032 | lr: 9.838e-08
[Epoch: 92][#examples: 51840/81252][#steps: 117650]
	Train Loss: 0.031 | Train PPL:   1.032 | lr: 9.838e-08
[Epoch: 92][#examples: 55040/81252][#steps: 117700]
	Train Loss: 0.031 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 92][#examples: 58240/81252][#steps: 117750]
	Train Loss: 0.031 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 92][#examples: 61440/81252][#steps: 117800]
	Train Loss: 0.031 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 92][#examples: 64640/81252][#steps: 117850]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 92][#examples: 67840/81252][#steps: 117900]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 92][#examples: 71040/81252][#steps: 117950]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 92][#examples: 74240

[Epoch: 94][#examples: 17280/81252][#steps: 119650]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 94][#examples: 20480/81252][#steps: 119700]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 94][#examples: 23680/81252][#steps: 119750]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 94][#examples: 26880/81252][#steps: 119800]
	Train Loss: 0.029 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 94][#examples: 30080/81252][#steps: 119850]
	Train Loss: 0.029 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 94][#examples: 33280/81252][#steps: 119900]
	Train Loss: 0.029 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 94][#examples: 36480/81252][#steps: 119950]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 94][#examples: 39680/81252][#steps: 120000]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 342/513
[VAL]: The number of correct predictions (aux-task 

[Epoch: 96][#examples: 1920/81252][#steps: 121950]
	Train Loss: 0.031 | Train PPL:   1.032 | lr: 9.838e-08
[Epoch: 96][#examples: 5120/81252][#steps: 122000]
	Train Loss: 0.031 | Train PPL:   1.031 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 342/513
[VAL]: The number of correct predictions (aux-task (multi)): 352/513

---------------------------------------
[Epoch: 96][Validatiing...]
	 Early Stopping Patience: 97/100
	 Val. Loss: 1.522 | Val. Acc: 0.667 | Val. PPL:   4.583
	 BEST. Val. Loss: 1.522 | BEST. Val. Acc: 0.673 | Val. Loss: 1.534 | BEST. Val. Epoch: 15 | BEST. Val. Step: 20000
---------------------------------------

	BEST. Val. Acc Aux: 0.692
---------------------------------------

[Epoch: 96][#examples: 8320/81252][#steps: 122050]
	Train Loss: 0.033 | Train PPL:   1.033 | lr: 9.838e-08
[Epoch: 96][#examples: 11520/81252][#steps: 122100]
	Train Loss: 0.033 | Train PPL:   1.034 | lr: 9.838e-08
[Epoch: 96][#examples: 14720/812

[Epoch: 97][#examples: 64640/81252][#steps: 124200]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 97][#examples: 67840/81252][#steps: 124250]
	Train Loss: 0.029 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 97][#examples: 71040/81252][#steps: 124300]
	Train Loss: 0.029 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 97][#examples: 74240/81252][#steps: 124350]
	Train Loss: 0.029 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 97][#examples: 77440/81252][#steps: 124400]
	Train Loss: 0.029 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 97][#examples: 80640/81252][#steps: 124450]
	Train Loss: 0.029 | Train PPL:   1.030 | lr: 9.838e-08
[VAL]: The number of correct predictions (main-task (multi)): 342/513
[VAL]: The number of correct predictions (aux-task (multi)): 352/513

---------------------------------------
[Epoch: 97][Validatiing...]
	 Early Stopping Patience: 91/100
	 Val. Loss: 1.522 | Val. Acc: 0.667 | Val. PPL:   4.583
	 BEST. Val. Loss: 1.522 | BEST. Val. Acc: 0.673 | Va

[Epoch: 99][#examples: 39680/81252][#steps: 126350]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 99][#examples: 42880/81252][#steps: 126400]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 99][#examples: 46080/81252][#steps: 126450]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 99][#examples: 49280/81252][#steps: 126500]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 342/513
[VAL]: The number of correct predictions (aux-task (multi)): 352/513

---------------------------------------
[Epoch: 99][Validatiing...]
	 Early Stopping Patience: 96/100
	 Val. Loss: 1.522 | Val. Acc: 0.667 | Val. PPL:   4.583
	 BEST. Val. Loss: 1.522 | BEST. Val. Acc: 0.673 | Val. Loss: 1.534 | BEST. Val. Epoch: 15 | BEST. Val. Step: 20000
---------------------------------------

	BEST. Val. Acc Aux: 0.692
---------------------------------------

[Epoch: 99][#examples: 52480/

[Epoch: 0][#examples: 54400/81252][#steps: 850]
	Train Loss: 1.398 | Train PPL:   4.045 | lr: 3.000e-03
[Epoch: 0][#examples: 57600/81252][#steps: 900]
	Train Loss: 1.344 | Train PPL:   3.835 | lr: 3.000e-03
[Epoch: 0][#examples: 60800/81252][#steps: 950]
	Train Loss: 1.298 | Train PPL:   3.660 | lr: 3.000e-03
[Epoch: 0][#examples: 64000/81252][#steps: 1000]
	Train Loss: 1.253 | Train PPL:   3.501 | lr: 3.000e-03
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 286/513
[VAL]: The number of correct predictions (aux-task (multi)): 286/513

---------------------------------------
[Epoch: 0][Validatiing...]
		 Better Valid Loss! (at least equal)
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.400 | Val. Acc: 0.558 | Val. PPL:   4.054
	 BEST. Val. Loss: 1.400 | BEST. Val. Acc: 0.558 | Val. Loss: 1.400 | BEST. Val. Epoch: 0 | BEST. Val. Step: 1000
---------------------------------------

		 Better Valid Acc on Auxiliary Task

[Epoch: 2][#examples: 13440/81252][#steps: 2750]
	Train Loss: 0.251 | Train PPL:   1.285 | lr: 3.000e-03
[Epoch: 2][#examples: 16640/81252][#steps: 2800]
	Train Loss: 0.255 | Train PPL:   1.290 | lr: 3.000e-03
[Epoch: 2][#examples: 19840/81252][#steps: 2850]
	Train Loss: 0.257 | Train PPL:   1.293 | lr: 3.000e-03
[Epoch: 2][#examples: 23040/81252][#steps: 2900]
	Train Loss: 0.258 | Train PPL:   1.294 | lr: 3.000e-03
[Epoch: 2][#examples: 26240/81252][#steps: 2950]
	Train Loss: 0.258 | Train PPL:   1.294 | lr: 3.000e-03
[Epoch: 2][#examples: 29440/81252][#steps: 3000]
	Train Loss: 0.259 | Train PPL:   1.296 | lr: 3.000e-03
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 322/513
[VAL]: The number of correct predictions (aux-task (multi)): 330/513

---------------------------------------
[Epoch: 2][Validatiing...]
		 Better Valid Loss! (at least equal)
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.347 | Val. Acc: 0.628

	 Early Stopping Patience: 99/100
	 Val. Loss: 1.403 | Val. Acc: 0.659 | Val. PPL:   4.068
	 BEST. Val. Loss: 1.269 | BEST. Val. Acc: 0.659 | Val. Loss: 1.403 | BEST. Val. Epoch: 3 | BEST. Val. Step: 5000
---------------------------------------

		 Better Valid Acc on Auxiliary Task! (at least equal)
	BEST. Val. Acc Aux: 0.674
---------------------------------------

[Epoch: 3][#examples: 79360/81252][#steps: 5050]
	Train Loss: 0.188 | Train PPL:   1.207 | lr: 2.187e-03
[VAL]: The number of correct predictions (main-task (multi)): 340/513
[VAL]: The number of correct predictions (aux-task (multi)): 349/513

---------------------------------------
[Epoch: 3][Validatiing...]
		 Better Valid Loss! (at least equal)
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.195 | Val. Acc: 0.663 | Val. PPL:   3.303
	 BEST. Val. Loss: 1.195 | BEST. Val. Acc: 0.663 | Val. Loss: 1.195 | BEST. Val. Epoch: 3 | BEST. Val. Step: 5080
-----------------------------------

[Epoch: 5][#examples: 44800/81252][#steps: 7050]
	Train Loss: 0.106 | Train PPL:   1.112 | lr: 1.594e-03
[Epoch: 5][#examples: 48000/81252][#steps: 7100]
	Train Loss: 0.105 | Train PPL:   1.111 | lr: 1.594e-03
[Epoch: 5][#examples: 51200/81252][#steps: 7150]
	Train Loss: 0.106 | Train PPL:   1.111 | lr: 1.594e-03
[Epoch: 5][#examples: 54400/81252][#steps: 7200]
	Train Loss: 0.106 | Train PPL:   1.112 | lr: 1.594e-03
[Epoch: 5][#examples: 57600/81252][#steps: 7250]
	Train Loss: 0.106 | Train PPL:   1.112 | lr: 1.594e-03
[Epoch: 5][#examples: 60800/81252][#steps: 7300]
	Train Loss: 0.106 | Train PPL:   1.112 | lr: 1.594e-03
[Epoch: 5][#examples: 64000/81252][#steps: 7350]
	Train Loss: 0.106 | Train PPL:   1.112 | lr: 1.594e-03
[Epoch: 5][#examples: 67200/81252][#steps: 7400]
	Train Loss: 0.106 | Train PPL:   1.112 | lr: 1.594e-03
[Epoch: 5][#examples: 70400/81252][#steps: 7450]
	Train Loss: 0.106 | Train PPL:   1.112 | lr: 1.594e-03
[Epoch: 5][#examples: 73600/81252][#steps: 7500]
	Train

[Epoch: 7][#examples: 10240/81252][#steps: 9050]
	Train Loss: 0.050 | Train PPL:   1.051 | lr: 9.414e-04
[Epoch: 7][#examples: 13440/81252][#steps: 9100]
	Train Loss: 0.050 | Train PPL:   1.051 | lr: 9.414e-04
[Epoch: 7][#examples: 16640/81252][#steps: 9150]
	Train Loss: 0.051 | Train PPL:   1.052 | lr: 9.414e-04
[Epoch: 7][#examples: 19840/81252][#steps: 9200]
	Train Loss: 0.051 | Train PPL:   1.052 | lr: 9.414e-04
[Epoch: 7][#examples: 23040/81252][#steps: 9250]
	Train Loss: 0.051 | Train PPL:   1.053 | lr: 9.414e-04
[Epoch: 7][#examples: 26240/81252][#steps: 9300]
	Train Loss: 0.052 | Train PPL:   1.053 | lr: 9.414e-04
[Epoch: 7][#examples: 29440/81252][#steps: 9350]
	Train Loss: 0.052 | Train PPL:   1.053 | lr: 9.414e-04
[Epoch: 7][#examples: 32640/81252][#steps: 9400]
	Train Loss: 0.052 | Train PPL:   1.053 | lr: 9.414e-04
[Epoch: 7][#examples: 35840/81252][#steps: 9450]
	Train Loss: 0.052 | Train PPL:   1.053 | lr: 9.414e-04
[Epoch: 7][#examples: 39040/81252][#steps: 9500]
	Train

[VAL]: The number of correct predictions (main-task (multi)): 343/513
[VAL]: The number of correct predictions (aux-task (multi)): 362/513

---------------------------------------
[Epoch: 8][Validatiing...]
		 Better Valid Loss! (at least equal)
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.462 | Val. Acc: 0.669 | Val. PPL:   4.312
	 BEST. Val. Loss: 1.462 | BEST. Val. Acc: 0.669 | Val. Loss: 1.462 | BEST. Val. Epoch: 8 | BEST. Val. Step: 11430
---------------------------------------

	BEST. Val. Acc Aux: 0.712
---------------------------------------

Epoch: 09 | Time: 2m 27s
	Train Loss: 0.040 | Train PPL:   1.041
	 Val. Loss: 1.462 | Val. Acc: 0.669 | Val. PPL:   4.312
Renew Evaluation Records in the Burning Phase...
[Train]: Current Teacher Forcing Ratio: 0.530
[Epoch: 9][#examples: 1280/81252][#steps: 11450]
	Train Loss: 0.025 | Train PPL:   1.026 | lr: 5.003e-04
[Epoch: 9][#examples: 4480/81252][#steps: 11500]
	Train Loss: 0.034 | Train PP

[Epoch: 10][#examples: 51200/81252][#steps: 13500]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 2.954e-04
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 343/513
[VAL]: The number of correct predictions (aux-task (multi)): 357/513

---------------------------------------
[Epoch: 10][Validatiing...]
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 99/100
	 Val. Loss: 1.565 | Val. Acc: 0.669 | Val. PPL:   4.784
	 BEST. Val. Loss: 1.498 | BEST. Val. Acc: 0.669 | Val. Loss: 1.565 | BEST. Val. Epoch: 10 | BEST. Val. Step: 13500
---------------------------------------

	BEST. Val. Acc Aux: 0.712
---------------------------------------

[Epoch: 10][#examples: 54400/81252][#steps: 13550]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 2.659e-04
[Epoch: 10][#examples: 57600/81252][#steps: 13600]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 2.659e-04
[Epoch: 10][#examples: 60800/81252][#steps: 13650]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 2.659

	 Early Stopping Patience: 100/100
	 Val. Loss: 1.492 | Val. Acc: 0.665 | Val. PPL:   4.447
	 BEST. Val. Loss: 1.492 | BEST. Val. Acc: 0.665 | Val. Loss: 1.492 | BEST. Val. Epoch: 12 | BEST. Val. Step: 15500
---------------------------------------

	BEST. Val. Acc Aux: 0.712
---------------------------------------

[Epoch: 12][#examples: 19840/81252][#steps: 15550]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 1.413e-04
[Epoch: 12][#examples: 23040/81252][#steps: 15600]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 1.413e-04
[Epoch: 12][#examples: 26240/81252][#steps: 15650]
	Train Loss: 0.022 | Train PPL:   1.023 | lr: 1.413e-04
[Epoch: 12][#examples: 29440/81252][#steps: 15700]
	Train Loss: 0.022 | Train PPL:   1.023 | lr: 1.413e-04
[Epoch: 12][#examples: 32640/81252][#steps: 15750]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 1.413e-04
[Epoch: 12][#examples: 35840/81252][#steps: 15800]
	Train Loss: 0.023 | Train PPL:   1.024 | lr: 1.413e-04
[Epoch: 12][#examples: 39040/81252][#step

[VAL]: The number of correct predictions (main-task (multi)): 340/513
[VAL]: The number of correct predictions (aux-task (multi)): 361/513

---------------------------------------
[Epoch: 13][Validatiing...]
	 Early Stopping Patience: 99/100
	 Val. Loss: 1.555 | Val. Acc: 0.663 | Val. PPL:   4.733
	 BEST. Val. Loss: 1.516 | BEST. Val. Acc: 0.665 | Val. Loss: 1.541 | BEST. Val. Epoch: 13 | BEST. Val. Step: 17000
---------------------------------------

	BEST. Val. Acc Aux: 0.712
---------------------------------------

Epoch: 14 | Time: 2m 24s
	Train Loss: 0.023 | Train PPL:   1.023
	 Val. Loss: 1.555 | Val. Acc: 0.663 | Val. PPL:   4.733
Renew Evaluation Records in the Burning Phase...
[Train]: Current Teacher Forcing Ratio: 0.380
[Epoch: 14][#examples: 1280/81252][#steps: 17800]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 7.509e-05
[Epoch: 14][#examples: 4480/81252][#steps: 17850]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 7.509e-05
[Epoch: 14][#examples: 7680/81252][#steps: 1790

[Epoch: 15][#examples: 48000/81252][#steps: 19800]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 4.434e-05
[Epoch: 15][#examples: 51200/81252][#steps: 19850]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 4.434e-05
[Epoch: 15][#examples: 54400/81252][#steps: 19900]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 4.434e-05
[Epoch: 15][#examples: 57600/81252][#steps: 19950]
	Train Loss: 0.022 | Train PPL:   1.023 | lr: 4.434e-05
[Epoch: 15][#examples: 60800/81252][#steps: 20000]
	Train Loss: 0.022 | Train PPL:   1.023 | lr: 4.434e-05
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 340/513
[VAL]: The number of correct predictions (aux-task (multi)): 358/513

---------------------------------------
[Epoch: 15][Validatiing...]
	 Early Stopping Patience: 99/100
	 Val. Loss: 1.566 | Val. Acc: 0.663 | Val. PPL:   4.788
	 BEST. Val. Loss: 1.558 | BEST. Val. Acc: 0.665 | Val. Loss: 1.558 | BEST. Val. Epoch: 15 | BEST. Val. Step: 19500
-----------------------------------

[Epoch: 17][#examples: 19840/81252][#steps: 21900]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 2.357e-05
[Epoch: 17][#examples: 23040/81252][#steps: 21950]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 2.357e-05
[Epoch: 17][#examples: 26240/81252][#steps: 22000]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 2.357e-05
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 341/513
[VAL]: The number of correct predictions (aux-task (multi)): 359/513

---------------------------------------
[Epoch: 17][Validatiing...]
	 Early Stopping Patience: 96/100
	 Val. Loss: 1.453 | Val. Acc: 0.665 | Val. PPL:   4.276
	 BEST. Val. Loss: 1.376 | BEST. Val. Acc: 0.669 | Val. Loss: 1.379 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21500
---------------------------------------

	BEST. Val. Acc Aux: 0.712
---------------------------------------

[Epoch: 17][#examples: 29440/81252][#steps: 22050]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 2.121e-05
[Epoch: 17][#examples: 32640/8125

[Epoch: 18][#examples: 79360/81252][#steps: 24100]
	Train Loss: 0.023 | Train PPL:   1.024 | lr: 1.252e-05
[VAL]: The number of correct predictions (main-task (multi)): 340/513
[VAL]: The number of correct predictions (aux-task (multi)): 357/513

---------------------------------------
[Epoch: 18][Validatiing...]
	 Early Stopping Patience: 99/100
	 Val. Loss: 1.372 | Val. Acc: 0.663 | Val. PPL:   3.943
	 BEST. Val. Loss: 1.371 | BEST. Val. Acc: 0.669 | Val. Loss: 1.379 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21500
---------------------------------------

	BEST. Val. Acc Aux: 0.712
---------------------------------------

Epoch: 19 | Time: 2m 26s
	Train Loss: 0.023 | Train PPL:   1.024
	 Val. Loss: 1.372 | Val. Acc: 0.663 | Val. PPL:   3.943
[Train]: Current Teacher Forcing Ratio: 0.230
[Epoch: 19][#examples: 1280/81252][#steps: 24150]
	Train Loss: 0.026 | Train PPL:   1.027 | lr: 1.127e-05
[Epoch: 19][#examples: 4480/81252][#steps: 24200]
	Train Loss: 0.022 | Train PPL:   1.022 | lr:

[Epoch: 20][#examples: 57600/81252][#steps: 26300]
	Train Loss: 0.024 | Train PPL:   1.025 | lr: 6.656e-06
[Epoch: 20][#examples: 60800/81252][#steps: 26350]
	Train Loss: 0.024 | Train PPL:   1.025 | lr: 6.656e-06
[Epoch: 20][#examples: 64000/81252][#steps: 26400]
	Train Loss: 0.024 | Train PPL:   1.025 | lr: 6.656e-06
[Epoch: 20][#examples: 67200/81252][#steps: 26450]
	Train Loss: 0.025 | Train PPL:   1.025 | lr: 6.656e-06
[Epoch: 20][#examples: 70400/81252][#steps: 26500]
	Train Loss: 0.025 | Train PPL:   1.025 | lr: 6.656e-06
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 341/513
[VAL]: The number of correct predictions (aux-task (multi)): 360/513

---------------------------------------
[Epoch: 20][Validatiing...]
	 Early Stopping Patience: 95/100
	 Val. Loss: 1.368 | Val. Acc: 0.665 | Val. PPL:   3.927
	 BEST. Val. Loss: 1.368 | BEST. Val. Acc: 0.669 | Val. Loss: 1.379 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21500
-----------------------------------

[Epoch: 22][#examples: 32640/81252][#steps: 28450]
	Train Loss: 0.026 | Train PPL:   1.026 | lr: 3.537e-06
[Epoch: 22][#examples: 35840/81252][#steps: 28500]
	Train Loss: 0.026 | Train PPL:   1.026 | lr: 3.537e-06
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 341/513
[VAL]: The number of correct predictions (aux-task (multi)): 359/513

---------------------------------------
[Epoch: 22][Validatiing...]
	 Early Stopping Patience: 99/100
	 Val. Loss: 1.368 | Val. Acc: 0.665 | Val. PPL:   3.929
	 BEST. Val. Loss: 1.367 | BEST. Val. Acc: 0.669 | Val. Loss: 1.379 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21500
---------------------------------------

	BEST. Val. Acc Aux: 0.712
---------------------------------------

[Epoch: 22][#examples: 39040/81252][#steps: 28550]
	Train Loss: 0.026 | Train PPL:   1.026 | lr: 3.183e-06
[Epoch: 22][#examples: 42240/81252][#steps: 28600]
	Train Loss: 0.026 | Train PPL:   1.026 | lr: 3.183e-06
[Epoch: 22][#examples: 45440/8125

[Epoch: 24][#examples: 4480/81252][#steps: 30550]
	Train Loss: 0.025 | Train PPL:   1.026 | lr: 1.692e-06
[Epoch: 24][#examples: 7680/81252][#steps: 30600]
	Train Loss: 0.024 | Train PPL:   1.024 | lr: 1.692e-06
[Epoch: 24][#examples: 10880/81252][#steps: 30650]
	Train Loss: 0.024 | Train PPL:   1.024 | lr: 1.692e-06
[Epoch: 24][#examples: 14080/81252][#steps: 30700]
	Train Loss: 0.025 | Train PPL:   1.025 | lr: 1.692e-06
[Epoch: 24][#examples: 17280/81252][#steps: 30750]
	Train Loss: 0.024 | Train PPL:   1.025 | lr: 1.692e-06
[Epoch: 24][#examples: 20480/81252][#steps: 30800]
	Train Loss: 0.025 | Train PPL:   1.025 | lr: 1.692e-06
[Epoch: 24][#examples: 23680/81252][#steps: 30850]
	Train Loss: 0.025 | Train PPL:   1.025 | lr: 1.692e-06
[Epoch: 24][#examples: 26880/81252][#steps: 30900]
	Train Loss: 0.025 | Train PPL:   1.025 | lr: 1.692e-06
[Epoch: 24][#examples: 30080/81252][#steps: 30950]
	Train Loss: 0.025 | Train PPL:   1.025 | lr: 1.692e-06
[Epoch: 24][#examples: 33280/81252][#st

[VAL]: The number of correct predictions (main-task (multi)): 341/513
[VAL]: The number of correct predictions (aux-task (multi)): 358/513

---------------------------------------
[Epoch: 25][Validatiing...]
	 Early Stopping Patience: 87/100
	 Val. Loss: 1.370 | Val. Acc: 0.665 | Val. PPL:   3.937
	 BEST. Val. Loss: 1.367 | BEST. Val. Acc: 0.669 | Val. Loss: 1.379 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21500
---------------------------------------

	BEST. Val. Acc Aux: 0.712
---------------------------------------

[VAL]: The number of correct predictions (main-task (multi)): 341/513
[VAL]: The number of correct predictions (aux-task (multi)): 358/513

---------------------------------------
[Epoch: 25][Validatiing...]
	 Early Stopping Patience: 86/100
	 Val. Loss: 1.370 | Val. Acc: 0.665 | Val. PPL:   3.937
	 BEST. Val. Loss: 1.367 | BEST. Val. Acc: 0.669 | Val. Loss: 1.379 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21500
---------------------------------------

	BEST. Val. Acc Aux:

[Epoch: 27][#examples: 48640/81252][#steps: 35050]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 4.778e-07
[Epoch: 27][#examples: 51840/81252][#steps: 35100]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 4.778e-07
[Epoch: 27][#examples: 55040/81252][#steps: 35150]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 4.778e-07
[Epoch: 27][#examples: 58240/81252][#steps: 35200]
	Train Loss: 0.023 | Train PPL:   1.024 | lr: 4.778e-07
[Epoch: 27][#examples: 61440/81252][#steps: 35250]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 4.778e-07
[Epoch: 27][#examples: 64640/81252][#steps: 35300]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 4.778e-07
[Epoch: 27][#examples: 67840/81252][#steps: 35350]
	Train Loss: 0.023 | Train PPL:   1.024 | lr: 4.778e-07
[Epoch: 27][#examples: 71040/81252][#steps: 35400]
	Train Loss: 0.024 | Train PPL:   1.024 | lr: 4.778e-07
[Epoch: 27][#examples: 74240/81252][#steps: 35450]
	Train Loss: 0.024 | Train PPL:   1.024 | lr: 4.778e-07
[Epoch: 27][#examples: 77440/81252][#

[Epoch: 29][#examples: 26880/81252][#steps: 37250]
	Train Loss: 0.024 | Train PPL:   1.025 | lr: 2.539e-07
[Epoch: 29][#examples: 30080/81252][#steps: 37300]
	Train Loss: 0.025 | Train PPL:   1.025 | lr: 2.539e-07
[Epoch: 29][#examples: 33280/81252][#steps: 37350]
	Train Loss: 0.025 | Train PPL:   1.025 | lr: 2.539e-07
[Epoch: 29][#examples: 36480/81252][#steps: 37400]
	Train Loss: 0.025 | Train PPL:   1.025 | lr: 2.539e-07
[Epoch: 29][#examples: 39680/81252][#steps: 37450]
	Train Loss: 0.025 | Train PPL:   1.026 | lr: 2.539e-07
[Epoch: 29][#examples: 42880/81252][#steps: 37500]
	Train Loss: 0.025 | Train PPL:   1.025 | lr: 2.539e-07
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 341/513
[VAL]: The number of correct predictions (aux-task (multi)): 358/513

---------------------------------------
[Epoch: 29][Validatiing...]
	 Early Stopping Patience: 74/100
	 Val. Loss: 1.370 | Val. Acc: 0.665 | Val. PPL:   3.937
	 BEST. Val. Loss: 1.367 | BEST. Val. Acc: 0

[Epoch: 31][#examples: 5120/81252][#steps: 39450]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 1.349e-07
[Epoch: 31][#examples: 8320/81252][#steps: 39500]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 1.349e-07
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 341/513
[VAL]: The number of correct predictions (aux-task (multi)): 358/513

---------------------------------------
[Epoch: 31][Validatiing...]
	 Early Stopping Patience: 68/100
	 Val. Loss: 1.370 | Val. Acc: 0.665 | Val. PPL:   3.936
	 BEST. Val. Loss: 1.367 | BEST. Val. Acc: 0.669 | Val. Loss: 1.379 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21500
---------------------------------------

	BEST. Val. Acc Aux: 0.712
---------------------------------------

[Epoch: 31][#examples: 11520/81252][#steps: 39550]
	Train Loss: 0.022 | Train PPL:   1.023 | lr: 1.215e-07
[Epoch: 31][#examples: 14720/81252][#steps: 39600]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 1.215e-07
[Epoch: 31][#examples: 17920/81252]

[Epoch: 32][#examples: 67840/81252][#steps: 41700]
	Train Loss: 0.025 | Train PPL:   1.025 | lr: 9.838e-08
[Epoch: 32][#examples: 71040/81252][#steps: 41750]
	Train Loss: 0.025 | Train PPL:   1.025 | lr: 9.838e-08
[Epoch: 32][#examples: 74240/81252][#steps: 41800]
	Train Loss: 0.024 | Train PPL:   1.025 | lr: 9.838e-08
[Epoch: 32][#examples: 77440/81252][#steps: 41850]
	Train Loss: 0.024 | Train PPL:   1.025 | lr: 9.838e-08
[Epoch: 32][#examples: 80640/81252][#steps: 41900]
	Train Loss: 0.024 | Train PPL:   1.025 | lr: 9.838e-08
[VAL]: The number of correct predictions (main-task (multi)): 341/513
[VAL]: The number of correct predictions (aux-task (multi)): 358/513

---------------------------------------
[Epoch: 32][Validatiing...]
	 Early Stopping Patience: 62/100
	 Val. Loss: 1.370 | Val. Acc: 0.665 | Val. PPL:   3.936
	 BEST. Val. Loss: 1.367 | BEST. Val. Acc: 0.669 | Val. Loss: 1.379 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21500
---------------------------------------

	BEST. Va

[Epoch: 34][#examples: 46080/81252][#steps: 43900]
	Train Loss: 0.025 | Train PPL:   1.026 | lr: 9.838e-08
[Epoch: 34][#examples: 49280/81252][#steps: 43950]
	Train Loss: 0.026 | Train PPL:   1.026 | lr: 9.838e-08
[Epoch: 34][#examples: 52480/81252][#steps: 44000]
	Train Loss: 0.026 | Train PPL:   1.026 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 341/513
[VAL]: The number of correct predictions (aux-task (multi)): 358/513

---------------------------------------
[Epoch: 34][Validatiing...]
	 Early Stopping Patience: 56/100
	 Val. Loss: 1.370 | Val. Acc: 0.665 | Val. PPL:   3.936
	 BEST. Val. Loss: 1.367 | BEST. Val. Acc: 0.669 | Val. Loss: 1.379 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21500
---------------------------------------

	BEST. Val. Acc Aux: 0.712
---------------------------------------

[Epoch: 34][#examples: 55680/81252][#steps: 44050]
	Train Loss: 0.025 | Train PPL:   1.026 | lr: 9.838e-08
[Epoch: 34][#examples: 58880/8125

[Epoch: 36][#examples: 21120/81252][#steps: 46050]
	Train Loss: 0.025 | Train PPL:   1.025 | lr: 9.838e-08
[Epoch: 36][#examples: 24320/81252][#steps: 46100]
	Train Loss: 0.026 | Train PPL:   1.026 | lr: 9.838e-08
[Epoch: 36][#examples: 27520/81252][#steps: 46150]
	Train Loss: 0.025 | Train PPL:   1.026 | lr: 9.838e-08
[Epoch: 36][#examples: 30720/81252][#steps: 46200]
	Train Loss: 0.025 | Train PPL:   1.026 | lr: 9.838e-08
[Epoch: 36][#examples: 33920/81252][#steps: 46250]
	Train Loss: 0.025 | Train PPL:   1.025 | lr: 9.838e-08
[Epoch: 36][#examples: 37120/81252][#steps: 46300]
	Train Loss: 0.025 | Train PPL:   1.025 | lr: 9.838e-08
[Epoch: 36][#examples: 40320/81252][#steps: 46350]
	Train Loss: 0.024 | Train PPL:   1.025 | lr: 9.838e-08
[Epoch: 36][#examples: 43520/81252][#steps: 46400]
	Train Loss: 0.024 | Train PPL:   1.025 | lr: 9.838e-08
[Epoch: 36][#examples: 46720/81252][#steps: 46450]
	Train Loss: 0.024 | Train PPL:   1.025 | lr: 9.838e-08
[Epoch: 36][#examples: 49920/81252][#

[Epoch: 38][#examples: 2560/81252][#steps: 48300]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 9.838e-08
[Epoch: 38][#examples: 5760/81252][#steps: 48350]
	Train Loss: 0.026 | Train PPL:   1.027 | lr: 9.838e-08
[Epoch: 38][#examples: 8960/81252][#steps: 48400]
	Train Loss: 0.025 | Train PPL:   1.026 | lr: 9.838e-08
[Epoch: 38][#examples: 12160/81252][#steps: 48450]
	Train Loss: 0.026 | Train PPL:   1.027 | lr: 9.838e-08
[Epoch: 38][#examples: 15360/81252][#steps: 48500]
	Train Loss: 0.025 | Train PPL:   1.026 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 341/513
[VAL]: The number of correct predictions (aux-task (multi)): 358/513

---------------------------------------
[Epoch: 38][Validatiing...]
	 Early Stopping Patience: 43/100
	 Val. Loss: 1.370 | Val. Acc: 0.665 | Val. PPL:   3.936
	 BEST. Val. Loss: 1.367 | BEST. Val. Acc: 0.669 | Val. Loss: 1.379 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21500
--------------------------------------

[Epoch: 39][#examples: 65280/81252][#steps: 50550]
	Train Loss: 0.024 | Train PPL:   1.025 | lr: 9.838e-08
[Epoch: 39][#examples: 68480/81252][#steps: 50600]
	Train Loss: 0.024 | Train PPL:   1.025 | lr: 9.838e-08
[Epoch: 39][#examples: 71680/81252][#steps: 50650]
	Train Loss: 0.024 | Train PPL:   1.025 | lr: 9.838e-08
[Epoch: 39][#examples: 74880/81252][#steps: 50700]
	Train Loss: 0.024 | Train PPL:   1.025 | lr: 9.838e-08
[Epoch: 39][#examples: 78080/81252][#steps: 50750]
	Train Loss: 0.024 | Train PPL:   1.025 | lr: 9.838e-08
[Epoch: 39][#examples: 81280/81252][#steps: 50800]
	Train Loss: 0.024 | Train PPL:   1.025 | lr: 9.838e-08
[VAL]: The number of correct predictions (main-task (multi)): 341/513
[VAL]: The number of correct predictions (aux-task (multi)): 358/513

---------------------------------------
[Epoch: 39][Validatiing...]
	 Early Stopping Patience: 37/100
	 Val. Loss: 1.370 | Val. Acc: 0.665 | Val. PPL:   3.936
	 BEST. Val. Loss: 1.367 | BEST. Val. Acc: 0.669 | Val. Los

[Epoch: 41][#examples: 43520/81252][#steps: 52750]
	Train Loss: 0.025 | Train PPL:   1.025 | lr: 9.838e-08
[Epoch: 41][#examples: 46720/81252][#steps: 52800]
	Train Loss: 0.025 | Train PPL:   1.025 | lr: 9.838e-08
[Epoch: 41][#examples: 49920/81252][#steps: 52850]
	Train Loss: 0.025 | Train PPL:   1.025 | lr: 9.838e-08
[Epoch: 41][#examples: 53120/81252][#steps: 52900]
	Train Loss: 0.025 | Train PPL:   1.025 | lr: 9.838e-08
[Epoch: 41][#examples: 56320/81252][#steps: 52950]
	Train Loss: 0.025 | Train PPL:   1.025 | lr: 9.838e-08
[Epoch: 41][#examples: 59520/81252][#steps: 53000]
	Train Loss: 0.025 | Train PPL:   1.025 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 341/513
[VAL]: The number of correct predictions (aux-task (multi)): 358/513

---------------------------------------
[Epoch: 41][Validatiing...]
	 Early Stopping Patience: 31/100
	 Val. Loss: 1.370 | Val. Acc: 0.665 | Val. PPL:   3.936
	 BEST. Val. Loss: 1.367 | BEST. Val. Acc: 0

[Epoch: 43][#examples: 21760/81252][#steps: 54950]
	Train Loss: 0.026 | Train PPL:   1.027 | lr: 9.838e-08
[Epoch: 43][#examples: 24960/81252][#steps: 55000]
	Train Loss: 0.027 | Train PPL:   1.027 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 341/513
[VAL]: The number of correct predictions (aux-task (multi)): 358/513

---------------------------------------
[Epoch: 43][Validatiing...]
	 Early Stopping Patience: 25/100
	 Val. Loss: 1.370 | Val. Acc: 0.665 | Val. PPL:   3.936
	 BEST. Val. Loss: 1.367 | BEST. Val. Acc: 0.669 | Val. Loss: 1.379 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21500
---------------------------------------

	BEST. Val. Acc Aux: 0.712
---------------------------------------

[Epoch: 43][#examples: 28160/81252][#steps: 55050]
	Train Loss: 0.026 | Train PPL:   1.027 | lr: 9.838e-08
[Epoch: 43][#examples: 31360/81252][#steps: 55100]
	Train Loss: 0.026 | Train PPL:   1.026 | lr: 9.838e-08
[Epoch: 43][#examples: 34560/8125

[VAL]: The number of correct predictions (main-task (multi)): 341/513
[VAL]: The number of correct predictions (aux-task (multi)): 358/513

---------------------------------------
[Epoch: 44][Validatiing...]
	 Early Stopping Patience: 19/100
	 Val. Loss: 1.370 | Val. Acc: 0.665 | Val. PPL:   3.935
	 BEST. Val. Loss: 1.367 | BEST. Val. Acc: 0.669 | Val. Loss: 1.379 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21500
---------------------------------------

	BEST. Val. Acc Aux: 0.712
---------------------------------------

Epoch: 45 | Time: 2m 21s
	Train Loss: 0.024 | Train PPL:   1.025
	 Val. Loss: 1.370 | Val. Acc: 0.665 | Val. PPL:   3.935
[Train]: Current Teacher Forcing Ratio: 0.200
[Epoch: 45][#examples: 3200/81252][#steps: 57200]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 9.838e-08
[Epoch: 45][#examples: 6400/81252][#steps: 57250]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 9.838e-08
[Epoch: 45][#examples: 9600/81252][#steps: 57300]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 

[Epoch: 46][#examples: 62720/81252][#steps: 59400]
	Train Loss: 0.024 | Train PPL:   1.025 | lr: 9.838e-08
[Epoch: 46][#examples: 65920/81252][#steps: 59450]
	Train Loss: 0.024 | Train PPL:   1.024 | lr: 9.838e-08
[Epoch: 46][#examples: 69120/81252][#steps: 59500]
	Train Loss: 0.024 | Train PPL:   1.024 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 341/513
[VAL]: The number of correct predictions (aux-task (multi)): 358/513

---------------------------------------
[Epoch: 46][Validatiing...]
	 Early Stopping Patience: 13/100
	 Val. Loss: 1.370 | Val. Acc: 0.665 | Val. PPL:   3.935
	 BEST. Val. Loss: 1.367 | BEST. Val. Acc: 0.669 | Val. Loss: 1.379 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21500
---------------------------------------

	BEST. Val. Acc Aux: 0.712
---------------------------------------

[Epoch: 46][#examples: 72320/81252][#steps: 59550]
	Train Loss: 0.024 | Train PPL:   1.025 | lr: 9.838e-08
[Epoch: 46][#examples: 75520/8125

[Epoch: 48][#examples: 37760/81252][#steps: 61550]
	Train Loss: 0.023 | Train PPL:   1.024 | lr: 9.838e-08
[Epoch: 48][#examples: 40960/81252][#steps: 61600]
	Train Loss: 0.024 | Train PPL:   1.024 | lr: 9.838e-08
[Epoch: 48][#examples: 44160/81252][#steps: 61650]
	Train Loss: 0.023 | Train PPL:   1.024 | lr: 9.838e-08
[Epoch: 48][#examples: 47360/81252][#steps: 61700]
	Train Loss: 0.023 | Train PPL:   1.024 | lr: 9.838e-08
[Epoch: 48][#examples: 50560/81252][#steps: 61750]
	Train Loss: 0.024 | Train PPL:   1.024 | lr: 9.838e-08
[Epoch: 48][#examples: 53760/81252][#steps: 61800]
	Train Loss: 0.024 | Train PPL:   1.024 | lr: 9.838e-08
[Epoch: 48][#examples: 56960/81252][#steps: 61850]
	Train Loss: 0.024 | Train PPL:   1.024 | lr: 9.838e-08
[Epoch: 48][#examples: 60160/81252][#steps: 61900]
	Train Loss: 0.024 | Train PPL:   1.024 | lr: 9.838e-08
[Epoch: 48][#examples: 63360/81252][#steps: 61950]
	Train Loss: 0.024 | Train PPL:   1.024 | lr: 9.838e-08
[Epoch: 48][#examples: 66560/81252][#

[Epoch: 50][#examples: 16000/81252][#steps: 63750]
	Train Loss: 0.024 | Train PPL:   1.024 | lr: 9.838e-08
[Epoch: 50][#examples: 19200/81252][#steps: 63800]
	Train Loss: 0.024 | Train PPL:   1.024 | lr: 9.838e-08
[Epoch: 50][#examples: 22400/81252][#steps: 63850]
	Train Loss: 0.023 | Train PPL:   1.024 | lr: 9.838e-08
[Epoch: 50][#examples: 25600/81252][#steps: 63900]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 9.838e-08
[Epoch: 50][#examples: 28800/81252][#steps: 63950]
	Train Loss: 0.024 | Train PPL:   1.025 | lr: 9.838e-08
[Epoch: 50][#examples: 32000/81252][#steps: 64000]
	Train Loss: 0.024 | Train PPL:   1.024 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 341/513
[VAL]: The number of correct predictions (aux-task (multi)): 358/513

---------------------------------------
[Epoch: 50][Validatiing...]
	 Early Stopping Patience: 0/100
	 Val. Loss: 1.370 | Val. Acc: 0.665 | Val. PPL:   3.935
	 BEST. Val. Loss: 1.367 | BEST. Val. Acc: 0.

[Epoch: 0][#examples: 35200/81252][#steps: 550]
	Train Loss: 2.116 | Train PPL:   8.298 | lr: 3.000e-03
[Epoch: 0][#examples: 38400/81252][#steps: 600]
	Train Loss: 1.993 | Train PPL:   7.335 | lr: 3.000e-03
[Epoch: 0][#examples: 41600/81252][#steps: 650]
	Train Loss: 1.885 | Train PPL:   6.585 | lr: 3.000e-03
[Epoch: 0][#examples: 44800/81252][#steps: 700]
	Train Loss: 1.793 | Train PPL:   6.006 | lr: 3.000e-03
[Epoch: 0][#examples: 48000/81252][#steps: 750]
	Train Loss: 1.711 | Train PPL:   5.533 | lr: 3.000e-03
[Epoch: 0][#examples: 51200/81252][#steps: 800]
	Train Loss: 1.636 | Train PPL:   5.137 | lr: 3.000e-03
[Epoch: 0][#examples: 54400/81252][#steps: 850]
	Train Loss: 1.567 | Train PPL:   4.793 | lr: 3.000e-03
[Epoch: 0][#examples: 57600/81252][#steps: 900]
	Train Loss: 1.507 | Train PPL:   4.511 | lr: 3.000e-03
[Epoch: 0][#examples: 60800/81252][#steps: 950]
	Train Loss: 1.451 | Train PPL:   4.269 | lr: 3.000e-03
[Epoch: 0][#examples: 64000/81252][#steps: 1000]
	Train Loss: 1.

[Epoch: 2][#examples: 640/81252][#steps: 2550]
	Train Loss: 0.247 | Train PPL:   1.280 | lr: 2.430e-03
[Epoch: 2][#examples: 3840/81252][#steps: 2600]
	Train Loss: 0.251 | Train PPL:   1.286 | lr: 2.430e-03
[Epoch: 2][#examples: 7040/81252][#steps: 2650]
	Train Loss: 0.264 | Train PPL:   1.302 | lr: 2.430e-03
[Epoch: 2][#examples: 10240/81252][#steps: 2700]
	Train Loss: 0.261 | Train PPL:   1.298 | lr: 2.430e-03
[Epoch: 2][#examples: 13440/81252][#steps: 2750]
	Train Loss: 0.263 | Train PPL:   1.301 | lr: 2.430e-03
[Epoch: 2][#examples: 16640/81252][#steps: 2800]
	Train Loss: 0.263 | Train PPL:   1.301 | lr: 2.430e-03
[Epoch: 2][#examples: 19840/81252][#steps: 2850]
	Train Loss: 0.264 | Train PPL:   1.302 | lr: 2.430e-03
[Epoch: 2][#examples: 23040/81252][#steps: 2900]
	Train Loss: 0.264 | Train PPL:   1.302 | lr: 2.430e-03
[Epoch: 2][#examples: 26240/81252][#steps: 2950]
	Train Loss: 0.265 | Train PPL:   1.304 | lr: 2.430e-03
[Epoch: 2][#examples: 29440/81252][#steps: 3000]
	Train Los

[Epoch: 3][#examples: 72960/81252][#steps: 4950]
	Train Loss: 0.180 | Train PPL:   1.197 | lr: 1.594e-03
[Epoch: 3][#examples: 76160/81252][#steps: 5000]
	Train Loss: 0.179 | Train PPL:   1.196 | lr: 1.594e-03
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 335/513
[VAL]: The number of correct predictions (aux-task (multi)): 342/513

---------------------------------------
[Epoch: 3][Validatiing...]
		 Better Valid Loss! (at least equal)
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.102 | Val. Acc: 0.653 | Val. PPL:   3.010
	 BEST. Val. Loss: 1.102 | BEST. Val. Acc: 0.653 | Val. Loss: 1.102 | BEST. Val. Epoch: 3 | BEST. Val. Step: 5000
---------------------------------------

		 Better Valid Acc on Auxiliary Task! (at least equal)
	BEST. Val. Acc Aux: 0.667
---------------------------------------

[Epoch: 3][#examples: 79360/81252][#steps: 5050]
	Train Loss: 0.179 | Train PPL:   1.196 | lr: 1.594e-03
[VAL]: The numb

[Epoch: 5][#examples: 38400/81252][#steps: 6950]
	Train Loss: 0.093 | Train PPL:   1.097 | lr: 1.162e-03
[Epoch: 5][#examples: 41600/81252][#steps: 7000]
	Train Loss: 0.093 | Train PPL:   1.098 | lr: 1.162e-03
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 316/513
[VAL]: The number of correct predictions (aux-task (multi)): 337/513

---------------------------------------
[Epoch: 5][Validatiing...]
	 Early Stopping Patience: 99/100
	 Val. Loss: 1.283 | Val. Acc: 0.616 | Val. PPL:   3.608
	 BEST. Val. Loss: 1.137 | BEST. Val. Acc: 0.676 | Val. Loss: 1.137 | BEST. Val. Epoch: 5 | BEST. Val. Step: 6500
---------------------------------------

	BEST. Val. Acc Aux: 0.682
---------------------------------------

[Epoch: 5][#examples: 44800/81252][#steps: 7050]
	Train Loss: 0.093 | Train PPL:   1.097 | lr: 1.046e-03
[Epoch: 5][#examples: 48000/81252][#steps: 7100]
	Train Loss: 0.093 | Train PPL:   1.097 | lr: 1.046e-03
[Epoch: 5][#examples: 51200/81252][#steps: 7

[Epoch: 7][#examples: 7040/81252][#steps: 9000]
	Train Loss: 0.049 | Train PPL:   1.050 | lr: 6.177e-04
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 340/513
[VAL]: The number of correct predictions (aux-task (multi)): 354/513

---------------------------------------
[Epoch: 7][Validatiing...]
		 Better Valid Loss! (at least equal)
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.298 | Val. Acc: 0.663 | Val. PPL:   3.661
	 BEST. Val. Loss: 1.298 | BEST. Val. Acc: 0.663 | Val. Loss: 1.298 | BEST. Val. Epoch: 7 | BEST. Val. Step: 9000
---------------------------------------

		 Better Valid Acc on Auxiliary Task! (at least equal)
	BEST. Val. Acc Aux: 0.690
---------------------------------------

[Epoch: 7][#examples: 10240/81252][#steps: 9050]
	Train Loss: 0.048 | Train PPL:   1.049 | lr: 5.559e-04
[Epoch: 7][#examples: 13440/81252][#steps: 9100]
	Train Loss: 0.047 | Train PPL:   1.049 | lr: 5.559e-04
[Epoch: 7][#exam

[Epoch: 8][#examples: 63360/81252][#steps: 11150]
	Train Loss: 0.039 | Train PPL:   1.039 | lr: 3.283e-04
[Epoch: 8][#examples: 66560/81252][#steps: 11200]
	Train Loss: 0.039 | Train PPL:   1.040 | lr: 3.283e-04
[Epoch: 8][#examples: 69760/81252][#steps: 11250]
	Train Loss: 0.039 | Train PPL:   1.040 | lr: 3.283e-04
[Epoch: 8][#examples: 72960/81252][#steps: 11300]
	Train Loss: 0.039 | Train PPL:   1.040 | lr: 3.283e-04
[Epoch: 8][#examples: 76160/81252][#steps: 11350]
	Train Loss: 0.039 | Train PPL:   1.040 | lr: 3.283e-04
[Epoch: 8][#examples: 79360/81252][#steps: 11400]
	Train Loss: 0.039 | Train PPL:   1.040 | lr: 3.283e-04
[VAL]: The number of correct predictions (main-task (multi)): 335/513
[VAL]: The number of correct predictions (aux-task (multi)): 350/513

---------------------------------------
[Epoch: 8][Validatiing...]
	 Early Stopping Patience: 98/100
	 Val. Loss: 1.374 | Val. Acc: 0.653 | Val. PPL:   3.950
	 BEST. Val. Loss: 1.369 | BEST. Val. Acc: 0.669 | Val. Loss: 1.36

[Epoch: 10][#examples: 32000/81252][#steps: 13200]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 1.744e-04
[Epoch: 10][#examples: 35200/81252][#steps: 13250]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 1.744e-04
[Epoch: 10][#examples: 38400/81252][#steps: 13300]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 1.744e-04
[Epoch: 10][#examples: 41600/81252][#steps: 13350]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 1.744e-04
[Epoch: 10][#examples: 44800/81252][#steps: 13400]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 1.744e-04
[Epoch: 10][#examples: 48000/81252][#steps: 13450]
	Train Loss: 0.029 | Train PPL:   1.030 | lr: 1.744e-04
[Epoch: 10][#examples: 51200/81252][#steps: 13500]
	Train Loss: 0.029 | Train PPL:   1.030 | lr: 1.744e-04
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 336/513
[VAL]: The number of correct predictions (aux-task (multi)): 351/513

---------------------------------------
[Epoch: 10][Validatiing...]
		 Better Valid Loss! (at le

[Epoch: 12][#examples: 640/81252][#steps: 15250]
	Train Loss: 0.026 | Train PPL:   1.026 | lr: 9.271e-05
[Epoch: 12][#examples: 3840/81252][#steps: 15300]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 9.271e-05
[Epoch: 12][#examples: 7040/81252][#steps: 15350]
	Train Loss: 0.024 | Train PPL:   1.024 | lr: 9.271e-05
[Epoch: 12][#examples: 10240/81252][#steps: 15400]
	Train Loss: 0.025 | Train PPL:   1.025 | lr: 9.271e-05
[Epoch: 12][#examples: 13440/81252][#steps: 15450]
	Train Loss: 0.024 | Train PPL:   1.025 | lr: 9.271e-05
[Epoch: 12][#examples: 16640/81252][#steps: 15500]
	Train Loss: 0.025 | Train PPL:   1.026 | lr: 9.271e-05
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 337/513
[VAL]: The number of correct predictions (aux-task (multi)): 356/513

---------------------------------------
[Epoch: 12][Validatiing...]
		 Better Valid Loss! (at least equal)
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.415 | Val. A

[Epoch: 13][#examples: 66560/81252][#steps: 17550]
	Train Loss: 0.026 | Train PPL:   1.026 | lr: 4.927e-05
[Epoch: 13][#examples: 69760/81252][#steps: 17600]
	Train Loss: 0.026 | Train PPL:   1.026 | lr: 4.927e-05
[Epoch: 13][#examples: 72960/81252][#steps: 17650]
	Train Loss: 0.025 | Train PPL:   1.026 | lr: 4.927e-05
[Epoch: 13][#examples: 76160/81252][#steps: 17700]
	Train Loss: 0.026 | Train PPL:   1.026 | lr: 4.927e-05
[Epoch: 13][#examples: 79360/81252][#steps: 17750]
	Train Loss: 0.026 | Train PPL:   1.026 | lr: 4.927e-05
[VAL]: The number of correct predictions (main-task (multi)): 335/513
[VAL]: The number of correct predictions (aux-task (multi)): 355/513

---------------------------------------
[Epoch: 13][Validatiing...]
		 Better Valid Loss! (at least equal)
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.419 | Val. Acc: 0.653 | Val. PPL:   4.135
	 BEST. Val. Loss: 1.419 | BEST. Val. Acc: 0.653 | Val. Loss: 1.419 | BEST. Val. Epoch: 

[Epoch: 15][#examples: 32000/81252][#steps: 19550]
	Train Loss: 0.026 | Train PPL:   1.026 | lr: 2.618e-05
[Epoch: 15][#examples: 35200/81252][#steps: 19600]
	Train Loss: 0.026 | Train PPL:   1.026 | lr: 2.618e-05
[Epoch: 15][#examples: 38400/81252][#steps: 19650]
	Train Loss: 0.026 | Train PPL:   1.026 | lr: 2.618e-05
[Epoch: 15][#examples: 41600/81252][#steps: 19700]
	Train Loss: 0.026 | Train PPL:   1.026 | lr: 2.618e-05
[Epoch: 15][#examples: 44800/81252][#steps: 19750]
	Train Loss: 0.026 | Train PPL:   1.026 | lr: 2.618e-05
[Epoch: 15][#examples: 48000/81252][#steps: 19800]
	Train Loss: 0.026 | Train PPL:   1.026 | lr: 2.618e-05
[Epoch: 15][#examples: 51200/81252][#steps: 19850]
	Train Loss: 0.025 | Train PPL:   1.026 | lr: 2.618e-05
[Epoch: 15][#examples: 54400/81252][#steps: 19900]
	Train Loss: 0.026 | Train PPL:   1.026 | lr: 2.618e-05
[Epoch: 15][#examples: 57600/81252][#steps: 19950]
	Train Loss: 0.026 | Train PPL:   1.026 | lr: 2.618e-05
[Epoch: 15][#examples: 60800/81252][#

[Epoch: 17][#examples: 640/81252][#steps: 21600]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 1.392e-05
[Epoch: 17][#examples: 3840/81252][#steps: 21650]
	Train Loss: 0.027 | Train PPL:   1.027 | lr: 1.392e-05
[Epoch: 17][#examples: 7040/81252][#steps: 21700]
	Train Loss: 0.025 | Train PPL:   1.025 | lr: 1.392e-05
[Epoch: 17][#examples: 10240/81252][#steps: 21750]
	Train Loss: 0.024 | Train PPL:   1.024 | lr: 1.392e-05
[Epoch: 17][#examples: 13440/81252][#steps: 21800]
	Train Loss: 0.024 | Train PPL:   1.024 | lr: 1.392e-05
[Epoch: 17][#examples: 16640/81252][#steps: 21850]
	Train Loss: 0.025 | Train PPL:   1.026 | lr: 1.392e-05
[Epoch: 17][#examples: 19840/81252][#steps: 21900]
	Train Loss: 0.027 | Train PPL:   1.027 | lr: 1.392e-05
[Epoch: 17][#examples: 23040/81252][#steps: 21950]
	Train Loss: 0.026 | Train PPL:   1.027 | lr: 1.392e-05
[Epoch: 17][#examples: 26240/81252][#steps: 22000]
	Train Loss: 0.026 | Train PPL:   1.027 | lr: 1.392e-05
-----Val------
[VAL]: The number of corre

[Epoch: 18][#examples: 72960/81252][#steps: 24000]
	Train Loss: 0.026 | Train PPL:   1.027 | lr: 8.217e-06
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 334/513
[VAL]: The number of correct predictions (aux-task (multi)): 359/513

---------------------------------------
[Epoch: 18][Validatiing...]
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 91/100
	 Val. Loss: 1.425 | Val. Acc: 0.651 | Val. PPL:   4.159
	 BEST. Val. Loss: 1.419 | BEST. Val. Acc: 0.651 | Val. Loss: 1.425 | BEST. Val. Epoch: 18 | BEST. Val. Step: 24000
---------------------------------------

		 Better Valid Acc on Auxiliary Task! (at least equal)
	BEST. Val. Acc Aux: 0.700
---------------------------------------

[Epoch: 18][#examples: 76160/81252][#steps: 24050]
	Train Loss: 0.027 | Train PPL:   1.027 | lr: 7.395e-06
[Epoch: 18][#examples: 79360/81252][#steps: 24100]
	Train Loss: 0.027 | Train PPL:   1.027 | lr: 7.395e-06
[VAL]: The number of correct predictions (main

[VAL]: The number of correct predictions (main-task (multi)): 335/513
[VAL]: The number of correct predictions (aux-task (multi)): 357/513

---------------------------------------
[Epoch: 20][Validatiing...]
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 85/100
	 Val. Loss: 1.426 | Val. Acc: 0.653 | Val. PPL:   4.163
	 BEST. Val. Loss: 1.419 | BEST. Val. Acc: 0.653 | Val. Loss: 1.426 | BEST. Val. Epoch: 20 | BEST. Val. Step: 26000
---------------------------------------

	BEST. Val. Acc Aux: 0.700
---------------------------------------

[Epoch: 20][#examples: 41600/81252][#steps: 26050]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 3.930e-06
[Epoch: 20][#examples: 44800/81252][#steps: 26100]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 3.930e-06
[Epoch: 20][#examples: 48000/81252][#steps: 26150]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 3.930e-06
[Epoch: 20][#examples: 51200/81252][#steps: 26200]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 3.930e-06
[Epoch: 20

	 Early Stopping Patience: 79/100
	 Val. Loss: 1.420 | Val. Acc: 0.653 | Val. PPL:   4.139
	 BEST. Val. Loss: 1.419 | BEST. Val. Acc: 0.653 | Val. Loss: 1.420 | BEST. Val. Epoch: 22 | BEST. Val. Step: 28000
---------------------------------------

	BEST. Val. Acc Aux: 0.700
---------------------------------------

[Epoch: 22][#examples: 7040/81252][#steps: 28050]
	Train Loss: 0.026 | Train PPL:   1.027 | lr: 2.089e-06
[Epoch: 22][#examples: 10240/81252][#steps: 28100]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 2.089e-06
[Epoch: 22][#examples: 13440/81252][#steps: 28150]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 2.089e-06
[Epoch: 22][#examples: 16640/81252][#steps: 28200]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 2.089e-06
[Epoch: 22][#examples: 19840/81252][#steps: 28250]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 2.089e-06
[Epoch: 22][#examples: 23040/81252][#steps: 28300]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 2.089e-06
[Epoch: 22][#examples: 26240/81252][#steps:

[Epoch: 23][#examples: 72960/81252][#steps: 30350]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 1.233e-06
[Epoch: 23][#examples: 76160/81252][#steps: 30400]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 1.233e-06
[Epoch: 23][#examples: 79360/81252][#steps: 30450]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 1.233e-06
[VAL]: The number of correct predictions (main-task (multi)): 335/513
[VAL]: The number of correct predictions (aux-task (multi)): 358/513

---------------------------------------
[Epoch: 23][Validatiing...]
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 73/100
	 Val. Loss: 1.420 | Val. Acc: 0.653 | Val. PPL:   4.136
	 BEST. Val. Loss: 1.419 | BEST. Val. Acc: 0.653 | Val. Loss: 1.420 | BEST. Val. Epoch: 23 | BEST. Val. Step: 30480
---------------------------------------

	BEST. Val. Acc Aux: 0.700
---------------------------------------

Epoch: 24 | Time: 2m 24s
	Train Loss: 0.028 | Train PPL:   1.028
	 Val. Loss: 1.420 | Val. Acc: 0.653 | Val. PPL:   4

[Epoch: 25][#examples: 41600/81252][#steps: 32400]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 6.554e-07
[Epoch: 25][#examples: 44800/81252][#steps: 32450]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 6.554e-07
[Epoch: 25][#examples: 48000/81252][#steps: 32500]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 6.554e-07
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 335/513
[VAL]: The number of correct predictions (aux-task (multi)): 357/513

---------------------------------------
[Epoch: 25][Validatiing...]
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 99/100
	 Val. Loss: 1.419 | Val. Acc: 0.653 | Val. PPL:   4.134
	 BEST. Val. Loss: 1.419 | BEST. Val. Acc: 0.653 | Val. Loss: 1.419 | BEST. Val. Epoch: 25 | BEST. Val. Step: 32500
---------------------------------------

	BEST. Val. Acc Aux: 0.700
---------------------------------------

[Epoch: 25][#examples: 51200/81252][#steps: 32550]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 5.899

[Epoch: 27][#examples: 10240/81252][#steps: 34450]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 3.483e-07
[Epoch: 27][#examples: 13440/81252][#steps: 34500]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 3.483e-07
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 335/513
[VAL]: The number of correct predictions (aux-task (multi)): 358/513

---------------------------------------
[Epoch: 27][Validatiing...]
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 99/100
	 Val. Loss: 1.419 | Val. Acc: 0.653 | Val. PPL:   4.132
	 BEST. Val. Loss: 1.419 | BEST. Val. Acc: 0.653 | Val. Loss: 1.419 | BEST. Val. Epoch: 27 | BEST. Val. Step: 34500
---------------------------------------

	BEST. Val. Acc Aux: 0.700
---------------------------------------

[Epoch: 27][#examples: 16640/81252][#steps: 34550]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 3.135e-07
[Epoch: 27][#examples: 19840/81252][#steps: 34600]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 3.135

[Epoch: 28][#examples: 63360/81252][#steps: 36550]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 1.851e-07
[Epoch: 28][#examples: 66560/81252][#steps: 36600]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 1.851e-07
[Epoch: 28][#examples: 69760/81252][#steps: 36650]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 1.851e-07
[Epoch: 28][#examples: 72960/81252][#steps: 36700]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 1.851e-07
[Epoch: 28][#examples: 76160/81252][#steps: 36750]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 1.851e-07
[Epoch: 28][#examples: 79360/81252][#steps: 36800]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 1.851e-07
[VAL]: The number of correct predictions (main-task (multi)): 335/513
[VAL]: The number of correct predictions (aux-task (multi)): 358/513

---------------------------------------
[Epoch: 28][Validatiing...]
		 Better Valid Loss! (at least equal)
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.419 | Val. Acc: 0.653 |

[Epoch: 30][#examples: 28800/81252][#steps: 38550]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 30][#examples: 32000/81252][#steps: 38600]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 30][#examples: 35200/81252][#steps: 38650]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 30][#examples: 38400/81252][#steps: 38700]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 30][#examples: 41600/81252][#steps: 38750]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 30][#examples: 44800/81252][#steps: 38800]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 30][#examples: 48000/81252][#steps: 38850]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 30][#examples: 51200/81252][#steps: 38900]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 30][#examples: 54400/81252][#steps: 38950]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 30][#examples: 57600/81252][#

[Epoch: 32][#examples: 640/81252][#steps: 40650]
	Train Loss: 0.017 | Train PPL:   1.017 | lr: 9.838e-08
[Epoch: 32][#examples: 3840/81252][#steps: 40700]
	Train Loss: 0.025 | Train PPL:   1.026 | lr: 9.838e-08
[Epoch: 32][#examples: 7040/81252][#steps: 40750]
	Train Loss: 0.026 | Train PPL:   1.027 | lr: 9.838e-08
[Epoch: 32][#examples: 10240/81252][#steps: 40800]
	Train Loss: 0.026 | Train PPL:   1.026 | lr: 9.838e-08
[Epoch: 32][#examples: 13440/81252][#steps: 40850]
	Train Loss: 0.027 | Train PPL:   1.027 | lr: 9.838e-08
[Epoch: 32][#examples: 16640/81252][#steps: 40900]
	Train Loss: 0.026 | Train PPL:   1.026 | lr: 9.838e-08
[Epoch: 32][#examples: 19840/81252][#steps: 40950]
	Train Loss: 0.026 | Train PPL:   1.027 | lr: 9.838e-08
[Epoch: 32][#examples: 23040/81252][#steps: 41000]
	Train Loss: 0.027 | Train PPL:   1.027 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 335/513
[VAL]: The number of correct predictions (aux-task (multi)): 35

[Epoch: 33][#examples: 69760/81252][#steps: 43000]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 335/513
[VAL]: The number of correct predictions (aux-task (multi)): 358/513

---------------------------------------
[Epoch: 33][Validatiing...]
		 Better Valid Loss! (at least equal)
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.419 | Val. Acc: 0.653 | Val. PPL:   4.131
	 BEST. Val. Loss: 1.419 | BEST. Val. Acc: 0.653 | Val. Loss: 1.419 | BEST. Val. Epoch: 33 | BEST. Val. Step: 43000
---------------------------------------

	BEST. Val. Acc Aux: 0.700
---------------------------------------

[Epoch: 33][#examples: 72960/81252][#steps: 43050]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 33][#examples: 76160/81252][#steps: 43100]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 33][#examples: 79360/81252][#steps: 43150]
	Train Loss

[Epoch: 35][#examples: 35200/81252][#steps: 45000]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 335/513
[VAL]: The number of correct predictions (aux-task (multi)): 358/513

---------------------------------------
[Epoch: 35][Validatiing...]
		 Better Valid Loss! (at least equal)
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.418 | Val. Acc: 0.653 | Val. PPL:   4.131
	 BEST. Val. Loss: 1.418 | BEST. Val. Acc: 0.653 | Val. Loss: 1.418 | BEST. Val. Epoch: 35 | BEST. Val. Step: 45000
---------------------------------------

	BEST. Val. Acc Aux: 0.700
---------------------------------------

[Epoch: 35][#examples: 38400/81252][#steps: 45050]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 35][#examples: 41600/81252][#steps: 45100]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 35][#examples: 44800/81252][#steps: 45150]
	Train Loss

[Epoch: 37][#examples: 3840/81252][#steps: 47050]
	Train Loss: 0.033 | Train PPL:   1.034 | lr: 9.838e-08
[Epoch: 37][#examples: 7040/81252][#steps: 47100]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 37][#examples: 10240/81252][#steps: 47150]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 37][#examples: 13440/81252][#steps: 47200]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 37][#examples: 16640/81252][#steps: 47250]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 37][#examples: 19840/81252][#steps: 47300]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 37][#examples: 23040/81252][#steps: 47350]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 37][#examples: 26240/81252][#steps: 47400]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 37][#examples: 29440/81252][#steps: 47450]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 37][#examples: 32640/81252][#st

[VAL]: The number of correct predictions (main-task (multi)): 334/513
[VAL]: The number of correct predictions (aux-task (multi)): 358/513

---------------------------------------
[Epoch: 38][Validatiing...]
	 Early Stopping Patience: 89/100
	 Val. Loss: 1.418 | Val. Acc: 0.651 | Val. PPL:   4.131
	 BEST. Val. Loss: 1.418 | BEST. Val. Acc: 0.653 | Val. Loss: 1.418 | BEST. Val. Epoch: 35 | BEST. Val. Step: 45500
---------------------------------------

	BEST. Val. Acc Aux: 0.700
---------------------------------------

[VAL]: The number of correct predictions (main-task (multi)): 334/513
[VAL]: The number of correct predictions (aux-task (multi)): 358/513

---------------------------------------
[Epoch: 38][Validatiing...]
	 Early Stopping Patience: 88/100
	 Val. Loss: 1.418 | Val. Acc: 0.651 | Val. PPL:   4.131
	 BEST. Val. Loss: 1.418 | BEST. Val. Acc: 0.653 | Val. Loss: 1.418 | BEST. Val. Epoch: 35 | BEST. Val. Step: 45500
---------------------------------------

	BEST. Val. Acc Aux:

[Epoch: 40][#examples: 48000/81252][#steps: 51550]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 40][#examples: 51200/81252][#steps: 51600]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 40][#examples: 54400/81252][#steps: 51650]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 40][#examples: 57600/81252][#steps: 51700]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 40][#examples: 60800/81252][#steps: 51750]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 40][#examples: 64000/81252][#steps: 51800]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 40][#examples: 67200/81252][#steps: 51850]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 40][#examples: 70400/81252][#steps: 51900]
	Train Loss: 0.027 | Train PPL:   1.027 | lr: 9.838e-08
[Epoch: 40][#examples: 73600/81252][#steps: 51950]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 40][#examples: 76800/81252][#

[Epoch: 42][#examples: 26240/81252][#steps: 53750]
	Train Loss: 0.027 | Train PPL:   1.027 | lr: 9.838e-08
[Epoch: 42][#examples: 29440/81252][#steps: 53800]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 42][#examples: 32640/81252][#steps: 53850]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 42][#examples: 35840/81252][#steps: 53900]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 42][#examples: 39040/81252][#steps: 53950]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 42][#examples: 42240/81252][#steps: 54000]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 334/513
[VAL]: The number of correct predictions (aux-task (multi)): 358/513

---------------------------------------
[Epoch: 42][Validatiing...]
		 Better Valid Loss! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.418 | Val. Acc: 0.651 | Val. PPL:   4.130
	 BE

[Epoch: 44][#examples: 1280/81252][#steps: 55900]
	Train Loss: 0.030 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 44][#examples: 4480/81252][#steps: 55950]
	Train Loss: 0.035 | Train PPL:   1.035 | lr: 9.838e-08
[Epoch: 44][#examples: 7680/81252][#steps: 56000]
	Train Loss: 0.032 | Train PPL:   1.032 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 334/513
[VAL]: The number of correct predictions (aux-task (multi)): 358/513

---------------------------------------
[Epoch: 44][Validatiing...]
		 Better Valid Loss! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.418 | Val. Acc: 0.651 | Val. PPL:   4.130
	 BEST. Val. Loss: 1.418 | BEST. Val. Acc: 0.653 | Val. Loss: 1.418 | BEST. Val. Epoch: 35 | BEST. Val. Step: 45500
---------------------------------------

	BEST. Val. Acc Aux: 0.700
---------------------------------------

[Epoch: 44][#examples: 10880/81252][#steps: 56050]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e

[Epoch: 45][#examples: 64000/81252][#steps: 58150]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 45][#examples: 67200/81252][#steps: 58200]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 45][#examples: 70400/81252][#steps: 58250]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 45][#examples: 73600/81252][#steps: 58300]
	Train Loss: 0.027 | Train PPL:   1.027 | lr: 9.838e-08
[Epoch: 45][#examples: 76800/81252][#steps: 58350]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 45][#examples: 80000/81252][#steps: 58400]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 9.838e-08
[VAL]: The number of correct predictions (main-task (multi)): 334/513
[VAL]: The number of correct predictions (aux-task (multi)): 358/513

---------------------------------------
[Epoch: 45][Validatiing...]
		 Better Valid Loss! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.418 | Val. Acc: 0.651 | Val. PPL:   4.130
	 BEST. Val. Loss: 

[Epoch: 47][#examples: 35840/81252][#steps: 60250]
	Train Loss: 0.029 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 47][#examples: 39040/81252][#steps: 60300]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 47][#examples: 42240/81252][#steps: 60350]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 47][#examples: 45440/81252][#steps: 60400]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 47][#examples: 48640/81252][#steps: 60450]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 47][#examples: 51840/81252][#steps: 60500]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 334/513
[VAL]: The number of correct predictions (aux-task (multi)): 358/513

---------------------------------------
[Epoch: 47][Validatiing...]
	 Early Stopping Patience: 99/100
	 Val. Loss: 1.418 | Val. Acc: 0.651 | Val. PPL:   4.130
	 BEST. Val. Loss: 1.418 | BEST. Val. Acc: 0

[Epoch: 49][#examples: 10880/81252][#steps: 62400]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 49][#examples: 14080/81252][#steps: 62450]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 49][#examples: 17280/81252][#steps: 62500]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 334/513
[VAL]: The number of correct predictions (aux-task (multi)): 358/513

---------------------------------------
[Epoch: 49][Validatiing...]
		 Better Valid Loss! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.418 | Val. Acc: 0.651 | Val. PPL:   4.130
	 BEST. Val. Loss: 1.418 | BEST. Val. Acc: 0.653 | Val. Loss: 1.418 | BEST. Val. Epoch: 35 | BEST. Val. Step: 45500
---------------------------------------

	BEST. Val. Acc Aux: 0.700
---------------------------------------

[Epoch: 49][#examples: 20480/81252][#steps: 62550]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.8

[Epoch: 50][#examples: 70400/81252][#steps: 64600]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 50][#examples: 73600/81252][#steps: 64650]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 50][#examples: 76800/81252][#steps: 64700]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 50][#examples: 80000/81252][#steps: 64750]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 9.838e-08
[VAL]: The number of correct predictions (main-task (multi)): 334/513
[VAL]: The number of correct predictions (aux-task (multi)): 357/513

---------------------------------------
[Epoch: 50][Validatiing...]
	 Early Stopping Patience: 98/100
	 Val. Loss: 1.419 | Val. Acc: 0.651 | Val. PPL:   4.131
	 BEST. Val. Loss: 1.418 | BEST. Val. Acc: 0.653 | Val. Loss: 1.418 | BEST. Val. Epoch: 35 | BEST. Val. Step: 45500
---------------------------------------

	BEST. Val. Acc Aux: 0.700
---------------------------------------

Epoch: 51 | Time: 2m 22s
	Train Loss: 0.028 | Tr

[Epoch: 52][#examples: 45440/81252][#steps: 66750]
	Train Loss: 0.027 | Train PPL:   1.027 | lr: 9.838e-08
[Epoch: 52][#examples: 48640/81252][#steps: 66800]
	Train Loss: 0.027 | Train PPL:   1.027 | lr: 9.838e-08
[Epoch: 52][#examples: 51840/81252][#steps: 66850]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 52][#examples: 55040/81252][#steps: 66900]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 52][#examples: 58240/81252][#steps: 66950]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 52][#examples: 61440/81252][#steps: 67000]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 334/513
[VAL]: The number of correct predictions (aux-task (multi)): 357/513

---------------------------------------
[Epoch: 52][Validatiing...]
	 Early Stopping Patience: 96/100
	 Val. Loss: 1.418 | Val. Acc: 0.651 | Val. PPL:   4.131
	 BEST. Val. Loss: 1.418 | BEST. Val. Acc: 0

[Epoch: 54][#examples: 23680/81252][#steps: 68950]
	Train Loss: 0.026 | Train PPL:   1.027 | lr: 9.838e-08
[Epoch: 54][#examples: 26880/81252][#steps: 69000]
	Train Loss: 0.027 | Train PPL:   1.027 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 334/513
[VAL]: The number of correct predictions (aux-task (multi)): 357/513

---------------------------------------
[Epoch: 54][Validatiing...]
	 Early Stopping Patience: 90/100
	 Val. Loss: 1.418 | Val. Acc: 0.651 | Val. PPL:   4.131
	 BEST. Val. Loss: 1.418 | BEST. Val. Acc: 0.653 | Val. Loss: 1.418 | BEST. Val. Epoch: 35 | BEST. Val. Step: 45500
---------------------------------------

	BEST. Val. Acc Aux: 0.700
---------------------------------------

[Epoch: 54][#examples: 30080/81252][#steps: 69050]
	Train Loss: 0.026 | Train PPL:   1.026 | lr: 9.838e-08
[Epoch: 54][#examples: 33280/81252][#steps: 69100]
	Train Loss: 0.026 | Train PPL:   1.027 | lr: 9.838e-08
[Epoch: 54][#examples: 36480/8125

[Epoch: 56][#examples: 1920/81252][#steps: 71150]
	Train Loss: 0.040 | Train PPL:   1.040 | lr: 9.838e-08
[Epoch: 56][#examples: 5120/81252][#steps: 71200]
	Train Loss: 0.033 | Train PPL:   1.033 | lr: 9.838e-08
[Epoch: 56][#examples: 8320/81252][#steps: 71250]
	Train Loss: 0.032 | Train PPL:   1.032 | lr: 9.838e-08
[Epoch: 56][#examples: 11520/81252][#steps: 71300]
	Train Loss: 0.029 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 56][#examples: 14720/81252][#steps: 71350]
	Train Loss: 0.027 | Train PPL:   1.027 | lr: 9.838e-08
[Epoch: 56][#examples: 17920/81252][#steps: 71400]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 56][#examples: 21120/81252][#steps: 71450]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 56][#examples: 24320/81252][#steps: 71500]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 334/513
[VAL]: The number of correct predictions (aux-task (multi)): 3

[Epoch: 57][#examples: 74240/81252][#steps: 73550]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 57][#examples: 77440/81252][#steps: 73600]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 57][#examples: 80640/81252][#steps: 73650]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 9.838e-08
[VAL]: The number of correct predictions (main-task (multi)): 334/513
[VAL]: The number of correct predictions (aux-task (multi)): 358/513

---------------------------------------
[Epoch: 57][Validatiing...]
	 Early Stopping Patience: 99/100
	 Val. Loss: 1.418 | Val. Acc: 0.651 | Val. PPL:   4.128
	 BEST. Val. Loss: 1.418 | BEST. Val. Acc: 0.653 | Val. Loss: 1.418 | BEST. Val. Epoch: 35 | BEST. Val. Step: 45500
---------------------------------------

	BEST. Val. Acc Aux: 0.700
---------------------------------------

Epoch: 58 | Time: 2m 22s
	Train Loss: 0.029 | Train PPL:   1.029
	 Val. Loss: 1.418 | Val. Acc: 0.651 | Val. PPL:   4.128
[Train]: Current Teacher Forcing 

[Epoch: 59][#examples: 49280/81252][#steps: 75700]
	Train Loss: 0.027 | Train PPL:   1.027 | lr: 9.838e-08
[Epoch: 59][#examples: 52480/81252][#steps: 75750]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 59][#examples: 55680/81252][#steps: 75800]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 59][#examples: 58880/81252][#steps: 75850]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 59][#examples: 62080/81252][#steps: 75900]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 59][#examples: 65280/81252][#steps: 75950]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 59][#examples: 68480/81252][#steps: 76000]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 334/513
[VAL]: The number of correct predictions (aux-task (multi)): 358/513

---------------------------------------
[Epoch: 59][Validatiing...]
	 Early Stopping Patience: 9

[Epoch: 61][#examples: 24320/81252][#steps: 77850]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 61][#examples: 27520/81252][#steps: 77900]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 61][#examples: 30720/81252][#steps: 77950]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 61][#examples: 33920/81252][#steps: 78000]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 334/513
[VAL]: The number of correct predictions (aux-task (multi)): 358/513

---------------------------------------
[Epoch: 61][Validatiing...]
		 Better Valid Loss! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.418 | Val. Acc: 0.651 | Val. PPL:   4.128
	 BEST. Val. Loss: 1.418 | BEST. Val. Acc: 0.653 | Val. Loss: 1.418 | BEST. Val. Epoch: 35 | BEST. Val. Step: 45500
---------------------------------------

	BEST. Val. Acc Aux: 0.700
----------------------------------

[VAL]: The number of correct predictions (main-task (multi)): 334/513
[VAL]: The number of correct predictions (aux-task (multi)): 358/513

---------------------------------------
[Epoch: 62][Validatiing...]
		 Better Valid Loss! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.418 | Val. Acc: 0.651 | Val. PPL:   4.128
	 BEST. Val. Loss: 1.418 | BEST. Val. Acc: 0.653 | Val. Loss: 1.418 | BEST. Val. Epoch: 35 | BEST. Val. Step: 45500
---------------------------------------

	BEST. Val. Acc Aux: 0.700
---------------------------------------

Epoch: 63 | Time: 2m 24s
	Train Loss: 0.028 | Train PPL:   1.029
	 Val. Loss: 1.418 | Val. Acc: 0.651 | Val. PPL:   4.128
[Train]: Current Teacher Forcing Ratio: 0.200
[Epoch: 63][#examples: 2560/81252][#steps: 80050]
	Train Loss: 0.031 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 63][#examples: 5760/81252][#steps: 80100]
	Train Loss: 0.027 | Train PPL:   1.027 | lr: 9.838e-08
[Epoch: 63][#examples: 8960/81252][#steps: 80150]
	Train

[Epoch: 64][#examples: 55680/81252][#steps: 82150]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 64][#examples: 58880/81252][#steps: 82200]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 64][#examples: 62080/81252][#steps: 82250]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 64][#examples: 65280/81252][#steps: 82300]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 64][#examples: 68480/81252][#steps: 82350]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 64][#examples: 71680/81252][#steps: 82400]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 64][#examples: 74880/81252][#steps: 82450]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 64][#examples: 78080/81252][#steps: 82500]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 334/513
[VAL]: The number of correct predictions (aux-task (multi))

[Epoch: 66][#examples: 33920/81252][#steps: 84350]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 66][#examples: 37120/81252][#steps: 84400]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 66][#examples: 40320/81252][#steps: 84450]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 66][#examples: 43520/81252][#steps: 84500]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 334/513
[VAL]: The number of correct predictions (aux-task (multi)): 357/513

---------------------------------------
[Epoch: 66][Validatiing...]
	 Early Stopping Patience: 94/100
	 Val. Loss: 1.418 | Val. Acc: 0.651 | Val. PPL:   4.129
	 BEST. Val. Loss: 1.418 | BEST. Val. Acc: 0.653 | Val. Loss: 1.418 | BEST. Val. Epoch: 35 | BEST. Val. Step: 45500
---------------------------------------

	BEST. Val. Acc Aux: 0.700
---------------------------------------

[Epoch: 66][#examples: 46720/8125

[Epoch: 68][#examples: 8960/81252][#steps: 86500]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 334/513
[VAL]: The number of correct predictions (aux-task (multi)): 358/513

---------------------------------------
[Epoch: 68][Validatiing...]
		 Better Valid Loss! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.418 | Val. Acc: 0.651 | Val. PPL:   4.127
	 BEST. Val. Loss: 1.418 | BEST. Val. Acc: 0.653 | Val. Loss: 1.418 | BEST. Val. Epoch: 35 | BEST. Val. Step: 45500
---------------------------------------

	BEST. Val. Acc Aux: 0.700
---------------------------------------

[Epoch: 68][#examples: 12160/81252][#steps: 86550]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 68][#examples: 15360/81252][#steps: 86600]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 68][#examples: 18560/81252][#steps: 86650]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.83

[Epoch: 69][#examples: 71680/81252][#steps: 88750]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 69][#examples: 74880/81252][#steps: 88800]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 69][#examples: 78080/81252][#steps: 88850]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 69][#examples: 81280/81252][#steps: 88900]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 9.838e-08
[VAL]: The number of correct predictions (main-task (multi)): 334/513
[VAL]: The number of correct predictions (aux-task (multi)): 357/513

---------------------------------------
[Epoch: 69][Validatiing...]
	 Early Stopping Patience: 94/100
	 Val. Loss: 1.418 | Val. Acc: 0.651 | Val. PPL:   4.129
	 BEST. Val. Loss: 1.418 | BEST. Val. Acc: 0.653 | Val. Loss: 1.418 | BEST. Val. Epoch: 35 | BEST. Val. Step: 45500
---------------------------------------

	BEST. Val. Acc Aux: 0.700
---------------------------------------

Epoch: 70 | Time: 2m 21s
	Train Loss: 0.027 | Tr

[Epoch: 71][#examples: 49920/81252][#steps: 90950]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 71][#examples: 53120/81252][#steps: 91000]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 334/513
[VAL]: The number of correct predictions (aux-task (multi)): 357/513

---------------------------------------
[Epoch: 71][Validatiing...]
	 Early Stopping Patience: 88/100
	 Val. Loss: 1.418 | Val. Acc: 0.651 | Val. PPL:   4.128
	 BEST. Val. Loss: 1.418 | BEST. Val. Acc: 0.653 | Val. Loss: 1.418 | BEST. Val. Epoch: 35 | BEST. Val. Step: 45500
---------------------------------------

	BEST. Val. Acc Aux: 0.700
---------------------------------------

[Epoch: 71][#examples: 56320/81252][#steps: 91050]
	Train Loss: 0.027 | Train PPL:   1.027 | lr: 9.838e-08
[Epoch: 71][#examples: 59520/81252][#steps: 91100]
	Train Loss: 0.027 | Train PPL:   1.027 | lr: 9.838e-08
[Epoch: 71][#examples: 62720/8125

[Epoch: 73][#examples: 21760/81252][#steps: 93050]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 73][#examples: 24960/81252][#steps: 93100]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 73][#examples: 28160/81252][#steps: 93150]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 73][#examples: 31360/81252][#steps: 93200]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 73][#examples: 34560/81252][#steps: 93250]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 73][#examples: 37760/81252][#steps: 93300]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 73][#examples: 40960/81252][#steps: 93350]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 73][#examples: 44160/81252][#steps: 93400]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 73][#examples: 47360/81252][#steps: 93450]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 73][#examples: 50560/81252][#

[Epoch: 75][#examples: 3200/81252][#steps: 95300]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 75][#examples: 6400/81252][#steps: 95350]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 75][#examples: 9600/81252][#steps: 95400]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 75][#examples: 12800/81252][#steps: 95450]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 75][#examples: 16000/81252][#steps: 95500]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 334/513
[VAL]: The number of correct predictions (aux-task (multi)): 357/513

---------------------------------------
[Epoch: 75][Validatiing...]
	 Early Stopping Patience: 75/100
	 Val. Loss: 1.418 | Val. Acc: 0.651 | Val. PPL:   4.128
	 BEST. Val. Loss: 1.418 | BEST. Val. Acc: 0.653 | Val. Loss: 1.418 | BEST. Val. Epoch: 35 | BEST. Val. Step: 45500
--------------------------------------

[Epoch: 76][#examples: 65920/81252][#steps: 97550]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 76][#examples: 69120/81252][#steps: 97600]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 76][#examples: 72320/81252][#steps: 97650]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 76][#examples: 75520/81252][#steps: 97700]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 76][#examples: 78720/81252][#steps: 97750]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[VAL]: The number of correct predictions (main-task (multi)): 334/513
[VAL]: The number of correct predictions (aux-task (multi)): 357/513

---------------------------------------
[Epoch: 76][Validatiing...]
	 Early Stopping Patience: 69/100
	 Val. Loss: 1.418 | Val. Acc: 0.651 | Val. PPL:   4.128
	 BEST. Val. Loss: 1.418 | BEST. Val. Acc: 0.653 | Val. Loss: 1.418 | BEST. Val. Epoch: 35 | BEST. Val. Step: 45500
---------------------------------------

	BEST. Va

[Epoch: 78][#examples: 44160/81252][#steps: 99750]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 78][#examples: 47360/81252][#steps: 99800]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 78][#examples: 50560/81252][#steps: 99850]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 78][#examples: 53760/81252][#steps: 99900]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 78][#examples: 56960/81252][#steps: 99950]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 78][#examples: 60160/81252][#steps: 100000]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 334/513
[VAL]: The number of correct predictions (aux-task (multi)): 357/513

---------------------------------------
[Epoch: 78][Validatiing...]
	 Early Stopping Patience: 63/100
	 Val. Loss: 1.418 | Val. Acc: 0.651 | Val. PPL:   4.128
	 BEST. Val. Loss: 1.418 | BEST. Val. Acc: 

[Epoch: 80][#examples: 22400/81252][#steps: 101950]
	Train Loss: 0.026 | Train PPL:   1.027 | lr: 9.838e-08
[Epoch: 80][#examples: 25600/81252][#steps: 102000]
	Train Loss: 0.027 | Train PPL:   1.027 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 334/513
[VAL]: The number of correct predictions (aux-task (multi)): 357/513

---------------------------------------
[Epoch: 80][Validatiing...]
	 Early Stopping Patience: 57/100
	 Val. Loss: 1.418 | Val. Acc: 0.651 | Val. PPL:   4.128
	 BEST. Val. Loss: 1.418 | BEST. Val. Acc: 0.653 | Val. Loss: 1.418 | BEST. Val. Epoch: 35 | BEST. Val. Step: 45500
---------------------------------------

	BEST. Val. Acc Aux: 0.700
---------------------------------------

[Epoch: 80][#examples: 28800/81252][#steps: 102050]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 80][#examples: 32000/81252][#steps: 102100]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 80][#examples: 35200/

[Epoch: 82][#examples: 640/81252][#steps: 104150]
	Train Loss: 0.042 | Train PPL:   1.043 | lr: 9.838e-08
[Epoch: 82][#examples: 3840/81252][#steps: 104200]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 82][#examples: 7040/81252][#steps: 104250]
	Train Loss: 0.032 | Train PPL:   1.033 | lr: 9.838e-08
[Epoch: 82][#examples: 10240/81252][#steps: 104300]
	Train Loss: 0.031 | Train PPL:   1.031 | lr: 9.838e-08
[Epoch: 82][#examples: 13440/81252][#steps: 104350]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 82][#examples: 16640/81252][#steps: 104400]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
[Epoch: 82][#examples: 19840/81252][#steps: 104450]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 82][#examples: 23040/81252][#steps: 104500]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 334/513
[VAL]: The number of correct predictions (aux-task (mul

[Epoch: 83][#examples: 72960/81252][#steps: 106550]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 83][#examples: 76160/81252][#steps: 106600]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 83][#examples: 79360/81252][#steps: 106650]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 9.838e-08
[VAL]: The number of correct predictions (main-task (multi)): 334/513
[VAL]: The number of correct predictions (aux-task (multi)): 357/513

---------------------------------------
[Epoch: 83][Validatiing...]
	 Early Stopping Patience: 44/100
	 Val. Loss: 1.418 | Val. Acc: 0.651 | Val. PPL:   4.127
	 BEST. Val. Loss: 1.418 | BEST. Val. Acc: 0.653 | Val. Loss: 1.418 | BEST. Val. Epoch: 35 | BEST. Val. Step: 45500
---------------------------------------

	BEST. Val. Acc Aux: 0.700
---------------------------------------

Epoch: 84 | Time: 2m 24s
	Train Loss: 0.028 | Train PPL:   1.029
	 Val. Loss: 1.418 | Val. Acc: 0.651 | Val. PPL:   4.127
[Train]: Current Teacher Forci

[Epoch: 85][#examples: 48000/81252][#steps: 108700]
	Train Loss: 0.027 | Train PPL:   1.027 | lr: 9.838e-08
[Epoch: 85][#examples: 51200/81252][#steps: 108750]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 85][#examples: 54400/81252][#steps: 108800]
	Train Loss: 0.027 | Train PPL:   1.027 | lr: 9.838e-08
[Epoch: 85][#examples: 57600/81252][#steps: 108850]
	Train Loss: 0.027 | Train PPL:   1.027 | lr: 9.838e-08
[Epoch: 85][#examples: 60800/81252][#steps: 108900]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 85][#examples: 64000/81252][#steps: 108950]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 85][#examples: 67200/81252][#steps: 109000]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 334/513
[VAL]: The number of correct predictions (aux-task (multi)): 357/513

---------------------------------------
[Epoch: 85][Validatiing...]
		 Better Valid Loss!

[Epoch: 87][#examples: 23040/81252][#steps: 110850]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 87][#examples: 26240/81252][#steps: 110900]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 87][#examples: 29440/81252][#steps: 110950]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 87][#examples: 32640/81252][#steps: 111000]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 334/513
[VAL]: The number of correct predictions (aux-task (multi)): 357/513

---------------------------------------
[Epoch: 87][Validatiing...]
		 Better Valid Loss! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.417 | Val. Acc: 0.651 | Val. PPL:   4.127
	 BEST. Val. Loss: 1.417 | BEST. Val. Acc: 0.653 | Val. Loss: 1.418 | BEST. Val. Epoch: 35 | BEST. Val. Step: 45500
---------------------------------------

	BEST. Val. Acc Aux: 0.700
------------------------------

[VAL]: The number of correct predictions (main-task (multi)): 334/513
[VAL]: The number of correct predictions (aux-task (multi)): 357/513

---------------------------------------
[Epoch: 88][Validatiing...]
	 Early Stopping Patience: 99/100
	 Val. Loss: 1.417 | Val. Acc: 0.651 | Val. PPL:   4.126
	 BEST. Val. Loss: 1.417 | BEST. Val. Acc: 0.653 | Val. Loss: 1.418 | BEST. Val. Epoch: 35 | BEST. Val. Step: 45500
---------------------------------------

	BEST. Val. Acc Aux: 0.700
---------------------------------------

Epoch: 89 | Time: 2m 22s
	Train Loss: 0.028 | Train PPL:   1.028
	 Val. Loss: 1.417 | Val. Acc: 0.651 | Val. PPL:   4.126
[Train]: Current Teacher Forcing Ratio: 0.200
[Epoch: 89][#examples: 1280/81252][#steps: 113050]
	Train Loss: 0.032 | Train PPL:   1.032 | lr: 9.838e-08
[Epoch: 89][#examples: 4480/81252][#steps: 113100]
	Train Loss: 0.027 | Train PPL:   1.027 | lr: 9.838e-08
[Epoch: 89][#examples: 7680/81252][#steps: 113150]
	Train Loss: 0.025 | Train PPL:   1.025 | l

[Epoch: 90][#examples: 57600/81252][#steps: 115200]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 90][#examples: 60800/81252][#steps: 115250]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 90][#examples: 64000/81252][#steps: 115300]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 90][#examples: 67200/81252][#steps: 115350]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 90][#examples: 70400/81252][#steps: 115400]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 90][#examples: 73600/81252][#steps: 115450]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 90][#examples: 76800/81252][#steps: 115500]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 334/513
[VAL]: The number of correct predictions (aux-task (multi)): 357/513

---------------------------------------
[Epoch: 90][Validatiing...]
		 Better Valid Loss!

[Epoch: 92][#examples: 29440/81252][#steps: 117300]
	Train Loss: 0.027 | Train PPL:   1.027 | lr: 9.838e-08
[Epoch: 92][#examples: 32640/81252][#steps: 117350]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 92][#examples: 35840/81252][#steps: 117400]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 92][#examples: 39040/81252][#steps: 117450]
	Train Loss: 0.027 | Train PPL:   1.027 | lr: 9.838e-08
[Epoch: 92][#examples: 42240/81252][#steps: 117500]
	Train Loss: 0.027 | Train PPL:   1.027 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 334/513
[VAL]: The number of correct predictions (aux-task (multi)): 357/513

---------------------------------------
[Epoch: 92][Validatiing...]
	 Early Stopping Patience: 99/100
	 Val. Loss: 1.417 | Val. Acc: 0.651 | Val. PPL:   4.126
	 BEST. Val. Loss: 1.417 | BEST. Val. Acc: 0.653 | Val. Loss: 1.418 | BEST. Val. Epoch: 35 | BEST. Val. Step: 45500
------------------------------

[Epoch: 94][#examples: 1280/81252][#steps: 119400]
	Train Loss: 0.032 | Train PPL:   1.032 | lr: 9.838e-08
[Epoch: 94][#examples: 4480/81252][#steps: 119450]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 94][#examples: 7680/81252][#steps: 119500]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 334/513
[VAL]: The number of correct predictions (aux-task (multi)): 357/513

---------------------------------------
[Epoch: 94][Validatiing...]
	 Early Stopping Patience: 99/100
	 Val. Loss: 1.417 | Val. Acc: 0.651 | Val. PPL:   4.126
	 BEST. Val. Loss: 1.417 | BEST. Val. Acc: 0.653 | Val. Loss: 1.418 | BEST. Val. Epoch: 35 | BEST. Val. Step: 45500
---------------------------------------

	BEST. Val. Acc Aux: 0.700
---------------------------------------

[Epoch: 94][#examples: 10880/81252][#steps: 119550]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 94][#examples: 14080/812

[Epoch: 95][#examples: 57600/81252][#steps: 121550]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 95][#examples: 60800/81252][#steps: 121600]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 95][#examples: 64000/81252][#steps: 121650]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 95][#examples: 67200/81252][#steps: 121700]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 95][#examples: 70400/81252][#steps: 121750]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 95][#examples: 73600/81252][#steps: 121800]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 95][#examples: 76800/81252][#steps: 121850]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 9.838e-08
[Epoch: 95][#examples: 80000/81252][#steps: 121900]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 9.838e-08
[VAL]: The number of correct predictions (main-task (multi)): 334/513
[VAL]: The number of correct predictions (aux-task (multi)): 357/5

[Epoch: 97][#examples: 32640/81252][#steps: 123700]
	Train Loss: 0.027 | Train PPL:   1.027 | lr: 9.838e-08
[Epoch: 97][#examples: 35840/81252][#steps: 123750]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 97][#examples: 39040/81252][#steps: 123800]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 97][#examples: 42240/81252][#steps: 123850]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 97][#examples: 45440/81252][#steps: 123900]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 97][#examples: 48640/81252][#steps: 123950]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 97][#examples: 51840/81252][#steps: 124000]
	Train Loss: 0.028 | Train PPL:   1.029 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 334/513
[VAL]: The number of correct predictions (aux-task (multi)): 357/513

---------------------------------------
[Epoch: 97][Validatiing...]
		 Better Valid Loss!

[Epoch: 99][#examples: 7680/81252][#steps: 125850]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 99][#examples: 10880/81252][#steps: 125900]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 99][#examples: 14080/81252][#steps: 125950]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
[Epoch: 99][#examples: 17280/81252][#steps: 126000]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 334/513
[VAL]: The number of correct predictions (aux-task (multi)): 357/513

---------------------------------------
[Epoch: 99][Validatiing...]
	 Early Stopping Patience: 98/100
	 Val. Loss: 1.417 | Val. Acc: 0.651 | Val. PPL:   4.125
	 BEST. Val. Loss: 1.417 | BEST. Val. Acc: 0.653 | Val. Loss: 1.418 | BEST. Val. Epoch: 35 | BEST. Val. Step: 45500
---------------------------------------

	BEST. Val. Acc Aux: 0.700
---------------------------------------

[Epoch: 99][#examples: 20480/8

[Epoch: 0][#examples: 22400/81252][#steps: 350]
	Train Loss: 2.514 | Train PPL:  12.351 | lr: 3.000e-03
[Epoch: 0][#examples: 25600/81252][#steps: 400]
	Train Loss: 2.317 | Train PPL:  10.141 | lr: 3.000e-03
[Epoch: 0][#examples: 28800/81252][#steps: 450]
	Train Loss: 2.146 | Train PPL:   8.553 | lr: 3.000e-03
[Epoch: 0][#examples: 32000/81252][#steps: 500]
	Train Loss: 2.004 | Train PPL:   7.417 | lr: 3.000e-03
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 239/513
[VAL]: The number of correct predictions (aux-task (multi)): 173/513

---------------------------------------
[Epoch: 0][Validatiing...]
		 Better Valid Loss! (at least equal)
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.585 | Val. Acc: 0.466 | Val. PPL:   4.879
	 BEST. Val. Loss: 1.585 | BEST. Val. Acc: 0.466 | Val. Loss: 1.585 | BEST. Val. Epoch: 0 | BEST. Val. Step: 500
---------------------------------------

		 Better Valid Acc on Auxiliary Task! 

	 Early Stopping Patience: 98/100
	 Val. Loss: 1.318 | Val. Acc: 0.628 | Val. PPL:   3.737
	 BEST. Val. Loss: 1.301 | BEST. Val. Acc: 0.628 | Val. Loss: 1.318 | BEST. Val. Epoch: 1 | BEST. Val. Step: 2500
---------------------------------------

		 Better Valid Acc on Auxiliary Task! (at least equal)
	BEST. Val. Acc Aux: 0.612
---------------------------------------

[VAL]: The number of correct predictions (main-task (multi)): 315/513
[VAL]: The number of correct predictions (aux-task (multi)): 321/513

---------------------------------------
[Epoch: 1][Validatiing...]
		 Better Valid Loss! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.301 | Val. Acc: 0.614 | Val. PPL:   3.673
	 BEST. Val. Loss: 1.301 | BEST. Val. Acc: 0.628 | Val. Loss: 1.318 | BEST. Val. Epoch: 1 | BEST. Val. Step: 2500
---------------------------------------

		 Better Valid Acc on Auxiliary Task! (at least equal)
	BEST. Val. Acc Aux: 0.626
---------------------------------------

Epoch: 02 | T

	 Early Stopping Patience: 99/100
	 Val. Loss: 1.393 | Val. Acc: 0.643 | Val. PPL:   4.026
	 BEST. Val. Loss: 1.124 | BEST. Val. Acc: 0.643 | Val. Loss: 1.393 | BEST. Val. Epoch: 3 | BEST. Val. Step: 4500
---------------------------------------

	BEST. Val. Acc Aux: 0.663
---------------------------------------

[Epoch: 3][#examples: 47360/81252][#steps: 4550]
	Train Loss: 0.162 | Train PPL:   1.176 | lr: 1.771e-03
[Epoch: 3][#examples: 50560/81252][#steps: 4600]
	Train Loss: 0.162 | Train PPL:   1.176 | lr: 1.771e-03
[Epoch: 3][#examples: 53760/81252][#steps: 4650]
	Train Loss: 0.162 | Train PPL:   1.176 | lr: 1.771e-03
[Epoch: 3][#examples: 56960/81252][#steps: 4700]
	Train Loss: 0.163 | Train PPL:   1.177 | lr: 1.771e-03
[Epoch: 3][#examples: 60160/81252][#steps: 4750]
	Train Loss: 0.162 | Train PPL:   1.176 | lr: 1.771e-03
[Epoch: 3][#examples: 63360/81252][#steps: 4800]
	Train Loss: 0.162 | Train PPL:   1.176 | lr: 1.771e-03
[Epoch: 3][#examples: 66560/81252][#steps: 4850]
	Train 

[Epoch: 5][#examples: 12800/81252][#steps: 6550]
	Train Loss: 0.073 | Train PPL:   1.076 | lr: 1.046e-03
[Epoch: 5][#examples: 16000/81252][#steps: 6600]
	Train Loss: 0.074 | Train PPL:   1.077 | lr: 1.046e-03
[Epoch: 5][#examples: 19200/81252][#steps: 6650]
	Train Loss: 0.073 | Train PPL:   1.076 | lr: 1.046e-03
[Epoch: 5][#examples: 22400/81252][#steps: 6700]
	Train Loss: 0.073 | Train PPL:   1.076 | lr: 1.046e-03
[Epoch: 5][#examples: 25600/81252][#steps: 6750]
	Train Loss: 0.073 | Train PPL:   1.076 | lr: 1.046e-03
[Epoch: 5][#examples: 28800/81252][#steps: 6800]
	Train Loss: 0.073 | Train PPL:   1.076 | lr: 1.046e-03
[Epoch: 5][#examples: 32000/81252][#steps: 6850]
	Train Loss: 0.073 | Train PPL:   1.076 | lr: 1.046e-03
[Epoch: 5][#examples: 35200/81252][#steps: 6900]
	Train Loss: 0.073 | Train PPL:   1.076 | lr: 1.046e-03
[Epoch: 5][#examples: 38400/81252][#steps: 6950]
	Train Loss: 0.074 | Train PPL:   1.076 | lr: 1.046e-03
[Epoch: 5][#examples: 41600/81252][#steps: 7000]
	Train

[Epoch: 7][#examples: 640/81252][#steps: 8900]
	Train Loss: 0.031 | Train PPL:   1.032 | lr: 6.863e-04
[Epoch: 7][#examples: 3840/81252][#steps: 8950]
	Train Loss: 0.039 | Train PPL:   1.040 | lr: 6.863e-04
[Epoch: 7][#examples: 7040/81252][#steps: 9000]
	Train Loss: 0.043 | Train PPL:   1.044 | lr: 6.863e-04
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 347/513
[VAL]: The number of correct predictions (aux-task (multi)): 343/513

---------------------------------------
[Epoch: 7][Validatiing...]
		 Better Valid Loss! (at least equal)
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.487 | Val. Acc: 0.676 | Val. PPL:   4.424
	 BEST. Val. Loss: 1.487 | BEST. Val. Acc: 0.676 | Val. Loss: 1.487 | BEST. Val. Epoch: 7 | BEST. Val. Step: 9000
---------------------------------------

	BEST. Val. Acc Aux: 0.678
---------------------------------------

[Epoch: 7][#examples: 10240/81252][#steps: 9050]
	Train Loss: 0.043 | Train

[Epoch: 8][#examples: 56960/81252][#steps: 11050]
	Train Loss: 0.038 | Train PPL:   1.039 | lr: 4.503e-04
[Epoch: 8][#examples: 60160/81252][#steps: 11100]
	Train Loss: 0.038 | Train PPL:   1.039 | lr: 4.503e-04
[Epoch: 8][#examples: 63360/81252][#steps: 11150]
	Train Loss: 0.038 | Train PPL:   1.039 | lr: 4.503e-04
[Epoch: 8][#examples: 66560/81252][#steps: 11200]
	Train Loss: 0.039 | Train PPL:   1.040 | lr: 4.503e-04
[Epoch: 8][#examples: 69760/81252][#steps: 11250]
	Train Loss: 0.039 | Train PPL:   1.039 | lr: 4.503e-04
[Epoch: 8][#examples: 72960/81252][#steps: 11300]
	Train Loss: 0.039 | Train PPL:   1.040 | lr: 4.503e-04
[Epoch: 8][#examples: 76160/81252][#steps: 11350]
	Train Loss: 0.038 | Train PPL:   1.039 | lr: 4.503e-04
[Epoch: 8][#examples: 79360/81252][#steps: 11400]
	Train Loss: 0.038 | Train PPL:   1.039 | lr: 4.503e-04
[VAL]: The number of correct predictions (main-task (multi)): 339/513
[VAL]: The number of correct predictions (aux-task (multi)): 341/513

------------

[Epoch: 10][#examples: 25600/81252][#steps: 13100]
	Train Loss: 0.026 | Train PPL:   1.026 | lr: 2.393e-04
[Epoch: 10][#examples: 28800/81252][#steps: 13150]
	Train Loss: 0.025 | Train PPL:   1.025 | lr: 2.393e-04
[Epoch: 10][#examples: 32000/81252][#steps: 13200]
	Train Loss: 0.026 | Train PPL:   1.026 | lr: 2.393e-04
[Epoch: 10][#examples: 35200/81252][#steps: 13250]
	Train Loss: 0.026 | Train PPL:   1.026 | lr: 2.393e-04
[Epoch: 10][#examples: 38400/81252][#steps: 13300]
	Train Loss: 0.026 | Train PPL:   1.026 | lr: 2.393e-04
[Epoch: 10][#examples: 41600/81252][#steps: 13350]
	Train Loss: 0.026 | Train PPL:   1.026 | lr: 2.393e-04
[Epoch: 10][#examples: 44800/81252][#steps: 13400]
	Train Loss: 0.026 | Train PPL:   1.026 | lr: 2.393e-04
[Epoch: 10][#examples: 48000/81252][#steps: 13450]
	Train Loss: 0.026 | Train PPL:   1.026 | lr: 2.393e-04
[Epoch: 10][#examples: 51200/81252][#steps: 13500]
	Train Loss: 0.026 | Train PPL:   1.026 | lr: 2.393e-04
-----Val------
[VAL]: The number of c

[Epoch: 12][#examples: 640/81252][#steps: 15250]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 1.272e-04
[Epoch: 12][#examples: 3840/81252][#steps: 15300]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 1.272e-04
[Epoch: 12][#examples: 7040/81252][#steps: 15350]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 1.272e-04
[Epoch: 12][#examples: 10240/81252][#steps: 15400]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 1.272e-04
[Epoch: 12][#examples: 13440/81252][#steps: 15450]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 1.272e-04
[Epoch: 12][#examples: 16640/81252][#steps: 15500]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 1.272e-04
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 345/513
[VAL]: The number of correct predictions (aux-task (multi)): 341/513

---------------------------------------
[Epoch: 12][Validatiing...]
		 Better Valid Loss! (at least equal)
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.338 | Val. A

[Epoch: 13][#examples: 66560/81252][#steps: 17550]
	Train Loss: 0.021 | Train PPL:   1.022 | lr: 6.759e-05
[Epoch: 13][#examples: 69760/81252][#steps: 17600]
	Train Loss: 0.021 | Train PPL:   1.022 | lr: 6.759e-05
[Epoch: 13][#examples: 72960/81252][#steps: 17650]
	Train Loss: 0.021 | Train PPL:   1.022 | lr: 6.759e-05
[Epoch: 13][#examples: 76160/81252][#steps: 17700]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 6.759e-05
[Epoch: 13][#examples: 79360/81252][#steps: 17750]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 6.759e-05
[VAL]: The number of correct predictions (main-task (multi)): 345/513
[VAL]: The number of correct predictions (aux-task (multi)): 345/513

---------------------------------------
[Epoch: 13][Validatiing...]
		 Better Valid Loss! (at least equal)
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.288 | Val. Acc: 0.673 | Val. PPL:   3.627
	 BEST. Val. Loss: 1.288 | BEST. Val. Acc: 0.673 | Val. Loss: 1.288 | BEST. Val. Epoch: 

[Epoch: 15][#examples: 35200/81252][#steps: 19600]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 3.592e-05
[Epoch: 15][#examples: 38400/81252][#steps: 19650]
	Train Loss: 0.019 | Train PPL:   1.020 | lr: 3.592e-05
[Epoch: 15][#examples: 41600/81252][#steps: 19700]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 3.592e-05
[Epoch: 15][#examples: 44800/81252][#steps: 19750]
	Train Loss: 0.019 | Train PPL:   1.020 | lr: 3.592e-05
[Epoch: 15][#examples: 48000/81252][#steps: 19800]
	Train Loss: 0.019 | Train PPL:   1.020 | lr: 3.592e-05
[Epoch: 15][#examples: 51200/81252][#steps: 19850]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 3.592e-05
[Epoch: 15][#examples: 54400/81252][#steps: 19900]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 3.592e-05
[Epoch: 15][#examples: 57600/81252][#steps: 19950]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 3.592e-05
[Epoch: 15][#examples: 60800/81252][#steps: 20000]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 3.592e-05
-----Val------
[VAL]: The number of c

[Epoch: 17][#examples: 7040/81252][#steps: 21700]
	Train Loss: 0.019 | Train PPL:   1.020 | lr: 1.909e-05
[Epoch: 17][#examples: 10240/81252][#steps: 21750]
	Train Loss: 0.019 | Train PPL:   1.020 | lr: 1.909e-05
[Epoch: 17][#examples: 13440/81252][#steps: 21800]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 1.909e-05
[Epoch: 17][#examples: 16640/81252][#steps: 21850]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 1.909e-05
[Epoch: 17][#examples: 19840/81252][#steps: 21900]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 1.909e-05
[Epoch: 17][#examples: 23040/81252][#steps: 21950]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 1.909e-05
[Epoch: 17][#examples: 26240/81252][#steps: 22000]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 1.909e-05
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 343/513
[VAL]: The number of correct predictions (aux-task (multi)): 343/513

---------------------------------------
[Epoch: 17][Validatiing...]
	 Early Stopping Patience: 97

[Epoch: 18][#examples: 76160/81252][#steps: 24050]
	Train Loss: 0.020 | Train PPL:   1.021 | lr: 1.014e-05
[Epoch: 18][#examples: 79360/81252][#steps: 24100]
	Train Loss: 0.020 | Train PPL:   1.021 | lr: 1.014e-05
[VAL]: The number of correct predictions (main-task (multi)): 343/513
[VAL]: The number of correct predictions (aux-task (multi)): 345/513

---------------------------------------
[Epoch: 18][Validatiing...]
	 Early Stopping Patience: 97/100
	 Val. Loss: 1.356 | Val. Acc: 0.669 | Val. PPL:   3.882
	 BEST. Val. Loss: 1.352 | BEST. Val. Acc: 0.673 | Val. Loss: 1.353 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21000
---------------------------------------

	BEST. Val. Acc Aux: 0.680
---------------------------------------

Epoch: 19 | Time: 2m 24s
	Train Loss: 0.020 | Train PPL:   1.021
	 Val. Loss: 1.356 | Val. Acc: 0.669 | Val. PPL:   3.882
[Train]: Current Teacher Forcing Ratio: 0.230
[Epoch: 19][#examples: 1280/81252][#steps: 24150]
	Train Loss: 0.017 | Train PPL:   1.017 | lr

[Epoch: 20][#examples: 54400/81252][#steps: 26250]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 5.391e-06
[Epoch: 20][#examples: 57600/81252][#steps: 26300]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 5.391e-06
[Epoch: 20][#examples: 60800/81252][#steps: 26350]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 5.391e-06
[Epoch: 20][#examples: 64000/81252][#steps: 26400]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 5.391e-06
[Epoch: 20][#examples: 67200/81252][#steps: 26450]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 5.391e-06
[Epoch: 20][#examples: 70400/81252][#steps: 26500]
	Train Loss: 0.021 | Train PPL:   1.022 | lr: 5.391e-06
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 343/513
[VAL]: The number of correct predictions (aux-task (multi)): 342/513

---------------------------------------
[Epoch: 20][Validatiing...]
	 Early Stopping Patience: 91/100
	 Val. Loss: 1.354 | Val. Acc: 0.669 | Val. PPL:   3.875
	 BEST. Val. Loss: 1.352 | BEST. Val. Acc: 0

[Epoch: 22][#examples: 32640/81252][#steps: 28450]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 2.865e-06
[Epoch: 22][#examples: 35840/81252][#steps: 28500]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 2.865e-06
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 343/513
[VAL]: The number of correct predictions (aux-task (multi)): 342/513

---------------------------------------
[Epoch: 22][Validatiing...]
	 Early Stopping Patience: 85/100
	 Val. Loss: 1.353 | Val. Acc: 0.669 | Val. PPL:   3.869
	 BEST. Val. Loss: 1.352 | BEST. Val. Acc: 0.673 | Val. Loss: 1.353 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21000
---------------------------------------

	BEST. Val. Acc Aux: 0.680
---------------------------------------

[Epoch: 22][#examples: 39040/81252][#steps: 28550]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 2.579e-06
[Epoch: 22][#examples: 42240/81252][#steps: 28600]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 2.579e-06
[Epoch: 22][#examples: 45440/8125

[Epoch: 24][#examples: 4480/81252][#steps: 30550]
	Train Loss: 0.021 | Train PPL:   1.022 | lr: 1.370e-06
[Epoch: 24][#examples: 7680/81252][#steps: 30600]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 1.370e-06
[Epoch: 24][#examples: 10880/81252][#steps: 30650]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 1.370e-06
[Epoch: 24][#examples: 14080/81252][#steps: 30700]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 1.370e-06
[Epoch: 24][#examples: 17280/81252][#steps: 30750]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 1.370e-06
[Epoch: 24][#examples: 20480/81252][#steps: 30800]
	Train Loss: 0.021 | Train PPL:   1.022 | lr: 1.370e-06
[Epoch: 24][#examples: 23680/81252][#steps: 30850]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 1.370e-06
[Epoch: 24][#examples: 26880/81252][#steps: 30900]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 1.370e-06
[Epoch: 24][#examples: 30080/81252][#steps: 30950]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 1.370e-06
[Epoch: 24][#examples: 33280/81252][#st

[VAL]: The number of correct predictions (main-task (multi)): 343/513
[VAL]: The number of correct predictions (aux-task (multi)): 341/513

---------------------------------------
[Epoch: 25][Validatiing...]
		 Better Valid Loss! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.352 | Val. Acc: 0.669 | Val. PPL:   3.864
	 BEST. Val. Loss: 1.352 | BEST. Val. Acc: 0.673 | Val. Loss: 1.353 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21000
---------------------------------------

	BEST. Val. Acc Aux: 0.680
---------------------------------------

[VAL]: The number of correct predictions (main-task (multi)): 343/513
[VAL]: The number of correct predictions (aux-task (multi)): 341/513

---------------------------------------
[Epoch: 25][Validatiing...]
	 Early Stopping Patience: 99/100
	 Val. Loss: 1.352 | Val. Acc: 0.669 | Val. PPL:   3.864
	 BEST. Val. Loss: 1.352 | BEST. Val. Acc: 0.673 | Val. Loss: 1.353 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21000
---------------------

[Epoch: 27][#examples: 48640/81252][#steps: 35050]
	Train Loss: 0.022 | Train PPL:   1.023 | lr: 3.870e-07
[Epoch: 27][#examples: 51840/81252][#steps: 35100]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 3.870e-07
[Epoch: 27][#examples: 55040/81252][#steps: 35150]
	Train Loss: 0.022 | Train PPL:   1.023 | lr: 3.870e-07
[Epoch: 27][#examples: 58240/81252][#steps: 35200]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 3.870e-07
[Epoch: 27][#examples: 61440/81252][#steps: 35250]
	Train Loss: 0.022 | Train PPL:   1.023 | lr: 3.870e-07
[Epoch: 27][#examples: 64640/81252][#steps: 35300]
	Train Loss: 0.022 | Train PPL:   1.023 | lr: 3.870e-07
[Epoch: 27][#examples: 67840/81252][#steps: 35350]
	Train Loss: 0.022 | Train PPL:   1.023 | lr: 3.870e-07
[Epoch: 27][#examples: 71040/81252][#steps: 35400]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 3.870e-07
[Epoch: 27][#examples: 74240/81252][#steps: 35450]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 3.870e-07
[Epoch: 27][#examples: 77440/81252][#

[Epoch: 29][#examples: 26880/81252][#steps: 37250]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 2.057e-07
[Epoch: 29][#examples: 30080/81252][#steps: 37300]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 2.057e-07
[Epoch: 29][#examples: 33280/81252][#steps: 37350]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 2.057e-07
[Epoch: 29][#examples: 36480/81252][#steps: 37400]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 2.057e-07
[Epoch: 29][#examples: 39680/81252][#steps: 37450]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 2.057e-07
[Epoch: 29][#examples: 42880/81252][#steps: 37500]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 2.057e-07
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 343/513
[VAL]: The number of correct predictions (aux-task (multi)): 341/513

---------------------------------------
[Epoch: 29][Validatiing...]
	 Early Stopping Patience: 94/100
	 Val. Loss: 1.352 | Val. Acc: 0.669 | Val. PPL:   3.864
	 BEST. Val. Loss: 1.352 | BEST. Val. Acc: 0

[Epoch: 31][#examples: 5120/81252][#steps: 39450]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 1.093e-07
[Epoch: 31][#examples: 8320/81252][#steps: 39500]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 1.093e-07
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 343/513
[VAL]: The number of correct predictions (aux-task (multi)): 341/513

---------------------------------------
[Epoch: 31][Validatiing...]
	 Early Stopping Patience: 88/100
	 Val. Loss: 1.352 | Val. Acc: 0.669 | Val. PPL:   3.864
	 BEST. Val. Loss: 1.352 | BEST. Val. Acc: 0.673 | Val. Loss: 1.353 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21000
---------------------------------------

	BEST. Val. Acc Aux: 0.680
---------------------------------------

[Epoch: 31][#examples: 11520/81252][#steps: 39550]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 31][#examples: 14720/81252][#steps: 39600]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 31][#examples: 17920/81252]

[Epoch: 32][#examples: 67840/81252][#steps: 41700]
	Train Loss: 0.021 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 32][#examples: 71040/81252][#steps: 41750]
	Train Loss: 0.021 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 32][#examples: 74240/81252][#steps: 41800]
	Train Loss: 0.021 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 32][#examples: 77440/81252][#steps: 41850]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 32][#examples: 80640/81252][#steps: 41900]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[VAL]: The number of correct predictions (main-task (multi)): 343/513
[VAL]: The number of correct predictions (aux-task (multi)): 341/513

---------------------------------------
[Epoch: 32][Validatiing...]
	 Early Stopping Patience: 82/100
	 Val. Loss: 1.352 | Val. Acc: 0.669 | Val. PPL:   3.864
	 BEST. Val. Loss: 1.352 | BEST. Val. Acc: 0.673 | Val. Loss: 1.353 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21000
---------------------------------------

	BEST. Va

[Epoch: 34][#examples: 42880/81252][#steps: 43850]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 34][#examples: 46080/81252][#steps: 43900]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 34][#examples: 49280/81252][#steps: 43950]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 34][#examples: 52480/81252][#steps: 44000]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 343/513
[VAL]: The number of correct predictions (aux-task (multi)): 341/513

---------------------------------------
[Epoch: 34][Validatiing...]
		 Better Valid Loss! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.352 | Val. Acc: 0.669 | Val. PPL:   3.864
	 BEST. Val. Loss: 1.352 | BEST. Val. Acc: 0.673 | Val. Loss: 1.353 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21000
---------------------------------------

	BEST. Val. Acc Aux: 0.680
----------------------------------

[Epoch: 36][#examples: 17920/81252][#steps: 46000]
	Train Loss: 0.022 | Train PPL:   1.023 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 343/513
[VAL]: The number of correct predictions (aux-task (multi)): 341/513

---------------------------------------
[Epoch: 36][Validatiing...]
		 Better Valid Loss! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.352 | Val. Acc: 0.669 | Val. PPL:   3.864
	 BEST. Val. Loss: 1.352 | BEST. Val. Acc: 0.673 | Val. Loss: 1.353 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21000
---------------------------------------

	BEST. Val. Acc Aux: 0.680
---------------------------------------

[Epoch: 36][#examples: 21120/81252][#steps: 46050]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 9.838e-08
[Epoch: 36][#examples: 24320/81252][#steps: 46100]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 9.838e-08
[Epoch: 36][#examples: 27520/81252][#steps: 46150]
	Train Loss: 0.022 | Train PPL:   1.023 | lr: 9.8

[Epoch: 37][#examples: 77440/81252][#steps: 48200]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 37][#examples: 80640/81252][#steps: 48250]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[VAL]: The number of correct predictions (main-task (multi)): 343/513
[VAL]: The number of correct predictions (aux-task (multi)): 341/513

---------------------------------------
[Epoch: 37][Validatiing...]
	 Early Stopping Patience: 99/100
	 Val. Loss: 1.352 | Val. Acc: 0.669 | Val. PPL:   3.864
	 BEST. Val. Loss: 1.352 | BEST. Val. Acc: 0.673 | Val. Loss: 1.353 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21000
---------------------------------------

	BEST. Val. Acc Aux: 0.680
---------------------------------------

Epoch: 38 | Time: 2m 24s
	Train Loss: 0.022 | Train PPL:   1.022
	 Val. Loss: 1.352 | Val. Acc: 0.669 | Val. PPL:   3.864
[Train]: Current Teacher Forcing Ratio: 0.200
[Epoch: 38][#examples: 2560/81252][#steps: 48300]
	Train Loss: 0.021 | Train PPL:   1.022 | lr

[Epoch: 39][#examples: 55680/81252][#steps: 50400]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 9.838e-08
[Epoch: 39][#examples: 58880/81252][#steps: 50450]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 9.838e-08
[Epoch: 39][#examples: 62080/81252][#steps: 50500]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 343/513
[VAL]: The number of correct predictions (aux-task (multi)): 341/513

---------------------------------------
[Epoch: 39][Validatiing...]
	 Early Stopping Patience: 96/100
	 Val. Loss: 1.352 | Val. Acc: 0.669 | Val. PPL:   3.864
	 BEST. Val. Loss: 1.352 | BEST. Val. Acc: 0.673 | Val. Loss: 1.353 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21000
---------------------------------------

	BEST. Val. Acc Aux: 0.680
---------------------------------------

[Epoch: 39][#examples: 65280/81252][#steps: 50550]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 9.838e-08
[Epoch: 39][#examples: 68480/8125

[VAL]: The number of correct predictions (main-task (multi)): 343/513
[VAL]: The number of correct predictions (aux-task (multi)): 341/513

---------------------------------------
[Epoch: 41][Validatiing...]
	 Early Stopping Patience: 99/100
	 Val. Loss: 1.352 | Val. Acc: 0.669 | Val. PPL:   3.864
	 BEST. Val. Loss: 1.352 | BEST. Val. Acc: 0.673 | Val. Loss: 1.353 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21000
---------------------------------------

	BEST. Val. Acc Aux: 0.680
---------------------------------------

[Epoch: 41][#examples: 30720/81252][#steps: 52550]
	Train Loss: 0.021 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 41][#examples: 33920/81252][#steps: 52600]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 9.838e-08
[Epoch: 41][#examples: 37120/81252][#steps: 52650]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 9.838e-08
[Epoch: 41][#examples: 40320/81252][#steps: 52700]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 9.838e-08
[Epoch: 41][#examples: 43520/81252][#steps: 5275

[Epoch: 43][#examples: 2560/81252][#steps: 54650]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 43][#examples: 5760/81252][#steps: 54700]
	Train Loss: 0.020 | Train PPL:   1.021 | lr: 9.838e-08
[Epoch: 43][#examples: 8960/81252][#steps: 54750]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 9.838e-08
[Epoch: 43][#examples: 12160/81252][#steps: 54800]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 9.838e-08
[Epoch: 43][#examples: 15360/81252][#steps: 54850]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 9.838e-08
[Epoch: 43][#examples: 18560/81252][#steps: 54900]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 9.838e-08
[Epoch: 43][#examples: 21760/81252][#steps: 54950]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 9.838e-08
[Epoch: 43][#examples: 24960/81252][#steps: 55000]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 343/513
[VAL]: The number of correct predictions (aux-task (multi)): 3

[Epoch: 44][#examples: 74880/81252][#steps: 57050]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 44][#examples: 78080/81252][#steps: 57100]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 44][#examples: 81280/81252][#steps: 57150]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[VAL]: The number of correct predictions (main-task (multi)): 343/513
[VAL]: The number of correct predictions (aux-task (multi)): 341/513

---------------------------------------
[Epoch: 44][Validatiing...]
	 Early Stopping Patience: 99/100
	 Val. Loss: 1.352 | Val. Acc: 0.669 | Val. PPL:   3.864
	 BEST. Val. Loss: 1.352 | BEST. Val. Acc: 0.673 | Val. Loss: 1.353 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21000
---------------------------------------

	BEST. Val. Acc Aux: 0.680
---------------------------------------

Epoch: 45 | Time: 2m 24s
	Train Loss: 0.022 | Train PPL:   1.022
	 Val. Loss: 1.352 | Val. Acc: 0.669 | Val. PPL:   3.864
[Train]: Current Teacher Forcing 

[Epoch: 46][#examples: 49920/81252][#steps: 59200]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 46][#examples: 53120/81252][#steps: 59250]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 46][#examples: 56320/81252][#steps: 59300]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 46][#examples: 59520/81252][#steps: 59350]
	Train Loss: 0.021 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 46][#examples: 62720/81252][#steps: 59400]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 46][#examples: 65920/81252][#steps: 59450]
	Train Loss: 0.021 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 46][#examples: 69120/81252][#steps: 59500]
	Train Loss: 0.021 | Train PPL:   1.022 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 343/513
[VAL]: The number of correct predictions (aux-task (multi)): 341/513

---------------------------------------
[Epoch: 46][Validatiing...]
	 Early Stopping Patience: 9

[Epoch: 48][#examples: 28160/81252][#steps: 61400]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 48][#examples: 31360/81252][#steps: 61450]
	Train Loss: 0.021 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 48][#examples: 34560/81252][#steps: 61500]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 343/513
[VAL]: The number of correct predictions (aux-task (multi)): 341/513

---------------------------------------
[Epoch: 48][Validatiing...]
	 Early Stopping Patience: 93/100
	 Val. Loss: 1.352 | Val. Acc: 0.669 | Val. PPL:   3.864
	 BEST. Val. Loss: 1.352 | BEST. Val. Acc: 0.673 | Val. Loss: 1.353 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21000
---------------------------------------

	BEST. Val. Acc Aux: 0.680
---------------------------------------

[Epoch: 48][#examples: 37760/81252][#steps: 61550]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 48][#examples: 40960/8125

[Epoch: 50][#examples: 3200/81252][#steps: 63550]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 50][#examples: 6400/81252][#steps: 63600]
	Train Loss: 0.020 | Train PPL:   1.021 | lr: 9.838e-08
[Epoch: 50][#examples: 9600/81252][#steps: 63650]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 9.838e-08
[Epoch: 50][#examples: 12800/81252][#steps: 63700]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 9.838e-08
[Epoch: 50][#examples: 16000/81252][#steps: 63750]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 9.838e-08
[Epoch: 50][#examples: 19200/81252][#steps: 63800]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 9.838e-08
[Epoch: 50][#examples: 22400/81252][#steps: 63850]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 9.838e-08
[Epoch: 50][#examples: 25600/81252][#steps: 63900]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 50][#examples: 28800/81252][#steps: 63950]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 50][#examples: 32000/81252][#ste

[VAL]: The number of correct predictions (main-task (multi)): 343/513
[VAL]: The number of correct predictions (aux-task (multi)): 341/513

---------------------------------------
[Epoch: 51][Validatiing...]
	 Early Stopping Patience: 98/100
	 Val. Loss: 1.352 | Val. Acc: 0.669 | Val. PPL:   3.863
	 BEST. Val. Loss: 1.352 | BEST. Val. Acc: 0.673 | Val. Loss: 1.353 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21000
---------------------------------------

	BEST. Val. Acc Aux: 0.680
---------------------------------------

[VAL]: The number of correct predictions (main-task (multi)): 343/513
[VAL]: The number of correct predictions (aux-task (multi)): 341/513

---------------------------------------
[Epoch: 51][Validatiing...]
	 Early Stopping Patience: 97/100
	 Val. Loss: 1.352 | Val. Acc: 0.669 | Val. PPL:   3.864
	 BEST. Val. Loss: 1.352 | BEST. Val. Acc: 0.673 | Val. Loss: 1.353 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21000
---------------------------------------

	BEST. Val. Acc Aux:

[Epoch: 53][#examples: 47360/81252][#steps: 68050]
	Train Loss: 0.022 | Train PPL:   1.023 | lr: 9.838e-08
[Epoch: 53][#examples: 50560/81252][#steps: 68100]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 53][#examples: 53760/81252][#steps: 68150]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 53][#examples: 56960/81252][#steps: 68200]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 53][#examples: 60160/81252][#steps: 68250]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 53][#examples: 63360/81252][#steps: 68300]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 53][#examples: 66560/81252][#steps: 68350]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 53][#examples: 69760/81252][#steps: 68400]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 53][#examples: 72960/81252][#steps: 68450]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 53][#examples: 76160/81252][#

[Epoch: 55][#examples: 22400/81252][#steps: 70200]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 9.838e-08
[Epoch: 55][#examples: 25600/81252][#steps: 70250]
	Train Loss: 0.022 | Train PPL:   1.023 | lr: 9.838e-08
[Epoch: 55][#examples: 28800/81252][#steps: 70300]
	Train Loss: 0.022 | Train PPL:   1.023 | lr: 9.838e-08
[Epoch: 55][#examples: 32000/81252][#steps: 70350]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 55][#examples: 35200/81252][#steps: 70400]
	Train Loss: 0.022 | Train PPL:   1.023 | lr: 9.838e-08
[Epoch: 55][#examples: 38400/81252][#steps: 70450]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 55][#examples: 41600/81252][#steps: 70500]
	Train Loss: 0.022 | Train PPL:   1.023 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 343/513
[VAL]: The number of correct predictions (aux-task (multi)): 341/513

---------------------------------------
[Epoch: 55][Validatiing...]
		 Better Valid Loss! (at le

[Epoch: 57][#examples: 640/81252][#steps: 72400]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 9.838e-08
[Epoch: 57][#examples: 3840/81252][#steps: 72450]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 9.838e-08
[Epoch: 57][#examples: 7040/81252][#steps: 72500]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 343/513
[VAL]: The number of correct predictions (aux-task (multi)): 341/513

---------------------------------------
[Epoch: 57][Validatiing...]
	 Early Stopping Patience: 94/100
	 Val. Loss: 1.352 | Val. Acc: 0.669 | Val. PPL:   3.864
	 BEST. Val. Loss: 1.351 | BEST. Val. Acc: 0.673 | Val. Loss: 1.353 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21000
---------------------------------------

	BEST. Val. Acc Aux: 0.680
---------------------------------------

[Epoch: 57][#examples: 10240/81252][#steps: 72550]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 57][#examples: 13440/81252][#

[Epoch: 58][#examples: 63360/81252][#steps: 74650]
	Train Loss: 0.022 | Train PPL:   1.023 | lr: 9.838e-08
[Epoch: 58][#examples: 66560/81252][#steps: 74700]
	Train Loss: 0.022 | Train PPL:   1.023 | lr: 9.838e-08
[Epoch: 58][#examples: 69760/81252][#steps: 74750]
	Train Loss: 0.022 | Train PPL:   1.023 | lr: 9.838e-08
[Epoch: 58][#examples: 72960/81252][#steps: 74800]
	Train Loss: 0.022 | Train PPL:   1.023 | lr: 9.838e-08
[Epoch: 58][#examples: 76160/81252][#steps: 74850]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 58][#examples: 79360/81252][#steps: 74900]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[VAL]: The number of correct predictions (main-task (multi)): 343/513
[VAL]: The number of correct predictions (aux-task (multi)): 341/513

---------------------------------------
[Epoch: 58][Validatiing...]
	 Early Stopping Patience: 88/100
	 Val. Loss: 1.352 | Val. Acc: 0.669 | Val. PPL:   3.864
	 BEST. Val. Loss: 1.351 | BEST. Val. Acc: 0.673 | Val. Los

[Epoch: 60][#examples: 41600/81252][#steps: 76850]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 60][#examples: 44800/81252][#steps: 76900]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 60][#examples: 48000/81252][#steps: 76950]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 60][#examples: 51200/81252][#steps: 77000]
	Train Loss: 0.021 | Train PPL:   1.022 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 343/513
[VAL]: The number of correct predictions (aux-task (multi)): 341/513

---------------------------------------
[Epoch: 60][Validatiing...]
	 Early Stopping Patience: 82/100
	 Val. Loss: 1.352 | Val. Acc: 0.669 | Val. PPL:   3.863
	 BEST. Val. Loss: 1.351 | BEST. Val. Acc: 0.673 | Val. Loss: 1.353 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21000
---------------------------------------

	BEST. Val. Acc Aux: 0.680
---------------------------------------

[Epoch: 60][#examples: 54400/8125

[VAL]: The number of correct predictions (main-task (multi)): 343/513
[VAL]: The number of correct predictions (aux-task (multi)): 341/513

---------------------------------------
[Epoch: 62][Validatiing...]
	 Early Stopping Patience: 76/100
	 Val. Loss: 1.352 | Val. Acc: 0.669 | Val. PPL:   3.863
	 BEST. Val. Loss: 1.351 | BEST. Val. Acc: 0.673 | Val. Loss: 1.353 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21000
---------------------------------------

	BEST. Val. Acc Aux: 0.680
---------------------------------------

[Epoch: 62][#examples: 19840/81252][#steps: 79050]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 9.838e-08
[Epoch: 62][#examples: 23040/81252][#steps: 79100]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 9.838e-08
[Epoch: 62][#examples: 26240/81252][#steps: 79150]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 9.838e-08
[Epoch: 62][#examples: 29440/81252][#steps: 79200]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 9.838e-08
[Epoch: 62][#examples: 32640/81252][#steps: 7925

[Epoch: 64][#examples: 1280/81252][#steps: 81300]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 64][#examples: 4480/81252][#steps: 81350]
	Train Loss: 0.024 | Train PPL:   1.025 | lr: 9.838e-08
[Epoch: 64][#examples: 7680/81252][#steps: 81400]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 9.838e-08
[Epoch: 64][#examples: 10880/81252][#steps: 81450]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 9.838e-08
[Epoch: 64][#examples: 14080/81252][#steps: 81500]
	Train Loss: 0.020 | Train PPL:   1.021 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 343/513
[VAL]: The number of correct predictions (aux-task (multi)): 341/513

---------------------------------------
[Epoch: 64][Validatiing...]
	 Early Stopping Patience: 69/100
	 Val. Loss: 1.352 | Val. Acc: 0.669 | Val. PPL:   3.863
	 BEST. Val. Loss: 1.351 | BEST. Val. Acc: 0.673 | Val. Loss: 1.353 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21000
--------------------------------------

[Epoch: 65][#examples: 64000/81252][#steps: 83550]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 65][#examples: 67200/81252][#steps: 83600]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 65][#examples: 70400/81252][#steps: 83650]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 65][#examples: 73600/81252][#steps: 83700]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 65][#examples: 76800/81252][#steps: 83750]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 65][#examples: 80000/81252][#steps: 83800]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[VAL]: The number of correct predictions (main-task (multi)): 343/513
[VAL]: The number of correct predictions (aux-task (multi)): 341/513

---------------------------------------
[Epoch: 65][Validatiing...]
	 Early Stopping Patience: 63/100
	 Val. Loss: 1.352 | Val. Acc: 0.669 | Val. PPL:   3.863
	 BEST. Val. Loss: 1.351 | BEST. Val. Acc: 0.673 | Val. Los

[Epoch: 67][#examples: 42240/81252][#steps: 85750]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 67][#examples: 45440/81252][#steps: 85800]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 67][#examples: 48640/81252][#steps: 85850]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 67][#examples: 51840/81252][#steps: 85900]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 67][#examples: 55040/81252][#steps: 85950]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 67][#examples: 58240/81252][#steps: 86000]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 343/513
[VAL]: The number of correct predictions (aux-task (multi)): 341/513

---------------------------------------
[Epoch: 67][Validatiing...]
	 Early Stopping Patience: 57/100
	 Val. Loss: 1.352 | Val. Acc: 0.669 | Val. PPL:   3.864
	 BEST. Val. Loss: 1.351 | BEST. Val. Acc: 0

[Epoch: 69][#examples: 20480/81252][#steps: 87950]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 9.838e-08
[Epoch: 69][#examples: 23680/81252][#steps: 88000]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 343/513
[VAL]: The number of correct predictions (aux-task (multi)): 341/513

---------------------------------------
[Epoch: 69][Validatiing...]
	 Early Stopping Patience: 51/100
	 Val. Loss: 1.352 | Val. Acc: 0.669 | Val. PPL:   3.864
	 BEST. Val. Loss: 1.351 | BEST. Val. Acc: 0.673 | Val. Loss: 1.353 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21000
---------------------------------------

	BEST. Val. Acc Aux: 0.680
---------------------------------------

[Epoch: 69][#examples: 26880/81252][#steps: 88050]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 9.838e-08
[Epoch: 69][#examples: 30080/81252][#steps: 88100]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 9.838e-08
[Epoch: 69][#examples: 33280/8125

[VAL]: The number of correct predictions (main-task (multi)): 343/513
[VAL]: The number of correct predictions (aux-task (multi)): 341/513

---------------------------------------
[Epoch: 70][Validatiing...]
	 Early Stopping Patience: 45/100
	 Val. Loss: 1.352 | Val. Acc: 0.669 | Val. PPL:   3.864
	 BEST. Val. Loss: 1.351 | BEST. Val. Acc: 0.673 | Val. Loss: 1.353 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21000
---------------------------------------

	BEST. Val. Acc Aux: 0.680
---------------------------------------

Epoch: 71 | Time: 2m 23s
	Train Loss: 0.021 | Train PPL:   1.021
	 Val. Loss: 1.352 | Val. Acc: 0.669 | Val. PPL:   3.864
[Train]: Current Teacher Forcing Ratio: 0.200
[Epoch: 71][#examples: 1920/81252][#steps: 90200]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 9.838e-08
[Epoch: 71][#examples: 5120/81252][#steps: 90250]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 71][#examples: 8320/81252][#steps: 90300]
	Train Loss: 0.022 | Train PPL:   1.023 | lr: 

[Epoch: 72][#examples: 61440/81252][#steps: 92400]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 72][#examples: 64640/81252][#steps: 92450]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 72][#examples: 67840/81252][#steps: 92500]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 343/513
[VAL]: The number of correct predictions (aux-task (multi)): 341/513

---------------------------------------
[Epoch: 72][Validatiing...]
	 Early Stopping Patience: 39/100
	 Val. Loss: 1.352 | Val. Acc: 0.669 | Val. PPL:   3.863
	 BEST. Val. Loss: 1.351 | BEST. Val. Acc: 0.673 | Val. Loss: 1.353 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21000
---------------------------------------

	BEST. Val. Acc Aux: 0.680
---------------------------------------

[Epoch: 72][#examples: 71040/81252][#steps: 92550]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 72][#examples: 74240/8125

[Epoch: 74][#examples: 36480/81252][#steps: 94550]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 9.838e-08
[Epoch: 74][#examples: 39680/81252][#steps: 94600]
	Train Loss: 0.022 | Train PPL:   1.023 | lr: 9.838e-08
[Epoch: 74][#examples: 42880/81252][#steps: 94650]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 9.838e-08
[Epoch: 74][#examples: 46080/81252][#steps: 94700]
	Train Loss: 0.022 | Train PPL:   1.023 | lr: 9.838e-08
[Epoch: 74][#examples: 49280/81252][#steps: 94750]
	Train Loss: 0.022 | Train PPL:   1.023 | lr: 9.838e-08
[Epoch: 74][#examples: 52480/81252][#steps: 94800]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 74][#examples: 55680/81252][#steps: 94850]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 74][#examples: 58880/81252][#steps: 94900]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 74][#examples: 62080/81252][#steps: 94950]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 74][#examples: 65280/81252][#

[Epoch: 76][#examples: 14720/81252][#steps: 96750]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 9.838e-08
[Epoch: 76][#examples: 17920/81252][#steps: 96800]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 9.838e-08
[Epoch: 76][#examples: 21120/81252][#steps: 96850]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 76][#examples: 24320/81252][#steps: 96900]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 76][#examples: 27520/81252][#steps: 96950]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 76][#examples: 30720/81252][#steps: 97000]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 343/513
[VAL]: The number of correct predictions (aux-task (multi)): 341/513

---------------------------------------
[Epoch: 76][Validatiing...]
	 Early Stopping Patience: 26/100
	 Val. Loss: 1.352 | Val. Acc: 0.669 | Val. PPL:   3.863
	 BEST. Val. Loss: 1.351 | BEST. Val. Acc: 0

[Epoch: 77][#examples: 80640/81252][#steps: 99050]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 9.838e-08
[VAL]: The number of correct predictions (main-task (multi)): 343/513
[VAL]: The number of correct predictions (aux-task (multi)): 341/513

---------------------------------------
[Epoch: 77][Validatiing...]
	 Early Stopping Patience: 20/100
	 Val. Loss: 1.352 | Val. Acc: 0.669 | Val. PPL:   3.863
	 BEST. Val. Loss: 1.351 | BEST. Val. Acc: 0.673 | Val. Loss: 1.353 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21000
---------------------------------------

	BEST. Val. Acc Aux: 0.680
---------------------------------------

Epoch: 78 | Time: 2m 24s
	Train Loss: 0.021 | Train PPL:   1.021
	 Val. Loss: 1.352 | Val. Acc: 0.669 | Val. PPL:   3.863
[Train]: Current Teacher Forcing Ratio: 0.200
[Epoch: 78][#examples: 2560/81252][#steps: 99100]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 78][#examples: 5760/81252][#steps: 99150]
	Train Loss: 0.022 | Train PPL:   1.022 | lr:

[Epoch: 79][#examples: 58880/81252][#steps: 101250]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 79][#examples: 62080/81252][#steps: 101300]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 79][#examples: 65280/81252][#steps: 101350]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 79][#examples: 68480/81252][#steps: 101400]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 79][#examples: 71680/81252][#steps: 101450]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 79][#examples: 74880/81252][#steps: 101500]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 343/513
[VAL]: The number of correct predictions (aux-task (multi)): 341/513

---------------------------------------
[Epoch: 79][Validatiing...]
	 Early Stopping Patience: 14/100
	 Val. Loss: 1.352 | Val. Acc: 0.669 | Val. PPL:   3.863
	 BEST. Val. Loss: 1.351 | BEST. Val. 

[Epoch: 81][#examples: 37120/81252][#steps: 103450]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 81][#examples: 40320/81252][#steps: 103500]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 343/513
[VAL]: The number of correct predictions (aux-task (multi)): 341/513

---------------------------------------
[Epoch: 81][Validatiing...]
	 Early Stopping Patience: 8/100
	 Val. Loss: 1.352 | Val. Acc: 0.669 | Val. PPL:   3.863
	 BEST. Val. Loss: 1.351 | BEST. Val. Acc: 0.673 | Val. Loss: 1.353 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21000
---------------------------------------

	BEST. Val. Acc Aux: 0.680
---------------------------------------

[Epoch: 81][#examples: 43520/81252][#steps: 103550]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 81][#examples: 46720/81252][#steps: 103600]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 81][#examples: 49920/8

[Epoch: 83][#examples: 8960/81252][#steps: 105550]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 9.838e-08
[Epoch: 83][#examples: 12160/81252][#steps: 105600]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 9.838e-08
[Epoch: 83][#examples: 15360/81252][#steps: 105650]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 9.838e-08
[Epoch: 83][#examples: 18560/81252][#steps: 105700]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 83][#examples: 21760/81252][#steps: 105750]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 83][#examples: 24960/81252][#steps: 105800]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 83][#examples: 28160/81252][#steps: 105850]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 83][#examples: 31360/81252][#steps: 105900]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 83][#examples: 34560/81252][#steps: 105950]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 83][#examples: 37760/

[Epoch: 85][#examples: 3200/81252][#steps: 108000]
	Train Loss: 0.020 | Train PPL:   1.021 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 343/513
[VAL]: The number of correct predictions (aux-task (multi)): 341/513

---------------------------------------
[Epoch: 85][Validatiing...]
	 Early Stopping Patience: 99/100
	 Val. Loss: 1.351 | Val. Acc: 0.669 | Val. PPL:   3.863
	 BEST. Val. Loss: 1.351 | BEST. Val. Acc: 0.673 | Val. Loss: 1.353 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21000
---------------------------------------

	BEST. Val. Acc Aux: 0.680
---------------------------------------

[Epoch: 85][#examples: 6400/81252][#steps: 108050]
	Train Loss: 0.020 | Train PPL:   1.021 | lr: 9.838e-08
[Epoch: 85][#examples: 9600/81252][#steps: 108100]
	Train Loss: 0.021 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 85][#examples: 12800/81252][#steps: 108150]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 9.838e-08
[Epoch: 85][#examples: 16000/812

[Epoch: 86][#examples: 62720/81252][#steps: 110200]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 86][#examples: 65920/81252][#steps: 110250]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 86][#examples: 69120/81252][#steps: 110300]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 86][#examples: 72320/81252][#steps: 110350]
	Train Loss: 0.021 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 86][#examples: 75520/81252][#steps: 110400]
	Train Loss: 0.021 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 86][#examples: 78720/81252][#steps: 110450]
	Train Loss: 0.021 | Train PPL:   1.022 | lr: 9.838e-08
[VAL]: The number of correct predictions (main-task (multi)): 343/513
[VAL]: The number of correct predictions (aux-task (multi)): 341/513

---------------------------------------
[Epoch: 86][Validatiing...]
	 Early Stopping Patience: 97/100
	 Val. Loss: 1.351 | Val. Acc: 0.669 | Val. PPL:   3.863
	 BEST. Val. Loss: 1.351 | BEST. Val. Acc: 0.673 | Va

[Epoch: 88][#examples: 37760/81252][#steps: 112350]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 88][#examples: 40960/81252][#steps: 112400]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 88][#examples: 44160/81252][#steps: 112450]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 88][#examples: 47360/81252][#steps: 112500]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 343/513
[VAL]: The number of correct predictions (aux-task (multi)): 341/513

---------------------------------------
[Epoch: 88][Validatiing...]
		 Better Valid Loss! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.351 | Val. Acc: 0.669 | Val. PPL:   3.863
	 BEST. Val. Loss: 1.351 | BEST. Val. Acc: 0.673 | Val. Loss: 1.353 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21000
---------------------------------------

	BEST. Val. Acc Aux: 0.680
------------------------------

[Epoch: 90][#examples: 9600/81252][#steps: 114450]
	Train Loss: 0.021 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 90][#examples: 12800/81252][#steps: 114500]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 343/513
[VAL]: The number of correct predictions (aux-task (multi)): 341/513

---------------------------------------
[Epoch: 90][Validatiing...]
	 Early Stopping Patience: 99/100
	 Val. Loss: 1.351 | Val. Acc: 0.669 | Val. PPL:   3.863
	 BEST. Val. Loss: 1.351 | BEST. Val. Acc: 0.673 | Val. Loss: 1.353 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21000
---------------------------------------

	BEST. Val. Acc Aux: 0.680
---------------------------------------

[Epoch: 90][#examples: 16000/81252][#steps: 114550]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 90][#examples: 19200/81252][#steps: 114600]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 90][#examples: 22400/8

[Epoch: 91][#examples: 72320/81252][#steps: 116700]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 9.838e-08
[Epoch: 91][#examples: 75520/81252][#steps: 116750]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 9.838e-08
[Epoch: 91][#examples: 78720/81252][#steps: 116800]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 9.838e-08
[VAL]: The number of correct predictions (main-task (multi)): 343/513
[VAL]: The number of correct predictions (aux-task (multi)): 341/513

---------------------------------------
[Epoch: 91][Validatiing...]
	 Early Stopping Patience: 96/100
	 Val. Loss: 1.351 | Val. Acc: 0.669 | Val. PPL:   3.863
	 BEST. Val. Loss: 1.351 | BEST. Val. Acc: 0.673 | Val. Loss: 1.353 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21000
---------------------------------------

	BEST. Val. Acc Aux: 0.680
---------------------------------------

Epoch: 92 | Time: 2m 23s
	Train Loss: 0.021 | Train PPL:   1.021
	 Val. Loss: 1.351 | Val. Acc: 0.669 | Val. PPL:   3.863
[Train]: Current Teacher Forci

[Epoch: 93][#examples: 47360/81252][#steps: 118850]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 93][#examples: 50560/81252][#steps: 118900]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 93][#examples: 53760/81252][#steps: 118950]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 93][#examples: 56960/81252][#steps: 119000]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 343/513
[VAL]: The number of correct predictions (aux-task (multi)): 341/513

---------------------------------------
[Epoch: 93][Validatiing...]
		 Better Valid Loss! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.351 | Val. Acc: 0.669 | Val. PPL:   3.862
	 BEST. Val. Loss: 1.351 | BEST. Val. Acc: 0.673 | Val. Loss: 1.353 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21000
---------------------------------------

	BEST. Val. Acc Aux: 0.680
------------------------------

[Epoch: 95][#examples: 19200/81252][#steps: 120950]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 95][#examples: 22400/81252][#steps: 121000]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 343/513
[VAL]: The number of correct predictions (aux-task (multi)): 341/513

---------------------------------------
[Epoch: 95][Validatiing...]
		 Better Valid Loss! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.351 | Val. Acc: 0.669 | Val. PPL:   3.862
	 BEST. Val. Loss: 1.351 | BEST. Val. Acc: 0.673 | Val. Loss: 1.353 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21000
---------------------------------------

	BEST. Val. Acc Aux: 0.680
---------------------------------------

[Epoch: 95][#examples: 25600/81252][#steps: 121050]
	Train Loss: 0.021 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 95][#examples: 28800/81252][#steps: 121100]
	Train Loss: 0.021 | Train PPL:   1.021 | lr:

[Epoch: 96][#examples: 75520/81252][#steps: 123100]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 96][#examples: 78720/81252][#steps: 123150]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[VAL]: The number of correct predictions (main-task (multi)): 343/513
[VAL]: The number of correct predictions (aux-task (multi)): 341/513

---------------------------------------
[Epoch: 96][Validatiing...]
		 Better Valid Loss! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.351 | Val. Acc: 0.669 | Val. PPL:   3.862
	 BEST. Val. Loss: 1.351 | BEST. Val. Acc: 0.673 | Val. Loss: 1.353 | BEST. Val. Epoch: 16 | BEST. Val. Step: 21000
---------------------------------------

	BEST. Val. Acc Aux: 0.680
---------------------------------------

Epoch: 97 | Time: 2m 27s
	Train Loss: 0.022 | Train PPL:   1.022
	 Val. Loss: 1.351 | Val. Acc: 0.669 | Val. PPL:   3.862
[Train]: Current Teacher Forcing Ratio: 0.200
[Epoch: 97][#examples: 640/81252][#steps: 123200]
	T

[Epoch: 98][#examples: 50560/81252][#steps: 125250]
	Train Loss: 0.021 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 98][#examples: 53760/81252][#steps: 125300]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 9.838e-08
[Epoch: 98][#examples: 56960/81252][#steps: 125350]
	Train Loss: 0.021 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 98][#examples: 60160/81252][#steps: 125400]
	Train Loss: 0.021 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 98][#examples: 63360/81252][#steps: 125450]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 98][#examples: 66560/81252][#steps: 125500]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 343/513
[VAL]: The number of correct predictions (aux-task (multi)): 341/513

---------------------------------------
[Epoch: 98][Validatiing...]
		 Better Valid Loss! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.351 | Val. Acc: 0.669 | Val. PPL:   3.86

The model has 22,160,366 trainable parameters
Running multi-task experiment...
Renew Evaluation Records in the Burning Phase...
[Train]: Current Teacher Forcing Ratio: 0.800
[Epoch: 0][#examples: 3200/81252][#steps: 50]
	Train Loss: 3.933 | Train PPL:  51.084 | lr: 3.000e-03
[Epoch: 0][#examples: 6400/81252][#steps: 100]
	Train Loss: 3.608 | Train PPL:  36.895 | lr: 3.000e-03
[Epoch: 0][#examples: 9600/81252][#steps: 150]
	Train Loss: 3.483 | Train PPL:  32.555 | lr: 3.000e-03
[Epoch: 0][#examples: 12800/81252][#steps: 200]
	Train Loss: 3.395 | Train PPL:  29.815 | lr: 3.000e-03
[Epoch: 0][#examples: 16000/81252][#steps: 250]
	Train Loss: 3.296 | Train PPL:  27.003 | lr: 3.000e-03
[Epoch: 0][#examples: 19200/81252][#steps: 300]
	Train Loss: 3.150 | Train PPL:  23.328 | lr: 3.000e-03
[Epoch: 0][#examples: 22400/81252][#steps: 350]
	Train Loss: 2.938 | Train PPL:  18.872 | lr: 3.000e-03
[Epoch: 0][#examples: 25600/81252][#steps: 400]
	Train Loss: 2.724 | Train PPL:  15.234 | lr: 3.000e-0

[Epoch: 1][#examples: 62720/81252][#steps: 2250]
	Train Loss: 0.376 | Train PPL:   1.456 | lr: 3.000e-03
[Epoch: 1][#examples: 65920/81252][#steps: 2300]
	Train Loss: 0.375 | Train PPL:   1.455 | lr: 3.000e-03
[Epoch: 1][#examples: 69120/81252][#steps: 2350]
	Train Loss: 0.373 | Train PPL:   1.452 | lr: 3.000e-03
[Epoch: 1][#examples: 72320/81252][#steps: 2400]
	Train Loss: 0.372 | Train PPL:   1.450 | lr: 3.000e-03
[Epoch: 1][#examples: 75520/81252][#steps: 2450]
	Train Loss: 0.370 | Train PPL:   1.447 | lr: 3.000e-03
[Epoch: 1][#examples: 78720/81252][#steps: 2500]
	Train Loss: 0.369 | Train PPL:   1.446 | lr: 3.000e-03
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 322/513
[VAL]: The number of correct predictions (aux-task (multi)): 314/513

---------------------------------------
[Epoch: 1][Validatiing...]
		 Better Valid Loss! (at least equal)
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.429 | Val. Acc: 0.628

[Epoch: 3][#examples: 28160/81252][#steps: 4250]
	Train Loss: 0.192 | Train PPL:   1.211 | lr: 1.968e-03
[Epoch: 3][#examples: 31360/81252][#steps: 4300]
	Train Loss: 0.192 | Train PPL:   1.212 | lr: 1.968e-03
[Epoch: 3][#examples: 34560/81252][#steps: 4350]
	Train Loss: 0.191 | Train PPL:   1.211 | lr: 1.968e-03
[Epoch: 3][#examples: 37760/81252][#steps: 4400]
	Train Loss: 0.191 | Train PPL:   1.211 | lr: 1.968e-03
[Epoch: 3][#examples: 40960/81252][#steps: 4450]
	Train Loss: 0.191 | Train PPL:   1.210 | lr: 1.968e-03
[Epoch: 3][#examples: 44160/81252][#steps: 4500]
	Train Loss: 0.192 | Train PPL:   1.211 | lr: 1.968e-03
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 334/513
[VAL]: The number of correct predictions (aux-task (multi)): 324/513

---------------------------------------
[Epoch: 3][Validatiing...]
		 Better Valid Loss! (at least equal)
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.285 | Val. Acc: 0.651

[Epoch: 5][#examples: 3200/81252][#steps: 6400]
	Train Loss: 0.096 | Train PPL:   1.101 | lr: 1.435e-03
[Epoch: 5][#examples: 6400/81252][#steps: 6450]
	Train Loss: 0.092 | Train PPL:   1.097 | lr: 1.435e-03
[Epoch: 5][#examples: 9600/81252][#steps: 6500]
	Train Loss: 0.091 | Train PPL:   1.096 | lr: 1.435e-03
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 328/513
[VAL]: The number of correct predictions (aux-task (multi)): 343/513

---------------------------------------
[Epoch: 5][Validatiing...]
		 Better Valid Loss! (at least equal)
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.367 | Val. Acc: 0.639 | Val. PPL:   3.923
	 BEST. Val. Loss: 1.367 | BEST. Val. Acc: 0.639 | Val. Loss: 1.367 | BEST. Val. Epoch: 5 | BEST. Val. Step: 6500
---------------------------------------

	BEST. Val. Acc Aux: 0.671
---------------------------------------

[Epoch: 5][#examples: 12800/81252][#steps: 6550]
	Train Loss: 0.093 | Trai

[Epoch: 6][#examples: 59520/81252][#steps: 8550]
	Train Loss: 0.067 | Train PPL:   1.069 | lr: 9.414e-04
[Epoch: 6][#examples: 62720/81252][#steps: 8600]
	Train Loss: 0.067 | Train PPL:   1.069 | lr: 9.414e-04
[Epoch: 6][#examples: 65920/81252][#steps: 8650]
	Train Loss: 0.067 | Train PPL:   1.069 | lr: 9.414e-04
[Epoch: 6][#examples: 69120/81252][#steps: 8700]
	Train Loss: 0.067 | Train PPL:   1.069 | lr: 9.414e-04
[Epoch: 6][#examples: 72320/81252][#steps: 8750]
	Train Loss: 0.067 | Train PPL:   1.069 | lr: 9.414e-04
[Epoch: 6][#examples: 75520/81252][#steps: 8800]
	Train Loss: 0.067 | Train PPL:   1.070 | lr: 9.414e-04
[Epoch: 6][#examples: 78720/81252][#steps: 8850]
	Train Loss: 0.067 | Train PPL:   1.070 | lr: 9.414e-04
[VAL]: The number of correct predictions (main-task (multi)): 344/513
[VAL]: The number of correct predictions (aux-task (multi)): 353/513

---------------------------------------
[Epoch: 6][Validatiing...]
		 Better Valid Loss! (at least equal)
		 Better Valid Acc

[Epoch: 8][#examples: 28160/81252][#steps: 10600]
	Train Loss: 0.041 | Train PPL:   1.041 | lr: 6.177e-04
[Epoch: 8][#examples: 31360/81252][#steps: 10650]
	Train Loss: 0.042 | Train PPL:   1.043 | lr: 6.177e-04
[Epoch: 8][#examples: 34560/81252][#steps: 10700]
	Train Loss: 0.041 | Train PPL:   1.042 | lr: 6.177e-04
[Epoch: 8][#examples: 37760/81252][#steps: 10750]
	Train Loss: 0.041 | Train PPL:   1.042 | lr: 6.177e-04
[Epoch: 8][#examples: 40960/81252][#steps: 10800]
	Train Loss: 0.042 | Train PPL:   1.043 | lr: 6.177e-04
[Epoch: 8][#examples: 44160/81252][#steps: 10850]
	Train Loss: 0.042 | Train PPL:   1.042 | lr: 6.177e-04
[Epoch: 8][#examples: 47360/81252][#steps: 10900]
	Train Loss: 0.042 | Train PPL:   1.043 | lr: 6.177e-04
[Epoch: 8][#examples: 50560/81252][#steps: 10950]
	Train Loss: 0.042 | Train PPL:   1.043 | lr: 6.177e-04
[Epoch: 8][#examples: 53760/81252][#steps: 11000]
	Train Loss: 0.042 | Train PPL:   1.043 | lr: 6.177e-04
-----Val------
[VAL]: The number of correct pr

[Epoch: 10][#examples: 3200/81252][#steps: 12750]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 3.283e-04
[Epoch: 10][#examples: 6400/81252][#steps: 12800]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 3.283e-04
[Epoch: 10][#examples: 9600/81252][#steps: 12850]
	Train Loss: 0.029 | Train PPL:   1.029 | lr: 3.283e-04
[Epoch: 10][#examples: 12800/81252][#steps: 12900]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 3.283e-04
[Epoch: 10][#examples: 16000/81252][#steps: 12950]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 3.283e-04
[Epoch: 10][#examples: 19200/81252][#steps: 13000]
	Train Loss: 0.028 | Train PPL:   1.028 | lr: 3.283e-04
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 347/513
[VAL]: The number of correct predictions (aux-task (multi)): 358/513

---------------------------------------
[Epoch: 10][Validatiing...]
		 Better Valid Loss! (at least equal)
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.452 | Val. 

	 Early Stopping Patience: 99/100
	 Val. Loss: 1.507 | Val. Acc: 0.673 | Val. PPL:   4.511
	 BEST. Val. Loss: 1.451 | BEST. Val. Acc: 0.673 | Val. Loss: 1.507 | BEST. Val. Epoch: 11 | BEST. Val. Step: 15000
---------------------------------------

		 Better Valid Acc on Auxiliary Task! (at least equal)
	BEST. Val. Acc Aux: 0.706
---------------------------------------

[Epoch: 11][#examples: 69120/81252][#steps: 15050]
	Train Loss: 0.027 | Train PPL:   1.027 | lr: 1.938e-04
[Epoch: 11][#examples: 72320/81252][#steps: 15100]
	Train Loss: 0.027 | Train PPL:   1.027 | lr: 1.938e-04
[Epoch: 11][#examples: 75520/81252][#steps: 15150]
	Train Loss: 0.027 | Train PPL:   1.027 | lr: 1.938e-04
[Epoch: 11][#examples: 78720/81252][#steps: 15200]
	Train Loss: 0.027 | Train PPL:   1.027 | lr: 1.938e-04
[VAL]: The number of correct predictions (main-task (multi)): 342/513
[VAL]: The number of correct predictions (aux-task (multi)): 355/513

---------------------------------------
[Epoch: 11][Validati

[Epoch: 13][#examples: 34560/81252][#steps: 17050]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 1.030e-04
[Epoch: 13][#examples: 37760/81252][#steps: 17100]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 1.030e-04
[Epoch: 13][#examples: 40960/81252][#steps: 17150]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 1.030e-04
[Epoch: 13][#examples: 44160/81252][#steps: 17200]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 1.030e-04
[Epoch: 13][#examples: 47360/81252][#steps: 17250]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 1.030e-04
[Epoch: 13][#examples: 50560/81252][#steps: 17300]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 1.030e-04
[Epoch: 13][#examples: 53760/81252][#steps: 17350]
	Train Loss: 0.022 | Train PPL:   1.023 | lr: 1.030e-04
[Epoch: 13][#examples: 56960/81252][#steps: 17400]
	Train Loss: 0.022 | Train PPL:   1.023 | lr: 1.030e-04
[Epoch: 13][#examples: 60160/81252][#steps: 17450]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 1.030e-04
[Epoch: 13][#examples: 63360/81252][#

[Epoch: 15][#examples: 3200/81252][#steps: 19100]
	Train Loss: 0.020 | Train PPL:   1.021 | lr: 5.474e-05
[Epoch: 15][#examples: 6400/81252][#steps: 19150]
	Train Loss: 0.020 | Train PPL:   1.021 | lr: 5.474e-05
[Epoch: 15][#examples: 9600/81252][#steps: 19200]
	Train Loss: 0.019 | Train PPL:   1.020 | lr: 5.474e-05
[Epoch: 15][#examples: 12800/81252][#steps: 19250]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 5.474e-05
[Epoch: 15][#examples: 16000/81252][#steps: 19300]
	Train Loss: 0.020 | Train PPL:   1.021 | lr: 5.474e-05
[Epoch: 15][#examples: 19200/81252][#steps: 19350]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 5.474e-05
[Epoch: 15][#examples: 22400/81252][#steps: 19400]
	Train Loss: 0.020 | Train PPL:   1.021 | lr: 5.474e-05
[Epoch: 15][#examples: 25600/81252][#steps: 19450]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 5.474e-05
[Epoch: 15][#examples: 28800/81252][#steps: 19500]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 5.474e-05
-----Val------
[VAL]: The number of corr

[Epoch: 16][#examples: 78720/81252][#steps: 21550]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 2.909e-05
[VAL]: The number of correct predictions (main-task (multi)): 346/513
[VAL]: The number of correct predictions (aux-task (multi)): 352/513

---------------------------------------
[Epoch: 16][Validatiing...]
	 Early Stopping Patience: 94/100
	 Val. Loss: 1.519 | Val. Acc: 0.674 | Val. PPL:   4.567
	 BEST. Val. Loss: 1.505 | BEST. Val. Acc: 0.676 | Val. Loss: 1.505 | BEST. Val. Epoch: 15 | BEST. Val. Step: 19500
---------------------------------------

	BEST. Val. Acc Aux: 0.706
---------------------------------------

Epoch: 17 | Time: 2m 16s
	Train Loss: 0.022 | Train PPL:   1.022
	 Val. Loss: 1.519 | Val. Acc: 0.674 | Val. PPL:   4.567
[Train]: Current Teacher Forcing Ratio: 0.290
[Epoch: 17][#examples: 640/81252][#steps: 21600]
	Train Loss: 0.027 | Train PPL:   1.028 | lr: 2.618e-05
[Epoch: 17][#examples: 3840/81252][#steps: 21650]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 

[Epoch: 18][#examples: 53760/81252][#steps: 23700]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 1.546e-05
[Epoch: 18][#examples: 56960/81252][#steps: 23750]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 1.546e-05
[Epoch: 18][#examples: 60160/81252][#steps: 23800]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 1.546e-05
[Epoch: 18][#examples: 63360/81252][#steps: 23850]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 1.546e-05
[Epoch: 18][#examples: 66560/81252][#steps: 23900]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 1.546e-05
[Epoch: 18][#examples: 69760/81252][#steps: 23950]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 1.546e-05
[Epoch: 18][#examples: 72960/81252][#steps: 24000]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 1.546e-05
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 348/513
[VAL]: The number of correct predictions (aux-task (multi)): 351/513

---------------------------------------
[Epoch: 18][Validatiing...]
		 Better Valid Acc! (at lea

[Epoch: 20][#examples: 32000/81252][#steps: 25900]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 8.217e-06
[Epoch: 20][#examples: 35200/81252][#steps: 25950]
	Train Loss: 0.022 | Train PPL:   1.023 | lr: 8.217e-06
[Epoch: 20][#examples: 38400/81252][#steps: 26000]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 8.217e-06
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 346/513
[VAL]: The number of correct predictions (aux-task (multi)): 351/513

---------------------------------------
[Epoch: 20][Validatiing...]
	 Early Stopping Patience: 82/100
	 Val. Loss: 1.515 | Val. Acc: 0.674 | Val. PPL:   4.549
	 BEST. Val. Loss: 1.505 | BEST. Val. Acc: 0.678 | Val. Loss: 1.517 | BEST. Val. Epoch: 18 | BEST. Val. Step: 24000
---------------------------------------

	BEST. Val. Acc Aux: 0.706
---------------------------------------

[Epoch: 20][#examples: 41600/81252][#steps: 26050]
	Train Loss: 0.022 | Train PPL:   1.023 | lr: 7.395e-06
[Epoch: 20][#examples: 44800/8125

[Epoch: 22][#examples: 7040/81252][#steps: 28050]
	Train Loss: 0.022 | Train PPL:   1.023 | lr: 3.930e-06
[Epoch: 22][#examples: 10240/81252][#steps: 28100]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 3.930e-06
[Epoch: 22][#examples: 13440/81252][#steps: 28150]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 3.930e-06
[Epoch: 22][#examples: 16640/81252][#steps: 28200]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 3.930e-06
[Epoch: 22][#examples: 19840/81252][#steps: 28250]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 3.930e-06
[Epoch: 22][#examples: 23040/81252][#steps: 28300]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 3.930e-06
[Epoch: 22][#examples: 26240/81252][#steps: 28350]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 3.930e-06
[Epoch: 22][#examples: 29440/81252][#steps: 28400]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 3.930e-06
[Epoch: 22][#examples: 32640/81252][#steps: 28450]
	Train Loss: 0.022 | Train PPL:   1.023 | lr: 3.930e-06
[Epoch: 22][#examples: 35840/81252][#s

[Epoch: 24][#examples: 1280/81252][#steps: 30500]
	Train Loss: 0.017 | Train PPL:   1.017 | lr: 2.089e-06
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 345/513
[VAL]: The number of correct predictions (aux-task (multi)): 348/513

---------------------------------------
[Epoch: 24][Validatiing...]
	 Early Stopping Patience: 69/100
	 Val. Loss: 1.522 | Val. Acc: 0.673 | Val. PPL:   4.580
	 BEST. Val. Loss: 1.505 | BEST. Val. Acc: 0.678 | Val. Loss: 1.517 | BEST. Val. Epoch: 18 | BEST. Val. Step: 24000
---------------------------------------

	BEST. Val. Acc Aux: 0.706
---------------------------------------

[Epoch: 24][#examples: 4480/81252][#steps: 30550]
	Train Loss: 0.017 | Train PPL:   1.017 | lr: 1.880e-06
[Epoch: 24][#examples: 7680/81252][#steps: 30600]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 1.880e-06
[Epoch: 24][#examples: 10880/81252][#steps: 30650]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 1.880e-06
[Epoch: 24][#examples: 14080/81252][

[Epoch: 25][#examples: 64000/81252][#steps: 32750]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 1.110e-06
[Epoch: 25][#examples: 67200/81252][#steps: 32800]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 1.110e-06
[Epoch: 25][#examples: 70400/81252][#steps: 32850]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 1.110e-06
[Epoch: 25][#examples: 73600/81252][#steps: 32900]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 1.110e-06
[Epoch: 25][#examples: 76800/81252][#steps: 32950]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 1.110e-06
[Epoch: 25][#examples: 80000/81252][#steps: 33000]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 1.110e-06
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 345/513
[VAL]: The number of correct predictions (aux-task (multi)): 349/513

---------------------------------------
[Epoch: 25][Validatiing...]
	 Early Stopping Patience: 63/100
	 Val. Loss: 1.522 | Val. Acc: 0.673 | Val. PPL:   4.582
	 BEST. Val. Loss: 1.505 | BEST. Val. Acc: 0

[Epoch: 27][#examples: 42240/81252][#steps: 34950]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 5.899e-07
[Epoch: 27][#examples: 45440/81252][#steps: 35000]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 5.899e-07
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 346/513
[VAL]: The number of correct predictions (aux-task (multi)): 349/513

---------------------------------------
[Epoch: 27][Validatiing...]
	 Early Stopping Patience: 57/100
	 Val. Loss: 1.520 | Val. Acc: 0.674 | Val. PPL:   4.572
	 BEST. Val. Loss: 1.505 | BEST. Val. Acc: 0.678 | Val. Loss: 1.517 | BEST. Val. Epoch: 18 | BEST. Val. Step: 24000
---------------------------------------

	BEST. Val. Acc Aux: 0.706
---------------------------------------

[Epoch: 27][#examples: 48640/81252][#steps: 35050]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 5.309e-07
[Epoch: 27][#examples: 51840/81252][#steps: 35100]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 5.309e-07
[Epoch: 27][#examples: 55040/8125

[Epoch: 29][#examples: 14080/81252][#steps: 37050]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 2.821e-07
[Epoch: 29][#examples: 17280/81252][#steps: 37100]
	Train Loss: 0.020 | Train PPL:   1.021 | lr: 2.821e-07
[Epoch: 29][#examples: 20480/81252][#steps: 37150]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 2.821e-07
[Epoch: 29][#examples: 23680/81252][#steps: 37200]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 2.821e-07
[Epoch: 29][#examples: 26880/81252][#steps: 37250]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 2.821e-07
[Epoch: 29][#examples: 30080/81252][#steps: 37300]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 2.821e-07
[Epoch: 29][#examples: 33280/81252][#steps: 37350]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 2.821e-07
[Epoch: 29][#examples: 36480/81252][#steps: 37400]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 2.821e-07
[Epoch: 29][#examples: 39680/81252][#steps: 37450]
	Train Loss: 0.021 | Train PPL:   1.022 | lr: 2.821e-07
[Epoch: 29][#examples: 42880/81252][#

[Epoch: 31][#examples: 1920/81252][#steps: 39400]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 1.499e-07
[Epoch: 31][#examples: 5120/81252][#steps: 39450]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 1.499e-07
[Epoch: 31][#examples: 8320/81252][#steps: 39500]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 1.499e-07
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 346/513
[VAL]: The number of correct predictions (aux-task (multi)): 349/513

---------------------------------------
[Epoch: 31][Validatiing...]
	 Early Stopping Patience: 44/100
	 Val. Loss: 1.520 | Val. Acc: 0.674 | Val. PPL:   4.572
	 BEST. Val. Loss: 1.505 | BEST. Val. Acc: 0.678 | Val. Loss: 1.517 | BEST. Val. Epoch: 18 | BEST. Val. Step: 24000
---------------------------------------

	BEST. Val. Acc Aux: 0.706
---------------------------------------

[Epoch: 31][#examples: 11520/81252][#steps: 39550]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 1.349e-07
[Epoch: 31][#examples: 14720/81252][

[Epoch: 32][#examples: 64640/81252][#steps: 41650]
	Train Loss: 0.022 | Train PPL:   1.023 | lr: 9.838e-08
[Epoch: 32][#examples: 67840/81252][#steps: 41700]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 9.838e-08
[Epoch: 32][#examples: 71040/81252][#steps: 41750]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 9.838e-08
[Epoch: 32][#examples: 74240/81252][#steps: 41800]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 9.838e-08
[Epoch: 32][#examples: 77440/81252][#steps: 41850]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 9.838e-08
[Epoch: 32][#examples: 80640/81252][#steps: 41900]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 9.838e-08
[VAL]: The number of correct predictions (main-task (multi)): 346/513
[VAL]: The number of correct predictions (aux-task (multi)): 349/513

---------------------------------------
[Epoch: 32][Validatiing...]
	 Early Stopping Patience: 38/100
	 Val. Loss: 1.520 | Val. Acc: 0.674 | Val. PPL:   4.571
	 BEST. Val. Loss: 1.505 | BEST. Val. Acc: 0.678 | Val. Los

[Epoch: 34][#examples: 42880/81252][#steps: 43850]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 34][#examples: 46080/81252][#steps: 43900]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 34][#examples: 49280/81252][#steps: 43950]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 34][#examples: 52480/81252][#steps: 44000]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 346/513
[VAL]: The number of correct predictions (aux-task (multi)): 349/513

---------------------------------------
[Epoch: 34][Validatiing...]
	 Early Stopping Patience: 32/100
	 Val. Loss: 1.520 | Val. Acc: 0.674 | Val. PPL:   4.571
	 BEST. Val. Loss: 1.505 | BEST. Val. Acc: 0.678 | Val. Loss: 1.517 | BEST. Val. Epoch: 18 | BEST. Val. Step: 24000
---------------------------------------

	BEST. Val. Acc Aux: 0.706
---------------------------------------

[Epoch: 34][#examples: 55680/8125

[VAL]: The number of correct predictions (main-task (multi)): 346/513
[VAL]: The number of correct predictions (aux-task (multi)): 349/513

---------------------------------------
[Epoch: 36][Validatiing...]
	 Early Stopping Patience: 26/100
	 Val. Loss: 1.520 | Val. Acc: 0.674 | Val. PPL:   4.571
	 BEST. Val. Loss: 1.505 | BEST. Val. Acc: 0.678 | Val. Loss: 1.517 | BEST. Val. Epoch: 18 | BEST. Val. Step: 24000
---------------------------------------

	BEST. Val. Acc Aux: 0.706
---------------------------------------

[Epoch: 36][#examples: 21120/81252][#steps: 46050]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 36][#examples: 24320/81252][#steps: 46100]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 36][#examples: 27520/81252][#steps: 46150]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 36][#examples: 30720/81252][#steps: 46200]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 36][#examples: 33920/81252][#steps: 4625

[Epoch: 38][#examples: 2560/81252][#steps: 48300]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 9.838e-08
[Epoch: 38][#examples: 5760/81252][#steps: 48350]
	Train Loss: 0.022 | Train PPL:   1.023 | lr: 9.838e-08
[Epoch: 38][#examples: 8960/81252][#steps: 48400]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 9.838e-08
[Epoch: 38][#examples: 12160/81252][#steps: 48450]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 9.838e-08
[Epoch: 38][#examples: 15360/81252][#steps: 48500]
	Train Loss: 0.024 | Train PPL:   1.025 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 346/513
[VAL]: The number of correct predictions (aux-task (multi)): 349/513

---------------------------------------
[Epoch: 38][Validatiing...]
	 Early Stopping Patience: 19/100
	 Val. Loss: 1.520 | Val. Acc: 0.674 | Val. PPL:   4.571
	 BEST. Val. Loss: 1.505 | BEST. Val. Acc: 0.678 | Val. Loss: 1.517 | BEST. Val. Epoch: 18 | BEST. Val. Step: 24000
--------------------------------------

[Epoch: 39][#examples: 65280/81252][#steps: 50550]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 9.838e-08
[Epoch: 39][#examples: 68480/81252][#steps: 50600]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 9.838e-08
[Epoch: 39][#examples: 71680/81252][#steps: 50650]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 9.838e-08
[Epoch: 39][#examples: 74880/81252][#steps: 50700]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 9.838e-08
[Epoch: 39][#examples: 78080/81252][#steps: 50750]
	Train Loss: 0.022 | Train PPL:   1.023 | lr: 9.838e-08
[Epoch: 39][#examples: 81280/81252][#steps: 50800]
	Train Loss: 0.022 | Train PPL:   1.023 | lr: 9.838e-08
[VAL]: The number of correct predictions (main-task (multi)): 346/513
[VAL]: The number of correct predictions (aux-task (multi)): 349/513

---------------------------------------
[Epoch: 39][Validatiing...]
	 Early Stopping Patience: 13/100
	 Val. Loss: 1.520 | Val. Acc: 0.674 | Val. PPL:   4.571
	 BEST. Val. Loss: 1.505 | BEST. Val. Acc: 0.678 | Val. Los

[Epoch: 41][#examples: 43520/81252][#steps: 52750]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 9.838e-08
[Epoch: 41][#examples: 46720/81252][#steps: 52800]
	Train Loss: 0.022 | Train PPL:   1.023 | lr: 9.838e-08
[Epoch: 41][#examples: 49920/81252][#steps: 52850]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 9.838e-08
[Epoch: 41][#examples: 53120/81252][#steps: 52900]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 9.838e-08
[Epoch: 41][#examples: 56320/81252][#steps: 52950]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 9.838e-08
[Epoch: 41][#examples: 59520/81252][#steps: 53000]
	Train Loss: 0.022 | Train PPL:   1.023 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 346/513
[VAL]: The number of correct predictions (aux-task (multi)): 349/513

---------------------------------------
[Epoch: 41][Validatiing...]
	 Early Stopping Patience: 7/100
	 Val. Loss: 1.520 | Val. Acc: 0.674 | Val. PPL:   4.571
	 BEST. Val. Loss: 1.505 | BEST. Val. Acc: 0.

[Epoch: 43][#examples: 21760/81252][#steps: 54950]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 9.838e-08
[Epoch: 43][#examples: 24960/81252][#steps: 55000]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 346/513
[VAL]: The number of correct predictions (aux-task (multi)): 349/513

---------------------------------------
[Epoch: 43][Validatiing...]
	 Early Stopping Patience: 1/100
	 Val. Loss: 1.520 | Val. Acc: 0.674 | Val. PPL:   4.571
	 BEST. Val. Loss: 1.505 | BEST. Val. Acc: 0.678 | Val. Loss: 1.517 | BEST. Val. Epoch: 18 | BEST. Val. Step: 24000
---------------------------------------

	BEST. Val. Acc Aux: 0.706
---------------------------------------

[Epoch: 43][#examples: 28160/81252][#steps: 55050]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 9.838e-08
[Epoch: 43][#examples: 31360/81252][#steps: 55100]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 9.838e-08
[Epoch: 43][#examples: 34560/81252

[Epoch: 0][#examples: 38400/81252][#steps: 600]
	Train Loss: 1.887 | Train PPL:   6.600 | lr: 3.000e-03
[Epoch: 0][#examples: 41600/81252][#steps: 650]
	Train Loss: 1.788 | Train PPL:   5.978 | lr: 3.000e-03
[Epoch: 0][#examples: 44800/81252][#steps: 700]
	Train Loss: 1.701 | Train PPL:   5.481 | lr: 3.000e-03
[Epoch: 0][#examples: 48000/81252][#steps: 750]
	Train Loss: 1.625 | Train PPL:   5.077 | lr: 3.000e-03
[Epoch: 0][#examples: 51200/81252][#steps: 800]
	Train Loss: 1.559 | Train PPL:   4.753 | lr: 3.000e-03
[Epoch: 0][#examples: 54400/81252][#steps: 850]
	Train Loss: 1.500 | Train PPL:   4.480 | lr: 3.000e-03
[Epoch: 0][#examples: 57600/81252][#steps: 900]
	Train Loss: 1.444 | Train PPL:   4.237 | lr: 3.000e-03
[Epoch: 0][#examples: 60800/81252][#steps: 950]
	Train Loss: 1.393 | Train PPL:   4.028 | lr: 3.000e-03
[Epoch: 0][#examples: 64000/81252][#steps: 1000]
	Train Loss: 1.349 | Train PPL:   3.853 | lr: 3.000e-03
-----Val------
[VAL]: The number of correct predictions (main-t

[Epoch: 2][#examples: 640/81252][#steps: 2550]
	Train Loss: 0.347 | Train PPL:   1.414 | lr: 2.430e-03
[Epoch: 2][#examples: 3840/81252][#steps: 2600]
	Train Loss: 0.265 | Train PPL:   1.304 | lr: 2.430e-03
[Epoch: 2][#examples: 7040/81252][#steps: 2650]
	Train Loss: 0.255 | Train PPL:   1.290 | lr: 2.430e-03
[Epoch: 2][#examples: 10240/81252][#steps: 2700]
	Train Loss: 0.255 | Train PPL:   1.290 | lr: 2.430e-03
[Epoch: 2][#examples: 13440/81252][#steps: 2750]
	Train Loss: 0.255 | Train PPL:   1.290 | lr: 2.430e-03
[Epoch: 2][#examples: 16640/81252][#steps: 2800]
	Train Loss: 0.256 | Train PPL:   1.292 | lr: 2.430e-03
[Epoch: 2][#examples: 19840/81252][#steps: 2850]
	Train Loss: 0.256 | Train PPL:   1.291 | lr: 2.430e-03
[Epoch: 2][#examples: 23040/81252][#steps: 2900]
	Train Loss: 0.257 | Train PPL:   1.293 | lr: 2.430e-03
[Epoch: 2][#examples: 26240/81252][#steps: 2950]
	Train Loss: 0.256 | Train PPL:   1.292 | lr: 2.430e-03
[Epoch: 2][#examples: 29440/81252][#steps: 3000]
	Train Los

[Epoch: 3][#examples: 69760/81252][#steps: 4900]
	Train Loss: 0.164 | Train PPL:   1.178 | lr: 1.771e-03
[Epoch: 3][#examples: 72960/81252][#steps: 4950]
	Train Loss: 0.164 | Train PPL:   1.178 | lr: 1.771e-03
[Epoch: 3][#examples: 76160/81252][#steps: 5000]
	Train Loss: 0.164 | Train PPL:   1.178 | lr: 1.771e-03
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 333/513
[VAL]: The number of correct predictions (aux-task (multi)): 340/513

---------------------------------------
[Epoch: 3][Validatiing...]
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 99/100
	 Val. Loss: 1.566 | Val. Acc: 0.649 | Val. PPL:   4.788
	 BEST. Val. Loss: 1.117 | BEST. Val. Acc: 0.649 | Val. Loss: 1.566 | BEST. Val. Epoch: 3 | BEST. Val. Step: 5000
---------------------------------------

		 Better Valid Acc on Auxiliary Task! (at least equal)
	BEST. Val. Acc Aux: 0.663
---------------------------------------

[Epoch: 3][#examples: 79360/81252][#steps: 5050]
	Train

[Epoch: 5][#examples: 35200/81252][#steps: 6900]
	Train Loss: 0.075 | Train PPL:   1.078 | lr: 1.162e-03
[Epoch: 5][#examples: 38400/81252][#steps: 6950]
	Train Loss: 0.075 | Train PPL:   1.078 | lr: 1.162e-03
[Epoch: 5][#examples: 41600/81252][#steps: 7000]
	Train Loss: 0.075 | Train PPL:   1.078 | lr: 1.162e-03
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 342/513
[VAL]: The number of correct predictions (aux-task (multi)): 333/513

---------------------------------------
[Epoch: 5][Validatiing...]
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 99/100
	 Val. Loss: 1.464 | Val. Acc: 0.667 | Val. PPL:   4.322
	 BEST. Val. Loss: 1.452 | BEST. Val. Acc: 0.667 | Val. Loss: 1.464 | BEST. Val. Epoch: 5 | BEST. Val. Step: 7000
---------------------------------------

	BEST. Val. Acc Aux: 0.663
---------------------------------------

[Epoch: 5][#examples: 44800/81252][#steps: 7050]
	Train Loss: 0.076 | Train PPL:   1.079 | lr: 1.162e-03
[Epoch

[Epoch: 7][#examples: 3840/81252][#steps: 8950]
	Train Loss: 0.040 | Train PPL:   1.041 | lr: 6.863e-04
[Epoch: 7][#examples: 7040/81252][#steps: 9000]
	Train Loss: 0.041 | Train PPL:   1.042 | lr: 6.863e-04
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 342/513
[VAL]: The number of correct predictions (aux-task (multi)): 344/513

---------------------------------------
[Epoch: 7][Validatiing...]
		 Better Valid Loss! (at least equal)
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.574 | Val. Acc: 0.667 | Val. PPL:   4.825
	 BEST. Val. Loss: 1.574 | BEST. Val. Acc: 0.667 | Val. Loss: 1.574 | BEST. Val. Epoch: 7 | BEST. Val. Step: 9000
---------------------------------------

	BEST. Val. Acc Aux: 0.678
---------------------------------------

[Epoch: 7][#examples: 10240/81252][#steps: 9050]
	Train Loss: 0.039 | Train PPL:   1.040 | lr: 6.177e-04
[Epoch: 7][#examples: 13440/81252][#steps: 9100]
	Train Loss: 0.040 | Tra

[Epoch: 8][#examples: 60160/81252][#steps: 11100]
	Train Loss: 0.034 | Train PPL:   1.034 | lr: 3.647e-04
[Epoch: 8][#examples: 63360/81252][#steps: 11150]
	Train Loss: 0.034 | Train PPL:   1.034 | lr: 3.647e-04
[Epoch: 8][#examples: 66560/81252][#steps: 11200]
	Train Loss: 0.034 | Train PPL:   1.034 | lr: 3.647e-04
[Epoch: 8][#examples: 69760/81252][#steps: 11250]
	Train Loss: 0.034 | Train PPL:   1.034 | lr: 3.647e-04
[Epoch: 8][#examples: 72960/81252][#steps: 11300]
	Train Loss: 0.034 | Train PPL:   1.035 | lr: 3.647e-04
[Epoch: 8][#examples: 76160/81252][#steps: 11350]
	Train Loss: 0.034 | Train PPL:   1.035 | lr: 3.647e-04
[Epoch: 8][#examples: 79360/81252][#steps: 11400]
	Train Loss: 0.034 | Train PPL:   1.035 | lr: 3.647e-04
[VAL]: The number of correct predictions (main-task (multi)): 338/513
[VAL]: The number of correct predictions (aux-task (multi)): 345/513

---------------------------------------
[Epoch: 8][Validatiing...]
		 Better Valid Loss! (at least equal)
	 Early Stop

[Epoch: 10][#examples: 22400/81252][#steps: 13050]
	Train Loss: 0.025 | Train PPL:   1.026 | lr: 2.154e-04
[Epoch: 10][#examples: 25600/81252][#steps: 13100]
	Train Loss: 0.025 | Train PPL:   1.025 | lr: 2.154e-04
[Epoch: 10][#examples: 28800/81252][#steps: 13150]
	Train Loss: 0.025 | Train PPL:   1.025 | lr: 2.154e-04
[Epoch: 10][#examples: 32000/81252][#steps: 13200]
	Train Loss: 0.025 | Train PPL:   1.025 | lr: 2.154e-04
[Epoch: 10][#examples: 35200/81252][#steps: 13250]
	Train Loss: 0.025 | Train PPL:   1.026 | lr: 2.154e-04
[Epoch: 10][#examples: 38400/81252][#steps: 13300]
	Train Loss: 0.026 | Train PPL:   1.026 | lr: 2.154e-04
[Epoch: 10][#examples: 41600/81252][#steps: 13350]
	Train Loss: 0.026 | Train PPL:   1.026 | lr: 2.154e-04
[Epoch: 10][#examples: 44800/81252][#steps: 13400]
	Train Loss: 0.026 | Train PPL:   1.026 | lr: 2.154e-04
[Epoch: 10][#examples: 48000/81252][#steps: 13450]
	Train Loss: 0.026 | Train PPL:   1.026 | lr: 2.154e-04
[Epoch: 10][#examples: 51200/81252][#

[Epoch: 12][#examples: 640/81252][#steps: 15250]
	Train Loss: 0.015 | Train PPL:   1.015 | lr: 1.272e-04
[Epoch: 12][#examples: 3840/81252][#steps: 15300]
	Train Loss: 0.016 | Train PPL:   1.016 | lr: 1.272e-04
[Epoch: 12][#examples: 7040/81252][#steps: 15350]
	Train Loss: 0.017 | Train PPL:   1.018 | lr: 1.272e-04
[Epoch: 12][#examples: 10240/81252][#steps: 15400]
	Train Loss: 0.018 | Train PPL:   1.019 | lr: 1.272e-04
[Epoch: 12][#examples: 13440/81252][#steps: 15450]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 1.272e-04
[Epoch: 12][#examples: 16640/81252][#steps: 15500]
	Train Loss: 0.019 | Train PPL:   1.020 | lr: 1.272e-04
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 340/513
[VAL]: The number of correct predictions (aux-task (multi)): 345/513

---------------------------------------
[Epoch: 12][Validatiing...]
		 Better Valid Loss! (at least equal)
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.244 | Val. A

	 Early Stopping Patience: 99/100
	 Val. Loss: 1.577 | Val. Acc: 0.671 | Val. PPL:   4.840
	 BEST. Val. Loss: 1.300 | BEST. Val. Acc: 0.671 | Val. Loss: 1.577 | BEST. Val. Epoch: 13 | BEST. Val. Step: 17500
---------------------------------------

	BEST. Val. Acc Aux: 0.688
---------------------------------------

[Epoch: 13][#examples: 66560/81252][#steps: 17550]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 6.759e-05
[Epoch: 13][#examples: 69760/81252][#steps: 17600]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 6.759e-05
[Epoch: 13][#examples: 72960/81252][#steps: 17650]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 6.759e-05
[Epoch: 13][#examples: 76160/81252][#steps: 17700]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 6.759e-05
[Epoch: 13][#examples: 79360/81252][#steps: 17750]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 6.759e-05
[VAL]: The number of correct predictions (main-task (multi)): 344/513
[VAL]: The number of correct predictions (aux-task (multi)): 346/513

---------

[Epoch: 15][#examples: 32000/81252][#steps: 19550]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 3.592e-05
[Epoch: 15][#examples: 35200/81252][#steps: 19600]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 3.592e-05
[Epoch: 15][#examples: 38400/81252][#steps: 19650]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 3.592e-05
[Epoch: 15][#examples: 41600/81252][#steps: 19700]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 3.592e-05
[Epoch: 15][#examples: 44800/81252][#steps: 19750]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 3.592e-05
[Epoch: 15][#examples: 48000/81252][#steps: 19800]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 3.592e-05
[Epoch: 15][#examples: 51200/81252][#steps: 19850]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 3.592e-05
[Epoch: 15][#examples: 54400/81252][#steps: 19900]
	Train Loss: 0.020 | Train PPL:   1.021 | lr: 3.592e-05
[Epoch: 15][#examples: 57600/81252][#steps: 19950]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 3.592e-05
[Epoch: 15][#examples: 60800/81252][#

[Epoch: 17][#examples: 7040/81252][#steps: 21700]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 1.909e-05
[Epoch: 17][#examples: 10240/81252][#steps: 21750]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 1.909e-05
[Epoch: 17][#examples: 13440/81252][#steps: 21800]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 1.909e-05
[Epoch: 17][#examples: 16640/81252][#steps: 21850]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 1.909e-05
[Epoch: 17][#examples: 19840/81252][#steps: 21900]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 1.909e-05
[Epoch: 17][#examples: 23040/81252][#steps: 21950]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 1.909e-05
[Epoch: 17][#examples: 26240/81252][#steps: 22000]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 1.909e-05
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 345/513
[VAL]: The number of correct predictions (aux-task (multi)): 345/513

---------------------------------------
[Epoch: 17][Validatiing...]
	 Early Stopping Patience: 95

[Epoch: 18][#examples: 76160/81252][#steps: 24050]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 1.014e-05
[Epoch: 18][#examples: 79360/81252][#steps: 24100]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 1.014e-05
[VAL]: The number of correct predictions (main-task (multi)): 346/513
[VAL]: The number of correct predictions (aux-task (multi)): 346/513

---------------------------------------
[Epoch: 18][Validatiing...]
	 Early Stopping Patience: 95/100
	 Val. Loss: 1.589 | Val. Acc: 0.674 | Val. PPL:   4.899
	 BEST. Val. Loss: 1.586 | BEST. Val. Acc: 0.676 | Val. Loss: 1.588 | BEST. Val. Epoch: 15 | BEST. Val. Step: 20000
---------------------------------------

	BEST. Val. Acc Aux: 0.688
---------------------------------------

Epoch: 19 | Time: 2m 15s
	Train Loss: 0.021 | Train PPL:   1.021
	 Val. Loss: 1.589 | Val. Acc: 0.674 | Val. PPL:   4.899
[Train]: Current Teacher Forcing Ratio: 0.230
[Epoch: 19][#examples: 1280/81252][#steps: 24150]
	Train Loss: 0.022 | Train PPL:   1.022 | lr

[Epoch: 20][#examples: 54400/81252][#steps: 26250]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 5.391e-06
[Epoch: 20][#examples: 57600/81252][#steps: 26300]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 5.391e-06
[Epoch: 20][#examples: 60800/81252][#steps: 26350]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 5.391e-06
[Epoch: 20][#examples: 64000/81252][#steps: 26400]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 5.391e-06
[Epoch: 20][#examples: 67200/81252][#steps: 26450]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 5.391e-06
[Epoch: 20][#examples: 70400/81252][#steps: 26500]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 5.391e-06
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 345/513
[VAL]: The number of correct predictions (aux-task (multi)): 347/513

---------------------------------------
[Epoch: 20][Validatiing...]
	 Early Stopping Patience: 89/100
	 Val. Loss: 1.591 | Val. Acc: 0.673 | Val. PPL:   4.907
	 BEST. Val. Loss: 1.586 | BEST. Val. Acc: 0

[Epoch: 22][#examples: 32640/81252][#steps: 28450]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 2.865e-06
[Epoch: 22][#examples: 35840/81252][#steps: 28500]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 2.865e-06
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 345/513
[VAL]: The number of correct predictions (aux-task (multi)): 347/513

---------------------------------------
[Epoch: 22][Validatiing...]
	 Early Stopping Patience: 83/100
	 Val. Loss: 1.594 | Val. Acc: 0.673 | Val. PPL:   4.921
	 BEST. Val. Loss: 1.586 | BEST. Val. Acc: 0.676 | Val. Loss: 1.588 | BEST. Val. Epoch: 15 | BEST. Val. Step: 20000
---------------------------------------

	BEST. Val. Acc Aux: 0.688
---------------------------------------

[Epoch: 22][#examples: 39040/81252][#steps: 28550]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 2.579e-06
[Epoch: 22][#examples: 42240/81252][#steps: 28600]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 2.579e-06
[Epoch: 22][#examples: 45440/8125

[Epoch: 24][#examples: 4480/81252][#steps: 30550]
	Train Loss: 0.022 | Train PPL:   1.023 | lr: 1.370e-06
[Epoch: 24][#examples: 7680/81252][#steps: 30600]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 1.370e-06
[Epoch: 24][#examples: 10880/81252][#steps: 30650]
	Train Loss: 0.023 | Train PPL:   1.024 | lr: 1.370e-06
[Epoch: 24][#examples: 14080/81252][#steps: 30700]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 1.370e-06
[Epoch: 24][#examples: 17280/81252][#steps: 30750]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 1.370e-06
[Epoch: 24][#examples: 20480/81252][#steps: 30800]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 1.370e-06
[Epoch: 24][#examples: 23680/81252][#steps: 30850]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 1.370e-06
[Epoch: 24][#examples: 26880/81252][#steps: 30900]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 1.370e-06
[Epoch: 24][#examples: 30080/81252][#steps: 30950]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 1.370e-06
[Epoch: 24][#examples: 33280/81252][#st

[VAL]: The number of correct predictions (main-task (multi)): 345/513
[VAL]: The number of correct predictions (aux-task (multi)): 348/513

---------------------------------------
[Epoch: 25][Validatiing...]
	 Early Stopping Patience: 71/100
	 Val. Loss: 1.593 | Val. Acc: 0.673 | Val. PPL:   4.917
	 BEST. Val. Loss: 1.586 | BEST. Val. Acc: 0.676 | Val. Loss: 1.588 | BEST. Val. Epoch: 15 | BEST. Val. Step: 20000
---------------------------------------

	BEST. Val. Acc Aux: 0.688
---------------------------------------

[VAL]: The number of correct predictions (main-task (multi)): 345/513
[VAL]: The number of correct predictions (aux-task (multi)): 348/513

---------------------------------------
[Epoch: 25][Validatiing...]
	 Early Stopping Patience: 70/100
	 Val. Loss: 1.593 | Val. Acc: 0.673 | Val. PPL:   4.917
	 BEST. Val. Loss: 1.586 | BEST. Val. Acc: 0.676 | Val. Loss: 1.588 | BEST. Val. Epoch: 15 | BEST. Val. Step: 20000
---------------------------------------

	BEST. Val. Acc Aux:

[Epoch: 27][#examples: 48640/81252][#steps: 35050]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 3.870e-07
[Epoch: 27][#examples: 51840/81252][#steps: 35100]
	Train Loss: 0.021 | Train PPL:   1.022 | lr: 3.870e-07
[Epoch: 27][#examples: 55040/81252][#steps: 35150]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 3.870e-07
[Epoch: 27][#examples: 58240/81252][#steps: 35200]
	Train Loss: 0.021 | Train PPL:   1.022 | lr: 3.870e-07
[Epoch: 27][#examples: 61440/81252][#steps: 35250]
	Train Loss: 0.021 | Train PPL:   1.022 | lr: 3.870e-07
[Epoch: 27][#examples: 64640/81252][#steps: 35300]
	Train Loss: 0.021 | Train PPL:   1.022 | lr: 3.870e-07
[Epoch: 27][#examples: 67840/81252][#steps: 35350]
	Train Loss: 0.021 | Train PPL:   1.022 | lr: 3.870e-07
[Epoch: 27][#examples: 71040/81252][#steps: 35400]
	Train Loss: 0.021 | Train PPL:   1.022 | lr: 3.870e-07
[Epoch: 27][#examples: 74240/81252][#steps: 35450]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 3.870e-07
[Epoch: 27][#examples: 77440/81252][#

[Epoch: 29][#examples: 26880/81252][#steps: 37250]
	Train Loss: 0.022 | Train PPL:   1.023 | lr: 2.057e-07
[Epoch: 29][#examples: 30080/81252][#steps: 37300]
	Train Loss: 0.022 | Train PPL:   1.023 | lr: 2.057e-07
[Epoch: 29][#examples: 33280/81252][#steps: 37350]
	Train Loss: 0.022 | Train PPL:   1.023 | lr: 2.057e-07
[Epoch: 29][#examples: 36480/81252][#steps: 37400]
	Train Loss: 0.022 | Train PPL:   1.023 | lr: 2.057e-07
[Epoch: 29][#examples: 39680/81252][#steps: 37450]
	Train Loss: 0.023 | Train PPL:   1.023 | lr: 2.057e-07
[Epoch: 29][#examples: 42880/81252][#steps: 37500]
	Train Loss: 0.022 | Train PPL:   1.023 | lr: 2.057e-07
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 345/513
[VAL]: The number of correct predictions (aux-task (multi)): 348/513

---------------------------------------
[Epoch: 29][Validatiing...]
	 Early Stopping Patience: 58/100
	 Val. Loss: 1.593 | Val. Acc: 0.673 | Val. PPL:   4.918
	 BEST. Val. Loss: 1.586 | BEST. Val. Acc: 0

[Epoch: 31][#examples: 5120/81252][#steps: 39450]
	Train Loss: 0.019 | Train PPL:   1.020 | lr: 1.093e-07
[Epoch: 31][#examples: 8320/81252][#steps: 39500]
	Train Loss: 0.020 | Train PPL:   1.021 | lr: 1.093e-07
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 345/513
[VAL]: The number of correct predictions (aux-task (multi)): 348/513

---------------------------------------
[Epoch: 31][Validatiing...]
	 Early Stopping Patience: 52/100
	 Val. Loss: 1.593 | Val. Acc: 0.673 | Val. PPL:   4.918
	 BEST. Val. Loss: 1.586 | BEST. Val. Acc: 0.676 | Val. Loss: 1.588 | BEST. Val. Epoch: 15 | BEST. Val. Step: 20000
---------------------------------------

	BEST. Val. Acc Aux: 0.688
---------------------------------------

[Epoch: 31][#examples: 11520/81252][#steps: 39550]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 9.838e-08
[Epoch: 31][#examples: 14720/81252][#steps: 39600]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 31][#examples: 17920/81252]

[Epoch: 32][#examples: 67840/81252][#steps: 41700]
	Train Loss: 0.021 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 32][#examples: 71040/81252][#steps: 41750]
	Train Loss: 0.021 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 32][#examples: 74240/81252][#steps: 41800]
	Train Loss: 0.021 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 32][#examples: 77440/81252][#steps: 41850]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 32][#examples: 80640/81252][#steps: 41900]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[VAL]: The number of correct predictions (main-task (multi)): 345/513
[VAL]: The number of correct predictions (aux-task (multi)): 348/513

---------------------------------------
[Epoch: 32][Validatiing...]
	 Early Stopping Patience: 46/100
	 Val. Loss: 1.593 | Val. Acc: 0.673 | Val. PPL:   4.920
	 BEST. Val. Loss: 1.586 | BEST. Val. Acc: 0.676 | Val. Loss: 1.588 | BEST. Val. Epoch: 15 | BEST. Val. Step: 20000
---------------------------------------

	BEST. Va

[Epoch: 34][#examples: 46080/81252][#steps: 43900]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 9.838e-08
[Epoch: 34][#examples: 49280/81252][#steps: 43950]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 9.838e-08
[Epoch: 34][#examples: 52480/81252][#steps: 44000]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 345/513
[VAL]: The number of correct predictions (aux-task (multi)): 348/513

---------------------------------------
[Epoch: 34][Validatiing...]
	 Early Stopping Patience: 40/100
	 Val. Loss: 1.593 | Val. Acc: 0.673 | Val. PPL:   4.920
	 BEST. Val. Loss: 1.586 | BEST. Val. Acc: 0.676 | Val. Loss: 1.588 | BEST. Val. Epoch: 15 | BEST. Val. Step: 20000
---------------------------------------

	BEST. Val. Acc Aux: 0.688
---------------------------------------

[Epoch: 34][#examples: 55680/81252][#steps: 44050]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 9.838e-08
[Epoch: 34][#examples: 58880/8125

[Epoch: 36][#examples: 21120/81252][#steps: 46050]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 36][#examples: 24320/81252][#steps: 46100]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 36][#examples: 27520/81252][#steps: 46150]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 36][#examples: 30720/81252][#steps: 46200]
	Train Loss: 0.021 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 36][#examples: 33920/81252][#steps: 46250]
	Train Loss: 0.021 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 36][#examples: 37120/81252][#steps: 46300]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 9.838e-08
[Epoch: 36][#examples: 40320/81252][#steps: 46350]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 9.838e-08
[Epoch: 36][#examples: 43520/81252][#steps: 46400]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 9.838e-08
[Epoch: 36][#examples: 46720/81252][#steps: 46450]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 9.838e-08
[Epoch: 36][#examples: 49920/81252][#

[Epoch: 38][#examples: 2560/81252][#steps: 48300]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 9.838e-08
[Epoch: 38][#examples: 5760/81252][#steps: 48350]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 38][#examples: 8960/81252][#steps: 48400]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 38][#examples: 12160/81252][#steps: 48450]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 38][#examples: 15360/81252][#steps: 48500]
	Train Loss: 0.020 | Train PPL:   1.021 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 345/513
[VAL]: The number of correct predictions (aux-task (multi)): 348/513

---------------------------------------
[Epoch: 38][Validatiing...]
	 Early Stopping Patience: 27/100
	 Val. Loss: 1.593 | Val. Acc: 0.673 | Val. PPL:   4.920
	 BEST. Val. Loss: 1.586 | BEST. Val. Acc: 0.676 | Val. Loss: 1.588 | BEST. Val. Epoch: 15 | BEST. Val. Step: 20000
--------------------------------------

[Epoch: 39][#examples: 65280/81252][#steps: 50550]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 39][#examples: 68480/81252][#steps: 50600]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 39][#examples: 71680/81252][#steps: 50650]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 39][#examples: 74880/81252][#steps: 50700]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 39][#examples: 78080/81252][#steps: 50750]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 39][#examples: 81280/81252][#steps: 50800]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[VAL]: The number of correct predictions (main-task (multi)): 345/513
[VAL]: The number of correct predictions (aux-task (multi)): 348/513

---------------------------------------
[Epoch: 39][Validatiing...]
	 Early Stopping Patience: 21/100
	 Val. Loss: 1.593 | Val. Acc: 0.673 | Val. PPL:   4.919
	 BEST. Val. Loss: 1.586 | BEST. Val. Acc: 0.676 | Val. Los

[Epoch: 41][#examples: 43520/81252][#steps: 52750]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 41][#examples: 46720/81252][#steps: 52800]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 41][#examples: 49920/81252][#steps: 52850]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 41][#examples: 53120/81252][#steps: 52900]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 41][#examples: 56320/81252][#steps: 52950]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 41][#examples: 59520/81252][#steps: 53000]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 345/513
[VAL]: The number of correct predictions (aux-task (multi)): 348/513

---------------------------------------
[Epoch: 41][Validatiing...]
	 Early Stopping Patience: 15/100
	 Val. Loss: 1.593 | Val. Acc: 0.673 | Val. PPL:   4.919
	 BEST. Val. Loss: 1.586 | BEST. Val. Acc: 0

[Epoch: 43][#examples: 21760/81252][#steps: 54950]
	Train Loss: 0.021 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 43][#examples: 24960/81252][#steps: 55000]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 345/513
[VAL]: The number of correct predictions (aux-task (multi)): 348/513

---------------------------------------
[Epoch: 43][Validatiing...]
	 Early Stopping Patience: 9/100
	 Val. Loss: 1.593 | Val. Acc: 0.673 | Val. PPL:   4.919
	 BEST. Val. Loss: 1.586 | BEST. Val. Acc: 0.676 | Val. Loss: 1.588 | BEST. Val. Epoch: 15 | BEST. Val. Step: 20000
---------------------------------------

	BEST. Val. Acc Aux: 0.688
---------------------------------------

[Epoch: 43][#examples: 28160/81252][#steps: 55050]
	Train Loss: 0.021 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 43][#examples: 31360/81252][#steps: 55100]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 43][#examples: 34560/81252

[VAL]: The number of correct predictions (main-task (multi)): 345/513
[VAL]: The number of correct predictions (aux-task (multi)): 348/513

---------------------------------------
[Epoch: 44][Validatiing...]
	 Early Stopping Patience: 3/100
	 Val. Loss: 1.593 | Val. Acc: 0.673 | Val. PPL:   4.919
	 BEST. Val. Loss: 1.586 | BEST. Val. Acc: 0.676 | Val. Loss: 1.588 | BEST. Val. Epoch: 15 | BEST. Val. Step: 20000
---------------------------------------

	BEST. Val. Acc Aux: 0.688
---------------------------------------

Epoch: 45 | Time: 2m 15s
	Train Loss: 0.021 | Train PPL:   1.022
	 Val. Loss: 1.593 | Val. Acc: 0.673 | Val. PPL:   4.919
[Train]: Current Teacher Forcing Ratio: 0.200
[Epoch: 45][#examples: 3200/81252][#steps: 57200]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 9.838e-08
[Epoch: 45][#examples: 6400/81252][#steps: 57250]
	Train Loss: 0.021 | Train PPL:   1.022 | lr: 9.838e-08
[Epoch: 45][#examples: 9600/81252][#steps: 57300]
	Train Loss: 0.020 | Train PPL:   1.021 | lr: 9

[Epoch: 0][#examples: 19200/81252][#steps: 300]
	Train Loss: 2.748 | Train PPL:  15.605 | lr: 3.000e-03
[Epoch: 0][#examples: 22400/81252][#steps: 350]
	Train Loss: 2.485 | Train PPL:  12.004 | lr: 3.000e-03
[Epoch: 0][#examples: 25600/81252][#steps: 400]
	Train Loss: 2.274 | Train PPL:   9.714 | lr: 3.000e-03
[Epoch: 0][#examples: 28800/81252][#steps: 450]
	Train Loss: 2.100 | Train PPL:   8.169 | lr: 3.000e-03
[Epoch: 0][#examples: 32000/81252][#steps: 500]
	Train Loss: 1.956 | Train PPL:   7.069 | lr: 3.000e-03
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 277/513
[VAL]: The number of correct predictions (aux-task (multi)): 189/513

---------------------------------------
[Epoch: 0][Validatiing...]
		 Better Valid Loss! (at least equal)
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.279 | Val. Acc: 0.540 | Val. PPL:   3.592
	 BEST. Val. Loss: 1.279 | BEST. Val. Acc: 0.540 | Val. Loss: 1.279 | BEST. Val. Epoch: 0

[VAL]: The number of correct predictions (main-task (multi)): 318/513
[VAL]: The number of correct predictions (aux-task (multi)): 309/513

---------------------------------------
[Epoch: 1][Validatiing...]
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 99/100
	 Val. Loss: 1.606 | Val. Acc: 0.620 | Val. PPL:   4.981
	 BEST. Val. Loss: 1.274 | BEST. Val. Acc: 0.620 | Val. Loss: 1.606 | BEST. Val. Epoch: 1 | BEST. Val. Step: 2540
---------------------------------------

	BEST. Val. Acc Aux: 0.604
---------------------------------------

Epoch: 02 | Time: 2m 15s
	Train Loss: 0.342 | Train PPL:   1.408
	 Val. Loss: 1.606 | Val. Acc: 0.620 | Val. PPL:   4.981
Renew Evaluation Records in the Burning Phase...
[Train]: Current Teacher Forcing Ratio: 0.740
[Epoch: 2][#examples: 640/81252][#steps: 2550]
	Train Loss: 0.236 | Train PPL:   1.267 | lr: 2.187e-03
[Epoch: 2][#examples: 3840/81252][#steps: 2600]
	Train Loss: 0.231 | Train PPL:   1.259 | lr: 2.187e-03
[Epoch: 2][#examp

[Epoch: 3][#examples: 47360/81252][#steps: 4550]
	Train Loss: 0.142 | Train PPL:   1.153 | lr: 1.594e-03
[Epoch: 3][#examples: 50560/81252][#steps: 4600]
	Train Loss: 0.143 | Train PPL:   1.154 | lr: 1.594e-03
[Epoch: 3][#examples: 53760/81252][#steps: 4650]
	Train Loss: 0.143 | Train PPL:   1.154 | lr: 1.594e-03
[Epoch: 3][#examples: 56960/81252][#steps: 4700]
	Train Loss: 0.143 | Train PPL:   1.154 | lr: 1.594e-03
[Epoch: 3][#examples: 60160/81252][#steps: 4750]
	Train Loss: 0.143 | Train PPL:   1.153 | lr: 1.594e-03
[Epoch: 3][#examples: 63360/81252][#steps: 4800]
	Train Loss: 0.142 | Train PPL:   1.153 | lr: 1.594e-03
[Epoch: 3][#examples: 66560/81252][#steps: 4850]
	Train Loss: 0.142 | Train PPL:   1.153 | lr: 1.594e-03
[Epoch: 3][#examples: 69760/81252][#steps: 4900]
	Train Loss: 0.143 | Train PPL:   1.153 | lr: 1.594e-03
[Epoch: 3][#examples: 72960/81252][#steps: 4950]
	Train Loss: 0.143 | Train PPL:   1.153 | lr: 1.594e-03
[Epoch: 3][#examples: 76160/81252][#steps: 5000]
	Train

[Epoch: 5][#examples: 12800/81252][#steps: 6550]
	Train Loss: 0.056 | Train PPL:   1.058 | lr: 9.414e-04
[Epoch: 5][#examples: 16000/81252][#steps: 6600]
	Train Loss: 0.058 | Train PPL:   1.059 | lr: 9.414e-04
[Epoch: 5][#examples: 19200/81252][#steps: 6650]
	Train Loss: 0.056 | Train PPL:   1.058 | lr: 9.414e-04
[Epoch: 5][#examples: 22400/81252][#steps: 6700]
	Train Loss: 0.056 | Train PPL:   1.057 | lr: 9.414e-04
[Epoch: 5][#examples: 25600/81252][#steps: 6750]
	Train Loss: 0.056 | Train PPL:   1.058 | lr: 9.414e-04
[Epoch: 5][#examples: 28800/81252][#steps: 6800]
	Train Loss: 0.056 | Train PPL:   1.058 | lr: 9.414e-04
[Epoch: 5][#examples: 32000/81252][#steps: 6850]
	Train Loss: 0.056 | Train PPL:   1.058 | lr: 9.414e-04
[Epoch: 5][#examples: 35200/81252][#steps: 6900]
	Train Loss: 0.056 | Train PPL:   1.058 | lr: 9.414e-04
[Epoch: 5][#examples: 38400/81252][#steps: 6950]
	Train Loss: 0.056 | Train PPL:   1.058 | lr: 9.414e-04
[Epoch: 5][#examples: 41600/81252][#steps: 7000]
	Train

[Epoch: 7][#examples: 640/81252][#steps: 8900]
	Train Loss: 0.040 | Train PPL:   1.041 | lr: 5.559e-04
[Epoch: 7][#examples: 3840/81252][#steps: 8950]
	Train Loss: 0.038 | Train PPL:   1.038 | lr: 5.559e-04
[Epoch: 7][#examples: 7040/81252][#steps: 9000]
	Train Loss: 0.034 | Train PPL:   1.034 | lr: 5.559e-04
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 327/513
[VAL]: The number of correct predictions (aux-task (multi)): 339/513

---------------------------------------
[Epoch: 7][Validatiing...]
		 Better Valid Loss! (at least equal)
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.767 | Val. Acc: 0.637 | Val. PPL:   5.852
	 BEST. Val. Loss: 1.767 | BEST. Val. Acc: 0.637 | Val. Loss: 1.767 | BEST. Val. Epoch: 7 | BEST. Val. Step: 9000
---------------------------------------

	BEST. Val. Acc Aux: 0.684
---------------------------------------

[Epoch: 7][#examples: 10240/81252][#steps: 9050]
	Train Loss: 0.034 | Train

[Epoch: 8][#examples: 56960/81252][#steps: 11050]
	Train Loss: 0.029 | Train PPL:   1.030 | lr: 2.954e-04
[Epoch: 8][#examples: 60160/81252][#steps: 11100]
	Train Loss: 0.029 | Train PPL:   1.030 | lr: 2.954e-04
[Epoch: 8][#examples: 63360/81252][#steps: 11150]
	Train Loss: 0.029 | Train PPL:   1.030 | lr: 2.954e-04
[Epoch: 8][#examples: 66560/81252][#steps: 11200]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 2.954e-04
[Epoch: 8][#examples: 69760/81252][#steps: 11250]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 2.954e-04
[Epoch: 8][#examples: 72960/81252][#steps: 11300]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 2.954e-04
[Epoch: 8][#examples: 76160/81252][#steps: 11350]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 2.954e-04
[Epoch: 8][#examples: 79360/81252][#steps: 11400]
	Train Loss: 0.030 | Train PPL:   1.030 | lr: 2.954e-04
[VAL]: The number of correct predictions (main-task (multi)): 338/513
[VAL]: The number of correct predictions (aux-task (multi)): 345/513

------------

[Epoch: 10][#examples: 25600/81252][#steps: 13100]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 1.570e-04
[Epoch: 10][#examples: 28800/81252][#steps: 13150]
	Train Loss: 0.022 | Train PPL:   1.022 | lr: 1.570e-04
[Epoch: 10][#examples: 32000/81252][#steps: 13200]
	Train Loss: 0.021 | Train PPL:   1.022 | lr: 1.570e-04
[Epoch: 10][#examples: 35200/81252][#steps: 13250]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 1.570e-04
[Epoch: 10][#examples: 38400/81252][#steps: 13300]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 1.570e-04
[Epoch: 10][#examples: 41600/81252][#steps: 13350]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 1.570e-04
[Epoch: 10][#examples: 44800/81252][#steps: 13400]
	Train Loss: 0.021 | Train PPL:   1.022 | lr: 1.570e-04
[Epoch: 10][#examples: 48000/81252][#steps: 13450]
	Train Loss: 0.021 | Train PPL:   1.022 | lr: 1.570e-04
[Epoch: 10][#examples: 51200/81252][#steps: 13500]
	Train Loss: 0.021 | Train PPL:   1.022 | lr: 1.570e-04
-----Val------
[VAL]: The number of c

[Epoch: 12][#examples: 640/81252][#steps: 15250]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 8.344e-05
[Epoch: 12][#examples: 3840/81252][#steps: 15300]
	Train Loss: 0.021 | Train PPL:   1.021 | lr: 8.344e-05
[Epoch: 12][#examples: 7040/81252][#steps: 15350]
	Train Loss: 0.018 | Train PPL:   1.019 | lr: 8.344e-05
[Epoch: 12][#examples: 10240/81252][#steps: 15400]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 8.344e-05
[Epoch: 12][#examples: 13440/81252][#steps: 15450]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 8.344e-05
[Epoch: 12][#examples: 16640/81252][#steps: 15500]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 8.344e-05
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 337/513
[VAL]: The number of correct predictions (aux-task (multi)): 338/513

---------------------------------------
[Epoch: 12][Validatiing...]
		 Better Valid Loss! (at least equal)
		 Better Valid Acc! (at least equal)
	 Early Stopping Patience: 100/100
	 Val. Loss: 1.647 | Val. A

	 Early Stopping Patience: 99/100
	 Val. Loss: 1.666 | Val. Acc: 0.663 | Val. PPL:   5.291
	 BEST. Val. Loss: 1.657 | BEST. Val. Acc: 0.663 | Val. Loss: 1.666 | BEST. Val. Epoch: 13 | BEST. Val. Step: 17500
---------------------------------------

	BEST. Val. Acc Aux: 0.684
---------------------------------------

[Epoch: 13][#examples: 66560/81252][#steps: 17550]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 4.434e-05
[Epoch: 13][#examples: 69760/81252][#steps: 17600]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 4.434e-05
[Epoch: 13][#examples: 72960/81252][#steps: 17650]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 4.434e-05
[Epoch: 13][#examples: 76160/81252][#steps: 17700]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 4.434e-05
[Epoch: 13][#examples: 79360/81252][#steps: 17750]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 4.434e-05
[VAL]: The number of correct predictions (main-task (multi)): 339/513
[VAL]: The number of correct predictions (aux-task (multi)): 339/513

---------

[Epoch: 15][#examples: 32000/81252][#steps: 19550]
	Train Loss: 0.017 | Train PPL:   1.017 | lr: 2.357e-05
[Epoch: 15][#examples: 35200/81252][#steps: 19600]
	Train Loss: 0.017 | Train PPL:   1.017 | lr: 2.357e-05
[Epoch: 15][#examples: 38400/81252][#steps: 19650]
	Train Loss: 0.017 | Train PPL:   1.018 | lr: 2.357e-05
[Epoch: 15][#examples: 41600/81252][#steps: 19700]
	Train Loss: 0.017 | Train PPL:   1.018 | lr: 2.357e-05
[Epoch: 15][#examples: 44800/81252][#steps: 19750]
	Train Loss: 0.017 | Train PPL:   1.017 | lr: 2.357e-05
[Epoch: 15][#examples: 48000/81252][#steps: 19800]
	Train Loss: 0.017 | Train PPL:   1.017 | lr: 2.357e-05
[Epoch: 15][#examples: 51200/81252][#steps: 19850]
	Train Loss: 0.017 | Train PPL:   1.018 | lr: 2.357e-05
[Epoch: 15][#examples: 54400/81252][#steps: 19900]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 2.357e-05
[Epoch: 15][#examples: 57600/81252][#steps: 19950]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 2.357e-05
[Epoch: 15][#examples: 60800/81252][#

[Epoch: 17][#examples: 3840/81252][#steps: 21650]
	Train Loss: 0.016 | Train PPL:   1.016 | lr: 1.252e-05
[Epoch: 17][#examples: 7040/81252][#steps: 21700]
	Train Loss: 0.014 | Train PPL:   1.014 | lr: 1.252e-05
[Epoch: 17][#examples: 10240/81252][#steps: 21750]
	Train Loss: 0.014 | Train PPL:   1.014 | lr: 1.252e-05
[Epoch: 17][#examples: 13440/81252][#steps: 21800]
	Train Loss: 0.015 | Train PPL:   1.015 | lr: 1.252e-05
[Epoch: 17][#examples: 16640/81252][#steps: 21850]
	Train Loss: 0.016 | Train PPL:   1.016 | lr: 1.252e-05
[Epoch: 17][#examples: 19840/81252][#steps: 21900]
	Train Loss: 0.017 | Train PPL:   1.017 | lr: 1.252e-05
[Epoch: 17][#examples: 23040/81252][#steps: 21950]
	Train Loss: 0.017 | Train PPL:   1.017 | lr: 1.252e-05
[Epoch: 17][#examples: 26240/81252][#steps: 22000]
	Train Loss: 0.017 | Train PPL:   1.018 | lr: 1.252e-05
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 340/513
[VAL]: The number of correct predictions (aux-task (multi)): 

[Epoch: 18][#examples: 76160/81252][#steps: 24050]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 6.656e-06
[Epoch: 18][#examples: 79360/81252][#steps: 24100]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 6.656e-06
[VAL]: The number of correct predictions (main-task (multi)): 340/513
[VAL]: The number of correct predictions (aux-task (multi)): 341/513

---------------------------------------
[Epoch: 18][Validatiing...]
	 Early Stopping Patience: 87/100
	 Val. Loss: 1.696 | Val. Acc: 0.663 | Val. PPL:   5.451
	 BEST. Val. Loss: 1.680 | BEST. Val. Acc: 0.665 | Val. Loss: 1.689 | BEST. Val. Epoch: 17 | BEST. Val. Step: 22500
---------------------------------------

	BEST. Val. Acc Aux: 0.684
---------------------------------------

Epoch: 19 | Time: 2m 16s
	Train Loss: 0.018 | Train PPL:   1.018
	 Val. Loss: 1.696 | Val. Acc: 0.663 | Val. PPL:   5.451
[Train]: Current Teacher Forcing Ratio: 0.230
[Epoch: 19][#examples: 1280/81252][#steps: 24150]
	Train Loss: 0.024 | Train PPL:   1.024 | lr

[Epoch: 20][#examples: 54400/81252][#steps: 26250]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 3.537e-06
[Epoch: 20][#examples: 57600/81252][#steps: 26300]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 3.537e-06
[Epoch: 20][#examples: 60800/81252][#steps: 26350]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 3.537e-06
[Epoch: 20][#examples: 64000/81252][#steps: 26400]
	Train Loss: 0.019 | Train PPL:   1.020 | lr: 3.537e-06
[Epoch: 20][#examples: 67200/81252][#steps: 26450]
	Train Loss: 0.019 | Train PPL:   1.020 | lr: 3.537e-06
[Epoch: 20][#examples: 70400/81252][#steps: 26500]
	Train Loss: 0.019 | Train PPL:   1.020 | lr: 3.537e-06
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 340/513
[VAL]: The number of correct predictions (aux-task (multi)): 341/513

---------------------------------------
[Epoch: 20][Validatiing...]
	 Early Stopping Patience: 81/100
	 Val. Loss: 1.696 | Val. Acc: 0.663 | Val. PPL:   5.454
	 BEST. Val. Loss: 1.680 | BEST. Val. Acc: 0

[Epoch: 22][#examples: 32640/81252][#steps: 28450]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 1.880e-06
[Epoch: 22][#examples: 35840/81252][#steps: 28500]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 1.880e-06
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 340/513
[VAL]: The number of correct predictions (aux-task (multi)): 340/513

---------------------------------------
[Epoch: 22][Validatiing...]
	 Early Stopping Patience: 75/100
	 Val. Loss: 1.697 | Val. Acc: 0.663 | Val. PPL:   5.460
	 BEST. Val. Loss: 1.680 | BEST. Val. Acc: 0.665 | Val. Loss: 1.689 | BEST. Val. Epoch: 17 | BEST. Val. Step: 22500
---------------------------------------

	BEST. Val. Acc Aux: 0.684
---------------------------------------

[Epoch: 22][#examples: 39040/81252][#steps: 28550]
	Train Loss: 0.018 | Train PPL:   1.019 | lr: 1.692e-06
[Epoch: 22][#examples: 42240/81252][#steps: 28600]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 1.692e-06
[Epoch: 22][#examples: 45440/8125

[Epoch: 24][#examples: 4480/81252][#steps: 30550]
	Train Loss: 0.017 | Train PPL:   1.017 | lr: 8.991e-07
[Epoch: 24][#examples: 7680/81252][#steps: 30600]
	Train Loss: 0.017 | Train PPL:   1.017 | lr: 8.991e-07
[Epoch: 24][#examples: 10880/81252][#steps: 30650]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 8.991e-07
[Epoch: 24][#examples: 14080/81252][#steps: 30700]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 8.991e-07
[Epoch: 24][#examples: 17280/81252][#steps: 30750]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 8.991e-07
[Epoch: 24][#examples: 20480/81252][#steps: 30800]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 8.991e-07
[Epoch: 24][#examples: 23680/81252][#steps: 30850]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 8.991e-07
[Epoch: 24][#examples: 26880/81252][#steps: 30900]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 8.991e-07
[Epoch: 24][#examples: 30080/81252][#steps: 30950]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 8.991e-07
[Epoch: 24][#examples: 33280/81252][#st

[VAL]: The number of correct predictions (main-task (multi)): 340/513
[VAL]: The number of correct predictions (aux-task (multi)): 341/513

---------------------------------------
[Epoch: 25][Validatiing...]
	 Early Stopping Patience: 63/100
	 Val. Loss: 1.698 | Val. Acc: 0.663 | Val. PPL:   5.461
	 BEST. Val. Loss: 1.680 | BEST. Val. Acc: 0.665 | Val. Loss: 1.689 | BEST. Val. Epoch: 17 | BEST. Val. Step: 22500
---------------------------------------

	BEST. Val. Acc Aux: 0.684
---------------------------------------

[VAL]: The number of correct predictions (main-task (multi)): 340/513
[VAL]: The number of correct predictions (aux-task (multi)): 341/513

---------------------------------------
[Epoch: 25][Validatiing...]
	 Early Stopping Patience: 62/100
	 Val. Loss: 1.698 | Val. Acc: 0.663 | Val. PPL:   5.461
	 BEST. Val. Loss: 1.680 | BEST. Val. Acc: 0.665 | Val. Loss: 1.689 | BEST. Val. Epoch: 17 | BEST. Val. Step: 22500
---------------------------------------

	BEST. Val. Acc Aux:

[Epoch: 27][#examples: 48640/81252][#steps: 35050]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 2.539e-07
[Epoch: 27][#examples: 51840/81252][#steps: 35100]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 2.539e-07
[Epoch: 27][#examples: 55040/81252][#steps: 35150]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 2.539e-07
[Epoch: 27][#examples: 58240/81252][#steps: 35200]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 2.539e-07
[Epoch: 27][#examples: 61440/81252][#steps: 35250]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 2.539e-07
[Epoch: 27][#examples: 64640/81252][#steps: 35300]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 2.539e-07
[Epoch: 27][#examples: 67840/81252][#steps: 35350]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 2.539e-07
[Epoch: 27][#examples: 71040/81252][#steps: 35400]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 2.539e-07
[Epoch: 27][#examples: 74240/81252][#steps: 35450]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 2.539e-07
[Epoch: 27][#examples: 77440/81252][#

[Epoch: 29][#examples: 26880/81252][#steps: 37250]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 1.349e-07
[Epoch: 29][#examples: 30080/81252][#steps: 37300]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 1.349e-07
[Epoch: 29][#examples: 33280/81252][#steps: 37350]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 1.349e-07
[Epoch: 29][#examples: 36480/81252][#steps: 37400]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 1.349e-07
[Epoch: 29][#examples: 39680/81252][#steps: 37450]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 1.349e-07
[Epoch: 29][#examples: 42880/81252][#steps: 37500]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 1.349e-07
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 340/513
[VAL]: The number of correct predictions (aux-task (multi)): 341/513

---------------------------------------
[Epoch: 29][Validatiing...]
	 Early Stopping Patience: 50/100
	 Val. Loss: 1.698 | Val. Acc: 0.663 | Val. PPL:   5.462
	 BEST. Val. Loss: 1.680 | BEST. Val. Acc: 0

[Epoch: 31][#examples: 5120/81252][#steps: 39450]
	Train Loss: 0.018 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 31][#examples: 8320/81252][#steps: 39500]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 340/513
[VAL]: The number of correct predictions (aux-task (multi)): 341/513

---------------------------------------
[Epoch: 31][Validatiing...]
	 Early Stopping Patience: 44/100
	 Val. Loss: 1.698 | Val. Acc: 0.663 | Val. PPL:   5.462
	 BEST. Val. Loss: 1.680 | BEST. Val. Acc: 0.665 | Val. Loss: 1.689 | BEST. Val. Epoch: 17 | BEST. Val. Step: 22500
---------------------------------------

	BEST. Val. Acc Aux: 0.684
---------------------------------------

[Epoch: 31][#examples: 11520/81252][#steps: 39550]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 31][#examples: 14720/81252][#steps: 39600]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 31][#examples: 17920/81252]

[Epoch: 32][#examples: 67840/81252][#steps: 41700]
	Train Loss: 0.018 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 32][#examples: 71040/81252][#steps: 41750]
	Train Loss: 0.018 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 32][#examples: 74240/81252][#steps: 41800]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 9.838e-08
[Epoch: 32][#examples: 77440/81252][#steps: 41850]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 9.838e-08
[Epoch: 32][#examples: 80640/81252][#steps: 41900]
	Train Loss: 0.018 | Train PPL:   1.019 | lr: 9.838e-08
[VAL]: The number of correct predictions (main-task (multi)): 340/513
[VAL]: The number of correct predictions (aux-task (multi)): 341/513

---------------------------------------
[Epoch: 32][Validatiing...]
	 Early Stopping Patience: 38/100
	 Val. Loss: 1.698 | Val. Acc: 0.663 | Val. PPL:   5.462
	 BEST. Val. Loss: 1.680 | BEST. Val. Acc: 0.665 | Val. Loss: 1.689 | BEST. Val. Epoch: 17 | BEST. Val. Step: 22500
---------------------------------------

	BEST. Va

[Epoch: 34][#examples: 46080/81252][#steps: 43900]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 9.838e-08
[Epoch: 34][#examples: 49280/81252][#steps: 43950]
	Train Loss: 0.018 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 34][#examples: 52480/81252][#steps: 44000]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 340/513
[VAL]: The number of correct predictions (aux-task (multi)): 341/513

---------------------------------------
[Epoch: 34][Validatiing...]
	 Early Stopping Patience: 32/100
	 Val. Loss: 1.698 | Val. Acc: 0.663 | Val. PPL:   5.462
	 BEST. Val. Loss: 1.680 | BEST. Val. Acc: 0.665 | Val. Loss: 1.689 | BEST. Val. Epoch: 17 | BEST. Val. Step: 22500
---------------------------------------

	BEST. Val. Acc Aux: 0.684
---------------------------------------

[Epoch: 34][#examples: 55680/81252][#steps: 44050]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 34][#examples: 58880/8125

[Epoch: 36][#examples: 21120/81252][#steps: 46050]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 9.838e-08
[Epoch: 36][#examples: 24320/81252][#steps: 46100]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 9.838e-08
[Epoch: 36][#examples: 27520/81252][#steps: 46150]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 9.838e-08
[Epoch: 36][#examples: 30720/81252][#steps: 46200]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 9.838e-08
[Epoch: 36][#examples: 33920/81252][#steps: 46250]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 9.838e-08
[Epoch: 36][#examples: 37120/81252][#steps: 46300]
	Train Loss: 0.019 | Train PPL:   1.020 | lr: 9.838e-08
[Epoch: 36][#examples: 40320/81252][#steps: 46350]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 36][#examples: 43520/81252][#steps: 46400]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 36][#examples: 46720/81252][#steps: 46450]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 36][#examples: 49920/81252][#

[Epoch: 38][#examples: 2560/81252][#steps: 48300]
	Train Loss: 0.017 | Train PPL:   1.017 | lr: 9.838e-08
[Epoch: 38][#examples: 5760/81252][#steps: 48350]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 38][#examples: 8960/81252][#steps: 48400]
	Train Loss: 0.020 | Train PPL:   1.020 | lr: 9.838e-08
[Epoch: 38][#examples: 12160/81252][#steps: 48450]
	Train Loss: 0.019 | Train PPL:   1.020 | lr: 9.838e-08
[Epoch: 38][#examples: 15360/81252][#steps: 48500]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 340/513
[VAL]: The number of correct predictions (aux-task (multi)): 341/513

---------------------------------------
[Epoch: 38][Validatiing...]
	 Early Stopping Patience: 19/100
	 Val. Loss: 1.698 | Val. Acc: 0.663 | Val. PPL:   5.463
	 BEST. Val. Loss: 1.680 | BEST. Val. Acc: 0.665 | Val. Loss: 1.689 | BEST. Val. Epoch: 17 | BEST. Val. Step: 22500
--------------------------------------

[Epoch: 39][#examples: 65280/81252][#steps: 50550]
	Train Loss: 0.018 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 39][#examples: 68480/81252][#steps: 50600]
	Train Loss: 0.018 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 39][#examples: 71680/81252][#steps: 50650]
	Train Loss: 0.018 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 39][#examples: 74880/81252][#steps: 50700]
	Train Loss: 0.018 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 39][#examples: 78080/81252][#steps: 50750]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 39][#examples: 81280/81252][#steps: 50800]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[VAL]: The number of correct predictions (main-task (multi)): 340/513
[VAL]: The number of correct predictions (aux-task (multi)): 341/513

---------------------------------------
[Epoch: 39][Validatiing...]
	 Early Stopping Patience: 13/100
	 Val. Loss: 1.698 | Val. Acc: 0.663 | Val. PPL:   5.463
	 BEST. Val. Loss: 1.680 | BEST. Val. Acc: 0.665 | Val. Los

[Epoch: 41][#examples: 43520/81252][#steps: 52750]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 9.838e-08
[Epoch: 41][#examples: 46720/81252][#steps: 52800]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 9.838e-08
[Epoch: 41][#examples: 49920/81252][#steps: 52850]
	Train Loss: 0.018 | Train PPL:   1.018 | lr: 9.838e-08
[Epoch: 41][#examples: 53120/81252][#steps: 52900]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 41][#examples: 56320/81252][#steps: 52950]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 41][#examples: 59520/81252][#steps: 53000]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 340/513
[VAL]: The number of correct predictions (aux-task (multi)): 341/513

---------------------------------------
[Epoch: 41][Validatiing...]
	 Early Stopping Patience: 7/100
	 Val. Loss: 1.698 | Val. Acc: 0.663 | Val. PPL:   5.463
	 BEST. Val. Loss: 1.680 | BEST. Val. Acc: 0.

[Epoch: 43][#examples: 21760/81252][#steps: 54950]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 43][#examples: 24960/81252][#steps: 55000]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
-----Val------
[VAL]: The number of correct predictions (main-task (multi)): 340/513
[VAL]: The number of correct predictions (aux-task (multi)): 341/513

---------------------------------------
[Epoch: 43][Validatiing...]
	 Early Stopping Patience: 1/100
	 Val. Loss: 1.698 | Val. Acc: 0.663 | Val. PPL:   5.463
	 BEST. Val. Loss: 1.680 | BEST. Val. Acc: 0.665 | Val. Loss: 1.689 | BEST. Val. Epoch: 17 | BEST. Val. Step: 22500
---------------------------------------

	BEST. Val. Acc Aux: 0.684
---------------------------------------

[Epoch: 43][#examples: 28160/81252][#steps: 55050]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 43][#examples: 31360/81252][#steps: 55100]
	Train Loss: 0.019 | Train PPL:   1.019 | lr: 9.838e-08
[Epoch: 43][#examples: 34560/81252