In [1]:
from mnmt.encoder import BasicEncoder
from mnmt.decoder import BasicFeedForwardDecoder
from mnmt.decoder import GreedyDecoder
from mnmt.attention import AdditiveAttention
from mnmt.decoder import BridgeLayer
from mnmt.model import Seq2Seq
from mnmt.datasets import *
from mnmt.inputter import ArgsFeeder
from mnmt.inputter import ModuleArgsFeeder
from mnmt.trainer.utils import *
from mnmt.trainer import Trainer
import sys

Loading DICT dataset...
Loading the Dataset into the container...
Field names: ['en', 'ch', 'pinyin_str', 'pinyin_char']
Data sizes: [(Train, 46620), (Valid, 5828), (Test, 5828)]
The first example of the training data is:
en :  ['k', 'a', 'r', 't', 'm', 'a', 'n']
ch :  ['卡', '特', '曼']
pinyin_str :  ['ka', 'te', 'man']
pinyin_char :  ['k', 'a', 't', 'e', 'm', 'a', 'n']
Loading NEWS dataset...
Loading the Dataset into the container...
Field names: ['en', 'ch', 'pinyin_str', 'pinyin_char']
Data sizes: [(Train, 81252), (Valid, 513), (Test, 1000)]
The first example of the training data is:
en :  ['k', 'u', 's', 'i', 'c', 'k']
ch :  ['库', '西', '克']
pinyin_str :  ['ku', 'xi', 'ke']
pinyin_char :  ['k', 'u', 'x', 'i', 'k', 'e']


In [2]:
def set_up_args(data_container, exp_num):
    build_vocabs(data_container, dict_min_freqs={'en': 1, 'ch': 1, 'pinyin_str': 1, 'pinyin_char': 1})
    for name, field in data_container.fields:
        if name == 'en':
            input_dim = len(field.vocab)
            src_pad_idx = field.vocab.stoi[field.pad_token]
        elif name == 'ch':
            output_dim = len(field.vocab)
            trg_pad_idx = field.vocab[field.pad_token]

    enc_args_feeder = ModuleArgsFeeder(input_dim=input_dim, embedding_dim=256, hidden_dim=512,
                                       embedding_dropout=0.1, rnn_type='LSTM',
                                       num_layers=2, rnn_dropout=0.2)
    dec_args_feeder = ModuleArgsFeeder(input_dim=output_dim, embedding_dim=256, hidden_dim=512,
                                       embedding_dropout=0.1, rnn_type='LSTM',
                                       num_layers=2, rnn_dropout=0.2)
    return ArgsFeeder(enc_args_feeder, [dec_args_feeder],
                      batch_size=64, src_pad_idx=src_pad_idx, trg_pad_idx=trg_pad_idx,
                      optim_choice='Adam', learning_rate=0.003, decay_patience=0,
                      lr_decay_factor=0.9, valid_criterion='ACC', early_stopping_patience=100,
                      total_epochs=100, report_interval=50, exp_num=exp_num, multi_task_ratio=1, data_container=data_container,
                      src_lang='en', trg_lang='ch', auxiliary_name='pinyin_str', quiet_translate=True,
                      valid_out_path=f"experiments/exp{exp_num}/valid.out", test_out_path=f"experiments/exp{exp_num}/test.out")


def test_seq2seq(args_feeder):
    decoder_args_feeder = args_feeder.decoder_args_feeders[0]
    encoder = BasicEncoder(args_feeder)
    feed_forward_decoder = \
        BasicFeedForwardDecoder(args_feeder,
                                AdditiveAttention(encoder_hidden_dim=args_feeder.encoder_args_feeder.hidden_dim,
                                                  decoder_hidden_dim=decoder_args_feeder.hidden_dim), decoder_index=0)
    bridge_layer = BridgeLayer(encoder_hidden_dim=args_feeder.encoder_args_feeder.hidden_dim,
                               decoder_hidden_dim=decoder_args_feeder.hidden_dim,
                               num_of_states=2)
    decoder = GreedyDecoder(feed_forward_decoder, bridge_layer, device=args_feeder.device)
    model = Seq2Seq(args_feeder, encoder, decoder, teacher_forcing_ratio=0.8).to(args_feeder.device)
    return model


if __name__ == '__main__':
    set_reproducibility(seed=1234)
    dict_dataset = DICT['data_container']
    news_dataset = NEWS['data_container']
    
    try:
        # DICT
        seq2seq_args_feeder = set_up_args(dict_dataset, exp_num=666)
        test_model = test_seq2seq(seq2seq_args_feeder)
        test_trainer = Trainer(seq2seq_args_feeder, test_model)
        test_trainer.run(burning_epoch=0)
        test_trainer.best_model_output()

        # NEWS
        seq2seq_args_feeder = set_up_args(news_dataset, exp_num=667)
        test_model = test_seq2seq(seq2seq_args_feeder)
        test_trainer = Trainer(seq2seq_args_feeder, test_model)
        # test_trainer.run(burning_epoch=10)
        test_trainer.best_model_output(test_ref_dict=NEWS['test-set-dict'])
    except KeyboardInterrupt:
        print("Exiting loop")

The current device for PyTorch is cuda
Seq2Seq(
  (encoder): BasicEncoder(
    (embedding): Sequential(
      (0): Embedding(33, 256)
      (1): Dropout(p=0.1, inplace=False)
    )
    (rnn): LSTM(256, 512, num_layers=2, dropout=0.2, bidirectional=True)
  )
  (decoder): GreedyDecoder(
    (feed_forward_decoder): BasicFeedForwardDecoder(
      (attention): AdditiveAttention(
        (additive_mapping): Linear(in_features=1536, out_features=512, bias=True)
        (v): Linear(in_features=512, out_features=1, bias=False)
      )
      (embedding): Sequential(
        (0): Embedding(631, 256)
        (1): Dropout(p=0.1, inplace=False)
      )
      (rnn): LSTM(1280, 512, num_layers=2, dropout=0.2)
      (prediction): Sequential(
        (0): Linear(in_features=1792, out_features=631, bias=True)
        (1): LogSoftmax()
      )
    )
    (bridge_layer): BridgeLayer(
      (bridge_layer): ModuleList(
        (0): Linear(in_features=1024, out_features=512, bias=True)
        (1): Linear(in_f