# Config

In [1]:
%run Config.py

# Data

In [2]:
!!python ../OpenNMT-py/preprocess.py --train_src "../data/cornell movie-dialogs corpus/src_movie_lines.txt" --train_tgt "../data/cornell movie-dialogs corpus/tgt_movie_lines.txt" --save_data ../data/cornell_raw

['Please backup existing pt files: ../data/cornell_raw.train*.pt, to avoid overwriting them!']

In [3]:
!!python ../OpenNMT-py/preprocess.py --train_src "../data/cornell movie-dialogs corpus/src_movie_lines.txt" --train_tgt "../data/cornell movie-dialogs corpus/tgt_movie_lines.txt" --save_data ../data/cornell_raw_min_30_10 --src_words_min_frequency 30 --tgt_words_min_frequency 30 --src_seq_length 10 --tgt_seq_length 10

['Please backup existing pt files: ../data/cornell_raw_min_30_10.train*.pt, to avoid overwriting them!']

In [4]:
vocab_fields = torch.load("../data/cornell_raw_min_30_10.vocab.pt")

In [5]:
src_text_field = vocab_fields["src"].base_field
src_vocab = src_text_field.vocab
src_padding = src_vocab.stoi[src_text_field.pad_token] #stoi: mapping token strings to numerical identifiers.
# ['<unk>', '<blank>', 'I', 'you', 'the', 'to', 'a', 'of', 'and', 'You']
# src_text_field.pad_token : '<blank>'

tgt_text_field = vocab_fields['tgt'].base_field
tgt_vocab = tgt_text_field.vocab
tgt_padding = tgt_vocab.stoi[tgt_text_field.pad_token]

In [6]:
config.src_vocab_size = len(src_vocab)
config.tgt_vocab_size = len(tgt_vocab)
config.src_padding = src_padding
config.tgt_padding = tgt_padding

In [7]:
config.tgt_unk = tgt_vocab.stoi[tgt_text_field.unk_token]
config.tgt_bos = tgt_vocab.stoi[tgt_text_field.init_token]
config.tgt_eos = tgt_vocab.stoi[tgt_text_field.eos_token]

# seq2seq-DQN

In [8]:
%run modules/NoisyLinear.py

In [9]:
%run modules/DQN.py

In [10]:
%run modules/Model.py

In [11]:
model = Model(config, DQN)

In [12]:
model.current_model

DQN(
  (encoder_embeddings): Embeddings(
    (make_embedding): Sequential(
      (emb_luts): Elementwise(
        (0): Embedding(1462, 100, padding_idx=1)
      )
    )
  )
  (encoder): RNNEncoder(
    (embeddings): Embeddings(
      (make_embedding): Sequential(
        (emb_luts): Elementwise(
          (0): Embedding(1462, 100, padding_idx=1)
        )
      )
    )
    (rnn): LSTM(100, 250, bidirectional=True)
  )
  (decoder_embeddings): Embeddings(
    (make_embedding): Sequential(
      (emb_luts): Elementwise(
        (0): Embedding(1472, 100, padding_idx=1)
      )
    )
  )
  (decoder): InputFeedRNNDecoder(
    (embeddings): Embeddings(
      (make_embedding): Sequential(
        (emb_luts): Elementwise(
          (0): Embedding(1472, 100, padding_idx=1)
        )
      )
    )
    (dropout): Dropout(p=0.0)
    (rnn): StackedLSTM(
      (dropout): Dropout(p=0.0)
      (layers): ModuleList(
        (0): LSTMCell(600, 500)
      )
    )
    (attn): GlobalAttention(
      (linear

In [13]:
%run modules/MSELoss.py

In [14]:
loss = MSELoss(
    nn.MSELoss(reduction="none"),
    model.current_model.generator
)

In [15]:
class Reward(object):
    def __init__(self, config):
        self.config = config
        
    def __call__(self, src, output):
        return 1

reward = Reward(config)

In [16]:
lr = 1
torch_optimizer = torch.optim.SGD(model.current_model.parameters(), lr=lr)
optim = onmt.utils.optimizers.Optimizer(torch_optimizer, learning_rate=lr, max_grad_norm=2)

In [17]:
#report_manager = onmt.utils.ReportMgr(report_every=1, start_time=None, tensorboard_writer=None)

In [18]:
%run modules/QLearning.py

In [19]:
trainer = QLearning(config,
                    model,
                    reward=reward,
                    train_loss=loss,
                    valid_loss=loss,
                    optim=optim,
                    gpu_verbose_level=100)
                    #shard_size = 0

In [20]:
from itertools import chain
train_data_file = "../data/cornell_raw_min_30_10.train.0.pt"
train_iter = onmt.inputters.inputter.DatasetLazyIter(dataset_paths=[train_data_file],
                                                     fields=vocab_fields,
                                                     batch_size=1,
                                                     batch_size_multiple=1,
                                                     batch_size_fn=None,
                                                     device="cpu",
                                                     is_train=True,
                                                     repeat=False,
                                                     pool_factor=8192)

# Preload Experience Replay Buffer

if len(model.replay_memory) == 0:
    data = list(train_iter)
    for example in data[:100]:
        model.replay_memory.push(example.src[0].squeeze(1), example.tgt.squeeze(1), 1)

In [21]:
#for i in model.target_model.parameters():
#    print(i.abs().sum())

In [22]:
#for t in model.replay_memory._storage:
    #print((t[1] == 0).sum())
    #print(t[1].size(0))

In [23]:
result = trainer.train(train_steps=400, valid_steps=200)

INFO:root:Start training loop and validate every 200 steps...


BETA 0.40006


INFO:root:Batch Length: tensor([9, 9, 8, 8, 8, 8, 7, 7, 6, 6, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 3,
        3, 2, 2, 2, 2, 2, 2, 2], dtype=torch.int16)


[tensor([1287,  679,  546,    2,    2,    2,    2,    2,    2, 1287])]
[tensor([ 103,  961,  551,  911, 1006,  546,    2,    2,    2,    2])]
[tensor([1147, 1147, 1147,  968, 1057,  390, 1057, 1057,  390,  925])]
[tensor([1147, 1147, 1147,  968, 1057,  390, 1057, 1057,  390,  925])]
[tensor([1147, 1147, 1147,  968, 1057,  390, 1057, 1057,  390,  925])]
[tensor([ 751,  751,  227,  486, 1122,  671,  393,  760,  982,  313])]
[tensor([ 961,  864, 1056,  546,  546,    2,    2,    2,    2,    2])]
[tensor([184, 763, 770, 763, 551, 180, 887, 372, 551, 180])]
[tensor([ 196,  450,  897, 1437, 1437,  854,  854,  854,  450,  860])]
[tensor([ 196,  450,  897, 1437, 1437,  854,  854,  854,  450,  860])]
[tensor([1171, 1294, 1294,   60, 1113, 1113, 1297,  551, 1265,   60])]
[tensor([1401,  760,  136,  227,  390,  925, 1401,  760,  136,   96])]
[tensor([2, 2, 2, 2, 2, 2, 2, 2, 2, 2])]
[tensor([2, 2, 2, 2, 2, 2, 2, 2, 2, 2])]
[tensor([ 961,  313,  313,  313,  313,  982,  313, 1201,  631,  313])]
[tens

INFO:root:tensor(5.8180, grad_fn=<DivBackward0>)


BETA 0.40012000000000003


INFO:root:Batch Length: tensor([9, 7, 7, 7, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 2, 2,
        2, 2, 2, 2, 2, 2, 1, 1], dtype=torch.int16)


[tensor([0, 0, 3])]
[tensor([0, 0, 3])]
[tensor([0, 0, 0, 3])]
[tensor([0, 0, 3])]
[tensor([0, 0, 0, 3])]
[tensor([0, 0, 0, 3])]
[tensor([0, 0, 3])]
[tensor([0, 0, 3])]
[tensor([0, 0, 0, 3])]
[tensor([0, 0, 3])]
[tensor([0, 0, 0, 3])]
[tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])]
[tensor([0, 0, 3])]
[tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])]
[tensor([0, 0, 0, 3])]
[tensor([0, 0, 3])]
[tensor([0, 0, 3])]
[tensor([0, 0, 3])]
[tensor([0, 0, 3])]
[tensor([0, 0, 3])]
[tensor([0, 0, 3])]
[tensor([0, 0, 3])]
[tensor([0, 0, 3])]
[tensor([0, 0, 0, 3])]
[tensor([0, 0, 0, 3])]
[tensor([0, 0, 0, 3])]
[tensor([0, 0, 0, 3])]
[tensor([0, 0, 3])]
[tensor([0, 0, 3])]
[tensor([0, 0, 0, 3])]
[tensor([0, 0, 3])]
[tensor([0, 0, 0, 3])]


INFO:root:tensor(3.8723, grad_fn=<DivBackward0>)


BETA 0.40018000000000004


INFO:root:Batch Length: tensor([10, 10,  9,  8,  7,  7,  7,  7,  7,  6,  6,  6,  5,  5,  5,  5,  5,  5,
         4,  4,  4,  4,  3,  3,  2,  2,  2,  2,  2,  2,  1,  1],
       dtype=torch.int16)


[tensor([3])]


INFO:root:Inference 0: No output sentence generated (just </s>)


[tensor([56,  3])]
[tensor([3])]


INFO:root:Inference 2: No output sentence generated (just </s>)


[tensor([72,  3])]
[tensor([3])]


INFO:root:Inference 4: No output sentence generated (just </s>)


[tensor([3])]


INFO:root:Inference 5: No output sentence generated (just </s>)


[tensor([72,  3])]
[tensor([3])]


INFO:root:Inference 7: No output sentence generated (just </s>)


[tensor([72,  3])]
[tensor([56,  3])]
[tensor([72,  3])]
[tensor([3])]


INFO:root:Inference 11: No output sentence generated (just </s>)


[tensor([3])]


INFO:root:Inference 12: No output sentence generated (just </s>)


[tensor([3])]


INFO:root:Inference 13: No output sentence generated (just </s>)


[tensor([3])]


INFO:root:Inference 14: No output sentence generated (just </s>)


[tensor([56, 56,  3])]
[tensor([3])]


INFO:root:Inference 16: No output sentence generated (just </s>)


[tensor([56, 56,  3])]
[tensor([3])]


INFO:root:Inference 18: No output sentence generated (just </s>)


[tensor([3])]


INFO:root:Inference 19: No output sentence generated (just </s>)


[tensor([3])]


INFO:root:Inference 20: No output sentence generated (just </s>)


[tensor([3])]


INFO:root:Inference 21: No output sentence generated (just </s>)


[tensor([3])]


INFO:root:Inference 22: No output sentence generated (just </s>)


[tensor([3])]


INFO:root:Inference 23: No output sentence generated (just </s>)


[tensor([3])]


INFO:root:Inference 24: No output sentence generated (just </s>)


[tensor([3])]


INFO:root:Inference 25: No output sentence generated (just </s>)


[tensor([3])]


INFO:root:Inference 26: No output sentence generated (just </s>)


[tensor([3])]


INFO:root:Inference 27: No output sentence generated (just </s>)


[tensor([3])]


INFO:root:Inference 28: No output sentence generated (just </s>)


[tensor([56, 56,  3])]
[tensor([56,  3])]
[tensor([3])]


INFO:root:Inference 31: No output sentence generated (just </s>)
INFO:root:tensor(2.3161, grad_fn=<DivBackward0>)


BETA 0.40024000000000004


INFO:root:Batch Length: tensor([8, 7, 7, 7, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 3, 2, 2, 2, 2,
        2, 2, 2, 2, 1, 1, 1, 1], dtype=torch.int16)


[tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])]
[tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])]
[tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])]
[tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])]
[tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])]
[tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])]
[tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])]
[tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])]
[tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])]
[tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])]
[tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])]
[tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])]
[tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])]
[tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])]
[tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])]
[tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])]
[tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])]
[tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])]
[tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])]
[tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])]
[tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])]
[tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])]
[tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])]
[tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])]
[tensor([0, 0, 0

INFO:root:tensor(2.5808, grad_fn=<DivBackward0>)


BETA 0.40030000000000004


INFO:root:Batch Length: tensor([9, 9, 8, 8, 7, 7, 7, 7, 7, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 3, 2,
        2, 2, 2, 2, 2, 2, 2, 1], dtype=torch.int16)


[tensor([3])]


INFO:root:Inference 0: No output sentence generated (just </s>)


[tensor([3])]


INFO:root:Inference 1: No output sentence generated (just </s>)


[tensor([3])]


INFO:root:Inference 2: No output sentence generated (just </s>)


[tensor([3])]


INFO:root:Inference 3: No output sentence generated (just </s>)


[tensor([3])]


INFO:root:Inference 4: No output sentence generated (just </s>)


[tensor([3])]


INFO:root:Inference 5: No output sentence generated (just </s>)


[tensor([3])]


INFO:root:Inference 6: No output sentence generated (just </s>)


[tensor([3])]


INFO:root:Inference 7: No output sentence generated (just </s>)


[tensor([3])]


INFO:root:Inference 8: No output sentence generated (just </s>)


[tensor([3])]


INFO:root:Inference 9: No output sentence generated (just </s>)


[tensor([3])]


INFO:root:Inference 10: No output sentence generated (just </s>)


[tensor([3])]


INFO:root:Inference 11: No output sentence generated (just </s>)


[tensor([3])]


INFO:root:Inference 12: No output sentence generated (just </s>)


[tensor([3])]


INFO:root:Inference 13: No output sentence generated (just </s>)


[tensor([0, 3])]
[tensor([3])]


INFO:root:Inference 15: No output sentence generated (just </s>)


[tensor([3])]


INFO:root:Inference 16: No output sentence generated (just </s>)


[tensor([3])]


INFO:root:Inference 17: No output sentence generated (just </s>)


[tensor([3])]


INFO:root:Inference 18: No output sentence generated (just </s>)


[tensor([3])]


INFO:root:Inference 19: No output sentence generated (just </s>)


[tensor([3])]


INFO:root:Inference 20: No output sentence generated (just </s>)


[tensor([3])]


INFO:root:Inference 21: No output sentence generated (just </s>)


[tensor([3])]


INFO:root:Inference 22: No output sentence generated (just </s>)


[tensor([3])]


INFO:root:Inference 23: No output sentence generated (just </s>)


[tensor([3])]


INFO:root:Inference 24: No output sentence generated (just </s>)


[tensor([3])]


INFO:root:Inference 25: No output sentence generated (just </s>)


[tensor([3])]


INFO:root:Inference 26: No output sentence generated (just </s>)


[tensor([3])]


INFO:root:Inference 27: No output sentence generated (just </s>)


[tensor([3])]


INFO:root:Inference 28: No output sentence generated (just </s>)


[tensor([3])]


INFO:root:Inference 29: No output sentence generated (just </s>)


[tensor([3])]


INFO:root:Inference 30: No output sentence generated (just </s>)


[tensor([3])]


INFO:root:Inference 31: No output sentence generated (just </s>)
INFO:root:tensor(3.6689, grad_fn=<DivBackward0>)


KeyboardInterrupt: 