In [128]:
from IPython.display import display, Markdown
import numpy.lib.recfunctions as nlr
def show(x): print(nlr.unstructured_to_structured(x.numpy())) # pretty print for tensors

# Config

In [129]:
%run Config.py
config.device = torch.device('cpu')

# Data

## Tokenization

In [68]:
!!perl ../OpenNMT-py/tools/tokenizer.perl  -l en \
< ../data/"cornell movie-dialogs corpus"/src_movie_lines.txt \
> ../data/"cornell movie-dialogs corpus"/src_movie_lines_tok.txt

^C


[]

In [257]:
!!perl ../OpenNMT-py/tools/tokenizer.perl  -l en \
< ../data/"cornell movie-dialogs corpus"/tgt_movie_lines.txt \
> ../data/"cornell movie-dialogs corpus"/tgt_movie_lines_tok.txt

['Tokenizer Version 1.1', 'Language: en', 'Number of threads: 1']

## Preprocessing

In [None]:
!!python ../OpenNMT-py/preprocess.py --train_src "data/cornell movie-dialogs corpus/src_movie_lines_tok.txt" --train_tgt "../data/cornell movie-dialogs corpus/tgt_movie_lines_tok.txt" --save_data ../data/cornell_raw

In [None]:
!!python ../OpenNMT-py/preprocess.py --train_src "data/cornell movie-dialogs corpus/src_movie_lines_tok.txt" --train_tgt "../data/cornell movie-dialogs corpus/tgt_movie_lines_tok.txt" --save_data ../data/cornell_raw_min_30_10_tok --src_words_min_frequency 30 --tgt_words_min_frequency 30 --src_seq_length 10 --tgt_seq_length 10

In [None]:
!!python ../OpenNMT-py/preprocess.py --train_src "data/cornell movie-dialogs corpus/src_movie_lines_tok.txt" --train_tgt "../data/cornell movie-dialogs corpus/tgt_movie_lines_tok.txt" --save_data ../data/cornell_raw_min_100_tok --src_words_min_frequency 100 --tgt_words_min_frequency 100 --src_seq_length 10 --tgt_seq_length 10

In [7]:
!!python ../OpenNMT-py/preprocess.py --train_src "data/cornell movie-dialogs corpus/src_movie_lines_tok.txt" --train_tgt "../data/cornell movie-dialogs corpus/tgt_movie_lines_tok.txt" --save_data ../data/cornell_raw_min_680_tok --src_words_min_frequency 680 --tgt_words_min_frequency 680 --src_seq_length 12 --tgt_seq_length 12

['[2019-10-11 03:37:49,444 INFO] Extracting features...',
 '[2019-10-11 03:37:49,444 INFO]  * number of source features: 0.',
 '[2019-10-11 03:37:49,444 INFO]  * number of target features: 0.',
 '[2019-10-11 03:37:49,444 INFO] Building `Fields` object...',
 '[2019-10-11 03:37:49,444 INFO] Building & saving training data...',
 '[2019-10-11 03:37:49,444 INFO] Reading source and target files: data/cornell movie-dialogs corpus/src_movie_lines_tok.txt ../data/cornell movie-dialogs corpus/tgt_movie_lines_tok.txt.',
 '[2019-10-11 03:37:49,506 INFO] Building shard 0.',
 '[2019-10-11 03:37:54,083 INFO]  * saving 0th train data shard to ../data/cornell_raw_min_680_tok.train.0.pt.',
 '[2019-10-11 03:37:55,793 INFO]  * tgt vocab size: 108.',
 '[2019-10-11 03:37:55,824 INFO]  * src vocab size: 111.']

In [130]:
vocab_fields = torch.load(config.dataset + ".vocab.pt")

In [131]:
src_text_field = vocab_fields["src"].base_field
src_vocab = src_text_field.vocab
src_padding = src_vocab.stoi[src_text_field.pad_token] #stoi: mapping token strings to numerical identifiers.
# ['<unk>', '<blank>', 'I', 'you', 'the', 'to', 'a', 'of', 'and', 'You']
# src_text_field.pad_token : '<blank>'

tgt_text_field = vocab_fields['tgt'].base_field
tgt_vocab = tgt_text_field.vocab
tgt_padding = tgt_vocab.stoi[tgt_text_field.pad_token]

In [132]:
config.src_vocab_size = len(src_vocab)
config.tgt_vocab_size = len(tgt_vocab)
config.src_padding = src_padding
config.tgt_padding = tgt_padding

In [133]:
config.src_unk = src_vocab.stoi[src_text_field.unk_token]
config.tgt_unk = tgt_vocab.stoi[tgt_text_field.unk_token]
config.tgt_bos = tgt_vocab.stoi[tgt_text_field.init_token]
config.tgt_eos = tgt_vocab.stoi[tgt_text_field.eos_token]

In [134]:
config.vocab_fields = vocab_fields
config.src_vocab = src_vocab
config.tgt_vocab = tgt_vocab

## Data Loading

In [135]:
import onmt
from itertools import chain

train_data_file = config.dataset + ".train.0.pt"
train_iter = onmt.inputters.inputter.DatasetLazyIter(dataset_paths=[train_data_file],
                                                     fields=vocab_fields,
                                                     batch_size=1,
                                                     batch_size_multiple=1,
                                                     batch_size_fn=None,
                                                     device=config.device,
                                                     is_train=True,
                                                     repeat=False,
                                                     pool_factor=8192)

data = list(train_iter)
filtered_data = []
for x in data:
    # Filtering sentences with <unk> token
    if not ((x.src[0].squeeze() == config.src_unk).any() or (x.tgt.squeeze() == config.tgt_unk).any()):
        filtered_data.append(x)  

INFO:root:Loading dataset from data/cornell_raw_min_680_tok.train.0.pt
INFO:root:number of examples: 90893


In [136]:
config.PRELOADING_SIZE = len(filtered_data)

In [137]:
display(Markdown(f'#### Data'))
display(Markdown(f'###### {config.PRELOADING_SIZE:,} records'))
display(Markdown(f'###### {config.src_vocab_size:,} src vocabulary size'))
display(Markdown(f'###### {config.tgt_vocab_size:,} tgt vocabulary size'))

display(Markdown(f'#### Vocabulary'))
display(Markdown("###### ["+"] [".join([voc[0].replace('<', '&lt;').replace('>', '&gt;') for voc in tgt_vocab.stoi.items() if voc[1] != 0]) + "]"))

display(Markdown(f'#### seq2seq - Hyperparameter'))
display(Markdown(f'###### Embedding Size: {config.emb_size}'))
display(Markdown(f'###### RNN Size: {config.rnn_size}'))
if config.SUPERVISED_PRETRAINING: display(Markdown(f'###### Pretrain Model'))

display(Markdown(f'#### RL - Hyperparameter'))
display(Markdown(f'###### Update Target Net every {config.target_update_freq} steps'))
display(Markdown(f'###### N-Steps {config.N_STEPS}'))
if config.DISTRIBUTIONAL: display(Markdown(f'###### Distributional RL with {config.QUANTILES} Quantiles'))
if config.value_penalty: display(Markdown(f'###### Using value penalty'))

#### Data

###### 1,311 records

###### 111 src vocabulary size

###### 108 tgt vocabulary size

#### Vocabulary

###### [&lt;blank&gt;] [&lt;s&gt;] [&lt;/s&gt;] [.] [?] [I] [,] [you] [&apos;s] [...] [!] [the] [&apos;t] [to] [a] [it] [You] [What] [me] [that] [No] [&apos;m] [is] [do] [know] [It] [&apos;re] [of] [don] [in] [--] [Yes] [what] [have] [not] [your] [for] [Yeah] [was] [my] [are] [be] [on] [&apos;ll] [That] [this] [Oh] [can] [He] [The] [about] [he] [We] [we] [here] [and] [him] [How] [like] [with] [want] [think] [Why] [just] [&apos;] [get] [right] [Well] [got] [up] [And] [out] [go] [did] [all] [there] [her] [&apos;ve] [Where] [going] [see] [one] [But] [so] [at] [A] [Who] [no] [She] [They] [say] [they] [Not] [Don] [mean] [&apos;d] [she] [Just] [didn] [Do] [&quot;] [now] [good] [So] [too] [back] [time]

#### seq2seq - Hyperparameter

###### Embedding Size: 100

###### RNN Size: 500

###### Pretrain Model

#### RL - Hyperparameter

###### Update Target Net every 10000 steps

###### N-Steps 4

###### Distributional RL with 51 Quantiles

# seq2seq-DQN

In [138]:
%run modules/DQN.py

In [139]:
%run modules/Model.py

In [140]:
model = Model(config, DQN)

In [141]:
model.current_model

DQN(
  (encoder_embeddings): Embeddings(
    (make_embedding): Sequential(
      (emb_luts): Elementwise(
        (0): Embedding(111, 100, padding_idx=1)
      )
    )
  )
  (encoder): RNNEncoder(
    (embeddings): Embeddings(
      (make_embedding): Sequential(
        (emb_luts): Elementwise(
          (0): Embedding(111, 100, padding_idx=1)
        )
      )
    )
    (rnn): GRU(100, 250, bidirectional=True)
  )
  (decoder_embeddings): Embeddings(
    (make_embedding): Sequential(
      (emb_luts): Elementwise(
        (0): Embedding(108, 100, padding_idx=1)
      )
    )
  )
  (decoder): InputFeedRNNDecoder(
    (embeddings): Embeddings(
      (make_embedding): Sequential(
        (emb_luts): Elementwise(
          (0): Embedding(108, 100, padding_idx=1)
        )
      )
    )
    (dropout): Dropout(p=0.0)
    (rnn): StackedGRU(
      (dropout): Dropout(p=0.0)
      (layers): ModuleList(
        (0): GRUCell(600, 500)
      )
    )
    (attn): GlobalAttention(
      (linear_in): L

In [142]:
total_params = sum(p.numel() for p in model.current_model.parameters() if p.requires_grad)
display(Markdown(f'##### Total Number of Parameters: {total_params:,}'))

##### Total Number of Parameters: 9,525,018

In [143]:
%run modules/MSELoss.py

In [144]:
loss = MSELoss(
    #nn.MSELoss(reduction="none"),
    nn.SmoothL1Loss(reduction="none"),
    model.current_model.generator
)

In [145]:
%run modules/Reward.py

In [146]:
config.rewards = ['BLEU']
config.rewards_weights = [1]    

reward = Reward(config)

In [147]:
#torch_optimizer = torch.optim.Adam(model.current_model.parameters(), lr=config.LR)
from lib.Ranger import Ranger
torch_optimizer = Ranger(model.current_model.parameters(), lr=config.LR)
optim = onmt.utils.optimizers.Optimizer(torch_optimizer, learning_rate=config.LR, max_grad_norm=2)

In [148]:
#report_manager = onmt.utils.ReportMgr(report_every=1, start_time=None, tensorboard_writer=None)

In [149]:
%run modules/RLModelSaver.py

In [150]:
model_saver = RLModelSaver("checkpoints/checkpoint", model, config, vocab_fields, optim)

In [151]:
import random
random.Random(42).shuffle(filtered_data)

In [152]:
# Preload Experience Replay Buffer
if len(model.replay_memory) == 0:
    for example in filtered_data[150:]:
        model.replay_memory.preload(example.src[0].squeeze(1), example.tgt.squeeze(1), 1)
        model.sample_buffer.preload(example.src[0].squeeze(1), example.tgt.squeeze(1), None)

In [153]:
%run modules/QLearning.py

In [154]:
trainer = QLearning(config,
                    model,
                    reward=reward,
                    train_loss=loss,
                    valid_loss=loss,
                    optim=optim,
                    model_saver = model_saver)
                    #shard_size = 0

In [155]:
display(Markdown(f'##### Examples'))
for i, x in enumerate(filtered_data[0:10]):
    print(' '.join([src_vocab.itos[token] for token in x.src[0].squeeze().tolist()]) + '  ||  ' + ' '.join([tgt_vocab.itos[token] for token in x.tgt.squeeze().tolist()]))

##### Examples

Why are you here ?  ||  <s> I don &apos;t know . </s>
Oh , no !  ||  <s> Oh , no ! </s>
You want that ?  ||  <s> Yes . </s>
Why did he ?  ||  <s> Why did he what ? </s>
Oh -- Oh --  ||  <s> Oh -- are you -- are you all right ? </s>
What do you think it is ?  ||  <s> What do you think it is ? </s>
Where ?  ||  <s> Just ... I have to . </s>
Yeah . You ?  ||  <s> Yeah . </s>
No .  ||  <s> I , I just can &apos;t ... </s>
What are you doing ?  ||  <s> What ? </s>


In [156]:
config.SAVE_PRETRAIN_SAMPLE_EVERY = 6
config.SAVE_SAMPLE_EVERY = 1
config.SAVE_SIGMA_EVERY = 1

In [157]:
config.SUPERVISED_PRETRAINING = True

In [160]:
if config.SUPERVISED_PRETRAINING:
    trainer.pretrain(train_steps=500, save_checkpoint_steps=100)

INFO:root:Start pretraining - training loop
INFO:root:Pretraining Step 1
INFO:root:Pretraining Step 2
INFO:root:Pretraining Step 3
INFO:root:Pretraining Step 4
INFO:root:Pretraining Step 5
INFO:root:Pretraining Step 6
INFO:root:Pretraining Step 7
INFO:root:Pretraining Step 8
INFO:root:Pretraining Step 9
INFO:root:Pretraining Step 10
INFO:root:Pretraining Step 11
INFO:root:Pretraining Step 12
INFO:root:Pretraining Step 13
INFO:root:Pretraining Step 14
INFO:root:Pretraining Step 15
INFO:root:Pretraining Step 16
INFO:root:Pretraining Step 17
INFO:root:Pretraining Step 18
INFO:root:Pretraining Step 19
INFO:root:Pretraining Step 20
INFO:root:Pretraining Step 21
INFO:root:Pretraining Step 22
INFO:root:Pretraining Step 23
INFO:root:Pretraining Step 24
INFO:root:Pretraining Step 25
INFO:root:Pretraining Step 26
INFO:root:Pretraining Step 27
INFO:root:Pretraining Step 28
INFO:root:Pretraining Step 29
INFO:root:Pretraining Step 30
INFO:root:Pretraining Step 31
INFO:root:Pretraining Step 32
INFO:

In [159]:
result = trainer.train(train_steps=150, save_checkpoint_steps=50)

INFO:root:Start q-learning training loop
INFO:root:Q-Learning Step 1
INFO:root:Q-Learning Step 2
INFO:root:Q-Learning Step 3
INFO:root:Q-Learning Step 4
INFO:root:Q-Learning Step 5
INFO:root:Q-Learning Step 6
INFO:root:Q-Learning Step 7
INFO:root:Q-Learning Step 8
INFO:root:Sampling: Collecting new data
INFO:root:Q-Learning Step 9
INFO:root:Q-Learning Step 10
INFO:root:Q-Learning Step 11
INFO:root:Q-Learning Step 12
INFO:root:Q-Learning Step 13
INFO:root:Q-Learning Step 14
INFO:root:Q-Learning Step 15
INFO:root:Q-Learning Step 16
INFO:root:Sampling: Collecting new data
INFO:root:Q-Learning Step 17
INFO:root:Q-Learning Step 18
INFO:root:Q-Learning Step 19
INFO:root:Q-Learning Step 20
INFO:root:Q-Learning Step 21
INFO:root:Q-Learning Step 22
INFO:root:Q-Learning Step 23
INFO:root:Q-Learning Step 24
INFO:root:Sampling: Collecting new data
INFO:root:Q-Learning Step 25
INFO:root:Q-Learning Step 26
INFO:root:Q-Learning Step 27
INFO:root:Q-Learning Step 28
INFO:root:Q-Learning Step 29
INFO:ro

In [None]:
#for para in list(model.current_model.parameters()):
#    printa(para.grad.abs().sum())

In [27]:
# Analyze Replay Memory
#for i, x in enumerate(model.replay_memory._storage[2000:]):
#    if ' '.join([tgt_vocab.itos[token] for token in x[1].squeeze().tolist()]) != '<s> </s>':
#        print(i+2000, ' '.join([src_vocab.itos[token] for token in x[0].squeeze().tolist()]) + '  ||  ' + ' '.join([tgt_vocab.itos[token] for token in x[1].squeeze().tolist()]))

In [28]:
# Sum up rewards in replay memory
#for i in range(0,len(model.replay_memory),2000):
#    sum_ = sum([y[2] for y in model.replay_memory._storage[i:i+2000]])
#    print(i, i+2000, sum_)

In [29]:
# Take a look at priorities in prioritzed replay memory
#for i in range(0,len(model.replay_memory),2000):
#    print(i, i+2000, sum([model.replay_memory._it_sum[y] for y in range(i, i+2000)]))