In [58]:
from IPython.display import display, Markdown
import numpy.lib.recfunctions as nlr
def show(x): print(nlr.unstructured_to_structured(x.numpy())) # pretty print for tensors

# Config

In [59]:
%run Config.py
config.device = torch.device('cpu')

# Data

## Tokenization

In [68]:
!!perl ../OpenNMT-py/tools/tokenizer.perl  -l en \
< ../data/"cornell movie-dialogs corpus"/src_movie_lines.txt \
> ../data/"cornell movie-dialogs corpus"/src_movie_lines_tok.txt

^C


[]

In [257]:
!!perl ../OpenNMT-py/tools/tokenizer.perl  -l en \
< ../data/"cornell movie-dialogs corpus"/tgt_movie_lines.txt \
> ../data/"cornell movie-dialogs corpus"/tgt_movie_lines_tok.txt

['Tokenizer Version 1.1', 'Language: en', 'Number of threads: 1']

## Preprocessing

In [None]:
!!python ../OpenNMT-py/preprocess.py --train_src "data/cornell movie-dialogs corpus/src_movie_lines_tok.txt" --train_tgt "../data/cornell movie-dialogs corpus/tgt_movie_lines_tok.txt" --save_data ../data/cornell_raw

In [None]:
!!python ../OpenNMT-py/preprocess.py --train_src "data/cornell movie-dialogs corpus/src_movie_lines_tok.txt" --train_tgt "../data/cornell movie-dialogs corpus/tgt_movie_lines_tok.txt" --save_data ../data/cornell_raw_min_30_10_tok --src_words_min_frequency 30 --tgt_words_min_frequency 30 --src_seq_length 10 --tgt_seq_length 10

In [None]:
!!python ../OpenNMT-py/preprocess.py --train_src "data/cornell movie-dialogs corpus/src_movie_lines_tok.txt" --train_tgt "../data/cornell movie-dialogs corpus/tgt_movie_lines_tok.txt" --save_data ../data/cornell_raw_min_100_tok --src_words_min_frequency 100 --tgt_words_min_frequency 100 --src_seq_length 10 --tgt_seq_length 10

In [7]:
!!python ../OpenNMT-py/preprocess.py --train_src "data/cornell movie-dialogs corpus/src_movie_lines_tok.txt" --train_tgt "../data/cornell movie-dialogs corpus/tgt_movie_lines_tok.txt" --save_data ../data/cornell_raw_min_680_tok --src_words_min_frequency 680 --tgt_words_min_frequency 680 --src_seq_length 12 --tgt_seq_length 12

['[2019-10-11 03:37:49,444 INFO] Extracting features...',
 '[2019-10-11 03:37:49,444 INFO]  * number of source features: 0.',
 '[2019-10-11 03:37:49,444 INFO]  * number of target features: 0.',
 '[2019-10-11 03:37:49,444 INFO] Building `Fields` object...',
 '[2019-10-11 03:37:49,444 INFO] Building & saving training data...',
 '[2019-10-11 03:37:49,444 INFO] Reading source and target files: data/cornell movie-dialogs corpus/src_movie_lines_tok.txt ../data/cornell movie-dialogs corpus/tgt_movie_lines_tok.txt.',
 '[2019-10-11 03:37:49,506 INFO] Building shard 0.',
 '[2019-10-11 03:37:54,083 INFO]  * saving 0th train data shard to ../data/cornell_raw_min_680_tok.train.0.pt.',
 '[2019-10-11 03:37:55,793 INFO]  * tgt vocab size: 108.',
 '[2019-10-11 03:37:55,824 INFO]  * src vocab size: 111.']

In [60]:
vocab_fields = torch.load(config.dataset + ".vocab.pt")

In [61]:
src_text_field = vocab_fields["src"].base_field
src_vocab = src_text_field.vocab
src_padding = src_vocab.stoi[src_text_field.pad_token] #stoi: mapping token strings to numerical identifiers.
# ['<unk>', '<blank>', 'I', 'you', 'the', 'to', 'a', 'of', 'and', 'You']
# src_text_field.pad_token : '<blank>'

tgt_text_field = vocab_fields['tgt'].base_field
tgt_vocab = tgt_text_field.vocab
tgt_padding = tgt_vocab.stoi[tgt_text_field.pad_token]

In [62]:
config.src_vocab_size = len(src_vocab)
config.tgt_vocab_size = len(tgt_vocab)
config.src_padding = src_padding
config.tgt_padding = tgt_padding

In [63]:
config.src_unk = src_vocab.stoi[src_text_field.unk_token]
config.tgt_unk = tgt_vocab.stoi[tgt_text_field.unk_token]
config.tgt_bos = tgt_vocab.stoi[tgt_text_field.init_token]
config.tgt_eos = tgt_vocab.stoi[tgt_text_field.eos_token]

In [64]:
config.vocab_fields = vocab_fields
config.src_vocab = src_vocab
config.tgt_vocab = tgt_vocab

## Data Loading

In [65]:
import onmt
from itertools import chain

train_data_file = config.dataset + ".train.0.pt"
train_iter = onmt.inputters.inputter.DatasetLazyIter(dataset_paths=[train_data_file],
                                                     fields=vocab_fields,
                                                     batch_size=1,
                                                     batch_size_multiple=1,
                                                     batch_size_fn=None,
                                                     device=config.device,
                                                     is_train=True,
                                                     repeat=False,
                                                     pool_factor=8192)

data = list(train_iter)
filtered_data = []
max_length = 0
for x in data:
    # Filtering sentences with <unk> token
    if not ((x.src[0].squeeze() == config.src_unk).any() or (x.tgt.squeeze() == config.tgt_unk).any()):
        max_length = max(max_length,max(x.src[0].size(0), x.tgt.size(0)))
        filtered_data.append(x)  

INFO:root:Loading dataset from data/cornell_raw_min_680_tok.train.0.pt
INFO:root:number of examples: 90893


In [66]:
config.max_sequence_length = max_length - 2 # bos, eos
config.PRELOADING_SIZE = len(filtered_data)

In [67]:
display(Markdown(f'#### Data'))
display(Markdown(f'###### {config.PRELOADING_SIZE:,} records'))
display(Markdown(f'###### {config.max_sequence_length:,} max sequence length'))
display(Markdown(f'###### {config.src_vocab_size:,} src vocabulary size'))
display(Markdown(f'###### {config.tgt_vocab_size:,} tgt vocabulary size'))

display(Markdown(f'#### Vocabulary'))
display(Markdown("###### ["+"] [".join([voc[0].replace('<', '&lt;').replace('>', '&gt;') for voc in tgt_vocab.stoi.items() if voc[1] != 0]) + "]"))

display(Markdown(f'#### seq2seq - Hyperparameter'))
display(Markdown(f'###### Embedding Size: {config.emb_size}'))
display(Markdown(f'###### RNN Size: {config.rnn_size}'))

display(Markdown(f'#### RL - Hyperparameter'))
display(Markdown(f'###### Update Target Net every {config.target_update_freq} steps'))
display(Markdown(f'###### N-Steps {config.N_STEPS}'))
if config.DISTRIBUTIONAL: display(Markdown(f'###### Distributional RL with {config.QUANTILES} Quantiles'))
if config.value_penalty: display(Markdown(f'###### Using value penalty'))

#### Data

###### 1,311 records

###### 12 max sequence length

###### 111 src vocabulary size

###### 108 tgt vocabulary size

#### Vocabulary

###### [&lt;blank&gt;] [&lt;s&gt;] [&lt;/s&gt;] [.] [?] [I] [,] [you] [&apos;s] [...] [!] [the] [&apos;t] [to] [a] [it] [You] [What] [me] [that] [No] [&apos;m] [is] [do] [know] [It] [&apos;re] [of] [don] [in] [--] [Yes] [what] [have] [not] [your] [for] [Yeah] [was] [my] [are] [be] [on] [&apos;ll] [That] [this] [Oh] [can] [He] [The] [about] [he] [We] [we] [here] [and] [him] [How] [like] [with] [want] [think] [Why] [just] [&apos;] [get] [right] [Well] [got] [up] [And] [out] [go] [did] [all] [there] [her] [&apos;ve] [Where] [going] [see] [one] [But] [so] [at] [A] [Who] [no] [She] [They] [say] [they] [Not] [Don] [mean] [&apos;d] [she] [Just] [didn] [Do] [&quot;] [now] [good] [So] [too] [back] [time]

#### seq2seq - Hyperparameter

###### Embedding Size: 100

###### RNN Size: 500

#### RL - Hyperparameter

###### Update Target Net every 10000 steps

###### N-Steps 4

###### Distributional RL with 51 Quantiles

# seq2seq-DQN

In [68]:
%run modules/DQN.py

In [69]:
%run modules/Model.py

In [70]:
model = Model(config, DQN)

In [71]:
model.current_model

DQN(
  (encoder_embeddings): Embeddings(
    (make_embedding): Sequential(
      (emb_luts): Elementwise(
        (0): Embedding(111, 100, padding_idx=1)
      )
    )
  )
  (encoder): RNNEncoder(
    (embeddings): Embeddings(
      (make_embedding): Sequential(
        (emb_luts): Elementwise(
          (0): Embedding(111, 100, padding_idx=1)
        )
      )
    )
    (rnn): GRU(100, 250, bidirectional=True)
  )
  (decoder_embeddings): Embeddings(
    (make_embedding): Sequential(
      (emb_luts): Elementwise(
        (0): Embedding(108, 100, padding_idx=1)
      )
    )
  )
  (decoder): InputFeedRNNDecoder(
    (embeddings): Embeddings(
      (make_embedding): Sequential(
        (emb_luts): Elementwise(
          (0): Embedding(108, 100, padding_idx=1)
        )
      )
    )
    (dropout): Dropout(p=0.0)
    (rnn): StackedGRU(
      (dropout): Dropout(p=0.0)
      (layers): ModuleList(
        (0): GRUCell(600, 500)
      )
    )
    (attn): GlobalAttention(
      (linear_in): L

In [72]:
total_params = sum(p.numel() for p in model.current_model.parameters() if p.requires_grad)
display(Markdown(f'##### Total Number of Parameters: {total_params:,}'))

##### Total Number of Parameters: 9,579,126

In [73]:
%run modules/MSELoss.py

In [74]:
loss = MSELoss(
    #nn.MSELoss(reduction="none"),
    nn.SmoothL1Loss(reduction="none"),
    model.current_model.generator
)

In [75]:
%run modules/Reward.py

In [76]:
config.rewards = ['ROUGE']
config.rewards_weights = [1]    

reward = Reward(config)

In [77]:
#torch_optimizer = torch.optim.Adam(model.current_model.parameters(), lr=config.LR)
from lib.Ranger import Ranger
torch_optimizer = Ranger(model.current_model.parameters(), lr=config.LR)
optim = onmt.utils.optimizers.Optimizer(torch_optimizer, learning_rate=config.LR, max_grad_norm=2)

In [78]:
#report_manager = onmt.utils.ReportMgr(report_every=1, start_time=None, tensorboard_writer=None)

In [79]:
%run modules/RLModelSaver.py

In [80]:
model_saver = RLModelSaver("checkpoints/checkpoint", model, config, vocab_fields, optim)

In [81]:
import random
random.Random(42).shuffle(filtered_data)

In [82]:
# Preload Experience Replay Buffer
if len(model.replay_memory) == 0:
    for example in filtered_data[150:]:
        model.replay_memory.preload(example.src[0].squeeze(1), example.tgt.squeeze(1), 1)
        model.sample_buffer.preload(example.src[0].squeeze(1), example.tgt.squeeze(1), None)

In [83]:
%run modules/QLearning.py

In [84]:
trainer = QLearning(config,
                    model,
                    reward=reward,
                    train_loss=loss,
                    valid_loss=loss,
                    optim=optim,
                    model_saver = model_saver)
                    #shard_size = 0

In [85]:
display(Markdown(f'##### Examples'))
for i, x in enumerate(filtered_data[0:10]):
    print(' '.join([src_vocab.itos[token] for token in x.src[0].squeeze().tolist()]) + '  ||  ' + ' '.join([tgt_vocab.itos[token] for token in x.tgt.squeeze().tolist()]))

##### Examples

Why ?  ||  <s> Why ? I don &apos;t know . </s>
No .  ||  <s> Why not ? </s>
No , you &apos;re not .  ||  <s> Why not ? </s>
Where are you going ?  ||  <s> I have to get out . </s>
That &apos;s my ...  ||  <s> Oh . </s>
Well she ...  ||  <s> She what ? </s>
What about you ?  ||  <s> I don &apos;t know . </s>
No .  ||  <s> Why not ? </s>
How do you know this ?  ||  <s> I know . </s>
Yeah . You ?  ||  <s> Yeah . </s>


In [29]:
config.SAVE_PRETRAIN_SAMPLE_EVERY = 6
config.SAVE_SAMPLE_EVERY = 1
config.SAVE_SIGMA_EVERY = 1
config.SAVE_SAMPLE_EVERY = 8

In [None]:
trainer.multitask_train(train_steps = 150, stop_pretrain_after=100)

In [None]:
trainer.pretrain(train_steps=500, save_checkpoint_steps=100)

In [None]:
trainer.train(train_steps=150, save_checkpoint_steps=50)

In [None]:
#for para in list(model.current_model.parameters()):
#    printa(para.grad.abs().sum())

In [27]:
# Analyze Replay Memory
#for i, x in enumerate(model.replay_memory._storage[2000:]):
#    if ' '.join([tgt_vocab.itos[token] for token in x[1].squeeze().tolist()]) != '<s> </s>':
#        print(i+2000, ' '.join([src_vocab.itos[token] for token in x[0].squeeze().tolist()]) + '  ||  ' + ' '.join([tgt_vocab.itos[token] for token in x[1].squeeze().tolist()]))

In [28]:
# Sum up rewards in replay memory
#for i in range(0,len(model.replay_memory),2000):
#    sum_ = sum([y[2] for y in model.replay_memory._storage[i:i+2000]])
#    print(i, i+2000, sum_)

In [29]:
# Take a look at priorities in prioritzed replay memory
#for i in range(0,len(model.replay_memory),2000):
#    print(i, i+2000, sum([model.replay_memory._it_sum[y] for y in range(i, i+2000)]))

In [61]:
batch = trainer.model.sample_from_memory(100)
true_batch, src, tgt, src_lengths, tgt_lengths, src_raw, tgt_raw, rewardx, per = trainer._process_batch(batch)

trainer.current_model.update_noise()
with torch.no_grad():
    trainer.target_model.update_noise()

# pass through encoder and decoder
trainer.optim.zero_grad()
current_net__outputs, current_net_attns = trainer.current_model(src, tgt, src_lengths, bptt=False)
target_net__outputs, target_net_attns = trainer.target_model(src, tgt, src_lengths, bptt=False)

# pass through generator
current_net__q_outputs = trainer.current_model.generator(current_net__outputs)
target_net__q_outputs = trainer.target_model.generator(target_net__outputs).detach() # detach from graph, don't backpropagate

# another generation with new noise for action selection
with torch.no_grad():
    trainer.current_model.update_noise(inplace = False)
    current_net__q_outputs_decorrelated = trainer.current_model.generator(current_net__outputs)

# calc q values
q_values = trainer.model.get_current_q_values(current_net__q_outputs, tgt)    
next_q_values = trainer.model.get_next_q_values(current_net__q_outputs_decorrelated, target_net__q_outputs)

# construct reward tensor
idxes = trainer.index_tensor[:,:tgt.size(0)]
rewards = (idxes.eq((tgt_lengths - 1).unsqueeze(1)).t().float() * torch.Tensor(rewardx).to(config.device))[1:].unsqueeze(2)   

if config.N_STEPS > 1:
    # one-sided exponential n-step decay of rewards via convolution
    rewards = F.conv1d(rewards.permute(1, 2, 0), trainer.kernel, padding=trainer.padding).permute(2, 0, 1)

# mask bc padding
mask_raw = idxes.lt(tgt_lengths.unsqueeze(1)).t()[1:].float().unsqueeze(2)

if config.DISTRIBUTIONAL:
    rewards = rewards.unsqueeze(3)
    mask = mask_raw.unsqueeze(3)
    zero_pad = torch.zeros((config.N_STEPS,config.BATCH_SIZE,1,config.QUANTILES), device=config.device)
else:
    mask = mask_raw
    zero_pad = torch.zeros((config.N_STEPS,config.BATCH_SIZE,1), device=config.device)

masked_q_values = q_values * mask

RuntimeError: The size of tensor a (11) must match the size of tensor b (10) at non-singleton dimension 0