In [None]:
from IPython.display import display, Markdown

# Config

In [2]:
%run Config.py

# Data

## Tokenization

In [256]:
!!perl ../OpenNMT-py/tools/tokenizer.perl  -l en \
< ../data/"cornell movie-dialogs corpus"/src_movie_lines.txt \
> ../data/"cornell movie-dialogs corpus"/src_movie_lines_tok.txt

['Tokenizer Version 1.1', 'Language: en', 'Number of threads: 1']

In [257]:
!!perl ../OpenNMT-py/tools/tokenizer.perl  -l en \
< ../data/"cornell movie-dialogs corpus"/tgt_movie_lines.txt \
> ../data/"cornell movie-dialogs corpus"/tgt_movie_lines_tok.txt

['Tokenizer Version 1.1', 'Language: en', 'Number of threads: 1']

## Preprocessing

In [2]:
!!python ../OpenNMT-py/preprocess.py --train_src "data/cornell movie-dialogs corpus/src_movie_lines_tok.txt" --train_tgt "../data/cornell movie-dialogs corpus/tgt_movie_lines_tok.txt" --save_data ../data/cornell_raw

['Please backup existing pt files: ../data/cornell_raw.train*.pt, to avoid overwriting them!']

In [3]:
!!python ../OpenNMT-py/preprocess.py --train_src "data/cornell movie-dialogs corpus/src_movie_lines_tok.txt" --train_tgt "../data/cornell movie-dialogs corpus/tgt_movie_lines_tok.txt" --save_data ../data/cornell_raw_min_30_10_tok --src_words_min_frequency 30 --tgt_words_min_frequency 30 --src_seq_length 10 --tgt_seq_length 10

['Please backup existing pt files: ../data/cornell_raw_min_30_10_tok.train*.pt, to avoid overwriting them!']

In [46]:
!!python ../OpenNMT-py/preprocess.py --train_src "data/cornell movie-dialogs corpus/src_movie_lines_tok.txt" --train_tgt "../data/cornell movie-dialogs corpus/tgt_movie_lines_tok.txt" --save_data ../data/cornell_raw_min_100_tok --src_words_min_frequency 100 --tgt_words_min_frequency 100 --src_seq_length 10 --tgt_seq_length 10

['[2019-09-12 02:53:40,588 INFO] Extracting features...',
 '[2019-09-12 02:53:40,590 INFO]  * number of source features: 0.',
 '[2019-09-12 02:53:40,591 INFO]  * number of target features: 0.',
 '[2019-09-12 02:53:40,591 INFO] Building `Fields` object...',
 '[2019-09-12 02:53:40,591 INFO] Building & saving training data...',
 '[2019-09-12 02:53:40,591 INFO] Reading source and target files: ../data/cornell movie-dialogs corpus/src_movie_lines_tok.txt ../data/cornell movie-dialogs corpus/tgt_movie_lines_tok.txt.',
 '[2019-09-12 02:53:40,770 INFO] Building shard 0.',
 '[2019-09-12 02:53:48,692 INFO]  * saving 0th train data shard to ../data/cornell_raw_min_100_tok.train.0.pt.',
 '[2019-09-12 02:53:49,922 INFO]  * tgt vocab size: 379.',
 '[2019-09-12 02:53:49,948 INFO]  * src vocab size: 380.']

In [137]:
!!python ../OpenNMT-py/preprocess.py --train_src "data/cornell movie-dialogs corpus/src_movie_lines_tok.txt" --train_tgt "../data/cornell movie-dialogs corpus/tgt_movie_lines_tok.txt" --save_data ../data/cornell_raw_min_1000_tok --src_words_min_frequency 1000 --tgt_words_min_frequency 1000 --src_seq_length 10 --tgt_seq_length 10 

['Please backup existing pt files: ../data/cornell_raw_min_1000_tok.train*.pt, to avoid overwriting them!']

In [3]:
vocab_fields = torch.load("data/cornell_raw_min_1000_tok.vocab.pt")

In [4]:
src_text_field = vocab_fields["src"].base_field
src_vocab = src_text_field.vocab
src_padding = src_vocab.stoi[src_text_field.pad_token] #stoi: mapping token strings to numerical identifiers.
# ['<unk>', '<blank>', 'I', 'you', 'the', 'to', 'a', 'of', 'and', 'You']
# src_text_field.pad_token : '<blank>'

tgt_text_field = vocab_fields['tgt'].base_field
tgt_vocab = tgt_text_field.vocab
tgt_padding = tgt_vocab.stoi[tgt_text_field.pad_token]

In [5]:
config.src_vocab_size = len(src_vocab)
config.tgt_vocab_size = len(tgt_vocab)
config.src_padding = src_padding
config.tgt_padding = tgt_padding

In [6]:
config.src_unk = src_vocab.stoi[src_text_field.unk_token]
config.tgt_unk = tgt_vocab.stoi[tgt_text_field.unk_token]
config.tgt_bos = tgt_vocab.stoi[tgt_text_field.init_token]
config.tgt_eos = tgt_vocab.stoi[tgt_text_field.eos_token]

In [7]:
config.src_vocab = src_vocab
config.tgt_vocab = tgt_vocab

## Data Loading

In [8]:
import onmt
from itertools import chain

train_data_file = "data/cornell_raw_min_1000_tok.train.0.pt"
train_iter = onmt.inputters.inputter.DatasetLazyIter(dataset_paths=[train_data_file],
                                                     fields=vocab_fields,
                                                     batch_size=1,
                                                     batch_size_multiple=1,
                                                     batch_size_fn=None,
                                                     device=config.device,
                                                     is_train=True,
                                                     repeat=False,
                                                     pool_factor=8192)

data = list(train_iter)
filtered_data = []
for x in data:
    # Filtering sentences with <unk> token
    if not ((x.src[0].squeeze() == config.src_unk).any() or (x.tgt.squeeze() == config.tgt_unk).any()):
        filtered_data.append(x)  

In [9]:
config.PRELOADING_SIZE = len(filtered_data)

In [10]:
display(Markdown(f'#### Data'))
display(Markdown(f'###### {config.PRELOADING_SIZE:,} records'))
display(Markdown(f'###### {config.src_vocab_size:,} src vocabulary size'))
display(Markdown(f'###### {config.tgt_vocab_size:,} tgt vocabulary size'))

display(Markdown(f'#### seq2seq - Hyperparameter'))
display(Markdown(f'###### Embedding Size: {config.emb_size}'))
display(Markdown(f'###### RNN Size: {config.rnn_size}'))

display(Markdown(f'#### RL - Hyperparameter'))
display(Markdown(f'###### Update Target Net every {config.target_update_freq} steps'))
display(Markdown(f'###### Pretraining Iterations {config.PRETRAIN_ITER}'))
display(Markdown(f'###### N-Steps {config.N_STEPS}'))

#### Data

###### 492 records

###### 64 src vocabulary size

###### 61 tgt vocabulary size

#### seq2seq - Hyperparameter

###### Embedding Size: 100

###### RNN Size: 500

#### RL - Hyperparameter

###### Update Target Net every 10000 steps

###### Pretraining Iterations 0

###### N-Steps 3

# seq2seq-DQN

In [11]:
%run modules/DQN.py

In [12]:
%run modules/Model.py

In [13]:
model = Model(config, DQN)

In [14]:
model.current_model

DQN(
  (encoder_embeddings): Embeddings(
    (make_embedding): Sequential(
      (emb_luts): Elementwise(
        (0): Embedding(64, 100, padding_idx=1)
      )
    )
  )
  (encoder): RNNEncoder(
    (embeddings): Embeddings(
      (make_embedding): Sequential(
        (emb_luts): Elementwise(
          (0): Embedding(64, 100, padding_idx=1)
        )
      )
    )
    (rnn): GRU(100, 250, bidirectional=True)
  )
  (decoder_embeddings): Embeddings(
    (make_embedding): Sequential(
      (emb_luts): Elementwise(
        (0): Embedding(61, 100, padding_idx=1)
      )
    )
  )
  (decoder): InputFeedRNNDecoder(
    (embeddings): Embeddings(
      (make_embedding): Sequential(
        (emb_luts): Elementwise(
          (0): Embedding(61, 100, padding_idx=1)
        )
      )
    )
    (dropout): Dropout(p=0.0)
    (rnn): StackedGRU(
      (dropout): Dropout(p=0.0)
      (layers): ModuleList(
        (0): GRUCell(600, 500)
      )
    )
    (attn): GlobalAttention(
      (linear_in): Linea

In [15]:
total_params = sum(p.numel() for p in model.current_model.parameters() if p.requires_grad)
display(Markdown(f'##### Total Number of Parameters: {total_params:,}'))

##### Total Number of Parameters: 3,514,188

In [16]:
%run modules/MSELoss.py

In [17]:
loss = MSELoss(
    #nn.MSELoss(reduction="none"),
    nn.SmoothL1Loss(reduction="none"),
    model.current_model.generator
)

In [18]:
%run modules/Reward.py

In [19]:
config.rewards = ['BLEU']
config.rewards_weights = [1]    

reward = Reward(config)

In [20]:
torch_optimizer = torch.optim.Adam(model.current_model.parameters(), lr=config.LR)
optim = onmt.utils.optimizers.Optimizer(torch_optimizer, learning_rate=config.LR, max_grad_norm=2)

In [21]:
#report_manager = onmt.utils.ReportMgr(report_every=1, start_time=None, tensorboard_writer=None)

In [22]:
%run modules/RLModelSaver.py

In [23]:
model_saver = RLModelSaver("checkpoints/checkpoint", model, config, vocab_fields, optim)

In [24]:
# Preload Experience Replay Buffer
if len(model.replay_memory) == 0:
    for example in filtered_data:
        model.replay_memory.preload(example.src[0].squeeze(1), example.tgt.squeeze(1), 1)
        model.sample_buffer.preload(example.src[0].squeeze(1), example.tgt.squeeze(1), None)

In [25]:
%run modules/QLearning.py

In [26]:
trainer = QLearning(config,
                    model,
                    reward=reward,
                    train_loss=loss,
                    valid_loss=loss,
                    optim=optim,
                    model_saver = model_saver)
                    #shard_size = 0

In [27]:
display(Markdown(f'##### Examples'))
for i, x in enumerate(filtered_data[0:10]):
    print(' '.join([src_vocab.itos[token] for token in x.src[0].squeeze().tolist()]) + '  ||  ' + ' '.join([tgt_vocab.itos[token] for token in x.tgt.squeeze().tolist()]))

##### Examples

No .  ||  <s> What do you want ? </s>
Yeah .  ||  <s> Why ? </s>
You are ?  ||  <s> Yeah . </s>
No !  ||  <s> Yes ! </s>
What &apos;s that ?  ||  <s> What ? </s>
Why .  ||  <s> Why what ? </s>
You know what for .  ||  <s> No I don &apos;t . </s>
No .  ||  <s> Why not ? </s>
I don &apos;t know .  ||  <s> You don &apos;t know . </s>
Yeah I can &apos;t .  ||  <s> Why ? </s>


In [28]:
#trainer.pretrain(500, 5000)

In [29]:
#for param in model.current_model.encoder.parameters():
#    param.requires_grad = False

#for param in model.current_model.decoder.parameters():
#    param.requires_grad = False

In [30]:
#config.SAVE_GRAD_FLOW_EVERY = 1

In [31]:
result = trainer.train(train_steps=1000000, save_checkpoint_steps=1000)

INFO:root:Start training loop
INFO:root:Step 1
INFO:root:Sampling: Collecting new data
INFO:root:Step 2
INFO:root:Sampling: Collecting new data
INFO:root:Step 3
INFO:root:Sampling: Collecting new data
INFO:root:Step 4
INFO:root:Sampling: Collecting new data
INFO:root:Step 5
INFO:root:Sampling: Collecting new data
INFO:root:Step 6
INFO:root:Sampling: Collecting new data
INFO:root:Step 7
INFO:root:Sampling: Collecting new data
INFO:root:Step 8
INFO:root:Sampling: Collecting new data
INFO:root:Step 9
INFO:root:Sampling: Collecting new data
INFO:root:Step 10
INFO:root:Sampling: Collecting new data
INFO:root:Step 11
INFO:root:Sampling: Collecting new data
INFO:root:Step 12
INFO:root:Sampling: Collecting new data
INFO:root:Step 13
INFO:root:Sampling: Collecting new data
INFO:root:Step 14
INFO:root:Sampling: Collecting new data
INFO:root:Step 15
INFO:root:Sampling: Collecting new data
INFO:root:Step 16
INFO:root:Sampling: Collecting new data
INFO:root:Step 17
INFO:root:Sampling: Collecting ne

KeyboardInterrupt: 

In [26]:
#for para in list(model.current_model.parameters()):
#    printa(para.grad.abs().sum())

In [27]:
# Analyze Replay Memory
#for i, x in enumerate(model.replay_memory._storage[2000:]):
#    if ' '.join([tgt_vocab.itos[token] for token in x[1].squeeze().tolist()]) != '<s> </s>':
#        print(i+2000, ' '.join([src_vocab.itos[token] for token in x[0].squeeze().tolist()]) + '  ||  ' + ' '.join([tgt_vocab.itos[token] for token in x[1].squeeze().tolist()]))

In [28]:
# Sum up rewards in replay memory
#for i in range(0,len(model.replay_memory),2000):
#    sum_ = sum([y[2] for y in model.replay_memory._storage[i:i+2000]])
#    print(i, i+2000, sum_)

In [29]:
# Take a look at priorities in prioritzed replay memory
#for i in range(0,len(model.replay_memory),2000):
#    print(i, i+2000, sum([model.replay_memory._it_sum[y] for y in range(i, i+2000)]))