In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib notebook

In [2]:
from my_utils import Dictionary

n_unique = 10

src_dict = Dictionary(['<EOS>'])
tgt_dict = Dictionary(['<BOS>', '<EOS>'])
for n in range(n_unique):
    src_dict.add_word(str(n))
    tgt_dict.add_word(str(n))

In [3]:
from my_utils.toy_data import invert_seq
train = invert_seq(5000, n_unique=n_unique)
test = invert_seq(100, n_unique=n_unique)

In [4]:
from my_utils import DataLoader
from torch_models.utils import seq2seq

def numericalize(dataset, src_dict, tgt_dict):
    numericalized = [([src_dict(s) for s in src], [tgt_dict(t) for t in tgt]) for src, tgt in dataset]
    return numericalized

trans_func = seq2seq()
train_loader = DataLoader(numericalize(train, src_dict, tgt_dict), batch_size=64, trans_func=trans_func)
test_loader = DataLoader(numericalize(test, src_dict, tgt_dict), batch_size=10, trans_func=trans_func)

In [5]:
from torch_models.models.transformer import Transformer
embed_size = 24
n_head = 4

model = Transformer(size=embed_size, n_head=n_head, src_vocab_size=len(src_dict), tgt_vocab_size=len(tgt_dict),
                    src_EOS=src_dict('<EOS>'), tgt_BOS=tgt_dict('<BOS>'), tgt_EOS=tgt_dict('<EOS>'),
                    dropout=0, n_layers=1)
print(model)

Transformer(
  (encoder): TransformerEncoder(
    (embedding): TransformerEmbedding(
      (embedding): Embedding(12, 24, padding_idx=11)
    )
    (pe): PositionalEncoding(
      (dropout): Dropout(p=0)
    )
    (layers): ModuleList(
      (0): EncoderLayer(
        (attention): MultiHeadedAttention(
          (Q_linear): Linear(in_features=24, out_features=24, bias=True)
          (K_linear): Linear(in_features=24, out_features=24, bias=True)
          (V_linear): Linear(in_features=24, out_features=24, bias=True)
          (out_linear): Linear(in_features=24, out_features=24, bias=True)
          (attention): DotAttn()
          (dropout): Dropout(p=0)
        )
        (fc): PositionwiseFeedForward(
          (w_1): Linear(in_features=24, out_features=96, bias=True)
          (w_2): Linear(in_features=96, out_features=24, bias=True)
          (dropout): Dropout(p=0)
        )
        (layer_norms): ModuleList(
          (0): LayerNorm(torch.Size([24]), eps=1e-05, elementwise_affin

In [7]:
from my_utils.misc.logging import init_logger
init_logger()
from my_utils import Trainer, EvaluatorSeq
from torch.optim import Adam, SGD

optimizer = Adam(model.parameters())
evaluator = EvaluatorSeq(model, test_loader, measure='BLEU')

trainer = Trainer(model, train_loader)
trainer.train_epoch(optimizer, max_epoch=5,
              evaluator=evaluator, score_monitor=None)

[2018-10-23 20:21:18,640 INFO] steps [79/79]	loss: 0.22773701989952522	
[2018-10-23 20:21:19,438 INFO] Evaluator BLEU: 0.8265569658903429	
[2018-10-23 20:21:22,466 INFO] steps [79/79]	loss: 0.10602986784298209	
[2018-10-23 20:21:23,292 INFO] Evaluator BLEU: 0.8622491239580674	


In [8]:
assert 0.8 < evaluator.evaluate()

In [21]:
iter(train_loader)
l = 10
inputs, targets = next(train_loader)
inputs = inputs[:l]
targets = targets[:l]
generated = model.predict(inputs)
print('======= input ======')
for seq in inputs:
    print([src_dict[s.item()] for s in seq])
print('======= output ======')
for seq in generated[:l]:
    print([tgt_dict[s] for s in seq])

['6', '9', '6', '6']
['6', '6', '4', '4']
['4', '4', '8']
['5', '1', '9', '8', '6']
['0', '6', '1', '3']
['5', '3', '1', '6']
['0', '7', '0', '8', '9']
['5', '9', '6', '3', '0']
['9', '4', '2']
['7', '2', '2']
['6', '6', '9', '6']
['4', '4', '6', '6']
['8', '4', '8']
['6', '8', '9', '1', '5']
['3', '1', '6', '0']
['6', '1', '3', '5']
['9', '8', '0', '7', '0']
['0', '3', '6', '9', '5']
['2', '4', '9']
['2', '2', '7']


In [9]:
import numpy as np
(np.array([3, 2, 1]) == np.array([1, 2, 3])).all()

False