In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib notebook

## Inverting sequence

### Grammar
#### invert a sequence of numbers
ex) '1', '2', '5', '2', '2', '6', '5', '1' -> '1', '5', '6', '2', '2', '5', '2', '1'

In [2]:
import random
def invert_seq(data_size=5000):
    dataset = []
    for _ in range(data_size):
        length = random.randint(3, 9)
        seq = [str(random.randint(0, 9)) for _ in range(length)]
        target = seq[::-1]
        dataset.append((seq, target))
    return dataset

In [3]:
from my_utils import Dictionary

src_dict = Dictionary()
tgt_dict = Dictionary(['<BOS>', '<EOS>'])
for n in '0123456789':
    src_dict.add_word(n)
    tgt_dict.add_word(n)

In [22]:
train = invert_seq(100)
test = invert_seq(100)

In [28]:
from my_utils import DataLoader
from torch_models.utils import seq2seq, get_device

def numericalize(dataset, src_dict, tgt_dict):
    numericalized = [([src_dict(s) for s in src], [tgt_dict(t) for t in tgt]) for src, tgt in dataset]
    return numericalized

device = get_device()
train_loader = DataLoader(numericalize(train, src_dict, tgt_dict), batch_size=10, trans_func=seq2seq)
test_loader = DataLoader(numericalize(test, src_dict, tgt_dict), batch_size=16, trans_func=seq2seq)

===== Device =====
cpu


In [29]:
from torch_models.models import MLP, LSTMEncoder
import torch.nn as nn
import torch

class Seq2Seq(nn.Module):
    def __init__(self, embed_size, hidden_size, src_vocab_size, tgt_vocab_size, tgt_BOS, tgt_EOS, num_layers=1):
        super().__init__()
        self.encoder = LSTMEncoder(embed_size, hidden_size, src_vocab_size, bidirectional=False, num_layers=num_layers)
        self.decoder = LSTMEncoder(embed_size, hidden_size, tgt_vocab_size, bidirectional=False, num_layers=num_layers)
        self.out_mlp = MLP(dims=[hidden_size, tgt_vocab_size])

        self.tgt_BOS = tgt_BOS
        self.tgt_EOS = tgt_EOS
  
    def forward(self, inputs):
        # encoding
        _, enc_hiddens = self.encoder.forward(inputs)
        # decoding
        BOSs = torch.LongTensor([[self.tgt_BOS] for _ in range(len(inputs))])
        decoded, _ = self.decoder.forward(BOSs, enc_hiddens)
        decoded = decoded.squeeze(1)
        out = self.out_mlp.forward(decoded)
        return out

    def generate(self, inputs, threshold=100):
        # encoding
        _, enc_hiddens = self.encoder.forward(inputs) # (num_layers * num_directions, batch, hidden_size)

        generated = []
        n_batch = enc_hiddens.shape[1]
        for i in range(n_batch):
            tgt_seq = []
            current_token = self.tgt_BOS
            hidden = enc_hiddens[:, i].unsqueeze(1)
            for _ in range(threshold):
                decoded, hidden = self.decoder.forward(torch.LongTensor([[current_token]]), hidden)
                out = self.out_mlp.predict(decoded.squeeze(1)).item()
                if out == self.tgt_EOS: break
                tgt_seq.append(out)
                current_token = out
            generated.append(tgt_seq)
        return generated


    def fit(self, inputs, targets, optimizer):
        # encoding
        _, enc_hiddens = self.encoder.forward(inputs)
        # decoding
        BOS_targets = self._append_BOS(targets)
        decoded, _ = self.decoder.forward(BOS_targets, enc_hiddens)
        decoded = self._flatten_and_unpad(decoded)
        # predicting
        targets_EOS = self._append_EOS_flatten(targets)
        loss = self.out_mlp.fit(decoded, targets_EOS, optimizer)
        return loss

    def _append_BOS(self, targets):
        BOS_targets = [torch.cat((torch.tensor([self.tgt_BOS]), target)) for target in targets]
        return BOS_targets
        
    def _append_EOS_flatten(self, targets):
        EOS_targets = [torch.cat((target, torch.tensor([self.tgt_EOS]))) for target in targets]
        return torch.cat(EOS_targets)
    
    def _flatten_and_unpad(self, decoded):
        decoded = decoded.view(-1, self.decoder.hidden_size) # (batch * seq_len, embed_size)
        decoded = torch.stack([tensor for tensor in decoded if not torch.tensor(float('-inf')) in tensor], dim=0)
        return decoded

In [30]:
model = Seq2Seq(13, 300, len(src_dict), len(tgt_dict),
                tgt_BOS=tgt_dict('<BOS>'), tgt_EOS=tgt_dict('<EOS>'), num_layers=1)
model.decoder.embedding.weight.data = torch.eye(13)
model.encoder.embedding = model.decoder.embedding
model.decoder.embedding.weight.requires_grad = False
print(model)

Seq2Seq(
  (encoder): LSTMEncoder(
    (embedding): Embedding(13, 13, padding_idx=12)
    (rnn): LSTM(13, 300)
  )
  (decoder): LSTMEncoder(
    (embedding): Embedding(13, 13, padding_idx=12)
    (rnn): LSTM(13, 300)
  )
  (out_mlp): MLP(
    (fc_out): Linear(in_features=300, out_features=12, bias=True)
    (dropout): Dropout(p=0)
    (criterion): CrossEntropyLoss()
    (activation): Tanh()
  )
)


In [31]:
from my_utils import Trainer, EvaluatorC
from torch.optim import Adam

optimizer = Adam(filter(lambda p: p.requires_grad, model.parameters()))

# evaluator = EvaluatorC(model, test_loader)

trainer = Trainer(model, train_loader)
trainer.train(optimizer, max_epoch=1000,
              evaluator=None, score_monitor=None, show_log=True, hook_func=None)

epoch 0  	loss: 2.4804214954376222	
epoch 1  	loss: 2.4440106391906737	
epoch 2  	loss: 2.3740649700164793	
epoch 3  	loss: 2.239237356185913	
epoch 4  	loss: 2.294962024688721	
epoch 5  	loss: 2.3219133377075196	
epoch 6  	loss: 2.333459210395813	
epoch 7  	loss: 2.1924530982971193	
epoch 8  	loss: 2.4210618495941163	
epoch 9  	loss: 2.448764371871948	
epoch 10 	loss: 2.3204408168792723	
epoch 11 	loss: 2.2321518421173097	
epoch 12 	loss: 2.1897303104400634	
epoch 13 	loss: 2.1656079292297363	
epoch 14 	loss: 2.1162369728088377	
epoch 15 	loss: 2.116805815696716	
epoch 16 	loss: 2.110946774482727	
epoch 17 	loss: 2.0855512857437133	
epoch 18 	loss: 2.073363208770752	
epoch 19 	loss: 2.0471068143844606	
epoch 20 	loss: 2.044719433784485	
epoch 21 	loss: 2.0282179594039915	
epoch 22 	loss: 2.008611631393433	
epoch 23 	loss: 1.9876764297485352	
epoch 24 	loss: 1.9866745948791504	
epoch 25 	loss: 1.992501473426819	
epoch 26 	loss: 1.9753814697265626	
epoch 27 	loss: 1.9550715684890747	
ep

KeyboardInterrupt: 

In [None]:
iter(train_loader)
inputs, target = next(train_loader)
generated = generate(model, inputs)
print('======= input ======')
for seq in inputs:
    print([src_dict(s.item()) for s in seq])
print('======= output ======')
for seq in generated:
    print([tgt_dict(s) for s in seq])

In [None]:
def generate(self, inputs, threshold=100):
    # encoding
    _, enc_hiddens = self.encoder.forward(inputs) # (num_layers * num_directions, batch, hidden_size)

    generated = []
    n_batch = enc_hiddens.shape[1]
    for i in range(n_batch):
        tgt_seq = []
        current_token = self.tgt_BOS
        hidden = enc_hiddens[:, i].unsqueeze(1)
        for _ in range(threshold):
            decoded, hidden = self.decoder.forward(torch.LongTensor([[current_token]]), hidden)
            out = self.out_mlp.predict(decoded.squeeze(1)).item()
            if out == self.tgt_EOS: break
            tgt_seq.append(out)
            current_token = out
        generated.append(tgt_seq)
    return generated

In [22]:
tgt_dict(11)

'9'