In [1]:
import torch
import torch.nn as nn
from torch import optim

In [48]:
class Seq2Seq(nn.Module):
    def __init__(self, emb_dim, hidden_dim, vocab_size, max_len=10):
        super(Seq2Seq, self).__init__()
        
        self.emb_dim = emb_dim
        self.hidden_dim = hidden_dim
        self.vocab_size = vocab_size
        self.max_len = max_len

        self.emb = nn.Embedding(vocab_size, emb_dim)
        self.encoder = nn.GRU(emb_dim, hidden_dim)
        self.decoder = nn.GRU(hidden_dim + emb_dim, hidden_dim)
        self.Wb = nn.Linear(hidden_dim, vocab_size)
        self.softmax = nn.Softmax(dim=1)
    
    def encoding(self, inp):
        encoded = []
        h_t = self.h_0()
        for i, w in enumerate(inp):
            x = self.emb(torch.tensor(w).long()).view((1, 1, emb_dim))
            output, h_t = self.encoder(x, h_t)
            encoded.append(h_t)
        return encoded
    
    def decoding(self, encoded, target):
        s_t = self.h_0()
        c_t = torch.mean(torch.cat(encoded), dim=0).view((1, 1, hidden_dim))
        y_t = self.emb(torch.tensor(w2id['<B>']).long()).view((1, 1, -1))
        
        losses = []
        for wid in target:
            x = torch.cat([c_t, y_t], dim=2)
            output, s_t = self.decoder(x, s_t)

            probs = self.softmax(self.Wb(s_t)[0])
            losses.append(probs)
            
            y_t = self.emb(torch.tensor(wid).long()).view((1, 1, -1))
        return losses
    
    def generating(self, encoded):
        s_t = self.h_0()
        c_t = torch.mean(torch.cat(encoded), dim=0).view((1, 1, hidden_dim))
        y_t = self.emb(torch.tensor(w2id['<B>']).long()).view((1, 1, -1))
        
        words = []
        for wid in range(self.max_len):
            x = torch.cat([c_t, y_t], dim=2)
            output, s_t = self.decoder(x, s_t)

            probs = self.softmax(self.Wb(s_t)[0])
            idx = torch.argmax(probs)
            y_t = self.emb(idx).view((1, 1, -1))
            words.append(idx)
        return words
    
    def forward(self, inp, target=None):
        encoded = self.encoding(inp)
        
        if target:
            return self.decoding(encoded, target)
        else:
            return self.generating(encoded)

    def h_0(self):
        return torch.zeros((1, 1, self.hidden_dim))

In [49]:
# preprocess
corpus = ['<B> Eu gosto de você . <E>', '<B> Eu não gosto de você . <E>', '<B> Eu não amo você . <E>']
corpus = [w.split() for w in corpus]

vocab = []
for snt in corpus:
    for w in snt:
        vocab.append(w)
vocab = set(vocab)
w2id = { w:i for i, w in enumerate(vocab) }
id2w = { i:w for i, w in enumerate(vocab) }

In [50]:
emb_dim, hidden_dim, vocab_size = 5, 10, len(vocab)
model = Seq2Seq(emb_dim, hidden_dim, vocab_size)
print(model)
optimizer = optim.Adam(model.parameters(), lr=0.01)
criterion = nn.CrossEntropyLoss()

Seq2Seq(
  (emb): Embedding(9, 5)
  (encoder): GRU(5, 10)
  (decoder): GRU(15, 10)
  (Wb): Linear(in_features=10, out_features=9, bias=True)
  (softmax): Softmax(dim=1)
)


In [51]:
for epoch in range(5):
    print('Epoch: ', epoch+1)
    for row in corpus:
        optimizer.zero_grad()
        snt_ids = [w2id[w] for w in row]
        output = model(snt_ids, snt_ids)

        loss = criterion(torch.cat(output, dim=0), torch.tensor(snt_ids))
        print(loss)
        loss.backward()
        optimizer.step()

Epoch:  1
tensor(2.1972, grad_fn=<NllLossBackward>)
tensor(2.1913, grad_fn=<NllLossBackward>)
tensor(2.1948, grad_fn=<NllLossBackward>)
Epoch:  2
tensor(2.1818, grad_fn=<NllLossBackward>)
tensor(2.1788, grad_fn=<NllLossBackward>)
tensor(2.1812, grad_fn=<NllLossBackward>)
Epoch:  3
tensor(2.1672, grad_fn=<NllLossBackward>)
tensor(2.1646, grad_fn=<NllLossBackward>)
tensor(2.1634, grad_fn=<NllLossBackward>)
Epoch:  4
tensor(2.1497, grad_fn=<NllLossBackward>)
tensor(2.1474, grad_fn=<NllLossBackward>)
tensor(2.1412, grad_fn=<NllLossBackward>)
Epoch:  5
tensor(2.1281, grad_fn=<NllLossBackward>)
tensor(2.1257, grad_fn=<NllLossBackward>)
tensor(2.1141, grad_fn=<NllLossBackward>)


In [53]:
for snt_ids in corpus:
    print([id2w[int(w)] for w in model(snt_ids)])

[tensor(5), tensor(2), tensor(4), tensor(1), tensor(1), tensor(1), tensor(1), tensor(1), tensor(1), tensor(1)]


['Eu', 'gosto', '.', '<E>', '<E>', '<E>', '<E>', '<E>', '<E>', '<E>']

In [38]:
# init
emb_dim, hidden_dim, vocab_size = 5, 10, len(vocab)

emb = nn.Embedding(vocab_size, emb_dim)
encoder = nn.GRU(emb_dim, hidden_dim)
decoder = nn.GRU(hidden_dim + emb_dim, hidden_dim)
Wb = nn.Linear(hidden_dim, vocab_size)
softmax = nn.LogSoftmax()

In [39]:
# encoder
snt = corpus[0]
snt_ids = [w2id[w] for w in snt]

encoded = []
h_t = torch.zeros((1, 1, hidden_dim))
for i, snt_id in enumerate(snt_ids):
    x = emb(torch.tensor(snt_id).long()).view((1, 1, emb_dim))
    
    output, h_t = encoder(x, h_t)
    encoded.append(h_t)

In [41]:
# decoder
s_t = torch.zeros((1, 1, hidden_dim))
c_t = torch.mean(torch.cat(encoded), dim=0).view((1, 1, hidden_dim))
y_t = emb(torch.tensor(w2id['<B>']).long()).view((1, 1, -1))
for i in range(5):
    x = torch.cat([c_t, y_t], dim=2)
    output, s_t = decoder(x, s_t)
    
    word_dist = softmax(Wb(s_t)[0])
    print(word_dist)
    idx = torch.argmax(word_dist)
    y_t = emb(idx).view((1, 1, -1))
    print(idx)

tensor([[-1.9836, -2.2431, -1.8425, -2.4867, -1.9412, -2.2557, -2.0177, -2.0143]],
       grad_fn=<LogSoftmaxBackward>)
tensor(2)
tensor([[-2.0522, -2.1550, -2.0337, -2.3604, -1.9261, -2.3631, -1.8979, -1.9611]],
       grad_fn=<LogSoftmaxBackward>)
tensor(6)
tensor([[-1.8888, -2.1833, -1.7217, -2.5176, -2.0078, -2.3238, -2.1533, -2.0528]],
       grad_fn=<LogSoftmaxBackward>)
tensor(2)
tensor([[-2.0029, -2.0828, -2.0165, -2.3504, -1.9493, -2.4000, -1.9807, -1.9594]],
       grad_fn=<LogSoftmaxBackward>)
tensor(4)
tensor([[-2.0358, -2.1827, -1.8251, -2.3776, -1.8816, -2.3632, -2.1038, -2.0081]],
       grad_fn=<LogSoftmaxBackward>)
tensor(2)


  if __name__ == '__main__':


In [10]:
word_dist[0][1]

tensor(0.1290, grad_fn=<SelectBackward>)

In [48]:
c_t.size()

torch.Size([1, 10])