# Seq2seq Model Simple Illustration

Start from a very simple example:
  
I am Licor -> Je suis Licor  
I am fine -> Je vais bien

We want the model to learn the translation of those two pairs

In [1]:
source = ['I am Licor', 'I am fine']
target = ['Je suis Licor', 'Je vais bien']

In [15]:
# Build a dictionary
src_itos = {0: 'I', 1: 'am', 2: 'Licor', 3: 'fine', 4:'<GO>'}
tgt_itos = {0: 'Je', 1: 'suis', 2: 'Licor', 3: 'vais', 4: 'bien', 5:'<GO>'}

src_stoi = {v: k for k, v in src_itos.items()}
tgt_stoi = {v: k for k, v in tgt_itos.items()}

In [12]:
# Vectorize the sentences and group into pair
pairs = []
for p in zip(source, target):
    s_idx = [src_stoi.get(w) for w in p[0].split()]
    t_idx = [tgt_stoi.get(w) for w in p[1].split()]
    pairs.append((s_idx, t_idx))

In [13]:
pairs

[([0, 1, 2], [0, 1, 2]), ([0, 1, 3], [0, 3, 4])]

In [14]:
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

device = torch.device("cpu")

## Seq2seq model with GRU

In [9]:
class Encoder(nn.Module):
    def __init__(self, input_size, hidden_size):
        super().__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)
        output = embedded
        output, hidden = self.gru(output, hidden)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [10]:
class Decoder(nn.Module):
    def __init__(self, hidden_size, output_size):
        super().__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(output_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        output = self.embedding(input).view(1, 1, -1)
        output = F.relu(output)
        output, hidden = self.gru(output, hidden)
        output = self.softmax(self.out(output[0]))
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

### Training

In [25]:
# prepare training data
input_seq = []
output_seq = []
for src, tgt in pairs:
    src_idx = torch.LongTensor([src], device=device).view(-1, 1)
    tgt_idx = torch.LongTensor([tgt], device=device).view(-1, 1)
    input_seq.append(src_idx)
    output_seq.append(tgt_idx)

# create encoder, decoder and optimizer
INPUT_SIZE = len(src_itos)
OUTPUT_SIZE = len(tgt_itos)
HIDDEN_SIZE = 32
LR = 0.01

encoder = Encoder(INPUT_SIZE, HIDDEN_SIZE).to(device)
decoder = Decoder(HIDDEN_SIZE, OUTPUT_SIZE)

encoder_optimizer = optim.Adam(encoder.parameters(), lr=LR)
decoder_optimizer = optim.Adam(decoder.parameters(), lr=LR)

criterion = nn.NLLLoss()

# initializa hidden layer in encoder
encoder_hidden = encoder.initHidden()

# clear gradient
encoder_optimizer.zero_grad()
decoder_optimizer.zero_grad()

# start training
for _ in range(10):
    for idx in range(len(input_seq)):
        input_tensor = input_seq[idx]
        output_tensor = output_seq[idx]
        loss = 0
        for i in range(len(input_tensor)):
            encoder_output, encoder_hidden = encoder(input_tensor[i], encoder_hidden)

        decoder_input = torch.tensor([[tgt_stoi['<GO>']]], device=device)
        decoder_hidden = encoder_hidden
        for i in range(len(output_tensor)):
            decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
            loss += criterion(decoder_output, output_tensor[i])
            decoder_input = output_tensor[i]  
        print(loss.item())
        loss.backward(retain_graph=True)

        encoder_optimizer.step()
        decoder_optimizer.step()

5.518531322479248
5.243597984313965
3.9942703247070312
4.468466758728027
3.1920394897460938
3.6199026107788086
2.5253758430480957
2.723552942276001
1.9895079135894775
1.9824979305267334
1.5752532482147217
1.4385730028152466
1.1577732563018799
1.0526231527328491
0.7395479083061218
0.7881637811660767
0.4203367233276367
0.5264965295791626
0.23585759103298187
0.2521694004535675


In [47]:
input_sent = 'I am fine'

def sent_to_tensor(s):
    s_idx = [src_stoi.get(w) for w in s.split()]
    return torch.LongTensor([s_idx], device=device).view(-1, 1)

with torch.no_grad():
    input_tensor = sent_to_tensor(input_sent)
    input_length = input_tensor.size()[0]
    encoder_hidden = encoder.initHidden()

    for i in range(len(input_tensor)):
        encoder_output, encoder_hidden = encoder(input_tensor[i], encoder_hidden)

    decoder_input = torch.tensor([[tgt_stoi['<GO>']]], device=device)
    decoder_hidden = encoder_hidden

    decoded_words = []

    for i in range(3):
        decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
        topv, topi = decoder_output.data.topk(1)
        decoded_words.append(tgt_itos[topi.item()])

        decoder_input = topi.squeeze().detach()
    print(' '.join(decoded_words))

Je vais bien
