# Implementation: Encoder-Decoder

## Implementation: Encoder

In [1]:
import torch.nn as nn
import torch

class EncoderLSTM(nn.Module):
    def __init__(self, input_size, embedding_size, hidden_size, num_layers, p):
        super(EncoderLSTM, self).__init__()
        self.input_size = input_size # length of one-hot input
        self.embedding_size = embedding_size # dimensionality of an input token (word embedding)
        self.hidden_size = hidden_size # dimensionality of hidden representation
        self.num_layers = num_layers # Number of layers in the LSTM
        self.dropout = nn.Dropout(p)
        self.embedding = nn.Embedding(self.input_size, self.embedding_size)
        self.LSTM = nn.LSTM(self.embedding_size, hidden_size, num_layers, dropout = p)

<p align="center">
<img src="./img/encoder_decoder.png">
</p>

In [4]:
def forward(self, x):
    # shape: [sequence length, batch size, embedding dims]
    embedding = self.dropout(self.embedding(x))

    # outputs shape: [sequence length, batch size, hidden_size]
    # hs, cs shape: [num_layers, batch_size, hidden_size]
    _outputs, (hidden_state, cell_state) = self.LSTM(embedding)

    return hidden_state, cell_state

EncoderLSTM.forward = forward

## Implementation: Decoder

In [3]:
class DecoderLSTM(nn.Module):
    def __init__(self, input_size, embedding_size, hidden_size, num_layers, p, output_size):
        super(DecoderLSTM, self).__init__()
        self.input_size = input_size # length of one-hot input (input language vocab size)
        self.embedding_size = embedding_size # word embedding size
        self.hidden_size = hidden_size # dimensionality of hidden representation
        self.num_layers = num_layers # Number of layers in the LSTM
        self.output_size = output_size # length of one-hot output (output language vocab size)
        self.dropout = nn.Dropout(p)
        self.embedding = nn.Embedding(self.input_size, self.embedding_size)
        self.LSTM = nn.LSTM(self.embedding_size, hidden_size, num_layers, dropout=p)
        self.fc = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, x, hidden_state, cell_state):
        x = x.unsqueeze(0) # shape of x: [1, batch_size]
        embedding = self.dropout(self.embedding(x)) # shape: [1. batch_size, embedding dims]

        # outputs shape: [1, batch size, hidden_size]
        # hs, cs shape: [num_layers, batch_size, hidden_size] - hs, cs from Encoder
        outputs, (hidden_state, cell_state) = self.LSTM(embedding, (hidden_state, cell_state))
        predictions = self.fc(outputs) # shape: [1, batch_size, output_size]

        return predictions, hidden_state, cell_state


## Implementation: Seq2seq Interface

In [5]:
class Seq2seq(nn.Module):
    def __init__(self, EncoderLSTM, DecoderLSTM):
        super(Seq2seq, self).__init__()
        self.EncoderLSTM = EncoderLSTM
        self.DecoderLSTM = DecoderLSTM

    def forward(self, source, target):
        batch_size = source.shape[1] # source shape: [input language seq len, num_sentences]
        target_len = target.shape[0] # target shape: [output language seq len, num_sentences]
        target_vocab_size = len(english.vocab)

        outputs = torch.zeros(target_len, batch_size, target_vocab_size)
        hs, cs = self.EncoderLSTM(source)

        x = target[0] # Trigger token <sos>; shape: [batch_size]

        for i in range(1, target_len):
            output, hs, cs = self.DecoderLSTM(x, hs, cs)
            outputs[i] = output
            x = output.argmax(1)
        return outputs # shape: [output language seq len, batch_size, target_vocab_size]