In [1]:
from ipynb.fs.full.batch import *
from ipynb.fs.full.vocab import *
import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
class LstmEncoder(nn.Module):
    def __init__(self, in_vocab_size, word_dim, num_layer = 1, bidirection = False, pad_idx = None):
        # in_vocab_size number of word in input language
        # word_dim convert a word index into it's feature dimention (word_dim)
        # pad_idx default None if provide then calculate gradient zero for specific index
        # for more clarification (pad_idx) check out nn.Embedding pytorch documentation 
        super(LstmEncoder, self).__init__()
        
        self.hidden_size = word_dim      # hidden size of the model
        self.num_layer = num_layer       # number of rnn model layers in encoder
                                         # more than one layers also known as stacked rnn model
        self.bidirection = bidirection   # direction of rnn model layers
        
        # initialize LSTM model for encoder
        self.lstm = nn.LSTM(input_size=word_dim, hidden_size=word_dim, num_layers=num_layer, batch_first=True, bidirectional=bidirection)
        
        # initialize embedding model with padding index
        if pad_idx != None:
            self.word_embeds = nn.Embedding(num_embeddings = in_vocab_size, embedding_dim = word_dim, padding_idx = pad_idx)
        
        # initialize embedding model without padding index
        else:
            self.word_embeds = nn.Embedding(num_embeddings = in_vocab_size, embedding_dim = word_dim)
    
    # initialize zero for initial state of LSTM hidden and cell state
    def initialize_hidden(self, batch_size):
        # return states for non bidirectional LSTM
        if self.bidirection == False:
            return (torch.zeros(self.num_layer, batch_size, self.hidden_size),
                    torch.zeros(self.num_layer, batch_size, self.hidden_size)
                   )
        # return states for bidirectional LSTM
        else:
            return (torch.zeros((self.num_layer*self.bidirection), batch_size, self.hidden_size),
                    torch.zeros((self.num_layer*self.bidirection), batch_size, self.hidden_size)
                   )
    # Forward pass through LSTM
    def forward(self, inputs, hidden):
        # get the feature vector for corresponding word index
        embed = self.word_embeds(inputs)
        
        # pass feature vector and tuple of (hidden, cell) states
        output, hidden = self.lstm(embed, hidden)
        return output, hidden

class LstmDecoder(nn.Module):
    def __init__(self, tar_vocab_size, word_dim, num_layer = 1, pad_idx = None):
        # tar_vocab_size number of word in target language
        # word_dim convert a word index into it's feature dimention (word_dim)
        # pad_idx default None if provide then calculate gradient zero for specific index
        # for more clarification (pad_idx) check out nn.Embedding pytorch documentation 
        super(LstmDecoder, self).__init__()
        
        self.hidden_size = word_dim     # hidden size of the model
        self.num_layer = num_layer      # number of rnn model layers in decoder
                                        # more than one layers also known as stacked rnn model
        
        # initialize LSTM model for decoder
        self.lstm = nn.LSTM(input_size = word_dim, hidden_size = word_dim, num_layers = num_layer)
        
        # a linear transformation for convert decoder output into tar_vocab_size
        self.classifier = nn.Linear(self.hidden_size, tar_vocab_size)
        
        # initialize embedding model with padding index
        # if padding index provide then gradient will be zero for that specific index
        if pad_idx != None:
            self.word_embeds = nn.Embedding(num_embeddings = tar_vocab_size, embedding_dim = word_dim, padding_idx = pad_idx)
        
        # initialize embedding model without padding index
        else:
            self.word_embeds = nn.Embedding(num_embeddings = tar_vocab_size, embedding_dim = word_dim)
    def forward(self, inputs, hidden):
        # get the feature vector for corresponding word index
        embed = self.word_embeds(inputs).view(1,1,-1)
        
        # pass feature vector and tuple of (hidden, cell) states
        outputs, hidden = self.lstm(embed, hidden)
        
        # linearly transform the output into it's vocab size
        outputs = F.log_softmax(self.classifier(outputs.squeeze(1)), dim=1)
        return outputs, hidden