# Importing all the necessary libraries

In [5]:
import torch
import torch.nn as nn
import torch.optim as optim

# Encoder class

In [11]:
# Define the Encoder class
class Encoder(nn.Module):
    def __init__(self, input_size, embedding_size, hidden_size, num_layers, rnn_cell='lstm', dropout=0.5):
        super(Encoder, self).__init__()
        # Create an embedding layer to convert input indices into dense vectors
        self.embedding = nn.Embedding(num_embeddings=input_size, embedding_dim=embedding_size)
        # Dropout layer for regularizing and preventing overfitting
        self.dropout = nn.Dropout(dropout)
        # RNN configuration parameters
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        # Construction of RNN layers based on specified cell type
        if rnn_cell.lower() == 'lstm':
            self.rnn = nn.LSTM(embedding_size, hidden_size, num_layers, batch_first=True, dropout=(0 if num_layers == 1 else dropout))
        elif rnn_cell.lower() == 'gru':
            self.rnn = nn.GRU(embedding_size, hidden_size, num_layers, batch_first=True, dropout=(0 if num_layers == 1 else dropout))
        else:
            self.rnn = nn.RNN(embedding_size, hidden_size, num_layers, batch_first=True, dropout=(0 if num_layers == 1 else dropout))

    def forward(self, x):
        # x: batch_size, sequence_length
        # Embed input indices and apply dropout
        embedded = self.dropout(self.embedding(x))
        # Forward pass through the RNN
        outputs, hidden = self.rnn(embedded)
        return hidden

# Decoder class

In [12]:
# Define the Decoder class
class Decoder(nn.Module):
    def __init__(self, output_size, embedding_size, hidden_size, num_layers, rnn_cell='lstm', dropout=0.5):
        super(Decoder, self).__init__()
        # Create an embedding layer to convert target indices into dense vectors
        self.embedding = nn.Embedding(num_embeddings=output_size, embedding_dim=embedding_size)
        # Dropout layer for regularizing and preventing overfitting
        self.dropout = nn.Dropout(dropout)
        # RNN configuration parameters
        self.output_size = output_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        # Conditional construction of RNN layers based on specified cell type
        if rnn_cell.lower() == 'lstm':
            self.rnn = nn.LSTM(embedding_size, hidden_size, num_layers, batch_first=True, dropout=(0 if num_layers == 1 else dropout))
        elif rnn_cell.lower() == 'gru':
            self.rnn = nn.GRU(embedding_size, hidden_size, num_layers, batch_first=True, dropout=(0 if num_layers == 1 else dropout))
        else:
            self.rnn = nn.RNN(embedding_size, hidden_size, num_layers, batch_first=True, dropout=(0 if num_layers == 1 else dropout))
        
        # Fully connected layer to transform RNN outputs to vocabulary space
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        # Prepare single step (time step) input for RNN
        x = x.unsqueeze(1)  # Change shape to (batch_size, 1)
        # Embed input and apply dropout
        embedded = self.dropout(self.embedding(x))
        # RNN forward pass
        output, hidden = self.rnn(embedded, hidden)
        # Transform RNN output to predictions for each vocabulary token
        prediction = self.fc(self.dropout(output.squeeze(1)))
        return prediction, hidden


# Sequence to Sequence model for the above encoder and decoder

In [13]:
# Define the Seq_to_Seq model
class Seq_to_Seq(nn.Module):
    def __init__(self, encoder, decoder):
        super(Seq_to_Seq, self).__init__()
        self.encoder = encoder
        self.decoder = decoder

    def forward(self, source, target, teaching_force_ratio=0.5):
        batch_size = source.size(0)
        target_len = target.size(1)
        target_vocab_size = self.decoder.output_size
        
        # Initialize a tensor to hold decoder outputs
        outputs = torch.zeros(batch_size, target_len, target_vocab_size).to(source.device)
        
        # Encode the source sequence
        encoder_hidden = self.encoder(source)
        # First token to the decoder is the start token which is the first input to the decoder
        decoder_input = target[:, 0]
        
        for t in range(1, target_len):
            # Decode the current token
            decoder_output, encoder_hidden = self.decoder(decoder_input, encoder_hidden)
            outputs[:, t] = decoder_output
            # Determine whether to use teacher forcing
            teacher_force = torch.rand(1) < teaching_force_ratio
            top1 = decoder_output.argmax(1)
            # If teacher forcing, use the actual next token as the next input; else use the predicted token
            decoder_input = target[:, t] if teacher_force else top1
        
        return outputs

# Printing the model

In [14]:
# Example for dimensions and layers
INPUT_DIM = 256  # size of the Latin character set
OUTPUT_DIM = 256  # size of the Devanagari character set
ENC_EMB_DIM = 64
DEC_EMB_DIM = 64
HID_DIM = 512
ENC_LAYERS = 2
DEC_LAYERS = 2
ENC_RNN_CELL = 'lstm'
DEC_RNN_CELL = 'lstm'

# Instantiate encoder and decoder
encoder = Encoder(INPUT_DIM, ENC_EMB_DIM, HID_DIM, ENC_LAYERS, ENC_RNN_CELL)
decoder = Decoder(OUTPUT_DIM, DEC_EMB_DIM, HID_DIM, DEC_LAYERS, DEC_RNN_CELL)

# Instantiate the Seq_to_Seq model
model = Seq_to_Seq(encoder, decoder)
print(model)


Seq_to_Seq(
  (encoder): Encoder(
    (embedding): Embedding(256, 64)
    (dropout): Dropout(p=0.5, inplace=False)
    (rnn): LSTM(64, 512, num_layers=2, batch_first=True, dropout=0.5)
  )
  (decoder): Decoder(
    (embedding): Embedding(256, 64)
    (dropout): Dropout(p=0.5, inplace=False)
    (rnn): LSTM(64, 512, num_layers=2, batch_first=True, dropout=0.5)
    (fc): Linear(in_features=512, out_features=256, bias=True)
  )
)
