# **Importing all the necessary libraries**

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence
import pandas as pd
import numpy as np
import pandas as pd

# **Encoder class**

In [2]:
# Define a class Encoder, which is a subclass of nn.Module
class Encoder(nn.Module):
    # Constructor with parameters for initialization
    def __init__(self, input_size, embedding_size, hidden_size, num_layers, rnn_cell='lstm', dropout=0.5):
        # Initialize the base class
        super(Encoder, self).__init__()
        # Embedding layer that transforms inputs (word indices) into embeddings of a specified size
        self.embedding = nn.Embedding(num_embeddings=input_size, embedding_dim=embedding_size)
        # Dropout layer for regularizing and preventing overfitting
        self.dropout = nn.Dropout(dropout)
        # Store RNN configuration parameters
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        # Construction of RNN layers based on specified cell type
        if rnn_cell.lower() == 'lstm':
            self.rnn = nn.LSTM(embedding_size, hidden_size, num_layers, batch_first=True, dropout=(0 if num_layers == 1 else dropout))
        elif rnn_cell.lower() == 'gru':
            self.rnn = nn.GRU(embedding_size, hidden_size, num_layers, batch_first=True, dropout=(0 if num_layers == 1 else dropout))
        else:
            self.rnn = nn.RNN(embedding_size, hidden_size, num_layers, batch_first=True, dropout=(0 if num_layers == 1 else dropout))
    
    # Define the forward pass which will be called with input x
    def forward(self, x):
        # Apply the embedding layer to the input x, which transforms word indices into dense vectors
        embedded = self.embedding(x)
        # Apply dropout to the embeddings
        embedded = self.dropout(embedded)
        # Pass the embedded and dropout-applied inputs through the RNN layer
        # Returns the output and the last hidden state
        outputs, hidden = self.rnn(embedded)
        # Return only the hidden state; outputs are not needed in this implementation
        return hidden

# **Decoder class**

In [3]:
# Define a class Decoder, which is a subclass of nn.Module
class Decoder(nn.Module):
    # Constructor with parameters for initialization
    def __init__(self, output_size, embedding_size, hidden_size, num_layers, rnn_cell='lstm', dropout=0.5):
        # Initialize the base class
        super(Decoder, self).__init__()
        
        # Embedding layer that maps indices in the target vocabulary to vectors of a specified size
        self.embedding = nn.Embedding(num_embeddings=output_size, embedding_dim=embedding_size)
        
        # Dropout layer for regularizing and preventing overfitting
        self.dropout = nn.Dropout(dropout)
        
        # Store parameters for configuration of the RNN
        self.output_size = output_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        # Construction of RNN layers based on specified cell type
        if rnn_cell.lower() == 'lstm':
            # LSTM layer
            self.rnn = nn.LSTM(embedding_size, hidden_size, num_layers, batch_first=True, dropout=(0 if num_layers == 1 else dropout))
        elif rnn_cell.lower() == 'gru':
            # GRU layer
            self.rnn = nn.GRU(embedding_size, hidden_size, num_layers, batch_first=True, dropout=(0 if num_layers == 1 else dropout))
        else:
            # Basic RNN layer 
            self.rnn = nn.RNN(embedding_size, hidden_size, num_layers, batch_first=True, dropout=(0 if num_layers == 1 else dropout))
        
        # Fully connected layer to transform the output of the RNN into the size of the output vocabulary
        self.fc = nn.Linear(hidden_size, output_size)

    # Define the forward pass method which will be called with input x and the initial hidden state
    def forward(self, x, hidden):
        # Prepare input data for the RNN by adding an extra dimension at index 1 (for batch handling)
        x = x.unsqueeze(1)  # Change shape from (batch_size) to (batch_size, 1)
        
        # Apply the embedding layer to x and then apply dropout
        embedded = self.dropout(self.embedding(x))
        
        # Pass the embedded, dropout-applied input and the previous hidden state into the RNN
        output, hidden = self.rnn(embedded, hidden)
        
        # Squeeze the output from RNN to remove the middle dimension (batch_first=True makes it (batch_size, 1, hidden_size))
        # Apply dropout again before the final transformation
        output = self.fc(self.dropout(output.squeeze(1)))
        
        # Return the output predictions and the hidden state to be used in the next time step
        return output, hidden


# **Sequence to Sequence model for the above encoder and decoder**

In [4]:
# Define the Seq_to_Seq model which is a subclass of nn.Module
class Seq_to_Seq(nn.Module):
    # Constructor with parameters for initialization
    def __init__(self, encoder, decoder):
        # Initialize the base class
        super(Seq_to_Seq, self).__init__()
        # Assign the encoder instance
        self.encoder = encoder
        # Assign the decoder instance
        self.decoder = decoder
        
    # Define the forward pass method that takes source data, target data, and a teaching force ratio
    def forward(self, source, target, teaching_force_ratio=0.5):
        # Determine the batch size from the source input
        batch_size = source.size(0)
        # Determine the target sequence length from the target input
        target_len = target.size(1)
        # Get the target vocabulary size from the decoder
        target_vocab_size = self.decoder.output_size
        
        # Initialize a tensor to store the outputs from the decoder
        outputs = torch.zeros(batch_size, target_len, target_vocab_size).to(source.device)
        
        # Encode the source input to get the initial hidden state for the decoder
        encoder_hidden = self.encoder(source)
        # The first input to the decoder is typically a start token; here, it's the first target token
        decoder_input = target[:, 0]
        
        # Iterate through each position in the target sequence
        for t in range(1, target_len):
            # Generate output and update the hidden state from the decoder
            decoder_output, encoder_hidden = self.decoder(decoder_input, encoder_hidden)
            # Store the output of the decoder at the corresponding position in the output tensor
            outputs[:, t] = decoder_output
            # Determine whether to use teacher forcing based on a random probability compared to the ratio
            teacher_force = torch.rand(1) < teaching_force_ratio
            # Get the highest probability token from the decoder's output
            top1 = decoder_output.argmax(1)
            # Depending on teacher forcing, use either the true next token or the predicted token as the next input
            decoder_input = target[:, t] if teacher_force else top1
        
        # Return the tensor holding all the decoder outputs
        return outputs

# **Printing the model**

In [5]:
# Constants defining the dimensions of the input and output character sets
INPUT_DIM = 50  # size of the Latin character set
OUTPUT_DIM = 100  # size of the Bangla character set

# Constants defining the dimensions of the embeddings for encoder and decoder
ENC_EMB_DIM = 64  # Encoder embedding dimension
DEC_EMB_DIM = 64  # Decoder embedding dimension

# Constants defining the dimension of the hidden layers for encoder and decoder
HID_DIM = 512  # Hidden dimension size

# Constants defining the number of layers for encoder and decoder
ENC_LAYERS = 2  # Number of layers in the encoder
DEC_LAYERS = 2  # Number of layers in the decoder

# Constants defining the type of RNN cell to use for encoder and decoder
ENC_RNN_CELL = 'lstm'  # RNN cell type for the encoder
DEC_RNN_CELL = 'lstm'  # RNN cell type for the decoder

# Instantiate the encoder with specified configurations
encoder = Encoder(INPUT_DIM, ENC_EMB_DIM, HID_DIM, ENC_LAYERS, ENC_RNN_CELL)
# Instantiate the decoder with specified configurations
decoder = Decoder(OUTPUT_DIM, DEC_EMB_DIM, HID_DIM, DEC_LAYERS, DEC_RNN_CELL)

# Determine the computing device (CUDA if available, otherwise CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Print the device will be used
print(f"Using device: {device}")

# Instantiate the Seq_to_Seq model and move it to the chosen computing device
model = Seq_to_Seq(encoder, decoder).to(device)
# Print the model architecture
print(model)


Using device: cuda
Seq_to_Seq(
  (encoder): Encoder(
    (embedding): Embedding(50, 64)
    (dropout): Dropout(p=0.5, inplace=False)
    (rnn): LSTM(64, 512, num_layers=2, batch_first=True, dropout=0.5)
  )
  (decoder): Decoder(
    (embedding): Embedding(100, 64)
    (dropout): Dropout(p=0.5, inplace=False)
    (rnn): LSTM(64, 512, num_layers=2, batch_first=True, dropout=0.5)
    (fc): Linear(in_features=512, out_features=100, bias=True)
  )
)


# **A function to create a vocabulary set from the given text**

In [6]:
# Define a function to create a vocabulary set from a given text
def create_vocab(text):
    # Create a set of unique characters found in the text
    # Each word in the text is processed to extract its characters
    vocab = set(char for word in text for char in word)
    # Add a padding token to the vocabulary
    vocab.add('<pad>')
    # Add a start-of-sequence token to the vocabulary
    vocab.add('<sos>')  # Start of sequence token
    # Add an end-of-sequence token to the vocabulary
    vocab.add('<eos>')  # End of sequence token
    # Return the complete set of vocabulary items
    return vocab

# **A function to load data from a CSV file**

In [7]:
# Define a function to load data from a CSV file
def load_data(path):
    # The file has no header and columns are named as 'latin' and 'bangla'
    df = pd.read_csv(path, header=None, names=['latin', 'bangla'])
    # Return the columns as two separate Series objects
    return df['latin'], df['bangla']

# **Load Latin and bangla training data**

In [8]:
# Load Latin and bangla training data from specified path
latin_train, bangla_train = load_data('/kaggle/input/aksharantar/aksharantar_sampled/ben/ben_train.csv')

# **Print the loaded Latin and Bangla training data**

In [9]:
# Print the loaded Latin training data
print(latin_train)
print()
# Print the loaded bangla training data
print(bangla_train)

0        namdharirao
1        hindukusher
2        farajikandi
3           moubarak
4             chiung
            ...     
51195       silmadar
51196        jonnote
51197      handibage
51198         borpar
51199     bideshikei
Name: latin, Length: 51200, dtype: object

0            নামধারীরাও
1           হিন্দুকুশের
2           ফরাজীকান্দি
3                মুবারক
4                চিয়ুং
              ...      
51195          সিলমাদার
51196            জন্যতে
51197    হ্যান্ডিব্যাগে
51198             বরপার
51199         বিদেশীকেই
Name: bangla, Length: 51200, dtype: object


# **Create two vocabularies from the Latin and Bangla training data**

In [10]:
# Create a vocabulary from the Latin training data
latin_vocab = create_vocab(latin_train)
# Create a vocabulary from the bangla training data
bangla_vocab = create_vocab(bangla_train)

# **Print the created Latin and Bangla vocabularies**

In [11]:
# Print the created Latin vocabulary
print(latin_vocab)
print()
# Print the created bangla vocabulary
print(bangla_vocab)

{'<sos>', 'p', '<eos>', 'z', 'h', '<pad>', 'k', 'j', 'u', 'w', 'i', 'x', 'f', 'o', 'v', 'r', 'y', 'l', 'c', 's', 'e', 'a', 'g', 't', 'm', 'q', 'n', 'b', 'd'}

{'ৈ', 'চ', 'ঊ', 'দ', 'ল', '২', 'ফ', 'ন', '<sos>', '<eos>', 'ক', 'খ', 'প', 'ঝ', 'স', '<pad>', 'ও', 'আ', 'ঘ', 'ু', 'ষ', 'ধ', 'ঁ', 'ঞ', 'শ', 'ৌ', 'ট', 'ঃ', 'ঠ', 'ম', 'ূ', 'গ', 'ি', 'ৎ', 'য', 'ৃ', 'ঐ', 'র', 'ছ', 'ো', 'এ', 'হ', 'উ', 'থ', 'ঢ', 'অ', 'ে', 'ী', 'ব', 'ণ', 'ভ', 'ঔ', 'ই', 'ত', 'ঙ', 'ড', 'ঋ', 'ঈ', '্', 'জ', '়', 'ং', 'া'}


# **Map each token in the Latin and Bangla vocabularies to a unique index and then Print the dictionaries mapping (Latin tokens to indices) and (Bangla tokens to indices)**


In [12]:
# Map each token in the Latin vocabulary to a unique index
latin_token_to_index = {token: index for index, token in enumerate(sorted(latin_vocab))}
# Map each token in the bangla vocabulary to a unique index
bangla_token_to_index = {token: index for index, token in enumerate(sorted(bangla_vocab))}

# Print the dictionary mapping Latin tokens to indices
print(latin_token_to_index)
print()

# Print the dictionary mapping bangla tokens to indices
print(bangla_token_to_index)

{'<eos>': 0, '<pad>': 1, '<sos>': 2, 'a': 3, 'b': 4, 'c': 5, 'd': 6, 'e': 7, 'f': 8, 'g': 9, 'h': 10, 'i': 11, 'j': 12, 'k': 13, 'l': 14, 'm': 15, 'n': 16, 'o': 17, 'p': 18, 'q': 19, 'r': 20, 's': 21, 't': 22, 'u': 23, 'v': 24, 'w': 25, 'x': 26, 'y': 27, 'z': 28}

{'<eos>': 0, '<pad>': 1, '<sos>': 2, 'ঁ': 3, 'ং': 4, 'ঃ': 5, 'অ': 6, 'আ': 7, 'ই': 8, 'ঈ': 9, 'উ': 10, 'ঊ': 11, 'ঋ': 12, 'এ': 13, 'ঐ': 14, 'ও': 15, 'ঔ': 16, 'ক': 17, 'খ': 18, 'গ': 19, 'ঘ': 20, 'ঙ': 21, 'চ': 22, 'ছ': 23, 'জ': 24, 'ঝ': 25, 'ঞ': 26, 'ট': 27, 'ঠ': 28, 'ড': 29, 'ঢ': 30, 'ণ': 31, 'ত': 32, 'থ': 33, 'দ': 34, 'ধ': 35, 'ন': 36, 'প': 37, 'ফ': 38, 'ব': 39, 'ভ': 40, 'ম': 41, 'য': 42, 'র': 43, 'ল': 44, 'শ': 45, 'ষ': 46, 'স': 47, 'হ': 48, '়': 49, 'া': 50, 'ি': 51, 'ী': 52, 'ু': 53, 'ূ': 54, 'ৃ': 55, 'ে': 56, 'ৈ': 57, 'ো': 58, 'ৌ': 59, '্': 60, 'ৎ': 61, '২': 62}


# **Defining a Dataset class for handling Latin and Bangla word pairs**

In [13]:
# Define a Dataset class for handling Latin and Bangla word pairs
class AksharantarDataset(Dataset):
    def __init__(self, latin_words, bangla_words, latin_token_to_index, bangla_token_to_index):
        # Store the lists of Latin and Bangla words
        self.latin_words = latin_words
        self.bangla_words = bangla_words
        # Store the dictionaries that map characters to indices for both languages
        self.latin_token_to_index = latin_token_to_index
        self.bangla_token_to_index = bangla_token_to_index

    def __len__(self):
        # Return the number of word pairs in the dataset
        return len(self.latin_words)

    def __getitem__(self, index):
        # Fetching the Latin and Bangla words at the specified index
        latin_word = self.latin_words.iloc[index]
#         print(latin_word)
        bangla_word = self.bangla_words.iloc[index]
#         print(bangla_word)
        # Convert the Latin word into indices using the latin_token_to_index mapping
        latin_indices = [latin_token_to_index[char] for char in latin_word]
#         print(latin_indices)
        # Convert the Bangla word into indices, adding <sos> and <eos> tokens
        bangla_indices = [bangla_token_to_index['<sos>']] + [bangla_token_to_index[char] for char in bangla_word] + [bangla_token_to_index['<eos>']]
#         print(bangla_indices)
        # Return the indices as tensor objects
        return torch.tensor(latin_indices, dtype=torch.long), torch.tensor(bangla_indices, dtype=torch.long)

# **Defining a function for padding sequences and packing batches**

In [14]:
# Define a function for padding sequences and packing batches
# packet_fn specifies a function to control how batches are created from the individual data items
def packet_fn(batch):
    # Unzip the batch to separate Latin and Bangla indices
    latin, bangla = zip(*batch)
#     print(latin, bangla)
    # Pad the sequences of Latin indices
    latin_padded = pad_sequence(latin, batch_first=True, padding_value=latin_token_to_index['<pad>'])
#     print(latin_padded)
    # Pad the sequences of Bangla indices
    bangla_padded = pad_sequence(bangla, batch_first=True, padding_value=bangla_token_to_index['<pad>'])
#     print(bangla_padded)
    # Return the padded batches
    return latin_padded, bangla_padded

# **Load training data into the AksharantarDataset and then creating the train_loader by Dataloader function**

In [15]:
# Load training data into the AksharantarDataset
train_dataset = AksharantarDataset(latin_train, bangla_train, latin_token_to_index, bangla_token_to_index)
# Create a DataLoader to batch and shuffle the dataset
# packet_fn specifies a function to control how batches are created from the individual data items
train_loader = DataLoader(train_dataset, batch_size=32, collate_fn=packet_fn, shuffle=True)

# **Print an example from the dataset**

In [16]:
# Print an example from the dataset
print(train_dataset[4897])
# for i,j in train_loader:
#     print(i,'\n\n\n',j)

(tensor([13, 17, 22,  3, 15, 21,  3, 20]), tensor([ 2, 17, 58, 27, 41, 47, 43,  0]))


# **A function for calculating accuracy per batch, ignoring the padding token**

In [17]:

# A function for calculating accuracy per batch, ignoring a specific index, typically the padding token
def categorical_accuracy(preds, y, ignore_index):
    # Get the index of the maximum probability to predict the class
    max_preds = preds.argmax(dim=1, keepdim=True)
    # Identify positions that do not correspond to the ignore_index (typically padding)
    non_pad_elements = (y != ignore_index).nonzero(as_tuple=True)
    # Check if predicted classes match the actual classes, excluding the ignore index
    correct = max_preds[non_pad_elements].squeeze(1).eq(y[non_pad_elements])
    # Compute accuracy as the number of correct predictions over the number of non-ignored positions
    return correct.sum() / torch.FloatTensor([y[non_pad_elements].shape[0]]).to(y.device)

# **Defining the Training function**

In [18]:
# model training function
def train(model, iterator, optimizer, criterion, clip, device, ignore_index):
    # Set the model to training mode
    model.train()
    # Initialize loss and accuracy
    epoch_loss = 0
    epoch_acc = 0
    
    # Loop over each batch in the iterator
    for source, target in iterator:
        source = source.to(device)
        target = target.to(device)
        
        # Reset gradients to zero before starting to do backpropragation
        optimizer.zero_grad()
        # Forward pass through the model
        output = model(source, target)
        
        # Compute the output dimensions
        output_dim = output.shape[-1]
        # Reshape output and target tensors for loss computation, excluding the <sos> token
        output = output[:, 1:].reshape(-1, output_dim)
        target = target[:, 1:].reshape(-1)
        
        # Calculate the loss and accuracy
        loss = criterion(output, target)
        acc = categorical_accuracy(output, target, ignore_index)
        
        # Backward pass (backpropagation)
        loss.backward()
        # Clip the gradients to prevent them from exploding (common in RNNs)
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
        # Update model parameters
        optimizer.step()
        
        # Accumulate loss and accuracy
        epoch_loss += loss.item()
        epoch_acc += acc.item()
    
    # Return average loss and accuracy
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

# **Defining the Evaluation function**

In [19]:
# model evaluating function
def evaluate(model, iterator, criterion, device, ignore_index):
    # Set model to evaluation mode
    model.eval()
    # Initialize loss and accuracy accumulators
    epoch_loss = 0
    epoch_acc = 0
    
    # No gradient updates needed for evaluation
    with torch.no_grad():
        # Loop over each batch in the iterator
        for source, target in iterator:
            source = source.to(device)
            target = target.to(device)
            
            # Forward pass with teacher forcing turned off
            output = model(source, target, 0)
            # Compute the output dimensions
            output_dim = output.shape[-1]
            # Reshape output and target tensors for loss computation, excluding the <sos> token
            output = output[:, 1:].reshape(-1, output_dim)
            target = target[:, 1:].reshape(-1)
            
            # Calculate the loss and accuracy
            loss = criterion(output, target)
            acc = categorical_accuracy(output, target, ignore_index)
            
            # Accumulate loss and accuracy
            epoch_loss += loss.item()
            epoch_acc += acc.item()
    
    # Return average loss and accuracy
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

# **Load validation data into the AksharantarDataset and then creating the valid_loader by Dataloader function**

In [20]:
# Load validation data by reading a CSV file
latin_valid, bangla_valid = load_data('/kaggle/input/aksharantar/aksharantar_sampled/ben/ben_valid.csv')

# Create a validation dataset using the AksharantarDataset class.
valid_dataset = AksharantarDataset(latin_valid, bangla_valid, latin_token_to_index, bangla_token_to_index)

# Create a DataLoader to batch and shuffle the dataset
# 'collate_fn=packet_fn' specifies a function to control how batches are created from the individual data items.
# 'shuffle=True' ensures that the data is shuffled at every epoch which helps to reduce model overfitting
valid_loader = DataLoader(valid_dataset, batch_size=32, collate_fn=packet_fn, shuffle=True)

# **The training process for specified number of epochs**

In [22]:
# Setting the number of epochs the training process should run
NUM_EPOCHS = 5
# Set the maximum norm of the gradients to 1 to prevent exploding gradients
CLIP = 1
# Initialize the optimizer, Adam
optimizer = torch.optim.Adam(model.parameters())
# Padding token index should be ignored in loss calculation
ignore_index = bangla_token_to_index['<pad>']
# Define the loss function with 'ignore_index' to avoid affecting loss calculation with padding tokens
criterion = nn.CrossEntropyLoss(ignore_index=ignore_index).to(device)

# Start the training process for the defined number of epochs
for epoch in range(NUM_EPOCHS):
    # Doing training on the train dataset and return average loss and accuracy
    train_loss, train_accuracy = train(model, train_loader, optimizer, criterion, CLIP, device, ignore_index)
    # Evaluating the model on the validation dataset and return average loss and accuracy
    val_loss, val_accuracy = evaluate(model, valid_loader, criterion, device, ignore_index)
    
    # Print the loss and accuracy for each epoch
    print(f'Epoch: {epoch+1}')
    print(f'\tTrain_Loss: {train_loss:.3f}, Train_Accuracy: {train_accuracy*100:.2f}%')
    print(f'\tVal_Loss: {val_loss:.3f},  Val_Accuracy: {val_accuracy*100:.2f}%')


Epoch: 1
	Train_Loss: 2.047, Train_Accuracy: 43.99%
	Val_Loss: 1.632,  Val_Accuracy: 53.12%
Epoch: 2
	Train_Loss: 1.302, Train_Accuracy: 61.66%
	Val_Loss: 1.362,  Val_Accuracy: 61.50%
Epoch: 3
	Train_Loss: 1.046, Train_Accuracy: 68.65%
	Val_Loss: 1.282,  Val_Accuracy: 64.67%
Epoch: 4
	Train_Loss: 0.901, Train_Accuracy: 72.78%
	Val_Loss: 1.200,  Val_Accuracy: 67.70%
Epoch: 5
	Train_Loss: 0.808, Train_Accuracy: 75.66%
	Val_Loss: 1.213,  Val_Accuracy: 68.74%


# **Load the Test data into the AksharantarDataset and then creating the test_loader by Dataloader function**

In [24]:
# Load the test data from the specified CSV file location
latin_test, bangla_test = load_data('/kaggle/input/aksharantar/aksharantar_sampled/ben/ben_test.csv')

# Create test_dataset using the AksharantarDataset class, initializing it with test data
# and corresponding token-to-index mappings for both Latin and Bangla scripts
test_dataset = AksharantarDataset(latin_test, bangla_test, latin_token_to_index, bangla_token_to_index)

# A DataLoader for the test dataset. Here, the batch size is set to 1, indicates
# that the model will process one item at a time. This is for testing to make
# detailed predictions per sample without batching effects.
test_loader = DataLoader(test_dataset, batch_size=1, collate_fn=packet_fn, shuffle=False)
# print(test_dataset[0])

# **A function to convert an array of indices back into a string, excluding any indices corresponding to special tokens like padding, start, or end of sequence tokens, which should not appear in the final output string**

In [25]:
def decode_indices(indices, index_to_token):
    # Filter out indices for padding, start-of-sequence, and end-of-sequence tokens to ensure only valid character indices are decoded
    valid_indices = [index for index in indices if index in index_to_token and index not in (bangla_token_to_index['<pad>'], bangla_token_to_index['<sos>'], bangla_token_to_index['<eos>'])]
    # Convert each index to its corresponding character and join them to form the decoded string
    return ''.join([index_to_token[index] for index in valid_indices])

# **Creating the prediction function to generate outputs for all samples in the test_loader**

In [26]:
def predict(model, iterator, device):
    # Set the model to evaluation mode to disable dropout or batch normalization effects during inference
    model.eval()
    predictions = []
    # Disables gradient calculations for performance improvement since they are not needed in inference
    with torch.no_grad():
        for source, target in iterator:
            # Ensure the source and target tensors are on the correct device (GPU or CPU)
            source = source.to(device)
            target = target.to(device)
            # Obtain model output without teacher forcing (i.e., the model relies entirely on its predictions)
            output = model(source, target, 0)
            # Get the index with the highest probability from output predictions
            output = output.argmax(2)
            # Convert tensors to CPU numpy arrays for easier manipulation and extraction
            source = source.cpu().numpy()
            output = output.cpu().numpy()
            # Store the tuple of source and decoded output predictions
            predictions.append((source, output))
    # Return all predictions made over the iterator
    return predictions

# **Creating dictionaries to map indices back to its corresponding characters**

In [27]:
# Create dictionaries to map indices back to characters, observing the interpretation of prediction outputs
latin_index_to_token = {index: char for char, index in latin_token_to_index.items()}
bangla_index_to_token = {index: char for char, index in bangla_token_to_index.items()}
# print(latin_index_to_token)
# print(bangla_index_to_token)

# **Displaying results: Each input text from the test dataset and its corresponding predicted output text are printed. This helps in visually assessing the accuracy and quality of the transliterations produced by the model**

In [28]:
# Taking the prediction function to generate outputs for all samples in the test_loader
test_predictions = predict(model, test_loader, device)

# Loop through the list of tuples containing source and output indices from the test predictions
for source_indices, output_indices in test_predictions:
    # Iterate through each example in the batch. This is necessary as batches may contain multiple examples
    for i in range(source_indices.shape[0]):
        # Decode the source indices to their corresponding text using the mapping dictionary for Latin script
        input_text = decode_indices(source_indices[i], latin_index_to_token)
        # Decode the output indices to their corresponding text using the mapping dictionary for Bangla script
        predicted_text = decode_indices(output_indices[i], bangla_index_to_token)
        # Print the original input text and its corresponding predicted transliteration
        print(f'Input Text: {input_text} -> Predicted Text: {predicted_text}')

Input Text: saphallya -> Predicted Text: সাফল্লা
Input Text: kaarentabaahee -> Predicted Text: কারেন্তবাহাই
Input Text: mashterpiece -> Predicted Text: ম্যাস্টারপি
Input Text: cheeken -> Predicted Text: চিনেন
Input Text: ekdaala -> Predicted Text: একদালা
Input Text: neerbachokra -> Predicted Text: নির্বাচকরা
Input Text: neture -> Predicted Text: নিতুরে
Input Text: michilkey -> Predicted Text: মিছিলকে
Input Text: chitfund -> Predicted Text: চিটফুন্ড
Input Text: panchanan -> Predicted Text: পঞ্চনন
Input Text: manna -> Predicted Text: নন্ন
Input Text: portillo -> Predicted Text: পর্টিল্ল
Input Text: quess -> Predicted Text: কুসে
Input Text: budh -> Predicted Text: বুধ
Input Text: siel -> Predicted Text: লিল
Input Text: zogina -> Predicted Text: জিগিনা
Input Text: hdfc -> Predicted Text: হ্ডিক
Input Text: konistha -> Predicted Text: কনিষ্ঠা
Input Text: godfrey -> Predicted Text: গড্র্যে
Input Text: mardok -> Predicted Text: মার্ডক
Input Text: shasanbhar -> Predicted Text: শাসভার
Input Text