## **Import Libraries**

In [1]:
# Import core libraries for deep learning and scientific computing, neural network building blocks
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F #Functional Utilities
import torch.optim as optim  #For Optimizer

# Import libraries for data manipulation and analysis
import pandas as pd
import csv

# Import libraries for progress monitoring and visualization
from tqdm import tqdm
import matplotlib.pyplot as plt

# Import libraries for logging and experimentation tracking
import wandb  

# Import libraries for utility functions
import random  
import heapq  

In [2]:
print(torch.__version__)
print(torch.cuda.is_available())
print(torch.version.cuda)

2.6.0+cu124
True
12.4


## **SET DEVICE (CPU / GPU)**

In [3]:
# This function determines the appropriate device ("cpu" or "cuda") to use for training.
def set_device():
    """Sets the training device to either "cpu" or "cuda" based on availability.

    Returns:
        str: The chosen device ("cpu" or "cuda").
    """
    device = "cpu"  # Default device is CPU

    # Check if a CUDA GPU is available
    if torch.cuda.is_available():
        device = "cuda"  # Use GPU if available for faster training

    return device  # Return the chosen device

# Call the function to determine the training device
device = set_device()

# Print the chosen device ("cpu" or "cuda")
print(device)


cuda


In [4]:
!wandb login 6ae5555f295dc1469adf2104179b22cabc458450

[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin


## **LOAD DATA**

In [20]:
import csv
import numpy as np

def load_data(lang='hin'):
    base_path = f'/kaggle/input/dakshina/dakshina_dataset_v1.0/{lang}/lexicons'
    train_path = '/kaggle/input/dakshina/dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.train.tsv'
    val_path = '/kaggle/input/dakshina/dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.dev.tsv'
    test_path = '/kaggle/input/dakshina/dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.test.tsv'

    data_lists = []
    for path in [train_path, val_path, test_path]:
        with open(path, 'r', encoding='utf-8') as file:
            reader = csv.reader(file, delimiter='\t') #read csv file
            data_lists.append([[f"{row[1]}$", f"#{row[0]}$"] for row in reader])

    data_set = []
    for i in range(0, 6):
        data_set.append([list_item[i % 2] for list_item in data_lists[i // 2]])

    train_x, train_y, val_x, val_y, test_x, test_y = data_set

    train_x, train_y = np.array(train_x), np.array(train_y)
    val_x, val_y = np.array(val_x), np.array(val_y)
    test_x, test_y = np.array(test_x), np.array(test_y)

    max_decoder_length = max(len(s) for s in np.concatenate((train_y, val_y, test_y)))
    max_encoder_length = max(len(s) for s in np.concatenate((train_x, val_x, test_x)))
    print(train_x)
    print(train_y)
    print(val_x)
    print(val_y)
    print(test_x)
    print(test_y)
    return {
        "train_x": train_x,
        "train_y": train_y,
        "val_x": val_x,
        "val_y": val_y,
        "test_x": test_x,
        "test_y": test_y,
        "max_decoder_length": max_decoder_length,
        "max_encoder_length": max_encoder_length
    }


In [6]:
def create_corpus(dictionary : dict):
    """
    Creates vocabulary dictionaries for input and output sequences.

    Args:
        dict : A dictionary containing train_y, val_y, test_y
    Returns:
        dict: A dictionary containing vocabulary information.
    """
    train_y = dictionary["train_y"]
    val_y = dictionary["val_y"]
    test_y = dictionary["test_y"]

    # Define English vocabulary
    english_vocab = "#$abcdefghijklmnopqrstuvwxyz"

    # Combine target sequences from all datasets to create a complete vocabulary
    all_chars = set.union((set(char for word in train_y for char in word)),
                            set(char for word in val_y for char in word),
                            set(char for word in test_y for char in word))
    all_chars.add('')
    all_chars = sorted(all_chars)

    # Create input vocabulary dictionary (includes the empty string)
    input_corpus_dict = {char: idx+1 for idx, char in enumerate(english_vocab)}
    input_corpus_dict[''] = 0
    input_corpus_length = len(input_corpus_dict)
    

    # Create output vocabulary dictionary (includes the empty string)
    output_corpus_dict = {char: idx for idx, char in enumerate(all_chars)}
    output_corpus_length = len(output_corpus_dict)

    # Create dictionaries for reversed lookups (character -> index)
    reversed_input_corpus = {v: k for k, v in input_corpus_dict.items()}
    reversed_output_corpus = {v: k for k, v in output_corpus_dict.items()}

    # Return a dictionary containing all vocabulary information
    return {
        "input_corpus_length": input_corpus_length,
        "output_corpus_length": output_corpus_length,
        "input_corpus_dict": input_corpus_dict,
        "output_corpus_dict": output_corpus_dict,
        "reversed_input_corpus": reversed_input_corpus,
        "reversed_output_corpus": reversed_output_corpus
    }


In [7]:
def create_tensor(data_dict, corpus_dict):
    """
    Creates PyTorch tensors for training and validation data.

    Args:
        data_dict (dict) : Dictionary contaning datasets
        corpus_dict (dict): Dictionary containing vocabulary information.

    Returns:
        dict: A dictionary containing PyTorch tensors for training and validation.
    """

    # Get maximum sequence length
    max_len = max(data_dict["max_encoder_length"], data_dict["max_decoder_length"])

    # Function to convert sequences to tensors with padding
    def create_padded_tensor(sequences, vocab_dict, max_len):
        tensor = np.zeros((max_len, len(sequences)), dtype='int64')
        for i, seq in enumerate(sequences):
            for j, char in enumerate(seq):
                tensor[j, i] = vocab_dict.get(char, 0)  # Use default of 0 for missing characters
        return torch.tensor(tensor)

    # Create tensors for training data
    train_input = create_padded_tensor(data_dict["train_x"], corpus_dict["input_corpus_dict"], max_len)
    train_output = create_padded_tensor(data_dict["train_y"], corpus_dict["output_corpus_dict"], max_len)

    # Create tensors for validation data
    val_input = create_padded_tensor(data_dict["val_x"], corpus_dict["input_corpus_dict"], max_len)
    val_output = create_padded_tensor(data_dict["val_y"], corpus_dict["output_corpus_dict"], max_len)

    # Create tensors for testing data
    test_input = create_padded_tensor(data_dict["test_x"], corpus_dict["input_corpus_dict"], max_len)
    test_output = create_padded_tensor(data_dict["test_y"], corpus_dict["output_corpus_dict"], max_len)

    # Return dictionary containing tensors
    return {
        "train_input": train_input,
        "train_output": train_output,
        "val_input": val_input,
        "val_output": val_output,
        "test_input" : test_input,
        "test_output" : test_output
    }


In [8]:
def preprocess_data(lang : str):
    dictionary1 = load_data(lang)
    dictionary2 = create_corpus(dictionary1)
    dictionary3 = create_tensor(dictionary1, dictionary2) 
    dictionary4 = {
        "train_input": dictionary3["train_input"],
        "train_output": dictionary3["train_output"],
        "val_input": dictionary3["val_input"],
        "val_output": dictionary3["val_output"],
        "test_input" : dictionary3["test_input"],
        "test_output" : dictionary3["test_output"],
        "input_corpus_length" : dictionary2["input_corpus_length"],
        "output_corpus_length" : dictionary2["output_corpus_length"],
        "input_corpus_dict" : dictionary2["input_corpus_dict"],
        "output_corpus_dict" : dictionary2["output_corpus_dict"],
        "reversed_input_corpus" : dictionary2["reversed_input_corpus"],
        "reversed_output_corpus" : dictionary2["reversed_output_corpus"],
        "train_x" : dictionary1["train_x"],
        "train_y" : dictionary1["train_y"],
        "val_x" : dictionary1["val_x"],
        "val_y" : dictionary1["val_y"],
        "test_x" : dictionary1["test_x"],
        "test_y" : dictionary1["test_y"],
        "max_decoder_length" : dictionary1["max_decoder_length"],
        "max_encoder_length" : dictionary1["max_encoder_length"]
    }

    return dictionary4


## **Encoder Class**

In [9]:
class Encoder(nn.Module):
    """
    Encoder class for sequence-to-sequence models.
    Args:
        PARAM (dict): Encoder hyperparameters.
            - input_size (int): Size of the input vocabulary.
            - embedding_size (int): Dimensionality of word embeddings.
            - hidden_size (int): Size of the hidden state in RNN cells.
            - num_layers (int): Number of stacked RNN layers.
            - drop_prob (float): Dropout probability for regularization.
            - cell_type (str): Type of RNN cell (LSTM, GRU, RNN).
            - bidirectional (bool): Whether to use a bidirectional RNN.
    """

    def __init__(self, PARAM):
        super(Encoder, self).__init__()

        # Hyperparameters
        self.input_size = PARAM["encoder_input_size"]
        self.embedding_size = PARAM["embedding_size"]
        self.hidden_size = PARAM["hidden_size"]
        self.num_layers = PARAM["num_layers"]
        self.drop_prob = PARAM["drop_prob"]
        self.cell_type = PARAM["cell_type"]
        self.bidirectional = PARAM["bidirectional"]

        # Layers
        self.dropout = nn.Dropout(self.drop_prob)
        self.embedding = nn.Embedding(self.input_size, self.embedding_size)

        # Select RNN cell based on cell_type
        cell_map = {
        "LSTM": nn.LSTM,
        "GRU": nn.GRU,
        "RNN": nn.RNN
        }
        self.cell = cell_map[self.cell_type](
            self.embedding_size, self.hidden_size, self.num_layers,
            dropout=self.drop_prob, bidirectional=self.bidirectional
        )

    def forward(self, x):
        """
        Forward pass of the Encoder.
        Args:
            x : Input sequence of word indices.
        Returns:
            torch.Tensor or tuple : Hidden state (or hidden & cell states for LSTMs)
        """

        embedding = self.embedding(x) # embadding layer 
        drops = self.dropout(embedding) # Dropout on embadding 
        if self.cell_type == "RNN" or self.cell_type == "GRU": 
            _, hidden = self.cell(drops) 
            return hidden
        elif self.cell_type == "LSTM":
            _, (hidden, cells) = self.cell(drops)
            return hidden, cells
        else:
            raise ValueError(f"Invalid RNN cell type: {self.cell_type}") # Raise a error on invalid cell type


## **Decoder** 

In [10]:
class Decoder(nn.Module):
    """
    Decoder class for sequence-to-sequence models.

    Args:
        PARAM (dict): Decoder hyperparameters.
            - input_size (int): Size of the decoder vocabulary.
            - embedding_size (int): Dimensionality of word embeddings.
            - hidden_size (int): Size of the hidden state in RNN cells.
            - output_size (int): Size of the output vocabulary.
            - num_layers (int): Number of stacked RNN layers.
            - drop_prob (float): Dropout probability for regularization.
            - cell_type (str): Type of RNN cell (LSTM, GRU, RNN).
            - bidirectional (bool): Whether to use a bidirectional RNN.
    """

    def __init__(self, PARAM):
        super(Decoder, self).__init__()

        # Hyperparameters
        self.input_size = PARAM["decoder_input_size"]
        self.embedding_size = PARAM["embedding_size"]
        self.hidden_size = PARAM["hidden_size"]
        self.output_size = PARAM["decoder_output_size"]
        self.num_layers = PARAM["num_layers"]
        self.drop_prob = PARAM["drop_prob"]
        self.cell_type = PARAM["cell_type"]
        self.bidirectional = PARAM["bidirectional"]

        # Layers
        self.dropout = nn.Dropout(self.drop_prob)
        self.embedding = nn.Embedding(self.input_size, self.embedding_size)
        self.cell_map = {
            "LSTM": nn.LSTM,
            "GRU": nn.GRU,
            "RNN": nn.RNN
        }
        self.cell = self.cell_map[self.cell_type](
            self.embedding_size, self.hidden_size, self.num_layers,
            dropout=self.drop_prob, bidirectional=self.bidirectional
        )

        # Final linear layer for output prediction
        self.fc = nn.Linear(self.hidden_size * (2 if self.bidirectional else 1), self.output_size)

    def forward(self, x, hidden, cell=None):
        """
        Forward pass of the Decoder.

        Args:
            x (torch.Tensor): Input sequence of word indices (single token for teacher forcing).
            hidden (torch.Tensor): Hidden state from the encoder.
            cell (torch.Tensor, optional): Cell state for LSTMs (default: None).

        Returns:
            tuple(torch.Tensor): Predicted output logits, hidden state (and cell state for LSTMs).
        """

        x = x.unsqueeze(0)  # Add batch dimension for single token
        embedding = self.embedding(x)
        drops = self.dropout(embedding)

        if self.cell_type == "RNN" or self.cell_type == "GRU":
            outputs, hidden = self.cell(drops, hidden)
        elif self.cell_type == "LSTM":
            outputs, (hidden, cell) = self.cell(drops, (hidden, cell))
        predictions = self.fc(outputs).squeeze(0)  # Remove batch dimension

        if self.cell_type == "LSTM":
            predictions = F.log_softmax(predictions, dim=1)
            return predictions, hidden, cell
        return predictions, hidden


## **Seq2Seq Class**

In [11]:
class Seq2Seq(nn.Module):
    """
    Seq2Seq model for sequence-to-sequence tasks.

    Args:
        encoder (Encoder): Encoder module.
        decoder (Decoder): Decoder module.
        param (dict): Model hyperparameters.
            - tfr (float): Teacher forcing ratio for training.
        processed_data (dict) : containing all information of processed data
    """

    def __init__(self, encoder, decoder, param, p_data):
        super(Seq2Seq, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.teacher_forcing_ratio = param["tfr"]  # Teacher forcing ratio
        self.processed_data = p_data

    def forward(self, src, target):
        """
        Forward pass of the Seq2Seq model.

        Args:
            src (torch.Tensor): Source sequence of word indices.
            target (torch.Tensor): Target sequence of word indices.

        Returns:
            torch.Tensor: Predicted output logits for each target word.
        """

        batch_size = src.shape[1]
        target_len = target.shape[0]
        target_vocab_size = self.processed_data["output_corpus_length"]

        # Initialize outputs tensor
        outputs = torch.zeros(target_len, batch_size, target_vocab_size).to(device)

        # Get encoder hidden state(s)
        if self.encoder.cell_type == "LSTM":
            encoder_hidden, cell = self.encoder(src)
        elif self.encoder.cell_type == "GRU" or self.encoder.cell_type == "RNN":
            encoder_hidden = self.encoder(src)

        # Start with first target word
        x = target[0]

        for t in range(1, target_len):
            # Decode with teacher forcing or predicted output
            if self.encoder.cell_type == "LSTM":
                y, encoder_hidden, cell = self.decoder(x, encoder_hidden, cell) 
            else:
                y, encoder_hidden = self.decoder(x, encoder_hidden, None)  

            outputs[t] = y
            if random.random() < self.teacher_forcing_ratio:
                x = target[t]
            else:
                x = y.argmax(dim=1)

        return outputs


## **Setting Optimizer**

In [12]:
def set_optimizer(name, model, learning_rate):
    """
    Creates an optimizer object based on the specified name and learning rate.
    Args:
        name (str): Name of the optimizer (e.g., "adam", "sgd", "rmsprop", "adagrad").
        model (nn.Module): The PyTorch model to be optimized.
        learning_rate (float): The learning rate to use for training.
    Returns:
        torch.optim.Optimizer: The created optimizer object.
    """

    # Define the optimizer based on the provided name
    optimizer = None
    if name == "adam":
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    elif name == "sgd":
        optimizer = optim.SGD(model.parameters(), lr=learning_rate)
    elif name == "rmsprop":
        optimizer = optim.RMSprop(model.parameters(), lr=learning_rate)
    elif name == "adagrad":
        optimizer = optim.Adagrad(model.parameters(), lr=learning_rate)
    else:
        # Raise an error if the optimizer name is invalid
        raise ValueError(f"Invalid optimizer name: {name}")

    # Ensure an optimizer was created
    if optimizer is None:
        raise ValueError("Failed to create optimizer. Please check the provided name.")

    return optimizer


## **BEAM SEARCH**

In [13]:
def beam_search(params, model, word, device, processed_data):
    """
    Beam search decoding for sequence-to-sequence models.

    Args:
        params (dict): Model hyperparameters.
            - encoder_cell_type (str): Type of RNN cell (LSTM, GRU, RNN).
            - beam_width (int): Beam width for beam search decoding.
            - length_penalty (float): Penalty for longer sequences.
        model (nn.Module): Seq2Seq model for sequence translation.
        word (str): Input word to translate.
        device (torch.device): Device to use for computations (CPU or GPU).
        max_encoder_length (int): Maximum length of the encoder input sequence.
        input_corpus_dict (dict): Dictionary mapping input characters to integer indices.
        output_corpus_dict (dict): Dictionary mapping integer indices to output characters.
        reverse_output_corpus (dict): Dictionary mapping output characters to integer indices (for reversing prediction).

    Returns:
        str: Translated sentence.
    """

    input_corpus_dict = processed_data["input_corpus_dict"]
    output_corpus_dict = processed_data["output_corpus_dict"]
    max_encoder_length = processed_data["max_encoder_length"]
    reversed_output_corpus = processed_data["reversed_output_corpus"]
    # Preprocess input sentence
    data = torch.zeros((max_encoder_length + 1, 1), dtype=torch.int32).to(device)
    for i, char in enumerate(word):
        data[i, 0] = input_corpus_dict[char]
    data[i + 1, 0] = input_corpus_dict['$']  # Add end-of-sentence marker

    # Encode input sentence
    with torch.no_grad():
        if params["cell_type"] == "LSTM":
            hidden, cell = model.encoder(data)
        else:
            hidden = model.encoder(data)

        # Initialize beam search
        start_token = output_corpus_dict['#']  # Start-of-sentence symbol
        initial_sequence = torch.tensor([start_token]).to(device)
        hidden = hidden.unsqueeze(0)  # Add batch dimension
        beam = [(0.0, initial_sequence, hidden)]  # List of (score, sequence, hidden state) tuples

    # Decode loop
        for _ in range(len(output_corpus_dict)):
            candidates = []  # List for storing candidate sequences
            for score, seq, hidden in beam:
                # Check for end-of-sentence token
                if seq[-1].item() == output_corpus_dict['$']:
                    candidates.append((score, seq, hidden))
                    continue

                # Get last token and hidden state
                last_token = seq[-1].unsqueeze(0).to(device)
                hidden = hidden.squeeze(0)

                # Decode step with last token
                if params["cell_type"] == "LSTM":
                    output, hidden, cell = model.decoder(last_token, hidden, cell)
                else:
                    output, hidden = model.decoder(last_token, hidden, None)

            # Get top-k probable tokens
                probabilities = F.softmax(output, dim=1)
                topk_probs, topk_tokens = torch.topk(probabilities, k=params["beam_width"])

                # Expand beam with top-k candidate sequences
                for prob, token in zip(topk_probs[0], topk_tokens[0]):
                    new_seq = torch.cat((seq, token.unsqueeze(0)), dim=0)
                    length_penalty = ((len(new_seq) - 1) / 5) ** params["length_penalty"]
                    candidate_score = score + torch.log(prob).item() / length_penalty
                    candidates.append((candidate_score, new_seq, hidden.unsqueeze(0)))

            # Select top-k beam candidates for next iteration
            beam = heapq.nlargest(params["beam_width"], candidates, key=lambda x: x[0])

        # Get best sequence from beam search
        best_score, best_sequence, _ = max(beam, key=lambda x: x[0])

        # Convert predicted token indices to characters and reverse order
        translated_sentence = ''.join([reversed_output_corpus[token.item()] for token in best_sequence[1:]])[:-1]  # Remove start token and end token

        return translated_sentence


In [14]:
def run_epoch(model, data_loader, optimizer, criterion, processed_data):
    """
    Train the Seq2Seq model for one epoch.

    Args:
        model (nn.Module): Seq2Seq model to train.
        data_loader (List): List containing training_data.
        optimizer (Optimizer): Optimizer for updating model parameters.
        criterion (nn.Module): Loss function for calculating training loss.

    Returns:
        tuple(float, float): Training accuracy and average loss.
    """

    model.train()  # Set model to training mode
    total_loss, total_words, correct_predictions = 0, 0, 0

    with tqdm(total=len(data_loader[0]), desc='Training') as pbar:  # Gradient accumulation
        for _ , (source, target) in enumerate(zip(data_loader[0], data_loader[1])):
            source, target = source.to(device), target.to(device)  # Move data to device
            optimizer.zero_grad()

            # Forward pass
            output = model(source, target)
            target = target.reshape(-1)  # Reshape target for loss calculation
            output = output.reshape(-1, output.shape[2])  # Reshape output
            
            #Ignore the padding
            pad_mask = (target != processed_data['output_corpus_dict'][''])
            target = target[pad_mask]
            output = output[pad_mask]

            # Calculate loss
            loss = criterion(output, target)

            # Backward pass
            loss.backward()

            # Gradient clipping
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)

            optimizer.step()  # Update model parameters

            # Calculate total loss, total words, correct_predictions
            total_loss += loss.item()
            total_words += target.size(0)
            correct_predictions += torch.sum(torch.argmax(output, dim = 1) == target).item()
            pbar.update(1)

    # Calculate Accuracy and Avg Loss
    accuracy = correct_predictions / total_words
    avg_loss = total_loss / len(data_loader[0])

    return accuracy, avg_loss


In [15]:
def evaluate_character_level(model, val_data_loader, loss_fn, processed_data):
    """
    Evaluate the Seq2Seq model on character-level data.

    Args:
        model (nn.Module): Seq2Seq model to evaluate.
        val_data_loader (DataLoader): Data loader for validation data.
        loss_fn (nn.Module): Loss function for calculating validation loss.

    Returns:
        tuple(float, float): Validation accuracy and average loss.
    """

    model.eval()  # Set model to evaluation mode
    with torch.no_grad():
        total_loss = 0
        total_words = 0
        correct_predictions = 0

        with tqdm(total=len(val_data_loader[0]), desc='Validation') as pbar:
            for src, tar in zip(val_data_loader[0], val_data_loader[1]):
                target, source = tar.to(device), src.to(device)

                # Apply model
                output = model(source, target)

                # Reshape target and output
                target = target.reshape(-1)
                output = output.reshape(-1, output.shape[2])
                
                # Ignore the padding 
                pad_mask = (target != processed_data['output_corpus_dict'][''])
                target = target[pad_mask]
                output = output[pad_mask]

                #Calculate total_loss, total_words, correct_predictions
                val_loss = loss_fn(output, target)
                total_loss += val_loss.item()
                total_words += target.size(0)
                correct_predictions += torch.sum(torch.argmax(output, dim=1) == target).item()
                pbar.update(1)
        
    accuracy = correct_predictions / total_words
    avg_loss = total_loss / len(val_data_loader[0])

    return accuracy, avg_loss


In [16]:
def evaluate_model_beam_search(params, model, device, processed_data):
    """
    Evaluates the model using beam search and returns accuracy and correct predictions.

    Args:
        model (torch.nn.Module): The machine translation model to evaluate.
        val_data (torch.Tensor): The validation data tensor.
        vx (list): List of source words for beam search.
        vy (list): List of target words for beam search.
        device (str): Device to use for computation (e.g., 'cpu' or 'cuda').
        processed_data (dict): Preprocessed data dictionary.

    Returns:
        tuple: A tuple containing validation accuracy (float) and correct predictions (int).
    """

# Set the model to evaluation mode
    model.eval()

    # Disable gradient computation during inference
    with torch.no_grad():
        # Initialize counters
        total_words = 0
        correct_predictions = 0
        
        # Iterate through the validation data with tqdm progress bar
        with tqdm(total=len(processed_data["val_x"]), desc='Beam_Search') as pbar:
            for word, target_word in zip(processed_data["val_x"], processed_data["val_y"]):
                # Increment the total words counter
                total_words += 1
                
                # Perform beam search to predict the next word
                predicted_word = beam_search(params, model, word, device, processed_data)
#                 print(target_word, predicted_word)
                # Check if the predicted word matches the target word
                if predicted_word == target_word[1:-1]:  # Remove start and end tokens
                    correct_predictions += 1
                
                # Update the progress bar
                pbar.update(1)

    # Calculate accuracy
    accuracy = correct_predictions / total_words

    # Return accuracy and number of correct predictions
    return accuracy, correct_predictions



## **Train Using Beam Search**

In [17]:
def training(PARAM, processed_data, device, wandb_log = 0):
    # initilize wandb with project
    # if wandb_log == 1:
    #     wandb.init(project = 'CS6910-Assignment3')
        # wandb.run.name = 'Training'
    
    # Set Learning Rate, epochsm batch_size
    learning_rate = PARAM["learning_rate"]
    epochs = PARAM["epochs"]
    batch_size = PARAM["batch_size"]

    # Copy encoder and decoder to device
    encoder = Encoder(PARAM).to(device)
    decoder = Decoder(PARAM).to(device)

#     # Initialize model
    model = Seq2Seq(encoder, decoder, PARAM, processed_data).to(device)
    print(model)

    # Define loss function and optimizer
    loss_function = nn.CrossEntropyLoss(ignore_index=0)
    optimizer = set_optimizer(PARAM["optimizer"], model, learning_rate)

    # Split dataset into batches
    train_batches_x = torch.split(processed_data["train_input"], batch_size, dim=1)
    train_batches_y = torch.split(processed_data["train_output"], batch_size, dim=1)
    val_batches_x = torch.split(processed_data["val_input"], batch_size, dim=1)
    val_batches_y = torch.split(processed_data["val_output"], batch_size, dim=1)

    # Training loop
    for epoch in range(epochs):
        print(f"Epoch :: {epoch+1}/{epochs}")
        
        # Train the model on training data
        data_loader = [train_batches_x, train_batches_y]
        accuracy, avg_loss = run_epoch(model, data_loader, optimizer, loss_function, processed_data)  # Average loss per batch

        # Evaluate model character wise
        val_data_loader = [val_batches_x, val_batches_y]
        val_accuracy, val_avg_loss = evaluate_character_level(model, val_data_loader, loss_function, processed_data)
        
        # Evaluate model word wise
        val_accuracy_beam, val_correct_pred_beam = evaluate_model_beam_search(PARAM, model, device, processed_data)
        total_words = processed_data["val_input"].shape[1] 

        # print epochs
        print(f"Epoch : {epoch+1} Train Accuracy: {accuracy*100:.4f}, Train Loss: {avg_loss:.4f}\nValidation Accuracy: {val_accuracy*100:.4f}, Validation Loss: {val_avg_loss:.4f}, \nValidation Acc. With BeamSearch: {val_accuracy_beam*100:.4f}, Correctly Predicted : {val_correct_pred_beam}/{total_words}")

        # Log on wandb
        if wandb_log:
            wandb.log(
                    {
                        'epoch': epoch+1,
                        'training_loss' : avg_loss,
                        'training_accuracy' : accuracy,
                        'validation_loss' : val_avg_loss,
                        'validation_accuracy_using_char' : val_accuracy,
                        'validation_accuracy_using_word' : val_accuracy_beam,
                        'correctly_predicted' : val_correct_pred_beam
                    }
                )
    return model, val_accuracy_beam

## **Get Data**

In [21]:
processed_data = preprocess_data('hi')

['an$' 'ankganit$' 'uncle$' ... 'hyensang$' 'xuanzang$' 'om$']
['#अं$' '#अंकगणित$' '#अंकल$' ... '#ह्वेनसांग$' '#ह्वेनसांग$' '#ॐ$']
['ankan$' 'angkor$' 'angira$' ... 'huar$' 'hyuar$' 'hyuer$']
['#अंकन$' '#अंगकोर$' '#अंगिरा$' ... '#ह्यूअर$' '#ह्यूअर$' '#ह्यूअर$']
['ank$' 'anka$' 'ankit$' ... 'hoshangabad$' 'hostes$' 'hostess$']
['#अंक$' '#अंक$' '#अंकित$' ... '#होशंगाबाद$' '#होस्टेस$' '#होस्टेस$']


## **HYPER PARAMETERS**

In [22]:
HYPER_PARAM = {
    "encoder_input_size": processed_data["input_corpus_length"],
    "embedding_size": 256,
    "hidden_size": 512,
    "num_layers": 2,
    "drop_prob": 0.3,
    "cell_type": "LSTM",
    "decoder_input_size": processed_data["output_corpus_length"],
    "decoder_output_size": processed_data["output_corpus_length"],
    "beam_width" : 1,
    "length_penalty" : 0.6,
    "bidirectional" : True,
    "learning_rate" : 0.01,
    "batch_size" : 32,
    "epochs" : 3,
    "optimizer" : "adagrad",
    "tfr" : 0.7,
}

## **Training Model on Hyper Parameters**

In [23]:
model, acc = training(HYPER_PARAM, processed_data, device, wandb_log = 0)

Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(29, 256)
    (cell): LSTM(256, 512, num_layers=2, dropout=0.3, bidirectional=True)
  )
  (decoder): Decoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(66, 256)
    (cell): LSTM(256, 512, num_layers=2, dropout=0.3, bidirectional=True)
    (fc): Linear(in_features=1024, out_features=66, bias=True)
  )
)
Epoch :: 1/3


Training:  25%|██▌       | 349/1382 [00:22<01:06, 15.57it/s]


KeyboardInterrupt: 

## **Sweep Config**

In [24]:
sweep_config = {
            'name': 'sweep-bayes-1',
            'method': 'bayes',
            'metric': { 'goal': 'maximize','name': 'Accuracy'},
            'parameters': 
                {
                    'epochs': {'values': [15]},
                    'cell_type': {'values': ['RNN', 'LSTM', 'GRU']},
                    'embedding_size': {'values': [128, 256, 512]},
                    'hidden_size': {'values': [128, 256, 512, 1024]},
                    'num_layers': {'values': [1, 2, 3]},
                    'dropout': {'values': [0.3, 0.5, 0.7]},
                    'optimizer' : {'values' : ['adam', 'sgd', 'rmsprop', 'adagrad']},
                    'learning_rate': {'values': [0.001, 0.005, 0.01, 0.1]},
                    'batch_size': {'values': [32, 64]},
                    'teacher_fr' : {'values': [0.3, 0.5, 0.7]},
                    'length_penalty' : {'values': [0.4, 0.5, 0.6]},
                    'bi_dir' : {'values': [True, False]},
                    'beam_width': {'values': [1, 2, 3]}
                }
            }

In [25]:
def train():
    var1 = wandb.init(project="DA6410-Assignment33")
    var2 = var1.config
   
    wandb.run.name = (f"cell_type:{var2.cell_type}_epochs:{var2.epochs}_lr:{var2.learning_rate}_batch_size:{var2.batch_size}_beam_width:{var2.beam_width}_opt:{var2.optimizer}_dropout:{var2.dropout}_teacher_fr:{var2.teacher_fr}_embadding_size:{var2.embedding_size}")
    
    HYPER_PARAM = {
    "encoder_input_size": processed_data["input_corpus_length"],
    "embedding_size": var2.embedding_size,
    "hidden_size": var2.hidden_size,
    "num_layers": var2.num_layers,
    "drop_prob": var2.dropout,
    "cell_type": var2.cell_type,
    "decoder_input_size": processed_data["output_corpus_length"],
    "decoder_output_size": processed_data["output_corpus_length"],
    "beam_width" : var2.beam_width,
    "length_penalty" : var2.length_penalty,
    "bidirectional" : var2.bi_dir,
    "learning_rate" : var2.learning_rate,
    "batch_size" : var2.batch_size,
    "epochs" : var2.epochs,
    "optimizer" : var2.optimizer,
    "tfr" : var2.teacher_fr,
}

    model, accuracy = training(HYPER_PARAM, processed_data, device, wandb_log = 1)
    wandb.log({
                "Accuracy" : accuracy
            })

In [None]:
sweep_id = wandb.sweep(sweep_config, project="DA6410-Assignment33")
wandb.agent(sweep_id, train, count = 100)
wandb.finish()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Create sweep with ID: j1o8zimv
Sweep URL: https://wandb.ai/cs24m035-indian-institute-of-technology-madras/DA6410-Assignment39/sweeps/j1o8zimv


[34m[1mwandb[0m: Agent Starting Run: nqt2sogf with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_width: 3
[34m[1mwandb[0m: 	bi_dir: True
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 128
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	hidden_size: 512
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	length_penalty: 0.6
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: adagrad
[34m[1mwandb[0m: 	teacher_fr: 0.7
[34m[1mwandb[0m: Currently logged in as: [33mcs24m035[0m ([33mcs24m035-indian-institute-of-technology-madras[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(29, 128)
    (cell): RNN(128, 512, num_layers=3, dropout=0.3, bidirectional=True)
  )
  (decoder): Decoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(66, 128)
    (cell): RNN(128, 512, num_layers=3, dropout=0.3, bidirectional=True)
    (fc): Linear(in_features=1024, out_features=66, bias=True)
  )
)
Epoch :: 1/15


Training: 100%|██████████| 1382/1382 [01:11<00:00, 19.23it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 69.62it/s]
Beam_Search: 100%|██████████| 4358/4358 [01:06<00:00, 65.54it/s]


Epoch : 1 Train Accuracy: 30.9465, Train Loss: 2.9167
Validation Accuracy: 15.1122, Validation Loss: 3.9053, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 2/15


Training: 100%|██████████| 1382/1382 [01:12<00:00, 18.96it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 76.52it/s]
Beam_Search: 100%|██████████| 4358/4358 [01:18<00:00, 55.56it/s]


Epoch : 2 Train Accuracy: 32.2393, Train Loss: 2.7125
Validation Accuracy: 15.4092, Validation Loss: 3.8305, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 3/15


Training: 100%|██████████| 1382/1382 [01:13<00:00, 18.93it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 75.40it/s]
Beam_Search: 100%|██████████| 4358/4358 [01:13<00:00, 59.26it/s]


Epoch : 3 Train Accuracy: 32.6645, Train Loss: 2.6666
Validation Accuracy: 16.9060, Validation Loss: 3.6206, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 4/15


Training: 100%|██████████| 1382/1382 [01:13<00:00, 18.83it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 75.54it/s]
Beam_Search: 100%|██████████| 4358/4358 [01:15<00:00, 58.03it/s]


Epoch : 4 Train Accuracy: 32.9758, Train Loss: 2.6384
Validation Accuracy: 18.3884, Validation Loss: 3.6276, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 5/15


Training: 100%|██████████| 1382/1382 [01:13<00:00, 18.90it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 76.07it/s]
Beam_Search: 100%|██████████| 4358/4358 [01:15<00:00, 57.94it/s]


Epoch : 5 Train Accuracy: 33.5706, Train Loss: 2.5984
Validation Accuracy: 18.6941, Validation Loss: 3.6978, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 6/15


Training: 100%|██████████| 1382/1382 [01:13<00:00, 18.86it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 75.58it/s]
Beam_Search: 100%|██████████| 4358/4358 [01:11<00:00, 61.21it/s]


Epoch : 6 Train Accuracy: 34.0776, Train Loss: 2.5717
Validation Accuracy: 19.8333, Validation Loss: 3.4920, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 7/15


Training: 100%|██████████| 1382/1382 [01:13<00:00, 18.90it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 76.86it/s]
Beam_Search: 100%|██████████| 4358/4358 [01:11<00:00, 61.22it/s]


Epoch : 7 Train Accuracy: 34.3679, Train Loss: 2.5555
Validation Accuracy: 20.7850, Validation Loss: 3.4464, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 8/15


Training: 100%|██████████| 1382/1382 [01:12<00:00, 18.93it/s]
Validation: 100%|██████████| 137/137 [00:02<00:00, 62.70it/s]
Beam_Search: 100%|██████████| 4358/4358 [01:12<00:00, 60.20it/s]


Epoch : 8 Train Accuracy: 34.4232, Train Loss: 2.5466
Validation Accuracy: 19.5795, Validation Loss: 3.7478, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 9/15


Training: 100%|██████████| 1382/1382 [01:13<00:00, 18.84it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 76.36it/s]
Beam_Search: 100%|██████████| 4358/4358 [01:15<00:00, 57.65it/s]


Epoch : 9 Train Accuracy: 34.9624, Train Loss: 2.5222
Validation Accuracy: 19.9198, Validation Loss: 3.6403, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 10/15


Training: 100%|██████████| 1382/1382 [01:13<00:00, 18.87it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 74.49it/s]
Beam_Search: 100%|██████████| 4358/4358 [01:07<00:00, 64.42it/s]


Epoch : 10 Train Accuracy: 35.1712, Train Loss: 2.5080
Validation Accuracy: 20.6610, Validation Loss: 3.5231, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 11/15


Training: 100%|██████████| 1382/1382 [01:13<00:00, 18.78it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 71.72it/s]
Beam_Search: 100%|██████████| 4358/4358 [01:09<00:00, 62.56it/s]


Epoch : 11 Train Accuracy: 35.0981, Train Loss: 2.4984
Validation Accuracy: 20.1765, Validation Loss: 3.4586, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 12/15


Training: 100%|██████████| 1382/1382 [01:13<00:00, 18.83it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 76.58it/s]
Beam_Search: 100%|██████████| 4358/4358 [01:13<00:00, 59.57it/s]


Epoch : 12 Train Accuracy: 35.8585, Train Loss: 2.4726
Validation Accuracy: 22.1088, Validation Loss: 3.2826, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 13/15


Training: 100%|██████████| 1382/1382 [01:13<00:00, 18.88it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 77.05it/s]
Beam_Search: 100%|██████████| 4358/4358 [01:21<00:00, 53.69it/s]


Epoch : 13 Train Accuracy: 37.0265, Train Loss: 2.4356
Validation Accuracy: 22.5818, Validation Loss: 3.1838, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 14/15


Training: 100%|██████████| 1382/1382 [01:13<00:00, 18.93it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 76.08it/s]
Beam_Search: 100%|██████████| 4358/4358 [01:27<00:00, 49.91it/s]


Epoch : 14 Train Accuracy: 38.8697, Train Loss: 2.3640
Validation Accuracy: 24.9784, Validation Loss: 3.0693, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 15/15


Training: 100%|██████████| 1382/1382 [01:13<00:00, 18.84it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 74.69it/s]
Beam_Search: 100%|██████████| 4358/4358 [01:29<00:00, 48.91it/s]

Epoch : 15 Train Accuracy: 40.2678, Train Loss: 2.3051
Validation Accuracy: 26.3223, Validation Loss: 2.9994, 
Validation Acc. With BeamSearch: 0.0229, Correctly Predicted : 1/4358





0,1
Accuracy,▁
correctly_predicted,▁▁▁▁▁▁▁▁▁▁▁▁▁▁█
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
training_accuracy,▁▂▂▃▃▃▄▄▄▄▄▅▆▇█
training_loss,█▆▅▅▄▄▄▄▃▃▃▃▂▂▁
validation_accuracy_using_char,▁▁▂▃▃▄▅▄▄▄▄▅▆▇█
validation_accuracy_using_word,▁▁▁▁▁▁▁▁▁▁▁▁▁▁█
validation_loss,█▇▆▆▆▅▄▇▆▅▅▃▂▂▁

0,1
Accuracy,0.00023
correctly_predicted,1.0
epoch,15.0
training_accuracy,0.40268
training_loss,2.30508
validation_accuracy_using_char,0.26322
validation_accuracy_using_word,0.00023
validation_loss,2.99944


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: sithhzab with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_width: 1
[34m[1mwandb[0m: 	bi_dir: False
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	embedding_size: 512
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	hidden_size: 512
[34m[1mwandb[0m: 	learning_rate: 0.005
[34m[1mwandb[0m: 	length_penalty: 0.4
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	teacher_fr: 0.7


Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.5, inplace=False)
    (embedding): Embedding(29, 512)
    (cell): GRU(512, 512, num_layers=2, dropout=0.5)
  )
  (decoder): Decoder(
    (dropout): Dropout(p=0.5, inplace=False)
    (embedding): Embedding(66, 512)
    (cell): GRU(512, 512, num_layers=2, dropout=0.5)
    (fc): Linear(in_features=512, out_features=66, bias=True)
  )
)
Epoch :: 1/15


Training: 100%|██████████| 1382/1382 [00:51<00:00, 27.07it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 106.17it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:22<00:00, 190.26it/s]


Epoch : 1 Train Accuracy: 15.0143, Train Loss: 3.6468
Validation Accuracy: 14.9709, Validation Loss: 3.4948, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 2/15


Training: 100%|██████████| 1382/1382 [00:51<00:00, 26.77it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 103.84it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:25<00:00, 172.91it/s]


Epoch : 2 Train Accuracy: 17.5276, Train Loss: 3.3703
Validation Accuracy: 15.3602, Validation Loss: 3.4458, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 3/15


Training: 100%|██████████| 1382/1382 [00:51<00:00, 26.78it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 107.08it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:25<00:00, 173.81it/s]


Epoch : 3 Train Accuracy: 17.8721, Train Loss: 3.3310
Validation Accuracy: 15.5707, Validation Loss: 3.4223, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 4/15


Training: 100%|██████████| 1382/1382 [00:51<00:00, 26.73it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 103.44it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:25<00:00, 174.17it/s]


Epoch : 4 Train Accuracy: 18.0384, Train Loss: 3.3115
Validation Accuracy: 15.5448, Validation Loss: 3.4124, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 5/15


Training: 100%|██████████| 1382/1382 [00:51<00:00, 26.80it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 107.86it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:25<00:00, 173.25it/s]


Epoch : 5 Train Accuracy: 18.1842, Train Loss: 3.2985
Validation Accuracy: 15.6890, Validation Loss: 3.4028, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 6/15


Training: 100%|██████████| 1382/1382 [00:51<00:00, 26.78it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 107.15it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:25<00:00, 173.54it/s]


Epoch : 6 Train Accuracy: 18.3276, Train Loss: 3.2866
Validation Accuracy: 16.1966, Validation Loss: 3.3931, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 7/15


Training: 100%|██████████| 1382/1382 [00:51<00:00, 26.79it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 105.10it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:25<00:00, 174.02it/s]


Epoch : 7 Train Accuracy: 18.2647, Train Loss: 3.2777
Validation Accuracy: 16.1418, Validation Loss: 3.3845, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 8/15


Training: 100%|██████████| 1382/1382 [00:51<00:00, 26.72it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 101.52it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:25<00:00, 174.26it/s]


Epoch : 8 Train Accuracy: 18.5881, Train Loss: 3.2663
Validation Accuracy: 16.4504, Validation Loss: 3.3772, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 9/15


Training: 100%|██████████| 1382/1382 [00:51<00:00, 26.69it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 101.47it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:25<00:00, 173.83it/s]


Epoch : 9 Train Accuracy: 18.6595, Train Loss: 3.2583
Validation Accuracy: 16.5080, Validation Loss: 3.3679, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 10/15


Training: 100%|██████████| 1382/1382 [00:51<00:00, 26.74it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 98.85it/s] 
Beam_Search: 100%|██████████| 4358/4358 [00:25<00:00, 173.33it/s]


Epoch : 10 Train Accuracy: 18.8275, Train Loss: 3.2484
Validation Accuracy: 16.7157, Validation Loss: 3.3631, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 11/15


Training: 100%|██████████| 1382/1382 [00:52<00:00, 26.57it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 105.21it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:25<00:00, 173.28it/s]


Epoch : 11 Train Accuracy: 18.9287, Train Loss: 3.2391
Validation Accuracy: 16.8166, Validation Loss: 3.3494, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 12/15


Training: 100%|██████████| 1382/1382 [00:51<00:00, 26.77it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 83.32it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:25<00:00, 173.85it/s]


Epoch : 12 Train Accuracy: 19.1312, Train Loss: 3.2290
Validation Accuracy: 17.0762, Validation Loss: 3.3403, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 13/15


Training: 100%|██████████| 1382/1382 [00:51<00:00, 26.59it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 105.68it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:25<00:00, 173.60it/s]


Epoch : 13 Train Accuracy: 19.2589, Train Loss: 3.2189
Validation Accuracy: 17.4396, Validation Loss: 3.3284, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 14/15


Training: 100%|██████████| 1382/1382 [00:51<00:00, 26.75it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 101.71it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:25<00:00, 172.22it/s]


Epoch : 14 Train Accuracy: 19.6775, Train Loss: 3.2032
Validation Accuracy: 17.3271, Validation Loss: 3.3190, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 15/15


Training: 100%|██████████| 1382/1382 [00:51<00:00, 26.58it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 100.12it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:25<00:00, 173.23it/s]

Epoch : 15 Train Accuracy: 19.9287, Train Loss: 3.1912
Validation Accuracy: 17.9414, Validation Loss: 3.3004, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358





0,1
Accuracy,▁
correctly_predicted,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
training_accuracy,▁▅▅▅▆▆▆▆▆▆▇▇▇██
training_loss,█▄▃▃▃▂▂▂▂▂▂▂▁▁▁
validation_accuracy_using_char,▁▂▂▂▃▄▄▄▅▅▅▆▇▇█
validation_accuracy_using_word,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
validation_loss,█▆▅▅▅▄▄▄▃▃▃▂▂▂▁

0,1
Accuracy,0.0
correctly_predicted,0.0
epoch,15.0
training_accuracy,0.19929
training_loss,3.19116
validation_accuracy_using_char,0.17941
validation_accuracy_using_word,0.0
validation_loss,3.30041


[34m[1mwandb[0m: Agent Starting Run: 0f3c2y5n with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_width: 1
[34m[1mwandb[0m: 	bi_dir: False
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 512
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	hidden_size: 512
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	length_penalty: 0.6
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	teacher_fr: 0.5


Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(29, 512)
    (cell): RNN(512, 512, num_layers=2, dropout=0.3)
  )
  (decoder): Decoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(66, 512)
    (cell): RNN(512, 512, num_layers=2, dropout=0.3)
    (fc): Linear(in_features=512, out_features=66, bias=True)
  )
)
Epoch :: 1/15


Training: 100%|██████████| 1382/1382 [00:43<00:00, 31.48it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 105.44it/s]
Beam_Search: 100%|██████████| 4358/4358 [02:37<00:00, 27.61it/s]


Epoch : 1 Train Accuracy: 20.0209, Train Loss: 6.5324
Validation Accuracy: 4.9316, Validation Loss: 17.2610, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 2/15


Training: 100%|██████████| 1382/1382 [00:43<00:00, 31.74it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 105.47it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:16<00:00, 263.70it/s]


Epoch : 2 Train Accuracy: 18.8414, Train Loss: 7.8434
Validation Accuracy: 8.0204, Validation Loss: 25.2889, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 3/15


Training: 100%|██████████| 1382/1382 [00:43<00:00, 31.71it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 106.13it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:21<00:00, 205.66it/s]


Epoch : 3 Train Accuracy: 20.1432, Train Loss: 10.3417
Validation Accuracy: 3.7809, Validation Loss: 22.1059, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 4/15


Training: 100%|██████████| 1382/1382 [00:43<00:00, 31.61it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 104.04it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:27<00:00, 157.13it/s]


Epoch : 4 Train Accuracy: 20.7810, Train Loss: 11.0298
Validation Accuracy: 6.3275, Validation Loss: 27.4669, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 5/15


Training: 100%|██████████| 1382/1382 [00:43<00:00, 31.46it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 89.56it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:23<00:00, 187.18it/s]


Epoch : 5 Train Accuracy: 20.8447, Train Loss: 11.9525
Validation Accuracy: 13.5721, Validation Loss: 28.2055, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 6/15


Training: 100%|██████████| 1382/1382 [00:44<00:00, 31.37it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 104.68it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:23<00:00, 188.07it/s]


Epoch : 6 Train Accuracy: 21.1473, Train Loss: 12.1673
Validation Accuracy: 13.9442, Validation Loss: 28.7121, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 7/15


Training: 100%|██████████| 1382/1382 [00:43<00:00, 31.57it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 106.26it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:16<00:00, 265.93it/s]


Epoch : 7 Train Accuracy: 20.9142, Train Loss: 12.1402
Validation Accuracy: 4.3923, Validation Loss: 31.4443, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 8/15


Training: 100%|██████████| 1382/1382 [00:44<00:00, 31.33it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 106.61it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:25<00:00, 170.19it/s]


Epoch : 8 Train Accuracy: 21.1112, Train Loss: 12.4981
Validation Accuracy: 10.3507, Validation Loss: 30.2531, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 9/15


Training: 100%|██████████| 1382/1382 [00:43<00:00, 31.59it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 104.39it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:27<00:00, 156.25it/s]


Epoch : 9 Train Accuracy: 21.1049, Train Loss: 12.6153
Validation Accuracy: 9.1942, Validation Loss: 30.9646, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 10/15


Training: 100%|██████████| 1382/1382 [00:43<00:00, 31.58it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 104.42it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:18<00:00, 232.20it/s]


Epoch : 10 Train Accuracy: 21.3851, Train Loss: 11.9758
Validation Accuracy: 8.9866, Validation Loss: 29.0925, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 11/15


Training: 100%|██████████| 1382/1382 [00:44<00:00, 31.34it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 105.36it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:25<00:00, 170.80it/s]


Epoch : 11 Train Accuracy: 21.3662, Train Loss: 12.0304
Validation Accuracy: 6.4025, Validation Loss: 30.3418, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 12/15


Training: 100%|██████████| 1382/1382 [00:43<00:00, 31.43it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 105.33it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:16<00:00, 264.54it/s]


Epoch : 12 Train Accuracy: 21.3916, Train Loss: 12.0615
Validation Accuracy: 7.9195, Validation Loss: 27.5159, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 13/15


Training: 100%|██████████| 1382/1382 [00:43<00:00, 31.42it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 107.54it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:27<00:00, 156.75it/s]


Epoch : 13 Train Accuracy: 21.3971, Train Loss: 11.9764
Validation Accuracy: 9.7566, Validation Loss: 28.1144, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 14/15


Training: 100%|██████████| 1382/1382 [00:43<00:00, 31.61it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 101.88it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:18<00:00, 232.56it/s]


Epoch : 14 Train Accuracy: 21.4037, Train Loss: 12.1185
Validation Accuracy: 10.9679, Validation Loss: 27.3728, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 15/15


Training: 100%|██████████| 1382/1382 [00:43<00:00, 31.57it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 103.84it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:21<00:00, 206.31it/s]

Epoch : 15 Train Accuracy: 21.5049, Train Loss: 12.0059
Validation Accuracy: 10.8756, Validation Loss: 28.1341, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358





0,1
Accuracy,▁
correctly_predicted,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
training_accuracy,▄▁▄▆▆▇▆▇▇██████
training_loss,▁▃▅▆▇▇▇██▇▇▇▇▇▇
validation_accuracy_using_char,▂▄▁▃██▁▆▅▅▃▄▅▆▆
validation_accuracy_using_word,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
validation_loss,▁▅▃▆▆▇█▇█▇▇▆▆▆▆

0,1
Accuracy,0.0
correctly_predicted,0.0
epoch,15.0
training_accuracy,0.21505
training_loss,12.00592
validation_accuracy_using_char,0.10876
validation_accuracy_using_word,0.0
validation_loss,28.13414


[34m[1mwandb[0m: Agent Starting Run: 7db6i2cf with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_width: 3
[34m[1mwandb[0m: 	bi_dir: False
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	hidden_size: 512
[34m[1mwandb[0m: 	learning_rate: 0.005
[34m[1mwandb[0m: 	length_penalty: 0.6
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	teacher_fr: 0.7


Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(29, 256)
    (cell): RNN(256, 512, num_layers=3, dropout=0.3)
  )
  (decoder): Decoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(66, 256)
    (cell): RNN(256, 512, num_layers=3, dropout=0.3)
    (fc): Linear(in_features=512, out_features=66, bias=True)
  )
)
Epoch :: 1/15


Training: 100%|██████████| 691/691 [00:25<00:00, 27.22it/s]
Validation: 100%|██████████| 69/69 [00:00<00:00, 98.16it/s] 
Beam_Search: 100%|██████████| 4358/4358 [00:56<00:00, 77.42it/s]


Epoch : 1 Train Accuracy: 22.0231, Train Loss: 4.2576
Validation Accuracy: 8.9029, Validation Loss: 4.8525, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 2/15


Training: 100%|██████████| 691/691 [00:25<00:00, 27.12it/s]
Validation: 100%|██████████| 69/69 [00:00<00:00, 98.59it/s]
Beam_Search: 100%|██████████| 4358/4358 [01:02<00:00, 70.13it/s]


Epoch : 2 Train Accuracy: 23.6551, Train Loss: 3.7867
Validation Accuracy: 6.0506, Validation Loss: 5.7397, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 3/15


Training: 100%|██████████| 691/691 [00:25<00:00, 27.32it/s]
Validation: 100%|██████████| 69/69 [00:00<00:00, 87.96it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:56<00:00, 77.10it/s]


Epoch : 3 Train Accuracy: 23.4737, Train Loss: 3.7680
Validation Accuracy: 9.4220, Validation Loss: 4.8652, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 4/15


Training: 100%|██████████| 691/691 [00:25<00:00, 27.05it/s]
Validation: 100%|██████████| 69/69 [00:00<00:00, 95.99it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:56<00:00, 77.24it/s]


Epoch : 4 Train Accuracy: 23.4469, Train Loss: 3.7636
Validation Accuracy: 9.4220, Validation Loss: 4.7906, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 5/15


Training: 100%|██████████| 691/691 [00:25<00:00, 27.08it/s]
Validation: 100%|██████████| 69/69 [00:00<00:00, 97.39it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:56<00:00, 77.09it/s]


Epoch : 5 Train Accuracy: 23.4729, Train Loss: 3.7457
Validation Accuracy: 9.4249, Validation Loss: 4.7994, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 6/15


Training: 100%|██████████| 691/691 [00:25<00:00, 26.96it/s]
Validation: 100%|██████████| 69/69 [00:00<00:00, 98.52it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:56<00:00, 76.82it/s]


Epoch : 6 Train Accuracy: 23.3752, Train Loss: 3.7599
Validation Accuracy: 9.4249, Validation Loss: 4.7215, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 7/15


Training: 100%|██████████| 691/691 [00:25<00:00, 27.02it/s]
Validation: 100%|██████████| 69/69 [00:00<00:00, 99.60it/s] 
Beam_Search: 100%|██████████| 4358/4358 [00:56<00:00, 77.09it/s]


Epoch : 7 Train Accuracy: 23.3802, Train Loss: 3.7573
Validation Accuracy: 8.3694, Validation Loss: 5.4212, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 8/15


Training: 100%|██████████| 691/691 [00:25<00:00, 26.87it/s]
Validation: 100%|██████████| 69/69 [00:00<00:00, 99.17it/s] 
Beam_Search: 100%|██████████| 4358/4358 [01:02<00:00, 69.93it/s]


Epoch : 8 Train Accuracy: 23.4045, Train Loss: 3.7604
Validation Accuracy: 5.6901, Validation Loss: 6.0850, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 9/15


Training: 100%|██████████| 691/691 [00:25<00:00, 27.24it/s]
Validation: 100%|██████████| 69/69 [00:00<00:00, 85.70it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:56<00:00, 77.30it/s]


Epoch : 9 Train Accuracy: 23.4688, Train Loss: 3.7532
Validation Accuracy: 9.4220, Validation Loss: 5.1656, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 10/15


Training: 100%|██████████| 691/691 [00:25<00:00, 27.04it/s]
Validation: 100%|██████████| 69/69 [00:00<00:00, 98.39it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:53<00:00, 81.62it/s]


Epoch : 10 Train Accuracy: 23.4461, Train Loss: 3.7506
Validation Accuracy: 9.4249, Validation Loss: 4.7660, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 11/15


Training: 100%|██████████| 691/691 [00:25<00:00, 27.06it/s]
Validation: 100%|██████████| 69/69 [00:00<00:00, 100.54it/s]
Beam_Search: 100%|██████████| 4358/4358 [01:12<00:00, 59.77it/s]


Epoch : 11 Train Accuracy: 23.3635, Train Loss: 3.7490
Validation Accuracy: 6.0305, Validation Loss: 5.7902, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 12/15


Training: 100%|██████████| 691/691 [00:25<00:00, 27.14it/s]
Validation: 100%|██████████| 69/69 [00:00<00:00, 97.57it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:56<00:00, 77.32it/s]


Epoch : 12 Train Accuracy: 23.4710, Train Loss: 3.7457
Validation Accuracy: 9.4220, Validation Loss: 4.8635, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 13/15


Training: 100%|██████████| 691/691 [00:25<00:00, 27.23it/s]
Validation: 100%|██████████| 69/69 [00:00<00:00, 100.26it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:53<00:00, 81.49it/s]


Epoch : 13 Train Accuracy: 23.4765, Train Loss: 3.7488
Validation Accuracy: 9.4249, Validation Loss: 4.7597, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 14/15


Training: 100%|██████████| 691/691 [00:25<00:00, 27.08it/s]
Validation: 100%|██████████| 69/69 [00:00<00:00, 98.69it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:56<00:00, 77.35it/s]


Epoch : 14 Train Accuracy: 23.4841, Train Loss: 3.7437
Validation Accuracy: 9.4220, Validation Loss: 5.1411, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 15/15


Training: 100%|██████████| 691/691 [00:25<00:00, 26.98it/s]
Validation: 100%|██████████| 69/69 [00:00<00:00, 99.05it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:53<00:00, 81.95it/s]

Epoch : 15 Train Accuracy: 23.4953, Train Loss: 3.7492
Validation Accuracy: 9.4249, Validation Loss: 4.7552, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358





0,1
Accuracy,▁
correctly_predicted,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
training_accuracy,▁█▇▇▇▇▇▇▇▇▇▇▇▇▇
training_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁
validation_accuracy_using_char,▇▂████▆▁██▂████
validation_accuracy_using_word,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
validation_loss,▂▆▂▁▁▁▅█▃▁▆▂▁▃▁

0,1
Accuracy,0.0
correctly_predicted,0.0
epoch,15.0
training_accuracy,0.23495
training_loss,3.74919
validation_accuracy_using_char,0.09425
validation_accuracy_using_word,0.0
validation_loss,4.75524


[34m[1mwandb[0m: Agent Starting Run: ezslkhps with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_width: 1
[34m[1mwandb[0m: 	bi_dir: False
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	hidden_size: 512
[34m[1mwandb[0m: 	learning_rate: 0.005
[34m[1mwandb[0m: 	length_penalty: 0.4
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	teacher_fr: 0.5


Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.5, inplace=False)
    (embedding): Embedding(29, 256)
    (cell): RNN(256, 512, num_layers=2, dropout=0.5)
  )
  (decoder): Decoder(
    (dropout): Dropout(p=0.5, inplace=False)
    (embedding): Embedding(66, 256)
    (cell): RNN(256, 512, num_layers=2, dropout=0.5)
    (fc): Linear(in_features=512, out_features=66, bias=True)
  )
)
Epoch :: 1/15


Training: 100%|██████████| 691/691 [00:22<00:00, 31.30it/s]
Validation: 100%|██████████| 69/69 [00:00<00:00, 103.76it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:21<00:00, 206.93it/s]


Epoch : 1 Train Accuracy: 21.1878, Train Loss: 4.0271
Validation Accuracy: 13.1568, Validation Loss: 5.0140, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 2/15


Training: 100%|██████████| 691/691 [00:22<00:00, 31.36it/s]
Validation: 100%|██████████| 69/69 [00:00<00:00, 108.38it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:18<00:00, 231.31it/s]


Epoch : 2 Train Accuracy: 19.8392, Train Loss: 4.1245
Validation Accuracy: 13.5779, Validation Loss: 5.6689, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 3/15


Training: 100%|██████████| 691/691 [00:21<00:00, 31.53it/s]
Validation: 100%|██████████| 69/69 [00:00<00:00, 107.49it/s]
Beam_Search: 100%|██████████| 4358/4358 [02:39<00:00, 27.32it/s]


Epoch : 3 Train Accuracy: 19.0584, Train Loss: 4.0867
Validation Accuracy: 3.0340, Validation Loss: 5.0241, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 4/15


Training: 100%|██████████| 691/691 [00:22<00:00, 31.29it/s]
Validation: 100%|██████████| 69/69 [00:00<00:00, 108.05it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:16<00:00, 263.33it/s]


Epoch : 4 Train Accuracy: 17.3738, Train Loss: 4.0273
Validation Accuracy: 13.4337, Validation Loss: 5.3334, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 5/15


Training: 100%|██████████| 691/691 [00:21<00:00, 31.51it/s]
Validation: 100%|██████████| 69/69 [00:00<00:00, 105.67it/s]
Beam_Search: 100%|██████████| 4358/4358 [02:40<00:00, 27.21it/s]


Epoch : 5 Train Accuracy: 17.7287, Train Loss: 4.1489
Validation Accuracy: 2.5985, Validation Loss: 5.2669, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 6/15


Training: 100%|██████████| 691/691 [00:22<00:00, 31.41it/s]
Validation: 100%|██████████| 69/69 [00:00<00:00, 104.59it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:16<00:00, 262.84it/s]


Epoch : 6 Train Accuracy: 18.0214, Train Loss: 4.1339
Validation Accuracy: 13.5000, Validation Loss: 5.1670, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 7/15


Training: 100%|██████████| 691/691 [00:21<00:00, 31.50it/s]
Validation: 100%|██████████| 69/69 [00:00<00:00, 104.07it/s]
Beam_Search: 100%|██████████| 4358/4358 [02:40<00:00, 27.23it/s]


Epoch : 7 Train Accuracy: 17.8945, Train Loss: 4.1562
Validation Accuracy: 2.4802, Validation Loss: 5.5363, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 8/15


Training: 100%|██████████| 691/691 [00:22<00:00, 31.26it/s]
Validation: 100%|██████████| 69/69 [00:00<00:00, 100.72it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:16<00:00, 263.12it/s]


Epoch : 8 Train Accuracy: 17.9500, Train Loss: 4.2167
Validation Accuracy: 13.3818, Validation Loss: 5.5617, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 9/15


Training: 100%|██████████| 691/691 [00:21<00:00, 31.65it/s]
Validation: 100%|██████████| 69/69 [00:00<00:00, 106.25it/s]
Beam_Search: 100%|██████████| 4358/4358 [02:40<00:00, 27.19it/s]


Epoch : 9 Train Accuracy: 17.8015, Train Loss: 4.2478
Validation Accuracy: 2.5985, Validation Loss: 5.7240, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 10/15


Training: 100%|██████████| 691/691 [00:21<00:00, 31.52it/s]
Validation: 100%|██████████| 69/69 [00:00<00:00, 106.02it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:16<00:00, 263.47it/s]


Epoch : 10 Train Accuracy: 17.7976, Train Loss: 4.2837
Validation Accuracy: 13.3818, Validation Loss: 5.7529, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 11/15


Training: 100%|██████████| 691/691 [00:21<00:00, 31.52it/s]
Validation: 100%|██████████| 69/69 [00:00<00:00, 98.84it/s] 
Beam_Search: 100%|██████████| 4358/4358 [00:16<00:00, 263.80it/s]


Epoch : 11 Train Accuracy: 17.7768, Train Loss: 4.3748
Validation Accuracy: 13.3818, Validation Loss: 5.7395, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 12/15


Training: 100%|██████████| 691/691 [00:22<00:00, 31.39it/s]
Validation: 100%|██████████| 69/69 [00:00<00:00, 104.89it/s]
Beam_Search: 100%|██████████| 4358/4358 [02:40<00:00, 27.21it/s]


Epoch : 12 Train Accuracy: 17.2422, Train Loss: 4.3602
Validation Accuracy: 3.7290, Validation Loss: 5.6163, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 13/15


Training: 100%|██████████| 691/691 [00:22<00:00, 31.01it/s]
Validation: 100%|██████████| 69/69 [00:00<00:00, 108.00it/s]
Beam_Search: 100%|██████████| 4358/4358 [02:40<00:00, 27.24it/s]


Epoch : 13 Train Accuracy: 17.4718, Train Loss: 4.2643
Validation Accuracy: 3.7290, Validation Loss: 5.4721, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 14/15


Training: 100%|██████████| 691/691 [00:21<00:00, 31.65it/s]
Validation: 100%|██████████| 69/69 [00:00<00:00, 105.76it/s]
Beam_Search: 100%|██████████| 4358/4358 [02:40<00:00, 27.17it/s]


Epoch : 14 Train Accuracy: 16.6570, Train Loss: 4.1130
Validation Accuracy: 3.7290, Validation Loss: 5.1750, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 15/15


Training: 100%|██████████| 691/691 [00:21<00:00, 31.63it/s]
Validation: 100%|██████████| 69/69 [00:00<00:00, 104.72it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:14<00:00, 305.29it/s]

Epoch : 15 Train Accuracy: 16.1073, Train Loss: 4.1207
Validation Accuracy: 14.0826, Validation Loss: 5.3528, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358





0,1
Accuracy,▁
correctly_predicted,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
training_accuracy,█▆▅▃▃▄▃▄▃▃▃▃▃▂▁
training_loss,▁▃▂▁▃▃▄▅▅▆██▆▃▃
validation_accuracy_using_char,▇█▁█▁█▁█▁██▂▂▂█
validation_accuracy_using_word,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
validation_loss,▁▇▁▄▃▂▆▆███▇▅▃▄

0,1
Accuracy,0.0
correctly_predicted,0.0
epoch,15.0
training_accuracy,0.16107
training_loss,4.12071
validation_accuracy_using_char,0.14083
validation_accuracy_using_word,0.0
validation_loss,5.35276


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: zffilrq4 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_width: 2
[34m[1mwandb[0m: 	bi_dir: False
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.7
[34m[1mwandb[0m: 	embedding_size: 128
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	hidden_size: 512
[34m[1mwandb[0m: 	learning_rate: 0.1
[34m[1mwandb[0m: 	length_penalty: 0.4
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	teacher_fr: 0.5


Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.7, inplace=False)
    (embedding): Embedding(29, 128)
    (cell): GRU(128, 512, num_layers=2, dropout=0.7)
  )
  (decoder): Decoder(
    (dropout): Dropout(p=0.7, inplace=False)
    (embedding): Embedding(66, 128)
    (cell): GRU(128, 512, num_layers=2, dropout=0.7)
    (fc): Linear(in_features=512, out_features=66, bias=True)
  )
)
Epoch :: 1/15


Training: 100%|██████████| 691/691 [00:26<00:00, 25.71it/s]
Validation: 100%|██████████| 69/69 [00:00<00:00, 104.32it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:35<00:00, 121.87it/s]


Epoch : 1 Train Accuracy: 22.9520, Train Loss: 3.1797
Validation Accuracy: 14.0249, Validation Loss: 3.8105, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 2/15


Training: 100%|██████████| 691/691 [00:27<00:00, 25.57it/s]
Validation: 100%|██████████| 69/69 [00:00<00:00, 103.19it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:35<00:00, 121.09it/s]


Epoch : 2 Train Accuracy: 24.1848, Train Loss: 3.0868
Validation Accuracy: 16.0351, Validation Loss: 3.9574, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 3/15


Training: 100%|██████████| 691/691 [00:26<00:00, 25.64it/s]
Validation: 100%|██████████| 69/69 [00:00<00:00, 96.91it/s] 
Beam_Search: 100%|██████████| 4358/4358 [00:38<00:00, 112.51it/s]


Epoch : 3 Train Accuracy: 25.3856, Train Loss: 3.0142
Validation Accuracy: 16.3985, Validation Loss: 3.8435, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 4/15


Training: 100%|██████████| 691/691 [00:26<00:00, 25.72it/s]
Validation: 100%|██████████| 69/69 [00:00<00:00, 102.54it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:38<00:00, 113.01it/s]


Epoch : 4 Train Accuracy: 26.0592, Train Loss: 2.9642
Validation Accuracy: 16.3754, Validation Loss: 3.8388, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 5/15


Training: 100%|██████████| 691/691 [00:26<00:00, 25.69it/s]
Validation: 100%|██████████| 69/69 [00:00<00:00, 102.56it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:38<00:00, 113.24it/s]


Epoch : 5 Train Accuracy: 26.2984, Train Loss: 2.9396
Validation Accuracy: 16.6205, Validation Loss: 3.8161, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 6/15


Training: 100%|██████████| 691/691 [00:26<00:00, 25.61it/s]
Validation: 100%|██████████| 69/69 [00:00<00:00, 101.28it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:48<00:00, 90.30it/s] 


Epoch : 6 Train Accuracy: 27.0084, Train Loss: 2.9060
Validation Accuracy: 17.7193, Validation Loss: 3.7549, 
Validation Acc. With BeamSearch: 0.0229, Correctly Predicted : 1/4358
Epoch :: 7/15


Training: 100%|██████████| 691/691 [00:27<00:00, 25.55it/s]
Validation: 100%|██████████| 69/69 [00:00<00:00, 101.76it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:49<00:00, 88.88it/s] 


Epoch : 7 Train Accuracy: 27.7008, Train Loss: 2.8800
Validation Accuracy: 17.9241, Validation Loss: 3.7299, 
Validation Acc. With BeamSearch: 0.0229, Correctly Predicted : 1/4358
Epoch :: 8/15


Training: 100%|██████████| 691/691 [00:27<00:00, 25.51it/s]
Validation: 100%|██████████| 69/69 [00:00<00:00, 76.79it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:50<00:00, 85.55it/s]


Epoch : 8 Train Accuracy: 28.2833, Train Loss: 2.8483
Validation Accuracy: 16.9839, Validation Loss: 3.7313, 
Validation Acc. With BeamSearch: 0.0229, Correctly Predicted : 1/4358
Epoch :: 9/15


Training: 100%|██████████| 691/691 [00:26<00:00, 25.79it/s]
Validation: 100%|██████████| 69/69 [00:00<00:00, 95.72it/s] 
Beam_Search: 100%|██████████| 4358/4358 [00:49<00:00, 88.00it/s] 


Epoch : 9 Train Accuracy: 28.6420, Train Loss: 2.8275
Validation Accuracy: 18.8527, Validation Loss: 3.6753, 
Validation Acc. With BeamSearch: 0.0229, Correctly Predicted : 1/4358
Epoch :: 10/15


Training: 100%|██████████| 691/691 [00:26<00:00, 25.73it/s]
Validation: 100%|██████████| 69/69 [00:00<00:00, 99.80it/s] 
Beam_Search: 100%|██████████| 4358/4358 [00:51<00:00, 85.05it/s]


Epoch : 10 Train Accuracy: 29.3665, Train Loss: 2.8019
Validation Accuracy: 17.7712, Validation Loss: 3.7308, 
Validation Acc. With BeamSearch: 0.0229, Correctly Predicted : 1/4358
Epoch :: 11/15


Training: 100%|██████████| 691/691 [00:26<00:00, 25.62it/s]
Validation: 100%|██████████| 69/69 [00:00<00:00, 103.16it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:50<00:00, 85.60it/s]


Epoch : 11 Train Accuracy: 29.9277, Train Loss: 2.7779
Validation Accuracy: 19.3574, Validation Loss: 3.6854, 
Validation Acc. With BeamSearch: 0.0229, Correctly Predicted : 1/4358
Epoch :: 12/15


Training: 100%|██████████| 691/691 [00:26<00:00, 25.65it/s]
Validation: 100%|██████████| 69/69 [00:00<00:00, 92.03it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:50<00:00, 86.45it/s]


Epoch : 12 Train Accuracy: 30.2034, Train Loss: 2.7618
Validation Accuracy: 19.9112, Validation Loss: 3.6660, 
Validation Acc. With BeamSearch: 0.0229, Correctly Predicted : 1/4358
Epoch :: 13/15


Training: 100%|██████████| 691/691 [00:26<00:00, 25.60it/s]
Validation: 100%|██████████| 69/69 [00:00<00:00, 103.69it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:52<00:00, 83.77it/s]


Epoch : 13 Train Accuracy: 30.5479, Train Loss: 2.7422
Validation Accuracy: 18.6076, Validation Loss: 3.7199, 
Validation Acc. With BeamSearch: 0.0229, Correctly Predicted : 1/4358
Epoch :: 14/15


Training: 100%|██████████| 691/691 [00:26<00:00, 25.68it/s]
Validation: 100%|██████████| 69/69 [00:00<00:00, 78.34it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:51<00:00, 84.82it/s]


Epoch : 14 Train Accuracy: 30.7928, Train Loss: 2.7256
Validation Accuracy: 19.6112, Validation Loss: 3.6909, 
Validation Acc. With BeamSearch: 0.0229, Correctly Predicted : 1/4358
Epoch :: 15/15


Training: 100%|██████████| 691/691 [00:26<00:00, 25.63it/s]
Validation: 100%|██████████| 69/69 [00:00<00:00, 103.21it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:50<00:00, 85.73it/s]

Epoch : 15 Train Accuracy: 31.3195, Train Loss: 2.6981
Validation Accuracy: 20.6408, Validation Loss: 3.6673, 
Validation Acc. With BeamSearch: 0.0229, Correctly Predicted : 1/4358





0,1
Accuracy,▁
correctly_predicted,▁▁▁▁▁██████████
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
training_accuracy,▁▂▃▄▄▄▅▅▆▆▇▇▇██
training_loss,█▇▆▅▅▄▄▃▃▃▂▂▂▁▁
validation_accuracy_using_char,▁▃▄▃▄▅▅▄▆▅▇▇▆▇█
validation_accuracy_using_word,▁▁▁▁▁██████████
validation_loss,▄█▅▅▅▃▃▃▁▃▁▁▂▂▁

0,1
Accuracy,0.00023
correctly_predicted,1.0
epoch,15.0
training_accuracy,0.31319
training_loss,2.69809
validation_accuracy_using_char,0.20641
validation_accuracy_using_word,0.00023
validation_loss,3.66728


[34m[1mwandb[0m: Agent Starting Run: api44m14 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_width: 3
[34m[1mwandb[0m: 	bi_dir: False
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.7
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	hidden_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	length_penalty: 0.5
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	teacher_fr: 0.5


Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.7, inplace=False)
    (embedding): Embedding(29, 256)
    (cell): RNN(256, 128, num_layers=3, dropout=0.7)
  )
  (decoder): Decoder(
    (dropout): Dropout(p=0.7, inplace=False)
    (embedding): Embedding(66, 256)
    (cell): RNN(256, 128, num_layers=3, dropout=0.7)
    (fc): Linear(in_features=128, out_features=66, bias=True)
  )
)
Epoch :: 1/15


Training: 100%|██████████| 1382/1382 [00:45<00:00, 30.66it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 124.56it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:47<00:00, 91.16it/s]


Epoch : 1 Train Accuracy: 23.3008, Train Loss: 3.1347
Validation Accuracy: 14.5239, Validation Loss: 3.7481, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 2/15


Training: 100%|██████████| 1382/1382 [00:44<00:00, 30.75it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 124.52it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:53<00:00, 81.25it/s]


Epoch : 2 Train Accuracy: 23.9525, Train Loss: 3.0832
Validation Accuracy: 15.6400, Validation Loss: 3.7602, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 3/15


Training: 100%|██████████| 1382/1382 [00:44<00:00, 30.86it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 124.88it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:53<00:00, 81.91it/s]


Epoch : 3 Train Accuracy: 24.4491, Train Loss: 3.0455
Validation Accuracy: 16.2341, Validation Loss: 3.7048, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 4/15


Training: 100%|██████████| 1382/1382 [00:44<00:00, 30.96it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 127.08it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:50<00:00, 86.25it/s]


Epoch : 4 Train Accuracy: 24.7350, Train Loss: 3.0345
Validation Accuracy: 16.6292, Validation Loss: 3.6767, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 5/15


Training: 100%|██████████| 1382/1382 [00:44<00:00, 30.81it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 124.73it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:53<00:00, 81.89it/s]


Epoch : 5 Train Accuracy: 24.9205, Train Loss: 3.0210
Validation Accuracy: 16.1735, Validation Loss: 3.7093, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 6/15


Training: 100%|██████████| 1382/1382 [00:44<00:00, 31.01it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 126.95it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:53<00:00, 82.03it/s]


Epoch : 6 Train Accuracy: 24.9840, Train Loss: 3.0142
Validation Accuracy: 16.2860, Validation Loss: 3.7568, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 7/15


Training: 100%|██████████| 1382/1382 [00:44<00:00, 30.85it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 123.17it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:47<00:00, 91.41it/s]


Epoch : 7 Train Accuracy: 24.9733, Train Loss: 3.0057
Validation Accuracy: 16.3437, Validation Loss: 3.7597, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 8/15


Training: 100%|██████████| 1382/1382 [00:44<00:00, 30.97it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 123.41it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:53<00:00, 81.60it/s]


Epoch : 8 Train Accuracy: 25.1025, Train Loss: 2.9948
Validation Accuracy: 15.9428, Validation Loss: 3.8076, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 9/15


Training: 100%|██████████| 1382/1382 [00:44<00:00, 30.84it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 120.85it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:53<00:00, 81.86it/s]


Epoch : 9 Train Accuracy: 25.1670, Train Loss: 2.9940
Validation Accuracy: 17.0618, Validation Loss: 3.7744, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 10/15


Training: 100%|██████████| 1382/1382 [00:44<00:00, 30.72it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 127.27it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:50<00:00, 86.22it/s]


Epoch : 10 Train Accuracy: 24.9131, Train Loss: 2.9943
Validation Accuracy: 16.8282, Validation Loss: 3.8048, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 11/15


Training: 100%|██████████| 1382/1382 [00:44<00:00, 30.74it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 121.67it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:55<00:00, 78.16it/s]


Epoch : 11 Train Accuracy: 24.9555, Train Loss: 2.9951
Validation Accuracy: 15.7092, Validation Loss: 3.7658, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 12/15


Training: 100%|██████████| 1382/1382 [00:44<00:00, 30.75it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 126.18it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:55<00:00, 78.60it/s]


Epoch : 12 Train Accuracy: 24.5181, Train Loss: 3.0314
Validation Accuracy: 16.4504, Validation Loss: 3.7148, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 13/15


Training: 100%|██████████| 1382/1382 [00:44<00:00, 30.85it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 123.43it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:52<00:00, 83.15it/s]


Epoch : 13 Train Accuracy: 24.6655, Train Loss: 3.0270
Validation Accuracy: 16.3985, Validation Loss: 3.6635, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 14/15


Training: 100%|██████████| 1382/1382 [00:44<00:00, 30.74it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 127.15it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:52<00:00, 83.37it/s]


Epoch : 14 Train Accuracy: 24.5678, Train Loss: 3.0151
Validation Accuracy: 16.7359, Validation Loss: 3.6365, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 15/15


Training: 100%|██████████| 1382/1382 [00:44<00:00, 30.91it/s]
Validation: 100%|██████████| 137/137 [00:01<00:00, 119.56it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:53<00:00, 81.13it/s]

Epoch : 15 Train Accuracy: 24.5542, Train Loss: 3.0248
Validation Accuracy: 16.4706, Validation Loss: 3.6152, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358





0,1
Accuracy,▁
correctly_predicted,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
training_accuracy,▁▃▅▆▇▇▇██▇▇▆▆▆▆
training_loss,█▅▄▃▂▂▂▁▁▁▁▃▃▂▃
validation_accuracy_using_char,▁▄▆▇▆▆▆▅█▇▄▆▆▇▆
validation_accuracy_using_word,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
validation_loss,▆▆▄▃▄▆▆█▇█▆▅▃▂▁

0,1
Accuracy,0.0
correctly_predicted,0.0
epoch,15.0
training_accuracy,0.24554
training_loss,3.02482
validation_accuracy_using_char,0.16471
validation_accuracy_using_word,0.0
validation_loss,3.61519


[34m[1mwandb[0m: Agent Starting Run: nep9ceqq with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_width: 3
[34m[1mwandb[0m: 	bi_dir: False
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.7
[34m[1mwandb[0m: 	embedding_size: 512
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	hidden_size: 1024
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	length_penalty: 0.4
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	teacher_fr: 0.7


Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.7, inplace=False)
    (embedding): Embedding(29, 512)
    (cell): LSTM(512, 1024, num_layers=2, dropout=0.7)
  )
  (decoder): Decoder(
    (dropout): Dropout(p=0.7, inplace=False)
    (embedding): Embedding(66, 512)
    (cell): LSTM(512, 1024, num_layers=2, dropout=0.7)
    (fc): Linear(in_features=1024, out_features=66, bias=True)
  )
)
Epoch :: 1/15


Training: 100%|██████████| 691/691 [01:20<00:00,  8.62it/s]
Validation: 100%|██████████| 69/69 [00:02<00:00, 28.78it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:38<00:00, 111.80it/s]


Epoch : 1 Train Accuracy: 5.8178, Train Loss: 4.1776
Validation Accuracy: 13.1511, Validation Loss: 4.1608, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 2/15


Training: 100%|██████████| 691/691 [01:18<00:00,  8.80it/s]
Validation: 100%|██████████| 69/69 [00:02<00:00, 28.61it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:38<00:00, 112.05it/s]


Epoch : 2 Train Accuracy: 12.1965, Train Loss: 4.1479
Validation Accuracy: 12.5685, Validation Loss: 4.1260, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 3/15


Training: 100%|██████████| 691/691 [01:18<00:00,  8.78it/s]
Validation: 100%|██████████| 69/69 [00:02<00:00, 29.00it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:38<00:00, 111.84it/s]


Epoch : 3 Train Accuracy: 12.1267, Train Loss: 4.1162
Validation Accuracy: 12.5685, Validation Loss: 4.0896, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 4/15


Training: 100%|██████████| 691/691 [01:18<00:00,  8.80it/s]
Validation: 100%|██████████| 69/69 [00:02<00:00, 28.45it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:38<00:00, 111.90it/s]


Epoch : 4 Train Accuracy: 12.0977, Train Loss: 4.0829
Validation Accuracy: 12.5685, Validation Loss: 4.0524, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 5/15


Training: 100%|██████████| 691/691 [01:18<00:00,  8.79it/s]
Validation: 100%|██████████| 69/69 [00:02<00:00, 28.81it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:39<00:00, 111.58it/s]


Epoch : 5 Train Accuracy: 12.0942, Train Loss: 4.0467
Validation Accuracy: 12.5685, Validation Loss: 4.0060, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 6/15


Training: 100%|██████████| 691/691 [01:18<00:00,  8.79it/s]
Validation: 100%|██████████| 69/69 [00:02<00:00, 28.73it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:39<00:00, 111.56it/s]


Epoch : 6 Train Accuracy: 12.0942, Train Loss: 4.0058
Validation Accuracy: 12.5685, Validation Loss: 3.9600, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 7/15


Training: 100%|██████████| 691/691 [01:18<00:00,  8.80it/s]
Validation: 100%|██████████| 69/69 [00:02<00:00, 28.48it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:39<00:00, 111.72it/s]


Epoch : 7 Train Accuracy: 12.0942, Train Loss: 3.9579
Validation Accuracy: 12.5685, Validation Loss: 3.8933, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 8/15


Training: 100%|██████████| 691/691 [01:18<00:00,  8.79it/s]
Validation: 100%|██████████| 69/69 [00:02<00:00, 28.75it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:38<00:00, 111.81it/s]


Epoch : 8 Train Accuracy: 12.0942, Train Loss: 3.9049
Validation Accuracy: 12.5685, Validation Loss: 3.8292, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 9/15


Training: 100%|██████████| 691/691 [01:18<00:00,  8.79it/s]
Validation: 100%|██████████| 69/69 [00:02<00:00, 28.67it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:38<00:00, 111.76it/s]


Epoch : 9 Train Accuracy: 12.0942, Train Loss: 3.8411
Validation Accuracy: 12.5685, Validation Loss: 3.7612, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 10/15


Training: 100%|██████████| 691/691 [01:18<00:00,  8.80it/s]
Validation: 100%|██████████| 69/69 [00:02<00:00, 28.93it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:38<00:00, 111.80it/s]


Epoch : 10 Train Accuracy: 12.0942, Train Loss: 3.7785
Validation Accuracy: 12.5685, Validation Loss: 3.6931, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 11/15


Training: 100%|██████████| 691/691 [01:18<00:00,  8.80it/s]
Validation: 100%|██████████| 69/69 [00:02<00:00, 28.53it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:39<00:00, 111.51it/s]


Epoch : 11 Train Accuracy: 12.0942, Train Loss: 3.7173
Validation Accuracy: 12.5685, Validation Loss: 3.6651, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 12/15


Training: 100%|██████████| 691/691 [01:18<00:00,  8.78it/s]
Validation: 100%|██████████| 69/69 [00:02<00:00, 29.10it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:41<00:00, 103.80it/s]


Epoch : 12 Train Accuracy: 12.0947, Train Loss: 3.6710
Validation Accuracy: 12.5685, Validation Loss: 3.6205, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 13/15


Training: 100%|██████████| 691/691 [01:18<00:00,  8.81it/s]
Validation: 100%|██████████| 69/69 [00:02<00:00, 28.49it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:41<00:00, 103.88it/s]


Epoch : 13 Train Accuracy: 12.2365, Train Loss: 3.6299
Validation Accuracy: 13.7711, Validation Loss: 3.5874, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 14/15


Training: 100%|██████████| 691/691 [01:18<00:00,  8.80it/s]
Validation: 100%|██████████| 69/69 [00:02<00:00, 28.97it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:44<00:00, 97.22it/s]


Epoch : 14 Train Accuracy: 13.1752, Train Loss: 3.5929
Validation Accuracy: 14.2297, Validation Loss: 3.5579, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358
Epoch :: 15/15


Training: 100%|██████████| 691/691 [01:18<00:00,  8.81it/s]
Validation: 100%|██████████| 69/69 [00:02<00:00, 29.24it/s]
Beam_Search: 100%|██████████| 4358/4358 [00:47<00:00, 91.38it/s]

Epoch : 15 Train Accuracy: 13.7248, Train Loss: 3.5607
Validation Accuracy: 14.4431, Validation Loss: 3.5326, 
Validation Acc. With BeamSearch: 0.0000, Correctly Predicted : 0/4358





0,1
Accuracy,▁
correctly_predicted,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
training_accuracy,▁▇▇▇▇▇▇▇▇▇▇▇▇██
training_loss,██▇▇▇▆▆▅▄▃▃▂▂▁▁
validation_accuracy_using_char,▃▁▁▁▁▁▁▁▁▁▁▁▅▇█
validation_accuracy_using_word,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
validation_loss,██▇▇▆▆▅▄▄▃▂▂▂▁▁

0,1
Accuracy,0.0
correctly_predicted,0.0
epoch,15.0
training_accuracy,0.13725
training_loss,3.56067
validation_accuracy_using_char,0.14443
validation_accuracy_using_word,0.0
validation_loss,3.53263


[34m[1mwandb[0m: Agent Starting Run: 5ly9ctj4 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_width: 3
[34m[1mwandb[0m: 	bi_dir: False
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.7
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	length_penalty: 0.4
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	optimizer: adagrad
[34m[1mwandb[0m: 	teacher_fr: 0.7


Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.7, inplace=False)
    (embedding): Embedding(29, 256)
    (cell): GRU(256, 256, num_layers=3, dropout=0.7)
  )
  (decoder): Decoder(
    (dropout): Dropout(p=0.7, inplace=False)
    (embedding): Embedding(66, 256)
    (cell): GRU(256, 256, num_layers=3, dropout=0.7)
    (fc): Linear(in_features=256, out_features=66, bias=True)
  )
)
Epoch :: 1/15


Training: 100%|██████████| 691/691 [00:30<00:00, 22.32it/s]
Validation: 100%|██████████| 69/69 [00:00<00:00, 101.66it/s]
Beam_Search: 100%|██████████| 4358/4358 [01:10<00:00, 61.76it/s]


Epoch : 1 Train Accuracy: 31.0374, Train Loss: 2.7287
Validation Accuracy: 24.6034, Validation Loss: 3.1618, 
Validation Acc. With BeamSearch: 0.0229, Correctly Predicted : 1/4358
Epoch :: 2/15


Training: 100%|██████████| 691/691 [00:30<00:00, 22.37it/s]
Validation: 100%|██████████| 69/69 [00:00<00:00, 98.84it/s]
Beam_Search:  38%|███▊      | 1677/4358 [00:29<00:45, 59.44it/s]

## **Predictions on Test Data in CSV File**

In [None]:
def store_prediction_in_csv_file(HYPER_PARAM, model, device, processed_data):
    # Initialize counters for correct and incorrect predictions
    total_correct, total_incorrect = 0, 0
    
    # Initialize lists to store results
    result, decoded_output, correct_output, input_word = [], [], [], []
    
    # Iterate over each word and its correct transliteration
    for word, correct_transliteration in zip(processed_data["test_x"], processed_data["test_y"]):
        # Generate output sequence using beam search
        output_sequence = beam_search(HYPER_PARAM, model, word[:-1], device, processed_data)
        
        # Check if the output sequence matches the correct transliteration
        if output_sequence != correct_transliteration[1:-1]:
            total_incorrect += 1
            result.append("Incorrect")
        else:
            total_correct += 1
            result.append("Correct")
        
        # Append data to lists
        decoded_output.append(output_sequence)
        correct_output.append(correct_transliteration[1:-1])
        input_word.append(word[:-1])
    
    # Print total correct and incorrect predictions
    print(total_correct, total_incorrect)
    
    # Create a dictionary to store data
    grid = {'Input_Word': input_word, 'Decoded_Output': decoded_output, 'True_Output': correct_output, "Match Result": result}
    
    # Define the path to save the CSV file
    _path = '/kaggle/working/predictions_vanilla.csv'
    
    # Create a DataFrame from the dictionary
    df = pd.DataFrame(grid)
    
    # Save the DataFrame to a CSV file
    df.to_csv(_path, index=False, header=True)


In [None]:
store_prediction_in_csv_file(HYPER_PARAM, model, device, processed_data)