In [8]:
# Import Lib

import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
import torch
from torch.autograd import Variable 
import copy
import os
import torch.nn.functional as F
from tqdm import tqdm
from torch.utils.data import DataLoader, TensorDataset
import random
import heapq
import wandb
# Set device (CUDA if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [None]:
# !wandb login 6a66920f640c7001ec17ad4aa7a5da8b378aee61

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin


# Preprocessing

In [10]:
def encode(x, max_length, char_to_idx):
    """
    Encode a string into a tensor.

    Args:
    - x (str): Input string to encode.
    - max_length (int): Maximum length for the encoded tensor.
    - char_to_idx (dict): Character to index mapping.

    Returns:
    - encoded (torch.Tensor): Encoded tensor.
    - length (int): Actual length of the encoded sequence.
    """
    encoded = np.zeros(max_length, dtype=int)
    encoder = np.array([char_to_idx[char] for char in x])
    length = min(max_length, len(encoder))
    encoded[:length] = encoder[:length]

    return torch.tensor(encoded, dtype=torch.int64), length

def get_tensor_object(df, max_input_length, max_output_length, char_to_idx_input, char_to_idx_output):
    """
    Create tensor objects from a DataFrame.

    Args:
    - df (pd.DataFrame): Input DataFrame containing input and output sequences.
    - max_input_length (int): Maximum length for input sequences.
    - max_output_length (int): Maximum length for output sequences.
    - char_to_idx_input (dict): Character to index mapping for input sequences.
    - char_to_idx_output (dict): Character to index mapping for output sequences.

    Returns:
    - tensor_inputs (torch.Tensor): Tensor containing encoded input sequences.
    - tensor_outputs (torch.Tensor): Tensor containing encoded output sequences.
    """
    
    # Encode unique inputs and outputs into tensors
    encoded_inputs = []
    encoded_outputs = []

    # Encode the input column
    for input_str in df[0]:
        encoded_input, input_length = encode(input_str, max_input_length, char_to_idx_input)
        encoded_inputs.append(encoded_input)

    # Encode the output column
    for output_str in df[1]:
        encoded_output, output_length = encode(output_str, max_output_length, char_to_idx_output)
        encoded_outputs.append(encoded_output)

    # Stack tensors column-wise
    
#     tensor_inputs = torch.stack(encoded_inputs, dim=1)
#     tensor_outputs = torch.stack(encoded_outputs, dim=1)
    tensor_inputs = torch.stack(encoded_inputs)
    tensor_outputs = torch.stack(encoded_outputs)

    return tensor_inputs, tensor_outputs

def load_dataset(path):
    """
    Load a dataset from a TSV file.
    Args:
    - path (str): Path to the TSV file.
    Returns:
    - df (pd.DataFrame): Loaded DataFrame.
    - max_input_length (int): Maximum length for input sequences.
    - max_output_length (int): Maximum length for output sequences.
    """
    df = pd.read_csv(path, header=None, encoding='utf-8', sep='\t')  # Changed separator to tab
    
    # Convert values to strings before adding special characters
    df[0] = df[0].astype(str).apply(lambda x: x + '$')
    df[1] = df[1].astype(str).apply(lambda x: '^' + x + '$')
    
    # Determine maximum length for input and output sequences
    max_input_length = max(len(x) for x in df[0].unique())
    max_output_length = max(len(x) for x in df[1].unique())
    return df, max_input_length, max_output_length

def look_up_table(vocab1, vocab2, vocab3):
    """
    Create lookup tables for vocabulary mapping.

    Args:
    - vocab1 (list): First list of vocabulary items.
    - vocab2 (list): Second list of vocabulary items.
    - vocab3 (list): Third list of vocabulary items.

    Returns:
    - vocab_to_int (dict): Mapping from vocabulary items to integers.
    - int_to_vocab (dict): Mapping from integers to vocabulary items.
    """
    
    # Combine all vocabularies into one set
    vocab = set(''.join(vocab1) + ''.join(vocab2) + ''.join(vocab3))
    vocab.discard('^')  
    vocab.discard('$')  
    vocab_to_int = {"": 0, '^':1, '$':2}
    for v_i, v in enumerate(sorted(vocab), len(vocab_to_int)):
        vocab_to_int[v] = v_i
    int_to_vocab = {v_i: v for v, v_i in vocab_to_int.items()}
    return vocab_to_int, int_to_vocab




# # Load Train, Val, Test
# df_train, train_input_len, train_out_len = load_dataset('/kaggle/input/dataset/aksharantar_sampled/hin/hin_train.csv')
# df_val, val_input_len, val_out_len = load_dataset('/kaggle/input/dataset/aksharantar_sampled/hin/hin_valid.csv')
# df_test, test_input_len, test_out_len = load_dataset('/kaggle/input/dataset/aksharantar_sampled/hin/hin_test.csv')

# input_max_len = max(train_input_len, val_input_len, test_input_len)
# output_max_len = max(train_out_len, val_out_len, test_out_len)


# # Create Look Up Table
# input_char_to_int, input_int_to_char = look_up_table(df_train[0], df_val[0], df_test[0])
# output_char_to_int, output_int_to_char = look_up_table(df_train[1], df_val[1], df_test[1])

# print("Input Lookup Table:", input_char_to_int)
# print("\n\n Output Lookup Table", output_char_to_int)

# # Data Embedding and Converting them into Tensor
# train_inputs, train_outputs = get_tensor_object(df_train, input_max_len, input_max_len, input_char_to_int, output_char_to_int)
# val_inputs, val_outputs = get_tensor_object(df_val, input_max_len, input_max_len, input_char_to_int, output_char_to_int)
# test_inputs, test_outputs = get_tensor_object(df_test, input_max_len, input_max_len, input_char_to_int, output_char_to_int)

# # Transpose column wise
# train_inputs, train_outputs = torch.transpose(train_inputs, 0, 1), torch.transpose(train_outputs, 0, 1)
# val_inputs, val_outputs = torch.transpose(val_inputs, 0, 1), torch.transpose(val_outputs, 0, 1)
# test_inputs, test_outputs = torch.transpose(test_inputs, 0, 1), torch.transpose(test_outputs, 0, 1)


# print("\n", train_inputs[:,0],train_outputs[:,0])
# print("Training:", train_inputs.shape, train_outputs.shape)

# print("Validation", val_inputs.shape, val_inputs.shape)
# print(df_train.head())

# Create Seq2Seq Model

## encoder and decoder

In [11]:
class Encoder(nn.Module): 
    def __init__(self, input_size, embedding_size, hidden_size, num_layers, dropout, bidirectional, cell_type):
        super(Encoder, self).__init__()
        self.bidirectional = bidirectional
        self.dropout = nn.Dropout(dropout)
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.cell_type = cell_type
        
        # Define embedding layer
        self.embedding = nn.Embedding(input_size, embedding_size)
        
        # Define RNN layer with specific cell type
        if cell_type == 'LSTM':
            self.rnn = nn.LSTM(embedding_size, hidden_size, num_layers, dropout=dropout, bidirectional=bidirectional)
        elif cell_type == 'GRU':
            self.rnn = nn.GRU(embedding_size, hidden_size, num_layers, dropout=dropout, bidirectional=bidirectional)
        elif cell_type == 'RNN':
            self.rnn = nn.RNN(embedding_size, hidden_size, num_layers, dropout=dropout, bidirectional=bidirectional)
        else:
            raise ValueError("Invalid RNN type. Choose from 'LSTM', 'GRU', or 'RNN'.")
        
        
    def forward(self, x): # x shape: (seq_length, N) where N is batch size
        # Perform dropout on the input
        embedding = self.embedding(x)
        embedding = self.dropout(embedding) # embedding shape: (seq_length, N, embedding_size)
        
        if self.cell_type == "LSTM":
            # Pass through the LSTM layer
            outputs, (hidden, cell) = self.rnn(embedding) # outputs shape: (seq_length, N, hidden_size)
            if self.bidirectional:
                # Sum the bidirectional outputs
                outputs = outputs[:, :, :self.hidden_size] + outputs[:, :, self.hidden_size:]
                hidden = torch.cat((hidden[: self.num_layers], hidden[self.num_layers:]), dim=0)
            # Return hidden state and cell state   
            return hidden, cell
        elif self.cell_type == "GRU" or self.cell_type == "RNN":
            # Pass through the RNN/GRU layer
            outputs, hidden = self.rnn(embedding) # outputs shape: (seq_length, N, hidden_size)
            if self.bidirectional:
                # Sum the bidirectional outputs
                outputs = outputs[:, :, :self.hidden_size] + outputs[:, :, self.hidden_size:]
                hidden = torch.cat((hidden[: self.num_layers], hidden[self.num_layers:]), dim=0)

            # Return hidden state and cell state
            return hidden
        else:
            print("Invalid cell_type specified for Encoder.")
            return None


class Decoder(nn.Module):
    def __init__(self, input_size, embedding_size, hidden_size, output_size, num_layers, dropout, bidirectional, cell_type):
        super(Decoder, self).__init__()
        self.bidirectional = bidirectional
        self.dropout = nn.Dropout(dropout)  
        self.num_layers = num_layers 
        self.hidden_size = hidden_size
        self.embedding_size = embedding_size
        self.cell_type = cell_type
        
        # Define embedding layer
        self.embedding = nn.Embedding(input_size, embedding_size)
        
        # Define RNN layer with specific cell type
        if cell_type == 'LSTM':
            self.rnn = nn.LSTM(embedding_size, hidden_size, num_layers, dropout=dropout, bidirectional=bidirectional)
        elif cell_type == 'GRU':
            self.rnn = nn.GRU(embedding_size, hidden_size, num_layers, dropout=dropout, bidirectional=bidirectional)
        elif cell_type == 'RNN':
            self.rnn = nn.RNN(embedding_size, hidden_size, num_layers, dropout=dropout, bidirectional=bidirectional)
        else:
            raise ValueError("Invalid RNN type. Choose from 'LSTM', 'GRU', or 'RNN'.")
            
            
        # Define fully connected layer
        self.fc = nn.Linear(hidden_size * 2 if bidirectional else hidden_size, output_size)  # Adjust input size for bidirectional decoder
        # Softmax layer
        self.log_softmax = nn.LogSoftmax(dim=1)
    
    def forward(self, x, hidden, cell): # x shape: (N) where N is for batch size, we want it to be (1, N), seq_length
        
        # Ensure x has the shape (1, N)
        x = x.unsqueeze(0)
        
        # Perform dropout on the input
        embedding = self.embedding(x)
        embedding = self.dropout(embedding)  # embedding shape: (1, N, embedding_size)
        
        if self.cell_type == "LSTM":
            # Pass through the LSTM layer
            outputs, (hidden, cell) = self.rnn(embedding, (hidden, cell))  # outputs shape: (1, N, hidden_size * num_directions)

            # Pass through fully connected layer
            out = self.fc(outputs).squeeze(0)
            predictions = self.log_softmax(out)

            return predictions, hidden, cell
        elif self.cell_type == "GRU" or self.cell_type == "RNN":
            # Pass through the RNN/GRU layer
            outputs, hidden = self.rnn(embedding, hidden)  # outputs shape: (1, N, hidden_size * num_directions)

            # Pass through fully connected layer
            out = self.fc(outputs).squeeze(0)
            predictions = self.log_softmax(out)

            return predictions, hidden

        else:
            print("Invalid cell_type specified for Decoder.")
            return None


## Seq2Seq Class

In [12]:
class Seq2Seq(nn.Module):

    def __init__(self, encoder, decoder, output_char_to_int, teacher_forcing, cell_type):

        super(Seq2Seq, self).__init__()  
        # Initialize encoder and decoder
        self.decoder = decoder
        self.encoder = encoder
        self.cell_type = cell_type
        self.target_vocab_size = len(output_char_to_int)
        self.teacher_force_ratio = teacher_forcing
        
    def forward(self, source, target):
        # Get batch size, target length, and target vocabulary size
        batch_size = source.shape[1]
        target_len = target.shape[0]
        target_vocab_size = self.target_vocab_size
        teacher_force_ratio = self.teacher_force_ratio
        
        # Initialize outputs tensor
        outputs = torch.zeros(target_len, batch_size, target_vocab_size).to(source.device)
        # Grab the first input to the Decoder which will be <SOS> token i.e '^'
        x = target[0]
        # Get hidden state and cell state from encoder
        if self.cell_type == 'LSTM':
            hidden, cell = self.encoder(source)
        else:
            hidden = self.encoder(source)
        
        for t in range(1, target_len):
            # Use previous hidden and cell states as context from encoder at start
            if self.cell_type == 'LSTM':
                output, hidden, cell = self.decoder(x, hidden, cell)
            else:
                output, hidden = self.decoder(x, hidden, None)
                
            # Store next output prediction
            outputs[t] = output
            # Get the best word the Decoder predicted (index in the vocabulary)
            best_guess = output.argmax(1)
            # Update input for next time step based on teacher forcing ratio
            x = best_guess if random.random() >= teacher_force_ratio else target[t]

        return outputs

# TRAINING

In [13]:
# BEAM SEARCH FUNCTION
def beam_search(model, input_seq, max_length, input_char_index, output_char_index, reverse_target_char_index, beam_width, length_penalty, cell_type):
    """
    Perform beam search to generate a sequence using the provided model.

    Args:
    - model (nn.Module): The Seq2Seq model.
    - input_seq (str): The input sequence.
    - max_length (int): Maximum length of the input sequence.
    - input_char_index (dict): Mapping from characters to integers for the input vocabulary.
    - output_char_index (dict): Mapping from characters to integers for the output vocabulary.
    - reverse_target_char_index (dict): Reverse mapping from integers to characters for the output vocabulary.
    - beam_width (int): Beam width for beam search.
    - length_penalty (float): Length penalty for beam search.
    - cell_type (str): Type of RNN cell used in the model ('LSTM', 'GRU', or 'RNN').

    Returns:
    - str: The generated output sequence.
    """
    if len(input_seq) > max_length:
        print("Input Length is exceeding max length!!!!")
        return ""

    # Create np array of zeros of length input
    input_data = np.zeros((max_length, 1), dtype=int)  # (N,1)

    # Encode the input
    for idx, char in enumerate(input_seq):
        input_data[idx, 0] = input_char_index[char]
    input_data[idx + 1, 0] = input_char_index["$"]  # EOS

    # Convert to tensor
    input_tensor = torch.tensor(input_data, dtype=torch.int64).to(device)  # N,1

    with torch.no_grad():
        if cell_type == 'LSTM':
            hidden, cell = model.encoder(input_tensor)

        else:
            hidden = model.encoder(input_tensor)

    # Initialize beam
    out_t = output_char_index['^']
    out_reshape = np.array(out_t).reshape(1,)
    hidden_par = hidden.unsqueeze(0)
    initial_sequence = torch.tensor(out_reshape).to(device)
    beam = [(0.0, initial_sequence, hidden_par)]  # [(score, sequence, hidden)]

    for _ in range(len(output_char_index)):
        candidates = []
        for score, seq, hidden in beam:
            if seq[-1].item() == output_char_index['$']:
                # If the sequence ends with the end token, add it to the candidates
                candidates.append((score, seq, hidden))
                continue

            last_token = np.array(seq[-1].item()).reshape(1,)
            x = torch.tensor(last_token).to(device)

            if cell_type == 'LSTM':
                output, hidden, cell,  = model.decoder(x, hidden.squeeze(0), cell)
            else:
                output, hidden,  = model.decoder(x, hidden.squeeze(0), None)

            probabilities = F.softmax(output, dim=1)
            topk_probs, topk_tokens = torch.topk(probabilities, k=beam_width)

            for prob, token in zip(topk_probs[0], topk_tokens[0]):
                new_seq = torch.cat((seq, token.unsqueeze(0)), dim=0)
                seq_length_norm_factor = (len(new_seq) - 1) / 5
                candidate_score = score + torch.log(prob).item() / (seq_length_norm_factor ** length_penalty)
                candidates.append((candidate_score, new_seq, hidden.unsqueeze(0)))

        # Select top-k candidates based on the accumulated scores
        beam = heapq.nlargest(beam_width, candidates, key=lambda x: x[0])

    best_score, best_sequence, _ = max(beam, key=lambda x: x[0])  # Select the best sequence from the beam as the output

    # Convert the best sequence indices to characters
    return ''.join([reverse_target_char_index[token.item()] for token in best_sequence[1:]])


# TRAINING FUNCTION
def train(model, num_epochs, criterion, optimizer, train_batch_x, train_batch_y, val_batch_x, val_batch_y, df_val, input_char_to_int, output_char_to_int, output_int_to_char, beam_width, length_penalty, cell_type, max_length, wandb_log):
    """
    Train the Seq2Seq model.

    Args:
    - model (nn.Module): The Seq2Seq model.
    - num_epochs (int): Number of training epochs.
    - criterion: Loss criterion for training.
    - optimizer: Optimizer for training.
    - train_batch_x: Training input data.
    - train_batch_y: Training target data.
    - val_batch_x: Validation input data.
    - val_batch_y: Validation target data.
    - df_val: DataFrame for validation data.
    - input_char_to_int (dict): Mapping from characters to integers for the input vocabulary.
    - output_char_to_int (dict): Mapping from characters to integers for the output vocabulary.
    - output_int_to_char (dict): Reverse mapping from integers to characters for the output vocabulary.
    - beam_width (int): Beam width for beam search.
    - length_penalty (float): Length penalty for beam search.
    - cell_type (str): Type of RNN cell used in the model ('LSTM', 'GRU', or 'RNN').
    - max_length (int): Maximum length of sequences.
    - wandb_log (int): Whether to log to wandb (1 or 0).
    Returns:
    - nn.Module: The trained model.
    - float: Validation accuracy.
    """
    for epoch in range(num_epochs):
        total_words = 0
        correct_pred = 0
        total_loss = 0
        accuracy = 0
        model.train()
        
        # Use tqdm for progress tracking
        train_data_iterator = tqdm(zip(train_batch_x, train_batch_y), total=len(train_batch_x))
        
        for (x, y) in train_data_iterator:
            # Get input and targets and move to device
            target, inp_data = y.to(device), x.to(device)
            
            # Forward propagation
            optimizer.zero_grad()
            output = model(inp_data, target)
            
            target = target.reshape(-1)
            output = output.reshape(-1, output.shape[2])
            
            pad_mask = (target != 0)  
            target = target[pad_mask] # Select non-padding elements
            output = output[pad_mask] 
            
            # Calculate loss
            loss = criterion(output, target)
            
            # Backpropagation
            loss.backward()
            
            # Clip gradients to avoid exploding gradients
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)
            
            # Update parameters
            optimizer.step()
            
            # Accumulate total loss
            total_loss += loss.item()
            # Update total words processed
            total_words += target.size(0)
            # Calculate number of correct predictions
            correct_pred += torch.sum(torch.argmax(output, dim=1) == target).item()
            
        # Calculate average loss per batch
        avg_loss = total_loss / len(train_batch_x)
        # Calculate accuracy
        accuracy = 100*correct_pred / total_words
        
        
        # Validation
        model.eval()
        with torch.no_grad():
            val_total_loss = 0
            val_total_words = 0
            val_correct_pred = 0

            val_data_iterator = tqdm(zip(val_batch_x, val_batch_y), total=len(val_batch_x))
            for x_val, y_val in val_data_iterator:
                target_val, inp_data_val = y_val.to(device), x_val.to(device)
                output_val = model(inp_data_val, target_val)
                
                
                target_val = target_val.reshape(-1)
                output_val = output_val.reshape(-1, output_val.shape[2])
                
                pad_mask = (target_val != 0)  
                target_val = target_val[pad_mask] # Select non-padding elements
                output_val = output_val[pad_mask] 
            
                val_loss = criterion(output_val, target_val)
                val_total_loss += val_loss.item()
                val_total_words += target_val.size(0)
                val_correct_pred += torch.sum(torch.argmax(output_val, dim=1) == target_val).item()

            # Calculate validation statistics
            val_accuracy = 100*val_correct_pred / val_total_words
            val_avg_loss = val_total_loss / len(val_batch_x)

            
            
        # Total word predict correct over training
        beam_val_pred = 0
        beam_val = 0
        for i in tqdm(range(df_val.shape[0])):
            input_seq = df_val.iloc[i, 0][:-1] 
            true_seq = df_val.iloc[i, 1][1:-1]
            predicted_output = beam_search(model, input_seq, max_length, input_char_to_int, output_char_to_int, output_int_to_char, beam_width, length_penalty, cell_type)
            if true_seq == predicted_output[:-1]:
                beam_val_pred+=1
        beam_val = 100*beam_val_pred/df_val.shape[0]



        # Print statistics
        print(f"Epoch {epoch + 1} / {num_epochs} ===========================>")
        print(f"Train Accuracy Char: {accuracy:.4f}, Train Average Loss: {avg_loss:.4f}")
        print(f"Validation Accuracy Char: {val_accuracy:.4f}, Validation Average Loss: {val_avg_loss:.4f}")
        print(f"Beam Val Word Accuracy: {beam_val:.4f} Correct Prediction : {beam_val_pred}/{df_val.shape[0]}")    
        
        if wandb_log == 1:
            wandb.log({
                "train_accuracy_char": accuracy,
                "train_loss": avg_loss,
                "val_accuracy_char": val_accuracy,
                "val_loss": val_avg_loss,
                "beam_val_accuracy_word" : beam_val,
            })
        
    
    return model, beam_val


In [None]:
def main():
    wandb.init(project='DL_Assignment_3')
    config = wandb.config
    wandb.run.name = 'cell_' + config.cell_type + '_bs_' + str(config.batch_size) + '_ep_' + str(config.num_epochs) + '_op_' + str(config.optimizer) + '_drop_' + str(config.dropout) + '_bsw_' + str(config.beam_search_width) +'_emb_' + str(config.embedding_size) + '_hs_' + str(config.hidden_size) + '_elayer_' + str(config.num_layers) + '_dlayer_' + str(config.num_layers)
    
    # Load Dataset
    df_train, train_input_len, train_out_len = load_dataset('/kaggle/input/dakshina-dataset-ass-3/dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.train.tsv')
    df_val, val_input_len, val_out_len = load_dataset('/kaggle/input/dakshina-dataset-ass-3/dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.dev.tsv')
    df_test, test_input_len, test_out_len = load_dataset('/kaggle/input/dakshina-dataset-ass-3/dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.test.tsv')

    input_max_len = max(train_input_len, val_input_len, test_input_len)
    output_max_len = max(train_out_len, val_out_len, test_out_len)
    
    max_length = max(input_max_len, output_max_len)

    # Create Look Up Table
    input_char_to_int, input_int_to_char = look_up_table(df_train[0], df_val[0], df_test[0])
    output_char_to_int, output_int_to_char = look_up_table(df_train[1], df_val[1], df_test[1])

    # Data Embedding and Converting them into Tensor
    train_inputs, train_outputs = get_tensor_object(df_train, max_length, max_length, input_char_to_int, output_char_to_int)
    val_inputs, val_outputs = get_tensor_object(df_val, max_length, max_length, input_char_to_int, output_char_to_int)
    test_inputs, test_outputs = get_tensor_object(df_test, max_length, max_length, input_char_to_int, output_char_to_int)

    # Transpose column wise
    train_inputs, train_outputs = torch.transpose(train_inputs, 0, 1), torch.transpose(train_outputs, 0, 1)
    val_inputs, val_outputs = torch.transpose(val_inputs, 0, 1), torch.transpose(val_outputs, 0, 1)
    test_inputs, test_outputs = torch.transpose(test_inputs, 0, 1), torch.transpose(test_outputs, 0, 1)


    # Initialize Hyperparameters
    input_size = len(input_char_to_int)
    output_size = len(output_char_to_int)
    embedding_size = config.embedding_size
    hidden_size = config.hidden_size
    enc_num_layers = config.num_layers
    dec_num_layers = config.num_layers
    cell_type = config.cell_type
    dropout = config.dropout
    learning_rate = config.learning_rate
    batch_size = config.batch_size
    num_epochs = config.num_epochs  
    optimizer = config.optimizer  
    beam_width = config.beam_search_width
    bidirectional = config.bidirectional
    length_penalty = config.length_penalty
    teacher_forcing = config.teacher_forcing
    learning_rate = config.learning_rate

    # Create train data batch
    train_batch_x, train_batch_y = torch.split(train_inputs, batch_size, dim=1), torch.split(train_outputs, batch_size, dim=1)
    # Validation data batch
    val_batch_x, val_batch_y = torch.split(val_inputs, batch_size, dim=1), torch.split(val_outputs, batch_size, dim=1)


    # Intialize encoder, decoder and seq2seq model
    encoder = Encoder(input_size, embedding_size, hidden_size, enc_num_layers, dropout, bidirectional, cell_type).to(device)
    decoder = Decoder(output_size, embedding_size, hidden_size, output_size, dec_num_layers, dropout, bidirectional, cell_type).to(device)  
    model = Seq2Seq(encoder, decoder, output_char_to_int, teacher_forcing, cell_type).to(device)

    # Print total number of parameters in the model
    total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(model)
    print(f'Total Trainable Parameters: {total_params}')


    # Loss function and Optimizer
    criterion = nn.CrossEntropyLoss()
    if optimizer == 'adam':
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    elif optimizer == 'sgd':
        optimizer = optim.SGD(model.parameters(), lr=learning_rate)
    elif optimizer == 'rmsprop':
        optimizer = optim.RMSprop(model.parameters(), lr=learning_rate)
    elif optimizer == 'nadam':
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    elif optimizer == 'adagrad':
        optimizer = optim.Adagrad(model.parameters(), lr=learning_rate)
    else:
        print("Incorrect Optmizer !!!!")

    # TRAINING
    model, acc = train(model, num_epochs, criterion, optimizer, train_batch_x, train_batch_y, val_batch_x, val_batch_y, df_val, input_char_to_int, output_char_to_int, output_int_to_char, beam_width, length_penalty, cell_type, max_length, 1)
    wandb.log({
            "accuracy": acc,
        })
    
# SWEEP CONFIG
sweep_config = {
    'name': 'sweep_2',
    'method': 'bayes',  
    'metric': {'name': 'accuracy', 'goal': 'maximize'},
    'parameters': {
        'embedding_size': {'values': [256]},  
        'hidden_size': {'values': [256, 512, 1024]},
        'num_layers': {'values': [1, 2]},  
        'cell_type': {'values':['LSTM', "GRU", "RNN"]}, # RNN, LSTM, GRU
        'dropout': {'values': [0.3]},
        'learning_rate': {'values': [0.01, 0.001]},
        'batch_size': {'values': [32]},
        'num_epochs': {'values': [10]},
        'optimizer': {'values': ['adagrad']}, # ['sgd', 'rmsprop', 'adam', 'nadam']
        'beam_search_width': {'values': [1, 4]},
        'length_penalty' : {'values': [0.6]},
        'bidirectional': {'values': [True]},
        'teacher_forcing': {'values': [0.7]}
    }
}

# RUN SWEEP ID with agent
# sweep_id = wandb.sweep(sweep_config, project = 'DL_Assignment_3')
# wandb.agent(sweep_id, main, count = 30)
# wandb.finish()

sweep_id = wandb.sweep(sweep_config, project = 'DL_Assignment_3')
wandb.agent('az4eniz8', main, count = 30)
wandb.finish()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Create sweep with ID: 6cm8l4vd
Sweep URL: https://wandb.ai/cs24m019-iitm/DL_Assignment_3/sweeps/6cm8l4vd


[34m[1mwandb[0m: Agent Starting Run: 0u8scpf2 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_search_width: 4
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 512
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	length_penalty: 0.6
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	optimizer: adagrad
[34m[1mwandb[0m: 	teacher_forcing: 0.7
[34m[1mwandb[0m: Currently logged in as: [33mcs24m019[0m ([33mcs24m019-iitm[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin




Seq2Seq(
  (decoder): Decoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(29, 256)
    (rnn): RNN(256, 512, dropout=0.3, bidirectional=True)
    (fc): Linear(in_features=1024, out_features=29, bias=True)
    (log_softmax): LogSoftmax(dim=1)
  )
  (encoder): Encoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(66, 256)
    (rnn): RNN(256, 512, dropout=0.3, bidirectional=True)
  )
)
Total Trainable Parameters: 1631005


100%|██████████| 1382/1382 [00:44<00:00, 31.19it/s]
100%|██████████| 137/137 [00:01<00:00, 98.95it/s]
100%|██████████| 4358/4358 [01:23<00:00, 52.29it/s]


Train Accuracy Char: 36.7620, Train Average Loss: 2.2407
Validation Accuracy Char: 19.2913, Validation Average Loss: 3.0902
Beam Val Word Accuracy: 0.0000 Correct Prediction : 0/4358


100%|██████████| 1382/1382 [00:43<00:00, 31.54it/s]
100%|██████████| 137/137 [00:01<00:00, 98.11it/s]
100%|██████████| 4358/4358 [01:26<00:00, 50.40it/s]


Train Accuracy Char: 36.7735, Train Average Loss: 2.1892
Validation Accuracy Char: 22.0663, Validation Average Loss: 2.8646
Beam Val Word Accuracy: 0.0000 Correct Prediction : 0/4358


100%|██████████| 1382/1382 [00:44<00:00, 31.40it/s]
100%|██████████| 137/137 [00:01<00:00, 98.11it/s]
100%|██████████| 4358/4358 [01:31<00:00, 47.72it/s]


Train Accuracy Char: 38.4229, Train Average Loss: 2.1337
Validation Accuracy Char: 23.4153, Validation Average Loss: 2.7004
Beam Val Word Accuracy: 0.0229 Correct Prediction : 1/4358


100%|██████████| 1382/1382 [00:44<00:00, 31.12it/s]
100%|██████████| 137/137 [00:01<00:00, 97.13it/s]
100%|██████████| 4358/4358 [01:41<00:00, 42.92it/s]


Train Accuracy Char: 41.3787, Train Average Loss: 2.0257
Validation Accuracy Char: 29.3533, Validation Average Loss: 2.4737
Beam Val Word Accuracy: 0.0229 Correct Prediction : 1/4358


100%|██████████| 1382/1382 [00:44<00:00, 31.24it/s]
100%|██████████| 137/137 [00:01<00:00, 94.32it/s]
100%|██████████| 4358/4358 [01:48<00:00, 40.05it/s]


Train Accuracy Char: 44.1747, Train Average Loss: 1.9210
Validation Accuracy Char: 34.3251, Validation Average Loss: 2.2975
Beam Val Word Accuracy: 0.1147 Correct Prediction : 5/4358


100%|██████████| 1382/1382 [00:44<00:00, 31.36it/s]
100%|██████████| 137/137 [00:01<00:00, 96.97it/s]
100%|██████████| 4358/4358 [01:50<00:00, 39.29it/s]


Train Accuracy Char: 46.1464, Train Average Loss: 1.8477
Validation Accuracy Char: 38.1408, Validation Average Loss: 2.1245
Beam Val Word Accuracy: 0.2524 Correct Prediction : 11/4358


100%|██████████| 1382/1382 [00:44<00:00, 31.25it/s]
100%|██████████| 137/137 [00:01<00:00, 97.60it/s]
100%|██████████| 4358/4358 [01:51<00:00, 39.20it/s]


Train Accuracy Char: 47.8504, Train Average Loss: 1.7811
Validation Accuracy Char: 40.4764, Validation Average Loss: 2.0208
Beam Val Word Accuracy: 0.4819 Correct Prediction : 21/4358


100%|██████████| 1382/1382 [00:44<00:00, 31.34it/s]
100%|██████████| 137/137 [00:01<00:00, 98.64it/s]
100%|██████████| 4358/4358 [01:48<00:00, 40.05it/s]


Train Accuracy Char: 49.2946, Train Average Loss: 1.7189
Validation Accuracy Char: 43.0253, Validation Average Loss: 1.9285
Beam Val Word Accuracy: 0.8031 Correct Prediction : 35/4358


100%|██████████| 1382/1382 [00:44<00:00, 31.32it/s]
100%|██████████| 137/137 [00:01<00:00, 98.89it/s]
100%|██████████| 4358/4358 [01:49<00:00, 39.65it/s]


Train Accuracy Char: 50.8237, Train Average Loss: 1.6673
Validation Accuracy Char: 44.7519, Validation Average Loss: 1.8630
Beam Val Word Accuracy: 1.3768 Correct Prediction : 60/4358


100%|██████████| 1382/1382 [00:44<00:00, 31.30it/s]
100%|██████████| 137/137 [00:01<00:00, 98.75it/s]
100%|██████████| 4358/4358 [01:51<00:00, 39.15it/s]

Train Accuracy Char: 52.2182, Train Average Loss: 1.6134
Validation Accuracy Char: 46.5659, Validation Average Loss: 1.7907
Beam Val Word Accuracy: 2.1340 Correct Prediction : 93/4358





0,1
accuracy,▁
beam_val_accuracy_word,▁▁▁▁▁▂▃▄▆█
train_accuracy_char,▁▁▂▃▄▅▆▇▇█
train_loss,█▇▇▆▄▄▃▂▂▁
val_accuracy_char,▁▂▂▄▅▆▆▇██
val_loss,█▇▆▅▄▃▂▂▁▁

0,1
accuracy,2.13401
beam_val_accuracy_word,2.13401
train_accuracy_char,52.21824
train_loss,1.61342
val_accuracy_char,46.56594
val_loss,1.79066


[34m[1mwandb[0m: Agent Starting Run: 1qg3e780 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_search_width: 4
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	length_penalty: 0.6
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	optimizer: adagrad
[34m[1mwandb[0m: 	teacher_forcing: 0.7




Seq2Seq(
  (decoder): Decoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(29, 256)
    (rnn): LSTM(256, 256, dropout=0.3, bidirectional=True)
    (fc): Linear(in_features=512, out_features=29, bias=True)
    (log_softmax): LogSoftmax(dim=1)
  )
  (encoder): Encoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(66, 256)
    (rnn): LSTM(256, 256, dropout=0.3, bidirectional=True)
  )
)
Total Trainable Parameters: 2144541


100%|██████████| 1382/1382 [00:54<00:00, 25.44it/s]
100%|██████████| 137/137 [00:01<00:00, 98.99it/s]
100%|██████████| 4358/4358 [01:08<00:00, 63.93it/s]


Train Accuracy Char: 54.3535, Train Average Loss: 1.5309
Validation Accuracy Char: 50.5614, Validation Average Loss: 1.6066
Beam Val Word Accuracy: 0.3212 Correct Prediction : 14/4358


100%|██████████| 1382/1382 [00:54<00:00, 25.39it/s]
100%|██████████| 137/137 [00:01<00:00, 97.20it/s]
100%|██████████| 4358/4358 [01:11<00:00, 60.94it/s]


Train Accuracy Char: 65.7220, Train Average Loss: 1.1381
Validation Accuracy Char: 60.4615, Validation Average Loss: 1.3044
Beam Val Word Accuracy: 1.1932 Correct Prediction : 52/4358


100%|██████████| 1382/1382 [00:54<00:00, 25.43it/s]
100%|██████████| 137/137 [00:01<00:00, 98.90it/s]
100%|██████████| 4358/4358 [01:13<00:00, 58.98it/s]


Train Accuracy Char: 69.1544, Train Average Loss: 1.0258
Validation Accuracy Char: 64.8860, Validation Average Loss: 1.1712
Beam Val Word Accuracy: 1.3309 Correct Prediction : 58/4358


100%|██████████| 1382/1382 [00:54<00:00, 25.37it/s]
100%|██████████| 137/137 [00:01<00:00, 98.28it/s]
100%|██████████| 4358/4358 [01:14<00:00, 58.45it/s]


Train Accuracy Char: 71.0204, Train Average Loss: 0.9653
Validation Accuracy Char: 67.8589, Validation Average Loss: 1.0711
Beam Val Word Accuracy: 1.4915 Correct Prediction : 65/4358


100%|██████████| 1382/1382 [00:54<00:00, 25.50it/s]
100%|██████████| 137/137 [00:01<00:00, 98.33it/s]
100%|██████████| 4358/4358 [01:15<00:00, 57.45it/s]


Train Accuracy Char: 72.2538, Train Average Loss: 0.9238
Validation Accuracy Char: 68.4139, Validation Average Loss: 1.0507
Beam Val Word Accuracy: 1.6980 Correct Prediction : 74/4358


100%|██████████| 1382/1382 [00:54<00:00, 25.37it/s]
100%|██████████| 137/137 [00:01<00:00, 99.82it/s] 
100%|██████████| 4358/4358 [01:16<00:00, 56.88it/s]


Train Accuracy Char: 73.1369, Train Average Loss: 0.8950
Validation Accuracy Char: 69.6832, Validation Average Loss: 1.0064
Beam Val Word Accuracy: 1.6751 Correct Prediction : 73/4358


100%|██████████| 1382/1382 [00:54<00:00, 25.54it/s]
100%|██████████| 137/137 [00:01<00:00, 99.00it/s]
100%|██████████| 4358/4358 [01:17<00:00, 56.57it/s]


Train Accuracy Char: 73.9206, Train Average Loss: 0.8683
Validation Accuracy Char: 70.3872, Validation Average Loss: 0.9850
Beam Val Word Accuracy: 1.9734 Correct Prediction : 86/4358


100%|██████████| 1382/1382 [00:54<00:00, 25.40it/s]
100%|██████████| 137/137 [00:01<00:00, 96.77it/s]
100%|██████████| 4358/4358 [01:18<00:00, 55.80it/s]


Train Accuracy Char: 74.4951, Train Average Loss: 0.8504
Validation Accuracy Char: 71.4304, Validation Average Loss: 0.9514
Beam Val Word Accuracy: 2.1111 Correct Prediction : 92/4358


100%|██████████| 1382/1382 [00:54<00:00, 25.34it/s]
100%|██████████| 137/137 [00:01<00:00, 97.55it/s]
100%|██████████| 4358/4358 [01:18<00:00, 55.67it/s]


Train Accuracy Char: 74.8113, Train Average Loss: 0.8378
Validation Accuracy Char: 71.5409, Validation Average Loss: 0.9439
Beam Val Word Accuracy: 2.1111 Correct Prediction : 92/4358


100%|██████████| 1382/1382 [00:54<00:00, 25.35it/s]
100%|██████████| 137/137 [00:01<00:00, 96.65it/s]
100%|██████████| 4358/4358 [01:18<00:00, 55.54it/s]

Train Accuracy Char: 75.2554, Train Average Loss: 0.8226
Validation Accuracy Char: 72.3220, Validation Average Loss: 0.9193
Beam Val Word Accuracy: 2.0422 Correct Prediction : 89/4358





0,1
accuracy,▁
beam_val_accuracy_word,▁▄▅▆▆▆▇███
train_accuracy_char,▁▅▆▇▇▇████
train_loss,█▄▃▂▂▂▁▁▁▁
val_accuracy_char,▁▄▆▇▇▇▇███
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
accuracy,2.04222
beam_val_accuracy_word,2.04222
train_accuracy_char,75.25544
train_loss,0.82263
val_accuracy_char,72.322
val_loss,0.91931


[34m[1mwandb[0m: Agent Starting Run: nsjjt25e with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_search_width: 1
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 1024
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	length_penalty: 0.6
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	optimizer: adagrad
[34m[1mwandb[0m: 	teacher_forcing: 0.7


Seq2Seq(
  (decoder): Decoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(29, 256)
    (rnn): GRU(256, 1024, num_layers=2, dropout=0.3, bidirectional=True)
    (fc): Linear(in_features=2048, out_features=29, bias=True)
    (log_softmax): LogSoftmax(dim=1)
  )
  (encoder): Encoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(66, 256)
    (rnn): GRU(256, 1024, num_layers=2, dropout=0.3, bidirectional=True)
  )
)
Total Trainable Parameters: 53610269


100%|██████████| 1382/1382 [03:37<00:00,  6.36it/s]
100%|██████████| 137/137 [00:05<00:00, 22.93it/s]
100%|██████████| 4358/4358 [00:52<00:00, 83.52it/s]


Train Accuracy Char: 41.9042, Train Average Loss: 2.1511
Validation Accuracy Char: 34.6951, Validation Average Loss: 2.3853
Beam Val Word Accuracy: 0.1377 Correct Prediction : 6/4358


100%|██████████| 1382/1382 [03:35<00:00,  6.40it/s]
100%|██████████| 137/137 [00:05<00:00, 23.17it/s]
100%|██████████| 4358/4358 [00:53<00:00, 81.77it/s]


Train Accuracy Char: 63.2247, Train Average Loss: 1.2060
Validation Accuracy Char: 56.2013, Validation Average Loss: 1.5369
Beam Val Word Accuracy: 4.1533 Correct Prediction : 181/4358


100%|██████████| 1382/1382 [03:35<00:00,  6.41it/s]
100%|██████████| 137/137 [00:05<00:00, 23.10it/s]
100%|██████████| 4358/4358 [00:54<00:00, 79.69it/s]


Train Accuracy Char: 72.4813, Train Average Loss: 0.9129
Validation Accuracy Char: 65.2972, Validation Average Loss: 1.1417
Beam Val Word Accuracy: 13.1941 Correct Prediction : 575/4358


100%|██████████| 1382/1382 [03:35<00:00,  6.41it/s]
100%|██████████| 137/137 [00:05<00:00, 23.32it/s]
100%|██████████| 4358/4358 [00:54<00:00, 79.44it/s]


Train Accuracy Char: 75.5225, Train Average Loss: 0.8076
Validation Accuracy Char: 70.5260, Validation Average Loss: 0.9596
Beam Val Word Accuracy: 25.7687 Correct Prediction : 1123/4358


100%|██████████| 1382/1382 [03:35<00:00,  6.41it/s]
100%|██████████| 137/137 [00:05<00:00, 23.31it/s]
100%|██████████| 4358/4358 [00:55<00:00, 79.13it/s]


Train Accuracy Char: 77.3331, Train Average Loss: 0.7439
Validation Accuracy Char: 73.9048, Validation Average Loss: 0.8648
Beam Val Word Accuracy: 34.6948 Correct Prediction : 1512/4358


100%|██████████| 1382/1382 [03:35<00:00,  6.42it/s]
100%|██████████| 137/137 [00:05<00:00, 23.16it/s]
100%|██████████| 4358/4358 [00:55<00:00, 79.21it/s]


Train Accuracy Char: 78.6810, Train Average Loss: 0.6938
Validation Accuracy Char: 75.0585, Validation Average Loss: 0.8317
Beam Val Word Accuracy: 37.6778 Correct Prediction : 1642/4358


100%|██████████| 1382/1382 [03:35<00:00,  6.41it/s]
100%|██████████| 137/137 [00:05<00:00, 23.34it/s]
100%|██████████| 4358/4358 [00:54<00:00, 79.46it/s]


Train Accuracy Char: 79.3855, Train Average Loss: 0.6643
Validation Accuracy Char: 76.2378, Validation Average Loss: 0.8036
Beam Val Word Accuracy: 39.9954 Correct Prediction : 1743/4358


100%|██████████| 1382/1382 [03:35<00:00,  6.41it/s]
100%|██████████| 137/137 [00:05<00:00, 22.88it/s]
100%|██████████| 4358/4358 [00:55<00:00, 78.98it/s]


Train Accuracy Char: 79.9472, Train Average Loss: 0.6420
Validation Accuracy Char: 76.2687, Validation Average Loss: 0.7957
Beam Val Word Accuracy: 39.8118 Correct Prediction : 1735/4358


100%|██████████| 1382/1382 [03:35<00:00,  6.40it/s]
100%|██████████| 137/137 [00:05<00:00, 23.05it/s]
100%|██████████| 4358/4358 [00:55<00:00, 78.53it/s]


Train Accuracy Char: 80.3033, Train Average Loss: 0.6250
Validation Accuracy Char: 76.4357, Validation Average Loss: 0.7900
Beam Val Word Accuracy: 41.5099 Correct Prediction : 1809/4358


100%|██████████| 1382/1382 [03:35<00:00,  6.43it/s]
100%|██████████| 137/137 [00:05<00:00, 23.26it/s]
100%|██████████| 4358/4358 [00:55<00:00, 79.09it/s]

Train Accuracy Char: 80.7572, Train Average Loss: 0.6073
Validation Accuracy Char: 76.9085, Validation Average Loss: 0.7842
Beam Val Word Accuracy: 41.9917 Correct Prediction : 1830/4358





0,1
accuracy,▁
beam_val_accuracy_word,▁▂▃▅▇▇████
train_accuracy_char,▁▅▇▇▇█████
train_loss,█▄▂▂▂▁▁▁▁▁
val_accuracy_char,▁▅▆▇██████
val_loss,█▄▃▂▁▁▁▁▁▁

0,1
accuracy,41.99174
beam_val_accuracy_word,41.99174
train_accuracy_char,80.75721
train_loss,0.60725
val_accuracy_char,76.90845
val_loss,0.78421


[34m[1mwandb[0m: Agent Starting Run: oxbru4ur with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_search_width: 4
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 1024
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	length_penalty: 0.6
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	optimizer: adagrad
[34m[1mwandb[0m: 	teacher_forcing: 0.7


Seq2Seq(
  (decoder): Decoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(29, 256)
    (rnn): RNN(256, 1024, num_layers=2, dropout=0.3, bidirectional=True)
    (fc): Linear(in_features=2048, out_features=29, bias=True)
    (log_softmax): LogSoftmax(dim=1)
  )
  (encoder): Encoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(66, 256)
    (rnn): RNN(256, 1024, num_layers=2, dropout=0.3, bidirectional=True)
  )
)
Total Trainable Parameters: 17925917


100%|██████████| 1382/1382 [01:23<00:00, 16.54it/s]
100%|██████████| 137/137 [00:02<00:00, 65.17it/s]
100%|██████████| 4358/4358 [02:00<00:00, 36.14it/s]


Train Accuracy Char: 45.3558, Train Average Loss: 1.9007
Validation Accuracy Char: 36.0492, Validation Average Loss: 2.2580
Beam Val Word Accuracy: 0.2065 Correct Prediction : 9/4358


100%|██████████| 1382/1382 [01:23<00:00, 16.52it/s]
100%|██████████| 137/137 [00:02<00:00, 64.20it/s]
100%|██████████| 4358/4358 [02:02<00:00, 35.51it/s]


Train Accuracy Char: 53.7444, Train Average Loss: 1.5543
Validation Accuracy Char: 44.7545, Validation Average Loss: 1.8654
Beam Val Word Accuracy: 1.3768 Correct Prediction : 60/4358


100%|██████████| 1382/1382 [01:23<00:00, 16.54it/s]
100%|██████████| 137/137 [00:02<00:00, 64.20it/s]
100%|██████████| 4358/4358 [02:04<00:00, 35.02it/s]


Train Accuracy Char: 57.4756, Train Average Loss: 1.4165
Validation Accuracy Char: 49.3538, Validation Average Loss: 1.6480
Beam Val Word Accuracy: 2.9142 Correct Prediction : 127/4358


100%|██████████| 1382/1382 [01:23<00:00, 16.53it/s]
100%|██████████| 137/137 [00:02<00:00, 63.95it/s]
100%|██████████| 4358/4358 [02:06<00:00, 34.51it/s]


Train Accuracy Char: 59.6079, Train Average Loss: 1.3406
Validation Accuracy Char: 51.5352, Validation Average Loss: 1.5638
Beam Val Word Accuracy: 4.1762 Correct Prediction : 182/4358


100%|██████████| 1382/1382 [01:23<00:00, 16.53it/s]
100%|██████████| 137/137 [00:02<00:00, 64.37it/s]
100%|██████████| 4358/4358 [02:09<00:00, 33.70it/s]


Train Accuracy Char: 61.5447, Train Average Loss: 1.2787
Validation Accuracy Char: 55.8802, Validation Average Loss: 1.4360
Beam Val Word Accuracy: 8.1918 Correct Prediction : 357/4358


100%|██████████| 1382/1382 [01:23<00:00, 16.52it/s]
100%|██████████| 137/137 [00:02<00:00, 63.72it/s]
100%|██████████| 4358/4358 [02:08<00:00, 33.93it/s]


Train Accuracy Char: 62.6168, Train Average Loss: 1.2405
Validation Accuracy Char: 57.1597, Validation Average Loss: 1.3934
Beam Val Word Accuracy: 9.8669 Correct Prediction : 430/4358


100%|██████████| 1382/1382 [01:23<00:00, 16.50it/s]
100%|██████████| 137/137 [00:02<00:00, 64.13it/s]
100%|██████████| 4358/4358 [02:09<00:00, 33.72it/s]


Train Accuracy Char: 63.1962, Train Average Loss: 1.2168
Validation Accuracy Char: 59.8654, Validation Average Loss: 1.3085
Beam Val Word Accuracy: 14.2267 Correct Prediction : 620/4358


100%|██████████| 1382/1382 [01:23<00:00, 16.51it/s]
100%|██████████| 137/137 [00:02<00:00, 64.15it/s]
100%|██████████| 4358/4358 [02:10<00:00, 33.40it/s]


Train Accuracy Char: 64.1194, Train Average Loss: 1.1872
Validation Accuracy Char: 61.1501, Validation Average Loss: 1.2707
Beam Val Word Accuracy: 16.5902 Correct Prediction : 723/4358


100%|██████████| 1382/1382 [01:23<00:00, 16.55it/s]
100%|██████████| 137/137 [00:02<00:00, 63.86it/s]
100%|██████████| 4358/4358 [02:09<00:00, 33.54it/s]


Train Accuracy Char: 64.7491, Train Average Loss: 1.1653
Validation Accuracy Char: 62.7277, Validation Average Loss: 1.2285
Beam Val Word Accuracy: 18.5636 Correct Prediction : 809/4358


100%|██████████| 1382/1382 [01:23<00:00, 16.56it/s]
100%|██████████| 137/137 [00:02<00:00, 63.95it/s]
100%|██████████| 4358/4358 [02:11<00:00, 33.22it/s]

Train Accuracy Char: 65.3693, Train Average Loss: 1.1436
Validation Accuracy Char: 63.1080, Validation Average Loss: 1.2147
Beam Val Word Accuracy: 20.4911 Correct Prediction : 893/4358





0,1
accuracy,▁
beam_val_accuracy_word,▁▁▂▂▄▄▆▇▇█
train_accuracy_char,▁▄▅▆▇▇▇███
train_loss,█▅▄▃▂▂▂▁▁▁
val_accuracy_char,▁▃▄▅▆▆▇▇██
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
accuracy,20.49105
beam_val_accuracy_word,20.49105
train_accuracy_char,65.36932
train_loss,1.14358
val_accuracy_char,63.10799
val_loss,1.21469


[34m[1mwandb[0m: Agent Starting Run: 3wuki3aj with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_search_width: 1
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	length_penalty: 0.6
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	optimizer: adagrad
[34m[1mwandb[0m: 	teacher_forcing: 0.7


Seq2Seq(
  (decoder): Decoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(29, 256)
    (rnn): RNN(256, 256, num_layers=2, dropout=0.3, bidirectional=True)
    (fc): Linear(in_features=512, out_features=29, bias=True)
    (log_softmax): LogSoftmax(dim=1)
  )
  (encoder): Encoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(66, 256)
    (rnn): RNN(256, 256, num_layers=2, dropout=0.3, bidirectional=True)
  )
)
Total Trainable Parameters: 1354013


100%|██████████| 1382/1382 [00:51<00:00, 26.63it/s]
100%|██████████| 137/137 [00:01<00:00, 95.81it/s]
100%|██████████| 4358/4358 [00:25<00:00, 170.50it/s]


Train Accuracy Char: 33.2528, Train Average Loss: 2.3369
Validation Accuracy Char: 27.3799, Validation Average Loss: 2.5377
Beam Val Word Accuracy: 0.0000 Correct Prediction : 0/4358


100%|██████████| 1382/1382 [00:51<00:00, 26.65it/s]
100%|██████████| 137/137 [00:01<00:00, 92.81it/s]
100%|██████████| 4358/4358 [00:26<00:00, 164.78it/s]


Train Accuracy Char: 37.7747, Train Average Loss: 2.1955
Validation Accuracy Char: 31.1647, Validation Average Loss: 2.3515
Beam Val Word Accuracy: 0.0229 Correct Prediction : 1/4358


100%|██████████| 1382/1382 [00:51<00:00, 26.73it/s]
100%|██████████| 137/137 [00:01<00:00, 93.77it/s]
100%|██████████| 4358/4358 [00:26<00:00, 162.72it/s]


Train Accuracy Char: 40.4133, Train Average Loss: 2.1158
Validation Accuracy Char: 34.1838, Validation Average Loss: 2.2482
Beam Val Word Accuracy: 0.0459 Correct Prediction : 2/4358


100%|██████████| 1382/1382 [00:51<00:00, 26.66it/s]
100%|██████████| 137/137 [00:01<00:00, 93.58it/s]
100%|██████████| 4358/4358 [00:26<00:00, 161.54it/s]


Train Accuracy Char: 41.9242, Train Average Loss: 2.0574
Validation Accuracy Char: 35.3683, Validation Average Loss: 2.1939
Beam Val Word Accuracy: 0.0459 Correct Prediction : 2/4358


100%|██████████| 1382/1382 [00:51<00:00, 26.60it/s]
100%|██████████| 137/137 [00:01<00:00, 93.63it/s]
100%|██████████| 4358/4358 [00:26<00:00, 161.82it/s]


Train Accuracy Char: 43.3463, Train Average Loss: 2.0020
Validation Accuracy Char: 36.1854, Validation Average Loss: 2.1545
Beam Val Word Accuracy: 0.1147 Correct Prediction : 5/4358


100%|██████████| 1382/1382 [00:51<00:00, 26.63it/s]
100%|██████████| 137/137 [00:01<00:00, 95.48it/s]
100%|██████████| 4358/4358 [00:27<00:00, 160.93it/s]


Train Accuracy Char: 44.1514, Train Average Loss: 1.9699
Validation Accuracy Char: 37.4778, Validation Average Loss: 2.1216
Beam Val Word Accuracy: 0.1377 Correct Prediction : 6/4358


100%|██████████| 1382/1382 [00:52<00:00, 26.57it/s]
100%|██████████| 137/137 [00:01<00:00, 95.30it/s]
100%|██████████| 4358/4358 [00:26<00:00, 164.03it/s]


Train Accuracy Char: 45.0499, Train Average Loss: 1.9382
Validation Accuracy Char: 37.4753, Validation Average Loss: 2.1294
Beam Val Word Accuracy: 0.1606 Correct Prediction : 7/4358


100%|██████████| 1382/1382 [00:51<00:00, 26.78it/s]
100%|██████████| 137/137 [00:01<00:00, 94.38it/s]
100%|██████████| 4358/4358 [00:26<00:00, 162.30it/s]


Train Accuracy Char: 45.6959, Train Average Loss: 1.9130
Validation Accuracy Char: 38.3771, Validation Average Loss: 2.0985
Beam Val Word Accuracy: 0.2065 Correct Prediction : 9/4358


100%|██████████| 1382/1382 [00:51<00:00, 26.63it/s]
100%|██████████| 137/137 [00:01<00:00, 95.52it/s]
100%|██████████| 4358/4358 [00:27<00:00, 161.30it/s]


Train Accuracy Char: 46.2556, Train Average Loss: 1.8864
Validation Accuracy Char: 39.1300, Validation Average Loss: 2.0651
Beam Val Word Accuracy: 0.2295 Correct Prediction : 10/4358


100%|██████████| 1382/1382 [00:51<00:00, 26.75it/s]
100%|██████████| 137/137 [00:01<00:00, 95.22it/s]
100%|██████████| 4358/4358 [00:27<00:00, 160.80it/s]

Train Accuracy Char: 46.7996, Train Average Loss: 1.8656
Validation Accuracy Char: 40.0164, Validation Average Loss: 2.0365
Beam Val Word Accuracy: 0.2983 Correct Prediction : 13/4358





0,1
accuracy,▁
beam_val_accuracy_word,▁▂▂▂▄▄▅▆▆█
train_accuracy_char,▁▃▅▅▆▇▇▇██
train_loss,█▆▅▄▃▃▂▂▁▁
val_accuracy_char,▁▃▅▅▆▇▇▇██
val_loss,█▅▄▃▃▂▂▂▁▁

0,1
accuracy,0.2983
beam_val_accuracy_word,0.2983
train_accuracy_char,46.79959
train_loss,1.86557
val_accuracy_char,40.01644
val_loss,2.03651


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ly756fpn with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_search_width: 1
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 512
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	length_penalty: 0.6
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	optimizer: adagrad
[34m[1mwandb[0m: 	teacher_forcing: 0.7




Seq2Seq(
  (decoder): Decoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(29, 256)
    (rnn): GRU(256, 512, dropout=0.3, bidirectional=True)
    (fc): Linear(in_features=1024, out_features=29, bias=True)
    (log_softmax): LogSoftmax(dim=1)
  )
  (encoder): Encoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(66, 256)
    (rnn): GRU(256, 512, dropout=0.3, bidirectional=True)
  )
)
Total Trainable Parameters: 4784925


100%|██████████| 1382/1382 [00:53<00:00, 25.84it/s]
100%|██████████| 137/137 [00:01<00:00, 90.83it/s]
100%|██████████| 4358/4358 [00:30<00:00, 140.59it/s]


Train Accuracy Char: 58.3505, Train Average Loss: 1.3790
Validation Accuracy Char: 55.8673, Validation Average Loss: 1.5293
Beam Val Word Accuracy: 6.8839 Correct Prediction : 300/4358


100%|██████████| 1382/1382 [00:53<00:00, 25.71it/s]
100%|██████████| 137/137 [00:01<00:00, 88.87it/s]
100%|██████████| 4358/4358 [00:31<00:00, 138.65it/s]


Train Accuracy Char: 70.3657, Train Average Loss: 0.9840
Validation Accuracy Char: 62.6635, Validation Average Loss: 1.2695
Beam Val Word Accuracy: 14.4332 Correct Prediction : 629/4358


100%|██████████| 1382/1382 [00:53<00:00, 25.66it/s]
100%|██████████| 137/137 [00:01<00:00, 90.07it/s]
100%|██████████| 4358/4358 [00:31<00:00, 136.36it/s]


Train Accuracy Char: 73.9494, Train Average Loss: 0.8698
Validation Accuracy Char: 67.0881, Validation Average Loss: 1.0942
Beam Val Word Accuracy: 20.4681 Correct Prediction : 892/4358


100%|██████████| 1382/1382 [00:53<00:00, 25.77it/s]
100%|██████████| 137/137 [00:01<00:00, 90.38it/s]
100%|██████████| 4358/4358 [00:32<00:00, 134.88it/s]


Train Accuracy Char: 75.5776, Train Average Loss: 0.8142
Validation Accuracy Char: 69.5804, Validation Average Loss: 1.0142
Beam Val Word Accuracy: 25.1033 Correct Prediction : 1094/4358


100%|██████████| 1382/1382 [00:53<00:00, 25.77it/s]
100%|██████████| 137/137 [00:01<00:00, 89.62it/s]
100%|██████████| 4358/4358 [00:32<00:00, 134.82it/s]


Train Accuracy Char: 76.6284, Train Average Loss: 0.7753
Validation Accuracy Char: 70.3358, Validation Average Loss: 0.9876
Beam Val Word Accuracy: 28.4075 Correct Prediction : 1238/4358


100%|██████████| 1382/1382 [00:53<00:00, 25.68it/s]
100%|██████████| 137/137 [00:01<00:00, 91.52it/s]
100%|██████████| 4358/4358 [00:32<00:00, 135.30it/s]


Train Accuracy Char: 77.3612, Train Average Loss: 0.7489
Validation Accuracy Char: 72.0548, Validation Average Loss: 0.9320
Beam Val Word Accuracy: 30.2432 Correct Prediction : 1318/4358


100%|██████████| 1382/1382 [00:53<00:00, 25.79it/s]
100%|██████████| 137/137 [00:01<00:00, 89.54it/s]
100%|██████████| 4358/4358 [00:32<00:00, 135.07it/s]


Train Accuracy Char: 77.9676, Train Average Loss: 0.7259
Validation Accuracy Char: 72.4993, Validation Average Loss: 0.9152
Beam Val Word Accuracy: 32.3313 Correct Prediction : 1409/4358


100%|██████████| 1382/1382 [00:53<00:00, 25.73it/s]
100%|██████████| 137/137 [00:01<00:00, 87.12it/s]
100%|██████████| 4358/4358 [00:32<00:00, 134.82it/s]


Train Accuracy Char: 78.3883, Train Average Loss: 0.7102
Validation Accuracy Char: 73.5862, Validation Average Loss: 0.8721
Beam Val Word Accuracy: 33.5704 Correct Prediction : 1463/4358


100%|██████████| 1382/1382 [00:53<00:00, 25.71it/s]
100%|██████████| 137/137 [00:01<00:00, 90.29it/s]
100%|██████████| 4358/4358 [00:32<00:00, 134.33it/s]


Train Accuracy Char: 78.7356, Train Average Loss: 0.6974
Validation Accuracy Char: 73.9664, Validation Average Loss: 0.8662
Beam Val Word Accuracy: 35.4291 Correct Prediction : 1544/4358


100%|██████████| 1382/1382 [00:53<00:00, 25.65it/s]
100%|██████████| 137/137 [00:01<00:00, 88.50it/s]
100%|██████████| 4358/4358 [00:32<00:00, 135.01it/s]

Train Accuracy Char: 79.0120, Train Average Loss: 0.6869
Validation Accuracy Char: 74.4007, Validation Average Loss: 0.8541
Beam Val Word Accuracy: 36.2552 Correct Prediction : 1580/4358





0,1
accuracy,▁
beam_val_accuracy_word,▁▃▄▅▆▇▇▇██
train_accuracy_char,▁▅▆▇▇▇████
train_loss,█▄▃▂▂▂▁▁▁▁
val_accuracy_char,▁▄▅▆▆▇▇███
val_loss,█▅▃▃▂▂▂▁▁▁

0,1
accuracy,36.25516
beam_val_accuracy_word,36.25516
train_accuracy_char,79.01198
train_loss,0.68693
val_accuracy_char,74.40068
val_loss,0.85413


[34m[1mwandb[0m: Agent Starting Run: flc5y841 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_search_width: 4
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 1024
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	length_penalty: 0.6
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	optimizer: adagrad
[34m[1mwandb[0m: 	teacher_forcing: 0.7


Seq2Seq(
  (decoder): Decoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(29, 256)
    (rnn): RNN(256, 1024, num_layers=2, dropout=0.3, bidirectional=True)
    (fc): Linear(in_features=2048, out_features=29, bias=True)
    (log_softmax): LogSoftmax(dim=1)
  )
  (encoder): Encoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(66, 256)
    (rnn): RNN(256, 1024, num_layers=2, dropout=0.3, bidirectional=True)
  )
)
Total Trainable Parameters: 17925917


100%|██████████| 1382/1382 [01:23<00:00, 16.57it/s]
100%|██████████| 137/137 [00:02<00:00, 65.56it/s]
100%|██████████| 4358/4358 [02:00<00:00, 36.15it/s]


Train Accuracy Char: 45.4323, Train Average Loss: 1.8956
Validation Accuracy Char: 36.5323, Validation Average Loss: 2.2659
Beam Val Word Accuracy: 0.1836 Correct Prediction : 8/4358


100%|██████████| 1382/1382 [01:23<00:00, 16.52it/s]
100%|██████████| 137/137 [00:02<00:00, 64.14it/s]
100%|██████████| 4358/4358 [02:04<00:00, 34.87it/s]


Train Accuracy Char: 53.9298, Train Average Loss: 1.5506
Validation Accuracy Char: 44.5078, Validation Average Loss: 1.8367
Beam Val Word Accuracy: 0.6884 Correct Prediction : 30/4358


100%|██████████| 1382/1382 [01:23<00:00, 16.52it/s]
100%|██████████| 137/137 [00:02<00:00, 63.94it/s]
100%|██████████| 4358/4358 [02:07<00:00, 34.26it/s]


Train Accuracy Char: 58.0267, Train Average Loss: 1.4016
Validation Accuracy Char: 49.8060, Validation Average Loss: 1.6158
Beam Val Word Accuracy: 3.5796 Correct Prediction : 156/4358


100%|██████████| 1382/1382 [01:23<00:00, 16.53it/s]
100%|██████████| 137/137 [00:02<00:00, 64.49it/s]
100%|██████████| 4358/4358 [02:09<00:00, 33.58it/s]


Train Accuracy Char: 60.1058, Train Average Loss: 1.3261
Validation Accuracy Char: 52.9690, Validation Average Loss: 1.5389
Beam Val Word Accuracy: 4.5893 Correct Prediction : 200/4358


100%|██████████| 1382/1382 [01:23<00:00, 16.54it/s]
100%|██████████| 137/137 [00:02<00:00, 63.97it/s]
100%|██████████| 4358/4358 [02:10<00:00, 33.39it/s]


Train Accuracy Char: 61.4588, Train Average Loss: 1.2743
Validation Accuracy Char: 55.2712, Validation Average Loss: 1.4632
Beam Val Word Accuracy: 7.0216 Correct Prediction : 306/4358


100%|██████████| 1382/1382 [01:23<00:00, 16.49it/s]
100%|██████████| 137/137 [00:02<00:00, 64.09it/s]
100%|██████████| 4358/4358 [02:11<00:00, 33.21it/s]


Train Accuracy Char: 62.5625, Train Average Loss: 1.2357
Validation Accuracy Char: 55.8673, Validation Average Loss: 1.4258
Beam Val Word Accuracy: 8.0083 Correct Prediction : 349/4358


100%|██████████| 1382/1382 [01:23<00:00, 16.52it/s]
100%|██████████| 137/137 [00:02<00:00, 64.23it/s]
100%|██████████| 4358/4358 [02:11<00:00, 33.21it/s]


Train Accuracy Char: 63.3442, Train Average Loss: 1.2088
Validation Accuracy Char: 57.7404, Validation Average Loss: 1.3686
Beam Val Word Accuracy: 10.1423 Correct Prediction : 442/4358


100%|██████████| 1382/1382 [01:23<00:00, 16.51it/s]
100%|██████████| 137/137 [00:02<00:00, 64.52it/s]
100%|██████████| 4358/4358 [02:11<00:00, 33.23it/s]


Train Accuracy Char: 64.2229, Train Average Loss: 1.1801
Validation Accuracy Char: 58.9249, Validation Average Loss: 1.3356
Beam Val Word Accuracy: 11.6108 Correct Prediction : 506/4358


100%|██████████| 1382/1382 [01:23<00:00, 16.53it/s]
100%|██████████| 137/137 [00:02<00:00, 64.44it/s]
100%|██████████| 4358/4358 [02:12<00:00, 32.85it/s]


Train Accuracy Char: 64.8113, Train Average Loss: 1.1611
Validation Accuracy Char: 61.2503, Validation Average Loss: 1.2628
Beam Val Word Accuracy: 15.6953 Correct Prediction : 684/4358


100%|██████████| 1382/1382 [01:23<00:00, 16.52it/s]
100%|██████████| 137/137 [00:02<00:00, 64.03it/s]
100%|██████████| 4358/4358 [02:14<00:00, 32.51it/s]

Train Accuracy Char: 65.5717, Train Average Loss: 1.1351
Validation Accuracy Char: 62.2190, Validation Average Loss: 1.2490
Beam Val Word Accuracy: 18.1735 Correct Prediction : 792/4358





0,1
accuracy,▁
beam_val_accuracy_word,▁▁▂▃▄▄▅▅▇█
train_accuracy_char,▁▄▅▆▇▇▇███
train_loss,█▅▃▃▂▂▂▁▁▁
val_accuracy_char,▁▃▅▅▆▆▇▇██
val_loss,█▅▄▃▂▂▂▂▁▁

0,1
accuracy,18.17347
beam_val_accuracy_word,18.17347
train_accuracy_char,65.57171
train_loss,1.13508
val_accuracy_char,62.21897
val_loss,1.249


[34m[1mwandb[0m: Agent Starting Run: y8abin59 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_search_width: 1
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 1024
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	length_penalty: 0.6
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	optimizer: adagrad
[34m[1mwandb[0m: 	teacher_forcing: 0.7


Seq2Seq(
  (decoder): Decoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(29, 256)
    (rnn): GRU(256, 1024, num_layers=2, dropout=0.3, bidirectional=True)
    (fc): Linear(in_features=2048, out_features=29, bias=True)
    (log_softmax): LogSoftmax(dim=1)
  )
  (encoder): Encoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(66, 256)
    (rnn): GRU(256, 1024, num_layers=2, dropout=0.3, bidirectional=True)
  )
)
Total Trainable Parameters: 53610269


100%|██████████| 1382/1382 [03:36<00:00,  6.37it/s]
100%|██████████| 137/137 [00:05<00:00, 23.27it/s]
100%|██████████| 4358/4358 [00:51<00:00, 84.15it/s]


Train Accuracy Char: 43.2292, Train Average Loss: 2.1351
Validation Accuracy Char: 38.9681, Validation Average Loss: 2.3466
Beam Val Word Accuracy: 0.4130 Correct Prediction : 18/4358


100%|██████████| 1382/1382 [03:35<00:00,  6.41it/s]
100%|██████████| 137/137 [00:05<00:00, 23.35it/s]
100%|██████████| 4358/4358 [00:54<00:00, 80.10it/s]


Train Accuracy Char: 64.3658, Train Average Loss: 1.1716
Validation Accuracy Char: 58.0976, Validation Average Loss: 1.4828
Beam Val Word Accuracy: 3.8091 Correct Prediction : 166/4358


100%|██████████| 1382/1382 [03:35<00:00,  6.41it/s]
100%|██████████| 137/137 [00:05<00:00, 23.10it/s]
100%|██████████| 4358/4358 [00:54<00:00, 79.74it/s]


Train Accuracy Char: 72.2357, Train Average Loss: 0.9250
Validation Accuracy Char: 64.3285, Validation Average Loss: 1.2263
Beam Val Word Accuracy: 11.7944 Correct Prediction : 514/4358


100%|██████████| 1382/1382 [03:35<00:00,  6.40it/s]
100%|██████████| 137/137 [00:05<00:00, 23.08it/s]
100%|██████████| 4358/4358 [00:55<00:00, 78.94it/s]


Train Accuracy Char: 75.5424, Train Average Loss: 0.8124
Validation Accuracy Char: 67.3424, Validation Average Loss: 1.0844
Beam Val Word Accuracy: 17.3015 Correct Prediction : 754/4358


100%|██████████| 1382/1382 [03:35<00:00,  6.41it/s]
100%|██████████| 137/137 [00:05<00:00, 23.27it/s]
100%|██████████| 4358/4358 [00:55<00:00, 78.90it/s]


Train Accuracy Char: 77.0007, Train Average Loss: 0.7580
Validation Accuracy Char: 69.3158, Validation Average Loss: 1.0147
Beam Val Word Accuracy: 21.8219 Correct Prediction : 951/4358


100%|██████████| 1382/1382 [03:35<00:00,  6.41it/s]
100%|██████████| 137/137 [00:05<00:00, 23.16it/s]
100%|██████████| 4358/4358 [00:55<00:00, 78.93it/s]


Train Accuracy Char: 78.0800, Train Average Loss: 0.7162
Validation Accuracy Char: 72.2346, Validation Average Loss: 0.9232
Beam Val Word Accuracy: 29.2336 Correct Prediction : 1274/4358


100%|██████████| 1382/1382 [03:35<00:00,  6.41it/s]
100%|██████████| 137/137 [00:05<00:00, 23.23it/s]
100%|██████████| 4358/4358 [00:55<00:00, 78.87it/s]


Train Accuracy Char: 78.8310, Train Average Loss: 0.6889
Validation Accuracy Char: 73.5168, Validation Average Loss: 0.8748
Beam Val Word Accuracy: 32.2855 Correct Prediction : 1407/4358


100%|██████████| 1382/1382 [03:35<00:00,  6.42it/s]
100%|██████████| 137/137 [00:05<00:00, 23.33it/s]
100%|██████████| 4358/4358 [00:55<00:00, 78.98it/s]


Train Accuracy Char: 79.4588, Train Average Loss: 0.6620
Validation Accuracy Char: 74.5677, Validation Average Loss: 0.8392
Beam Val Word Accuracy: 34.8554 Correct Prediction : 1519/4358


100%|██████████| 1382/1382 [03:35<00:00,  6.42it/s]
100%|██████████| 137/137 [00:05<00:00, 23.14it/s]
100%|██████████| 4358/4358 [00:55<00:00, 79.04it/s]


Train Accuracy Char: 80.0013, Train Average Loss: 0.6402
Validation Accuracy Char: 75.2203, Validation Average Loss: 0.8343
Beam Val Word Accuracy: 38.3433 Correct Prediction : 1671/4358


100%|██████████| 1382/1382 [03:35<00:00,  6.41it/s]
100%|██████████| 137/137 [00:05<00:00, 23.28it/s]
100%|██████████| 4358/4358 [00:55<00:00, 78.91it/s]


Train Accuracy Char: 80.2019, Train Average Loss: 0.6310
Validation Accuracy Char: 76.1582, Validation Average Loss: 0.8001
Beam Val Word Accuracy: 39.7430 Correct Prediction : 1732/4358


0,1
accuracy,▁
beam_val_accuracy_word,▁▂▃▄▅▆▇▇██
train_accuracy_char,▁▅▆▇▇█████
train_loss,█▄▂▂▂▁▁▁▁▁
val_accuracy_char,▁▅▆▆▇▇████
val_loss,█▄▃▂▂▂▁▁▁▁

0,1
accuracy,39.743
beam_val_accuracy_word,39.743
train_accuracy_char,80.20194
train_loss,0.63102
val_accuracy_char,76.15817
val_loss,0.80015


[34m[1mwandb[0m: Agent Starting Run: xg8m3aqe with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_search_width: 1
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 512
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	length_penalty: 0.6
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	optimizer: adagrad
[34m[1mwandb[0m: 	teacher_forcing: 0.7


Seq2Seq(
  (decoder): Decoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(29, 256)
    (rnn): RNN(256, 512, num_layers=2, dropout=0.3, bidirectional=True)
    (fc): Linear(in_features=1024, out_features=29, bias=True)
    (log_softmax): LogSoftmax(dim=1)
  )
  (encoder): Encoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(66, 256)
    (rnn): RNN(256, 512, num_layers=2, dropout=0.3, bidirectional=True)
  )
)
Total Trainable Parameters: 4780829


100%|██████████| 1382/1382 [01:01<00:00, 22.38it/s]
100%|██████████| 137/137 [00:01<00:00, 76.47it/s]
100%|██████████| 4358/4358 [00:30<00:00, 145.26it/s]


Train Accuracy Char: 36.0528, Train Average Loss: 2.2909
Validation Accuracy Char: 19.5277, Validation Average Loss: 2.9653
Beam Val Word Accuracy: 0.0000 Correct Prediction : 0/4358


100%|██████████| 1382/1382 [01:01<00:00, 22.52it/s]
100%|██████████| 137/137 [00:01<00:00, 77.51it/s]
100%|██████████| 4358/4358 [00:32<00:00, 136.15it/s]


Train Accuracy Char: 37.9426, Train Average Loss: 2.1698
Validation Accuracy Char: 22.6573, Validation Average Loss: 2.8637
Beam Val Word Accuracy: 0.0229 Correct Prediction : 1/4358


100%|██████████| 1382/1382 [01:01<00:00, 22.49it/s]
100%|██████████| 137/137 [00:01<00:00, 75.92it/s]
100%|██████████| 4358/4358 [00:32<00:00, 134.89it/s]


Train Accuracy Char: 39.4764, Train Average Loss: 2.0944
Validation Accuracy Char: 23.8084, Validation Average Loss: 2.8129
Beam Val Word Accuracy: 0.0229 Correct Prediction : 1/4358


100%|██████████| 1382/1382 [01:01<00:00, 22.42it/s]
100%|██████████| 137/137 [00:01<00:00, 76.24it/s]
100%|██████████| 4358/4358 [00:32<00:00, 134.43it/s]


Train Accuracy Char: 40.9875, Train Average Loss: 2.0389
Validation Accuracy Char: 27.1461, Validation Average Loss: 2.6865
Beam Val Word Accuracy: 0.0459 Correct Prediction : 2/4358


100%|██████████| 1382/1382 [01:01<00:00, 22.42it/s]
100%|██████████| 137/137 [00:01<00:00, 74.60it/s]
100%|██████████| 4358/4358 [00:33<00:00, 132.04it/s]


Train Accuracy Char: 42.3201, Train Average Loss: 1.9890
Validation Accuracy Char: 29.3635, Validation Average Loss: 2.5084
Beam Val Word Accuracy: 0.0229 Correct Prediction : 1/4358


100%|██████████| 1382/1382 [01:01<00:00, 22.44it/s]
100%|██████████| 137/137 [00:01<00:00, 75.87it/s]
100%|██████████| 4358/4358 [00:33<00:00, 128.63it/s]


Train Accuracy Char: 43.7626, Train Average Loss: 1.9428
Validation Accuracy Char: 31.8739, Validation Average Loss: 2.3876
Beam Val Word Accuracy: 0.0459 Correct Prediction : 2/4358


100%|██████████| 1382/1382 [01:01<00:00, 22.48it/s]
100%|██████████| 137/137 [00:01<00:00, 77.97it/s]
100%|██████████| 4358/4358 [00:33<00:00, 130.69it/s]


Train Accuracy Char: 45.8661, Train Average Loss: 1.8746
Validation Accuracy Char: 34.6617, Validation Average Loss: 2.2838
Beam Val Word Accuracy: 0.0688 Correct Prediction : 3/4358


100%|██████████| 1382/1382 [01:01<00:00, 22.49it/s]
100%|██████████| 137/137 [00:01<00:00, 76.12it/s]
100%|██████████| 4358/4358 [00:33<00:00, 131.03it/s]


Train Accuracy Char: 47.3802, Train Average Loss: 1.8151
Validation Accuracy Char: 37.1335, Validation Average Loss: 2.1726
Beam Val Word Accuracy: 0.2295 Correct Prediction : 10/4358


100%|██████████| 1382/1382 [01:01<00:00, 22.51it/s]
100%|██████████| 137/137 [00:01<00:00, 75.04it/s]
100%|██████████| 4358/4358 [00:33<00:00, 129.52it/s]


Train Accuracy Char: 48.6715, Train Average Loss: 1.7553
Validation Accuracy Char: 38.5981, Validation Average Loss: 2.1064
Beam Val Word Accuracy: 0.2295 Correct Prediction : 10/4358


100%|██████████| 1382/1382 [01:01<00:00, 22.45it/s]
100%|██████████| 137/137 [00:01<00:00, 76.77it/s]
100%|██████████| 4358/4358 [00:34<00:00, 127.71it/s]

Train Accuracy Char: 50.0655, Train Average Loss: 1.7036
Validation Accuracy Char: 41.5710, Validation Average Loss: 1.9949
Beam Val Word Accuracy: 0.4360 Correct Prediction : 19/4358





0,1
accuracy,▁
beam_val_accuracy_word,▁▁▁▂▁▂▂▅▅█
train_accuracy_char,▁▂▃▃▄▅▆▇▇█
train_loss,█▇▆▅▄▄▃▂▂▁
val_accuracy_char,▁▂▂▃▄▅▆▇▇█
val_loss,█▇▇▆▅▄▃▂▂▁

0,1
accuracy,0.43598
beam_val_accuracy_word,0.43598
train_accuracy_char,50.06554
train_loss,1.70356
val_accuracy_char,41.57096
val_loss,1.99493


[34m[1mwandb[0m: Agent Starting Run: db99sekn with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_search_width: 4
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 512
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	length_penalty: 0.6
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	optimizer: adagrad
[34m[1mwandb[0m: 	teacher_forcing: 0.7


Seq2Seq(
  (decoder): Decoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(29, 256)
    (rnn): GRU(256, 512, num_layers=2, dropout=0.3, bidirectional=True)
    (fc): Linear(in_features=1024, out_features=29, bias=True)
    (log_softmax): LogSoftmax(dim=1)
  )
  (encoder): Encoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(66, 256)
    (rnn): GRU(256, 512, num_layers=2, dropout=0.3, bidirectional=True)
  )
)
Total Trainable Parameters: 14234397


100%|██████████| 1382/1382 [01:22<00:00, 16.75it/s]
100%|██████████| 137/137 [00:01<00:00, 71.64it/s]
100%|██████████| 4358/4358 [01:59<00:00, 36.35it/s]


Train Accuracy Char: 42.0385, Train Average Loss: 1.9965
Validation Accuracy Char: 34.9546, Validation Average Loss: 2.1596
Beam Val Word Accuracy: 0.3442 Correct Prediction : 15/4358


100%|██████████| 1382/1382 [01:22<00:00, 16.78it/s]
100%|██████████| 137/137 [00:01<00:00, 72.45it/s]
100%|██████████| 4358/4358 [01:59<00:00, 36.44it/s]


Train Accuracy Char: 50.6882, Train Average Loss: 1.6230
Validation Accuracy Char: 42.6296, Validation Average Loss: 1.8514
Beam Val Word Accuracy: 2.2946 Correct Prediction : 100/4358


100%|██████████| 1382/1382 [01:22<00:00, 16.74it/s]
100%|██████████| 137/137 [00:01<00:00, 72.23it/s]
100%|██████████| 4358/4358 [02:04<00:00, 35.09it/s]


Train Accuracy Char: 55.2860, Train Average Loss: 1.4689
Validation Accuracy Char: 47.4241, Validation Average Loss: 1.6883
Beam Val Word Accuracy: 5.6448 Correct Prediction : 246/4358


100%|██████████| 1382/1382 [01:22<00:00, 16.77it/s]
100%|██████████| 137/137 [00:01<00:00, 72.66it/s]
100%|██████████| 4358/4358 [02:05<00:00, 34.66it/s]


Train Accuracy Char: 58.0933, Train Average Loss: 1.3740
Validation Accuracy Char: 51.1318, Validation Average Loss: 1.5647
Beam Val Word Accuracy: 7.8017 Correct Prediction : 340/4358


100%|██████████| 1382/1382 [01:22<00:00, 16.81it/s]
100%|██████████| 137/137 [00:01<00:00, 72.56it/s]
100%|██████████| 4358/4358 [02:05<00:00, 34.73it/s]


Train Accuracy Char: 60.3117, Train Average Loss: 1.3067
Validation Accuracy Char: 53.3416, Validation Average Loss: 1.4968
Beam Val Word Accuracy: 9.5227 Correct Prediction : 415/4358


100%|██████████| 1382/1382 [01:22<00:00, 16.80it/s]
100%|██████████| 137/137 [00:01<00:00, 72.24it/s]
100%|██████████| 4358/4358 [02:07<00:00, 34.16it/s]


Train Accuracy Char: 61.8390, Train Average Loss: 1.2574
Validation Accuracy Char: 55.1684, Validation Average Loss: 1.4495
Beam Val Word Accuracy: 11.6567 Correct Prediction : 508/4358


100%|██████████| 1382/1382 [01:22<00:00, 16.81it/s]
100%|██████████| 137/137 [00:01<00:00, 72.22it/s]
100%|██████████| 4358/4358 [02:08<00:00, 34.01it/s]


Train Accuracy Char: 62.7167, Train Average Loss: 1.2262
Validation Accuracy Char: 55.8699, Validation Average Loss: 1.4283
Beam Val Word Accuracy: 13.4924 Correct Prediction : 588/4358


100%|██████████| 1382/1382 [01:22<00:00, 16.81it/s]
100%|██████████| 137/137 [00:01<00:00, 72.34it/s]
100%|██████████| 4358/4358 [02:08<00:00, 33.83it/s]


Train Accuracy Char: 63.9180, Train Average Loss: 1.1900
Validation Accuracy Char: 58.2877, Validation Average Loss: 1.3471
Beam Val Word Accuracy: 14.8463 Correct Prediction : 647/4358


100%|██████████| 1382/1382 [01:22<00:00, 16.79it/s]
100%|██████████| 137/137 [00:01<00:00, 72.53it/s]
100%|██████████| 4358/4358 [02:10<00:00, 33.30it/s]


Train Accuracy Char: 64.7882, Train Average Loss: 1.1622
Validation Accuracy Char: 58.4984, Validation Average Loss: 1.3598
Beam Val Word Accuracy: 15.8788 Correct Prediction : 692/4358


100%|██████████| 1382/1382 [01:22<00:00, 16.84it/s]
100%|██████████| 137/137 [00:01<00:00, 71.88it/s]
100%|██████████| 4358/4358 [02:11<00:00, 33.25it/s]

Train Accuracy Char: 65.4286, Train Average Loss: 1.1440
Validation Accuracy Char: 58.5138, Validation Average Loss: 1.3454
Beam Val Word Accuracy: 14.9380 Correct Prediction : 651/4358





0,1
accuracy,▁
beam_val_accuracy_word,▁▂▃▄▅▆▇███
train_accuracy_char,▁▄▅▆▆▇▇███
train_loss,█▅▄▃▂▂▂▁▁▁
val_accuracy_char,▁▃▅▆▆▇▇███
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
accuracy,14.93804
beam_val_accuracy_word,14.93804
train_accuracy_char,65.42859
train_loss,1.14402
val_accuracy_char,58.51384
val_loss,1.34536


[34m[1mwandb[0m: Agent Starting Run: f8r5vbep with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_search_width: 1
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	length_penalty: 0.6
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	optimizer: adagrad
[34m[1mwandb[0m: 	teacher_forcing: 0.7




Seq2Seq(
  (decoder): Decoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(29, 256)
    (rnn): GRU(256, 256, dropout=0.3, bidirectional=True)
    (fc): Linear(in_features=512, out_features=29, bias=True)
    (log_softmax): LogSoftmax(dim=1)
  )
  (encoder): Encoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(66, 256)
    (rnn): GRU(256, 256, dropout=0.3, bidirectional=True)
  )
)
Total Trainable Parameters: 1618205


100%|██████████| 1382/1382 [00:51<00:00, 26.72it/s]
100%|██████████| 137/137 [00:01<00:00, 93.63it/s]
100%|██████████| 4358/4358 [00:29<00:00, 148.24it/s]


Train Accuracy Char: 54.5598, Train Average Loss: 1.4969
Validation Accuracy Char: 51.8307, Validation Average Loss: 1.5464
Beam Val Word Accuracy: 6.7462 Correct Prediction : 294/4358


100%|██████████| 1382/1382 [00:51<00:00, 26.72it/s]
100%|██████████| 137/137 [00:01<00:00, 94.35it/s]
100%|██████████| 4358/4358 [00:30<00:00, 145.02it/s]


Train Accuracy Char: 64.2895, Train Average Loss: 1.1668
Validation Accuracy Char: 60.5488, Validation Average Loss: 1.2880
Beam Val Word Accuracy: 14.7545 Correct Prediction : 643/4358


100%|██████████| 1382/1382 [00:51<00:00, 26.68it/s]
100%|██████████| 137/137 [00:01<00:00, 95.28it/s]
100%|██████████| 4358/4358 [00:30<00:00, 142.58it/s]


Train Accuracy Char: 68.1228, Train Average Loss: 1.0512
Validation Accuracy Char: 65.1738, Validation Average Loss: 1.1435
Beam Val Word Accuracy: 19.7109 Correct Prediction : 859/4358


100%|██████████| 1382/1382 [00:51<00:00, 26.61it/s]
100%|██████████| 137/137 [00:01<00:00, 92.35it/s]
100%|██████████| 4358/4358 [00:30<00:00, 141.45it/s]


Train Accuracy Char: 70.2593, Train Average Loss: 0.9822
Validation Accuracy Char: 67.4529, Validation Average Loss: 1.0742
Beam Val Word Accuracy: 23.9559 Correct Prediction : 1044/4358


100%|██████████| 1382/1382 [00:52<00:00, 26.50it/s]
100%|██████████| 137/137 [00:01<00:00, 93.61it/s]
100%|██████████| 4358/4358 [00:31<00:00, 140.51it/s]


Train Accuracy Char: 71.7222, Train Average Loss: 0.9393
Validation Accuracy Char: 69.2644, Validation Average Loss: 1.0181
Beam Val Word Accuracy: 27.1914 Correct Prediction : 1185/4358


100%|██████████| 1382/1382 [00:51<00:00, 26.61it/s]
100%|██████████| 137/137 [00:01<00:00, 93.63it/s]
100%|██████████| 4358/4358 [00:31<00:00, 140.16it/s]


Train Accuracy Char: 72.6608, Train Average Loss: 0.9072
Validation Accuracy Char: 70.3949, Validation Average Loss: 0.9780
Beam Val Word Accuracy: 28.8206 Correct Prediction : 1256/4358


100%|██████████| 1382/1382 [00:52<00:00, 26.54it/s]
100%|██████████| 137/137 [00:01<00:00, 95.53it/s]
100%|██████████| 4358/4358 [00:31<00:00, 139.95it/s]


Train Accuracy Char: 73.3432, Train Average Loss: 0.8853
Validation Accuracy Char: 70.9833, Validation Average Loss: 0.9641
Beam Val Word Accuracy: 30.6563 Correct Prediction : 1336/4358


100%|██████████| 1382/1382 [00:51<00:00, 26.58it/s]
100%|██████████| 137/137 [00:01<00:00, 95.39it/s]
100%|██████████| 4358/4358 [00:31<00:00, 139.64it/s]


Train Accuracy Char: 74.0633, Train Average Loss: 0.8604
Validation Accuracy Char: 71.7285, Validation Average Loss: 0.9383
Beam Val Word Accuracy: 31.4135 Correct Prediction : 1369/4358


100%|██████████| 1382/1382 [00:51<00:00, 26.64it/s]
100%|██████████| 137/137 [00:01<00:00, 94.00it/s]
100%|██████████| 4358/4358 [00:30<00:00, 140.62it/s]


Train Accuracy Char: 74.3065, Train Average Loss: 0.8523
Validation Accuracy Char: 72.2218, Validation Average Loss: 0.9277
Beam Val Word Accuracy: 32.4690 Correct Prediction : 1415/4358


100%|██████████| 1382/1382 [00:52<00:00, 26.55it/s]
100%|██████████| 137/137 [00:01<00:00, 94.66it/s]
100%|██████████| 4358/4358 [00:31<00:00, 139.91it/s]

Train Accuracy Char: 74.9240, Train Average Loss: 0.8313
Validation Accuracy Char: 72.4196, Validation Average Loss: 0.9136
Beam Val Word Accuracy: 33.0656 Correct Prediction : 1441/4358





0,1
accuracy,▁
beam_val_accuracy_word,▁▃▄▆▆▇▇███
train_accuracy_char,▁▄▆▆▇▇▇███
train_loss,█▅▃▃▂▂▂▁▁▁
val_accuracy_char,▁▄▆▆▇▇████
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
accuracy,33.06563
beam_val_accuracy_word,33.06563
train_accuracy_char,74.92395
train_loss,0.83127
val_accuracy_char,72.41964
val_loss,0.91364


[34m[1mwandb[0m: Agent Starting Run: 767i5hoe with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_search_width: 1
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	hidden_size: 512
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	length_penalty: 0.6
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	optimizer: adagrad
[34m[1mwandb[0m: 	teacher_forcing: 0.7


Seq2Seq(
  (decoder): Decoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(29, 256)
    (rnn): LSTM(256, 512, num_layers=2, dropout=0.3, bidirectional=True)
    (fc): Linear(in_features=1024, out_features=29, bias=True)
    (log_softmax): LogSoftmax(dim=1)
  )
  (encoder): Encoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(66, 256)
    (rnn): LSTM(256, 512, num_layers=2, dropout=0.3, bidirectional=True)
  )
)
Total Trainable Parameters: 18961181


100%|██████████| 1382/1382 [01:32<00:00, 14.91it/s]
100%|██████████| 137/137 [00:02<00:00, 63.35it/s]
100%|██████████| 4358/4358 [00:37<00:00, 115.68it/s]


Train Accuracy Char: 50.3464, Train Average Loss: 1.7020
Validation Accuracy Char: 50.1092, Validation Average Loss: 1.6934
Beam Val Word Accuracy: 2.8912 Correct Prediction : 126/4358


100%|██████████| 1382/1382 [01:32<00:00, 14.96it/s]
100%|██████████| 137/137 [00:02<00:00, 64.86it/s]
100%|██████████| 4358/4358 [00:38<00:00, 112.53it/s]


Train Accuracy Char: 69.0813, Train Average Loss: 1.0258
Validation Accuracy Char: 61.9749, Validation Average Loss: 1.2535
Beam Val Word Accuracy: 13.5842 Correct Prediction : 592/4358


 53%|█████▎    | 738/1382 [00:49<00:48, 13.32it/s]

## SWEEP CONFIGURATION

In [22]:
# Load Dataset
df_train, train_input_len, train_out_len = load_dataset('/kaggle/input/dakshina-dataset-ass-3/dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.train.tsv')
df_val, val_input_len, val_out_len = load_dataset('/kaggle/input/dakshina-dataset-ass-3/dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.dev.tsv')
df_test, test_input_len, test_out_len = load_dataset('/kaggle/input/dakshina-dataset-ass-3/dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.test.tsv')

input_max_len = max(train_input_len, val_input_len, test_input_len)
output_max_len = max(train_out_len, val_out_len, test_out_len)

max_length = max(input_max_len, output_max_len)

# Create Look Up Table
input_char_to_int, input_int_to_char = look_up_table(df_train[0], df_val[0], df_test[0])
output_char_to_int, output_int_to_char = look_up_table(df_train[1], df_val[1], df_test[1])

# NOW define parameters after lookup tables are created
params = {
    "input_size": len(input_char_to_int),
    "output_size": len(output_char_to_int),
    "embedding_size": 256,
    "hidden_size": 512,
    "enc_num_layers": 2,
    "dec_num_layers": 2,
    "cell_type": "GRU", # LSTM, GRU, RNN
    "dropout": 0.3,
    "learning_rate": 0.01,
    "batch_size": 32,
    "num_epochs": 10,
    "optimizer": 'adagrad',  # ['sgd', 'rmsprop', 'adam', 'nadam']
    "beam_search_width" : 4,
    "length_penalty" : 0.6,
    "bidirectional": True,
    "teacher_forcing": 0.7,
}

# Data Embedding and Converting them into Tensor
train_inputs, train_outputs = get_tensor_object(df_train, max_length, max_length, input_char_to_int, output_char_to_int)
val_inputs, val_outputs = get_tensor_object(df_val, max_length, max_length, input_char_to_int, output_char_to_int)
test_inputs, test_outputs = get_tensor_object(df_test, max_length, max_length, input_char_to_int, output_char_to_int)

# Transpose column wise
train_inputs, train_outputs = torch.transpose(train_inputs, 0, 1), torch.transpose(train_outputs, 0, 1)
val_inputs, val_outputs = torch.transpose(val_inputs, 0, 1), torch.transpose(val_outputs, 0, 1)
test_inputs, test_outputs = torch.transpose(test_inputs, 0, 1), torch.transpose(test_outputs, 0, 1)

# Extract parameters from the params dictionary
input_size = params['input_size']
output_size = params['output_size']
embedding_size = params['embedding_size']
hidden_size = params['hidden_size']
enc_num_layers = params['enc_num_layers'] 
dec_num_layers = params['dec_num_layers']  
cell_type = params['cell_type']
dropout = params['dropout']
learning_rate = params['learning_rate']
batch_size = params['batch_size']
num_epochs = params['num_epochs']  
optimizer = params['optimizer']  
beam_width = params['beam_search_width']
bidirectional = params['bidirectional']
length_penalty = params['length_penalty']
teacher_forcing = params['teacher_forcing']

# Create train data batch
train_batch_x, train_batch_y = torch.split(train_inputs, batch_size, dim=1), torch.split(train_outputs, batch_size, dim=1)
# Validation data batch
val_batch_x, val_batch_y = torch.split(val_inputs, batch_size, dim=1), torch.split(val_outputs, batch_size, dim=1)

# Initialize encoder, decoder and seq2seq model
encoder = Encoder(input_size, embedding_size, hidden_size, enc_num_layers, dropout, bidirectional, cell_type).to(device)
decoder = Decoder(output_size, embedding_size, hidden_size, output_size, dec_num_layers, dropout, bidirectional, cell_type).to(device)  
model = Seq2Seq(encoder, decoder, output_char_to_int, teacher_forcing, cell_type).to(device)

# Print total number of parameters in the model
total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(model)
print(f'Total Trainable Parameters: {total_params}')

# Loss function and Optimizer
criterion = nn.CrossEntropyLoss()
if optimizer == 'adam':
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
elif optimizer == 'sgd':
    optimizer = optim.SGD(model.parameters(), lr=learning_rate)
elif optimizer == 'rmsprop':
    optimizer = optim.RMSprop(model.parameters(), lr=learning_rate)
elif optimizer == 'nadam':
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
elif optimizer == 'adagrad':
    optimizer = optim.Adagrad(model.parameters(), lr=learning_rate)
else:
    print("Incorrect Optimizer !!!!")

# TRAINING
model, acc = train(model, num_epochs, criterion, optimizer, train_batch_x, train_batch_y, val_batch_x, val_batch_y, df_val, input_char_to_int, output_char_to_int, output_int_to_char, beam_width, length_penalty, cell_type, max_length, 0)

Seq2Seq(
  (decoder): Decoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(29, 256)
    (rnn): GRU(256, 512, num_layers=2, dropout=0.3, bidirectional=True)
    (fc): Linear(in_features=1024, out_features=29, bias=True)
    (log_softmax): LogSoftmax(dim=1)
  )
  (encoder): Encoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(66, 256)
    (rnn): GRU(256, 512, num_layers=2, dropout=0.3, bidirectional=True)
  )
)
Total Trainable Parameters: 14234397


100%|██████████| 1382/1382 [01:23<00:00, 16.60it/s]
100%|██████████| 137/137 [00:01<00:00, 72.83it/s]
100%|██████████| 4358/4358 [02:10<00:00, 33.44it/s]


Train Accuracy Char: 55.4023, Train Average Loss: 1.5227
Validation Accuracy Char: 56.2219, Validation Average Loss: 1.5318
Beam Val Word Accuracy: 9.3162 Correct Prediction : 406/4358


100%|██████████| 1382/1382 [01:22<00:00, 16.66it/s]
100%|██████████| 137/137 [00:01<00:00, 72.72it/s]
100%|██████████| 4358/4358 [02:15<00:00, 32.20it/s]


Train Accuracy Char: 70.7786, Train Average Loss: 0.9655
Validation Accuracy Char: 66.1065, Validation Average Loss: 1.1300
Beam Val Word Accuracy: 22.1661 Correct Prediction : 966/4358


100%|██████████| 1382/1382 [01:22<00:00, 16.69it/s]
100%|██████████| 137/137 [00:01<00:00, 72.54it/s]
100%|██████████| 4358/4358 [02:17<00:00, 31.71it/s]


Train Accuracy Char: 74.5757, Train Average Loss: 0.8437
Validation Accuracy Char: 71.6077, Validation Average Loss: 0.9478
Beam Val Word Accuracy: 31.9642 Correct Prediction : 1393/4358


100%|██████████| 1382/1382 [01:22<00:00, 16.71it/s]
100%|██████████| 137/137 [00:01<00:00, 72.75it/s]
100%|██████████| 4358/4358 [02:17<00:00, 31.78it/s]


Train Accuracy Char: 76.4771, Train Average Loss: 0.7790
Validation Accuracy Char: 74.0332, Validation Average Loss: 0.8657
Beam Val Word Accuracy: 37.7926 Correct Prediction : 1647/4358


100%|██████████| 1382/1382 [01:23<00:00, 16.62it/s]
100%|██████████| 137/137 [00:01<00:00, 72.98it/s]
100%|██████████| 4358/4358 [02:17<00:00, 31.59it/s]


Train Accuracy Char: 77.5222, Train Average Loss: 0.7421
Validation Accuracy Char: 74.9737, Validation Average Loss: 0.8302
Beam Val Word Accuracy: 39.9036 Correct Prediction : 1739/4358


100%|██████████| 1382/1382 [01:23<00:00, 16.62it/s]
100%|██████████| 137/137 [00:01<00:00, 72.39it/s]
100%|██████████| 4358/4358 [02:18<00:00, 31.45it/s]


Train Accuracy Char: 78.2174, Train Average Loss: 0.7139
Validation Accuracy Char: 75.8653, Validation Average Loss: 0.8052
Beam Val Word Accuracy: 41.1886 Correct Prediction : 1795/4358


100%|██████████| 1382/1382 [01:22<00:00, 16.66it/s]
100%|██████████| 137/137 [00:01<00:00, 72.88it/s]
100%|██████████| 4358/4358 [02:18<00:00, 31.56it/s]


Train Accuracy Char: 78.9606, Train Average Loss: 0.6872
Validation Accuracy Char: 76.3637, Validation Average Loss: 0.7925
Beam Val Word Accuracy: 42.5425 Correct Prediction : 1854/4358


100%|██████████| 1382/1382 [01:22<00:00, 16.68it/s]
100%|██████████| 137/137 [00:01<00:00, 72.70it/s]
100%|██████████| 4358/4358 [02:17<00:00, 31.60it/s]


Train Accuracy Char: 79.3597, Train Average Loss: 0.6714
Validation Accuracy Char: 76.3123, Validation Average Loss: 0.8012
Beam Val Word Accuracy: 42.7490 Correct Prediction : 1863/4358


100%|██████████| 1382/1382 [01:22<00:00, 16.71it/s]
100%|██████████| 137/137 [00:01<00:00, 72.90it/s]
100%|██████████| 4358/4358 [02:18<00:00, 31.35it/s]


Train Accuracy Char: 79.7300, Train Average Loss: 0.6565
Validation Accuracy Char: 76.9444, Validation Average Loss: 0.7762
Beam Val Word Accuracy: 43.5750 Correct Prediction : 1899/4358


100%|██████████| 1382/1382 [01:22<00:00, 16.67it/s]
100%|██████████| 137/137 [00:01<00:00, 73.28it/s]
100%|██████████| 4358/4358 [02:18<00:00, 31.36it/s]

Train Accuracy Char: 79.9206, Train Average Loss: 0.6470
Validation Accuracy Char: 76.5230, Validation Average Loss: 0.7920
Beam Val Word Accuracy: 43.6668 Correct Prediction : 1903/4358





## TEST PREDICTON

In [23]:
def store_results(data_type, words, translations, predictions, results):
    """
    This function saves the evaluation results to a CSV file.

    Args:
        data_type (str): The type of data used for evaluation (e.g., 'val', 'test').
        words (list): List of source words (without start/end tokens).
        translations (list): List of reference translations (without start/end tokens).
        predictions (list): List of predicted translated sequences (without start/end tokens).
        results (list): List of 'Yes' or 'No' indicating correct/incorrect predictions.
    """

    # Create a dictionary to store the results in a structured format
    log = {
        'Word': words,
        'Translation': translations,
        'Prediction': predictions,
        'Result': results  # 'Yes' for correct, 'No' for incorrect
    }
    
    # Construct the file path for the CSV file
    path = '/kaggle/working/predictions.csv'

    # Create a Pandas DataFrame from the dictionary
    data_frame = pd.DataFrame(log)

    # Save the DataFrame to a CSV file (header=True includes column names, index=False excludes row index)
    data_frame.to_csv(path, header=True, index=False)
    
    # Log to wandb
    wandb.init(project='DL_Assignment_3', name='Prediction_Store')

    wandb.log({'Prediction_table': wandb.Table(dataframe= data_frame)})

    wandb.finish()

In [24]:

test_acc = 0
correct_pred = 0
words_test = [] 
translations_test = [] 
predictions_test = []
results_test = []

for i in tqdm(range(df_test.shape[0])):
    input_seq = df_test.iloc[i, 0][:-1] 
    true_seq = df_test.iloc[i, 1][1:-1]
    predicted_output = beam_search(model, input_seq, max_length, input_char_to_int, output_char_to_int, output_int_to_char, beam_width, length_penalty, cell_type)
    words_test.append(input_seq)
    translations_test.append(true_seq)
    predictions_test.append(predicted_output[:-1])
    if true_seq == predicted_output[:-1]:
        correct_pred += 1
        results_test.append('Yes')
    else:
        results_test.append('No')

test_acc = 100 * correct_pred / df_test.shape[0]   

print(f'Test Accuracy Word Level: {test_acc}, Correctly Predicted: {correct_pred}')
#store_results('test', words_test, translations_test, predictions_test, results_test)

100%|██████████| 4502/4502 [02:21<00:00, 31.85it/s]

Test Accuracy Word Level: 42.714349178143046, Correctly Predicted: 1923





## Prediction