In [2]:
def load_dakshina_lexicon_pairs(filepath):
    pairs=[]
    with open(filepath, 'r', encoding='utf-8') as f:
        for line in f:
            line = line.strip()
            if not line:
                continue  # skip empty lines
            parts = line.split('\t')
            if len(parts) != 3:
                continue  # skip malformed lines
            devanagari_word, latin_word,_ = parts
            pairs.append((latin_word, devanagari_word))  # reverse order
    return pairs

In [3]:
filepath = "/kaggle/input/dakshina-dataset-v1-0/dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.train.tsv"
pairs = load_dakshina_lexicon_pairs(filepath)

for i in range(5):
    print(pairs[i])

('an', 'अं')
('ankganit', 'अंकगणित')
('uncle', 'अंकल')
('ankur', 'अंकुर')
('ankuran', 'अंकुरण')


In [4]:
def build_vocab(pairs, add_special_tokens=True):
    input_chars = set() # to ensure no repeated characters
    output_chars = set()

    # Collect unique characters from Latin (input) and Devanagari (output)
    for latin_word, devnagari_word in pairs:
        input_chars.update(list(latin_word))
        output_chars.update(list(devnagari_word))

    # Sort to keep it consistent
    input_chars = sorted(list(input_chars))
    output_chars = sorted(list(output_chars))

    # Add special tokens
    special_tokens = ['<pad>', '<sos>', '<eos>', '<unk>'] if add_special_tokens else []

    input_vocab = special_tokens + input_chars
    output_vocab = special_tokens + output_chars

    # Create dictionaries
    input_char2idx = {ch: idx for idx, ch in enumerate(input_vocab)}
    input_idx2char = {idx: ch for ch, idx in input_char2idx.items()}

    output_char2idx = {ch: idx for idx, ch in enumerate(output_vocab)}
    output_idx2char = {idx: ch for ch, idx in output_char2idx.items()}

    return input_char2idx, input_idx2char, output_char2idx, output_idx2char

In [5]:
input_char2idx, input_idx2char, output_char2idx, output_idx2char = build_vocab(pairs)

print("Latin char2idx:", list(input_char2idx.items())[:5])
print("Devanagari idx2char:", list(output_idx2char.items())[:5])

print(len(list(output_char2idx.keys())))

Latin char2idx: [('<pad>', 0), ('<sos>', 1), ('<eos>', 2), ('<unk>', 3), ('a', 4)]
Devanagari idx2char: [(0, '<pad>'), (1, '<sos>'), (2, '<eos>'), (3, '<unk>'), (4, 'ँ')]
67


In [6]:
import torch
import torch.nn as nn
class Encoder(nn.Module):
    def __init__(self, input_vocab_size, embed_size, hidden_size, num_encoder_layers=1, cell_type='lstm', dropout=0.0):
        super(Encoder, self).__init__()
        self.hidden_size = hidden_size
        self.num_encoder_layers = num_encoder_layers
        self.cell_type = cell_type.lower()
        
        # Embedding layer
        self.embedding = nn.Embedding(input_vocab_size, embed_size)
        
        # RNN layer
        if self.cell_type == 'lstm':
            self.rnn = nn.LSTM(
                embed_size, hidden_size, num_encoder_layers,
                batch_first=True, dropout=dropout if num_encoder_layers > 1 else 0
            )
        elif self.cell_type == 'gru':
            self.rnn = nn.GRU(
                embed_size, hidden_size, num_encoder_layers,
                batch_first=True, dropout=dropout if num_encoder_layers > 1 else 0
            )
        else:  # default to RNN
            self.rnn = nn.RNN(
                embed_size, hidden_size, num_encoder_layers,
                batch_first=True, dropout=dropout if num_encoder_layers > 1 else 0
            )
    
    def forward(self, input_seq, lengths):
        """
        Forward pass for encoder
        
        Args:
            input_seq: Input sequence tensor [batch_size, max_seq_len]
            lengths: Actual lengths of input sequences (tensor)
            
        Returns:
            None: Instead of encoder outputs (to avoid DataParallel issues)
            hidden: Hidden state for decoder initialization
        """
        batch_size = input_seq.size(0)
        
        # Important: ensure lengths is on CPU before using it
        if lengths.is_cuda:
            lengths = lengths.cpu()
        
        # Convert input to embeddings
        embedded = self.embedding(input_seq)  # [batch_size, seq_len, embed_size]
        
        # Pack padded sequence
        packed = nn.utils.rnn.pack_padded_sequence(
            embedded, lengths, batch_first=True, enforce_sorted=False
        )
        
        # Process with RNN
        if self.cell_type == 'lstm':
            # Don't return outputs to avoid DataParallel gathering issues
            outputs, (hidden, cell) = self.rnn(packed)
            return outputs, (hidden, cell)
        else:
            outputs, hidden = self.rnn(packed)
            return outputs, hidden


In [7]:
class Decoder(nn.Module):
    def __init__(self, output_vocab_size, embed_size, hidden_size, num_decoder_layers=1, cell_type='lstm', dropout=0.0):
        super(Decoder, self).__init__()
        self.hidden_size = hidden_size
        self.num_decoder_layers = num_decoder_layers
        self.output_vocab_size = output_vocab_size
        self.cell_type = cell_type.lower()
        
        # Embedding layer
        self.embedding = nn.Embedding(output_vocab_size, embed_size)
        
        # RNN layer
        if self.cell_type == 'lstm':
            self.rnn = nn.LSTM(
                embed_size, hidden_size, num_decoder_layers,
                batch_first=True, dropout=dropout if num_decoder_layers > 1 else 0
            )
        elif self.cell_type == 'gru':
            self.rnn = nn.GRU(
                embed_size, hidden_size, num_decoder_layers,
                batch_first=True, dropout=dropout if num_decoder_layers > 1 else 0
            )
        else:  # default to RNN
            self.rnn = nn.RNN(
                embed_size, hidden_size, num_decoder_layers,
                batch_first=True, dropout=dropout if num_decoder_layers > 1 else 0
            )
        
        # Output layer
        self.out = nn.Linear(hidden_size, output_vocab_size)
    
    def _convert_encoder_hidden(self, encoder_hidden):
        """
        Convert encoder hidden state to fit decoder dimensions
        
        Args:
            encoder_hidden: Encoder's hidden state
                           For RNN/GRU: tensor of shape (num_encoder_layers, batch_size, hidden_size)
                           For LSTM: tuple of two tensors with that shape
        
        Returns:
            Hidden state with shape compatible with decoder
        """
        if self.cell_type == 'lstm':
            # For LSTM, encoder_hidden is a tuple (hidden, cell)
            hidden, cell = encoder_hidden
            
            # Get shapes
            num_encoder_layers, batch_size, hidden_size = hidden.shape
            
            # Return as is if dimensions already match
            if num_encoder_layers == self.num_decoder_layers:
                return encoder_hidden
            
            # Initialize decoder hidden state
            decoder_hidden = torch.zeros(self.num_decoder_layers, batch_size, hidden_size, device=hidden.device)
            decoder_cell = torch.zeros(self.num_decoder_layers, batch_size, hidden_size, device=cell.device)
            
            # Fill decoder hidden state
            if num_encoder_layers >= self.num_decoder_layers:
                # Take the last layers from encoder
                decoder_hidden = hidden[-self.num_decoder_layers:]
                decoder_cell = cell[-self.num_decoder_layers:]
            else:
                # Copy all available layers from encoder
                decoder_hidden[:num_encoder_layers] = hidden
                decoder_cell[:num_encoder_layers] = cell
                
                # Fill remaining layers with the last encoder layer
                for i in range(num_encoder_layers, self.num_decoder_layers):
                    decoder_hidden[i] = hidden[-1]
                    decoder_cell[i] = cell[-1]
            
            return (decoder_hidden, decoder_cell)
        
        else:  # RNN or GRU
            # Get shapes
            num_encoder_layers, batch_size, hidden_size = encoder_hidden.shape
            
            # Return as is if dimensions already match
            if num_encoder_layers == self.num_decoder_layers:
                return encoder_hidden
            
            # Initialize decoder hidden state
            decoder_hidden = torch.zeros(self.num_decoder_layers, batch_size, hidden_size, device=encoder_hidden.device)
            
            # Fill decoder hidden state
            if num_encoder_layers >= self.num_decoder_layers:
                # Take the last layers from encoder
                decoder_hidden = encoder_hidden[-self.num_decoder_layers:]
            else:
                # Copy all available layers from encoder
                decoder_hidden[:num_encoder_layers] = encoder_hidden
                
                # Fill remaining layers with the last encoder layer
                for i in range(num_encoder_layers, self.num_decoder_layers):
                    decoder_hidden[i] = encoder_hidden[-1]
            
            return decoder_hidden
    
    def forward(self, input_seq, hidden):
        """
        Forward pass for decoder
        
        Args:
            input_seq: Input sequence tensor [batch_size, 1]
            hidden: Hidden state from encoder or previous decoder step
                   Will be automatically converted to match decoder dimensions
        
        Returns:
            output: Output logits
            hidden: Updated hidden state
        """
        # Convert encoder hidden state if this is the first decoder step
        hidden = self._convert_encoder_hidden(hidden)
        
        # Convert input to embeddings
        embedded = self.embedding(input_seq)  # [batch_size, 1, embed_size]
        
        # Process with RNN
        if self.cell_type == 'lstm':
            output, (hidden, cell) = self.rnn(embedded, hidden)
            output = self.out(output)  # [batch_size, 1, output_vocab_size]
            return output, (hidden, cell)
        else:
            output, hidden = self.rnn(embedded, hidden)
            output = self.out(output)  # [batch_size, 1, output_vocab_size]
            return output, hidden

In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

class TransliterationDataset(Dataset):
    def __init__(self, pairs, input_char2idx, output_char2idx):
        
        '''
        pairs: list of (latin_word, devnagari_word) tuples.
        input_char2idx: dictionary mapping each Latin character to an index.
        output_char2idx: dictionary mapping each Devanagari character to an index.
        '''
            
        self.pairs = pairs
        self.input_char2idx = input_char2idx
        self.output_char2idx = output_char2idx

    # This converts a word into a list of token indices, e.g., India -> [8,13,3,8,0]
    def encode_word(self, word, char2idx, add_sos_eos=False):
        tokens = [char2idx.get(c, char2idx['<unk>']) for c in word]
        if add_sos_eos:
            tokens = [char2idx['<sos>']] + tokens + [char2idx['<eos>']]
        return tokens

    #  Give the total number of latin, devnagri pairs in the dataset
    def __len__(self): 
        return len(self.pairs)

    # This takes the index of the word in latin and gets the latin, devnagri pair. 
        # Then, it converts each word to list of indices and gives the pair of list of indices
    def __getitem__(self, idx):
        latin, devnagari = self.pairs[idx]
        input_ids = self.encode_word(latin, self.input_char2idx)
        target_ids = self.encode_word(devnagari, self.output_char2idx, add_sos_eos=True)
        return input_ids, target_ids

In [10]:
#  Adds pad tokens, given the sequnece, maximum length and pad-token
def pad_seq(seq, max_len, pad_token):
    return seq + [pad_token] * (max_len - len(seq))

def collate_fn(batch):
    '''
    batch: List of tuples [(input1, target1), (input2, target2), ...]

    '''
    input_seqs, target_seqs = zip(*batch)

    input_max_len = max(len(seq) for seq in input_seqs)
    target_max_len = max(len(seq) for seq in target_seqs)

    # Adds padding for seqeuces so that sequence length = maximum sequence length in the batch. 
    # Now all sequenes in the batch are of same length 
    input_padded = [pad_seq(seq, input_max_len, pad_token=input_char2idx['<pad>']) for seq in input_seqs]
    target_padded = [pad_seq(seq, target_max_len, pad_token=output_char2idx['<pad>']) for seq in target_seqs]

    input_tensor = torch.tensor(input_padded, dtype=torch.long)
    target_tensor = torch.tensor(target_padded, dtype=torch.long)

    input_lengths = torch.tensor([len(seq) for seq in input_seqs])
    target_lengths = torch.tensor([len(seq) for seq in target_seqs])

    return input_tensor, input_lengths, target_tensor, target_lengths

In [9]:
sweep_config = {
    'method': 'bayes',  # Could also be 'random' or 'grid'
    'metric': {
        'name': 'token_accuracy',
        'goal': 'maximize'
    },
    'parameters': {
        'embed_size': {'values': [16, 32, 64]},
        'num_encoder_layers': {'values': [1, 2, 3]},
        'num_decoder_layers': {'values': [1, 2, 3]},
        'hidden_size': {'values': [16, 32, 64]},
        'cell_type': {'values': ['RNN', 'GRU', 'LSTM']},
        'dropout': {'values': [0.3, 0.4, 0.5]},
        'batch_size': {'values': [128, 256, 512]},
        'learning_rate': {'values': [5e-3, 1e-3, 5e-4]},
        'beam_size': {'values': [3, 4, 5]}
    },
    'early_terminate': {
        'type': 'hyperband',
        'min_iter': 7
    }
}

In [10]:
import wandb
wandb.login(key="70a00ae1607c730fb9cd50b1268b191bec7a2901")

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mishita49[0m ([33mishita49-indian-institute-of-technology-madras[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [11]:
filepath_val = "/kaggle/input/dakshina-dataset-v1-0/dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.dev.tsv"
pairs_val = load_dakshina_lexicon_pairs(filepath_val)

dataset = TransliterationDataset(pairs, input_char2idx, output_char2idx)
dataset_val = TransliterationDataset(pairs_val, input_char2idx, output_char2idx)

In [12]:
from tqdm import tqdm
import torch
import torch.nn as nn
import wandb
from torch.utils.data import DataLoader
import torch.nn.functional as F

def train():
    wandb.init()
    config = wandb.config
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Initialize encoder and decoder
    encoder = Encoder(
        input_vocab_size=len(input_char2idx),
        embed_size=config.embed_size,
        hidden_size=config.hidden_size,
        num_encoder_layers=config.num_encoder_layers,
        cell_type=config.cell_type,
        dropout=config.dropout
    ).to(device)

    decoder = Decoder(
        output_vocab_size=len(output_char2idx),
        embed_size=config.embed_size,
        hidden_size=config.hidden_size,
        num_decoder_layers=config.num_decoder_layers,
        cell_type=config.cell_type,
        dropout=config.dropout
    ).to(device)

    encoder_optimizer = torch.optim.Adam(encoder.parameters(), lr=config.learning_rate, weight_decay=1e-5)
    decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=config.learning_rate, weight_decay=1e-5)
    criterion = nn.CrossEntropyLoss(ignore_index=output_char2idx['<pad>'])

    dataloader = DataLoader(dataset, batch_size=config.batch_size, shuffle=True, collate_fn=collate_fn)

    # Load validation set
    filepath_val = "/kaggle/input/dakshina-dataset-v1-0/dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.dev.tsv"
    pairs_val = load_dakshina_lexicon_pairs(filepath_val)
    dataset_val = TransliterationDataset(pairs_val, input_char2idx, output_char2idx)

    dataloader_val = DataLoader(dataset_val, batch_size=1, shuffle=False, collate_fn=collate_fn)

    num_epochs = 20
    for epoch in range(num_epochs):
        # ======== TRAINING ========
        encoder.train()
        decoder.train()
        total_loss = 0

        with tqdm(dataloader, desc=f"Epoch {epoch+1}/{num_epochs}") as pbar:
            for input_tensor, input_lengths, target_tensor, target_lengths in pbar:
                input_tensor = input_tensor.to(device)
                target_tensor = target_tensor.to(device)

                encoder_optimizer.zero_grad()
                decoder_optimizer.zero_grad()

                encoder_outputs, encoder_hidden = encoder(input_tensor, input_lengths)
                decoder_input = target_tensor[:, 0].unsqueeze(1)  # <sos>
                decoder_hidden = encoder_hidden

                loss = 0
                max_target_len = target_tensor.size(1)

                for t in range(1, max_target_len):
                    decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
                    output = decoder_output.squeeze(1)
                    # print(f'output = {output}')
                    # print(f'target tensor = {target_tensor[:,t]}')
                    loss += criterion(output, target_tensor[:, t])
                    decoder_input = target_tensor[:, t].unsqueeze(1)  # Teacher forcing

                loss.backward()
                torch.nn.utils.clip_grad_norm_(encoder.parameters(), max_norm=1.0)
                torch.nn.utils.clip_grad_norm_(decoder.parameters(), max_norm=1.0)

                encoder_optimizer.step()
                decoder_optimizer.step()

                total_loss += loss.item() / (max_target_len - 1)

        avg_loss = total_loss / len(dataloader)
        print(f"Epoch {epoch+1}/{num_epochs} Train Loss: {avg_loss:.4f}")

        # ======== VALIDATION ========
        encoder.eval()
        decoder.eval()
        correct_sequences = 0
        total_sequences = 0
        correct_tokens = 0
        total_tokens = 0
        beam_width = config.beam_size  # You can change this
        
        with torch.no_grad():
            for input_tensor, input_lengths, target_tensor, target_lengths in dataloader_val:
                input_tensor = input_tensor.to(device)
                target_tensor = target_tensor.to(device)
        
                encoder_outputs, encoder_hidden = encoder(input_tensor, input_lengths)
                max_target_len = target_tensor.size(1)
                total_sequences += 1
        
                # Beam is a list of tuples: (sequence_so_far, cumulative_log_prob, decoder_hidden)
                beam = [([output_char2idx['<sos>']], 0.0, encoder_hidden)]
        
                completed_sequences = []
        
                for _ in range(1, max_target_len):
                    new_beam = []
                    for seq, score, hidden in beam:
                        decoder_input = torch.tensor([[seq[-1]]], device=device)
                        decoder_output, hidden_next = decoder(decoder_input, hidden)
                        log_probs = F.log_softmax(decoder_output.squeeze(1), dim=1)
        
                        topk_log_probs, topk_indices = log_probs.topk(beam_width)
        
                        for k in range(beam_width):
                            next_token = topk_indices[0][k].item()
                            next_score = score + topk_log_probs[0][k].item()
                            new_seq = seq + [next_token]
                            new_beam.append((new_seq, next_score, hidden_next))
        
                    # Keep top `beam_width` beams with highest scores
                    beam = sorted(new_beam, key=lambda x: x[1], reverse=True)[:beam_width]
        
                    # Move completed sequences out
                    beam, completed = [], []
                    for seq, score, hidden in new_beam:
                        if seq[-1] == output_char2idx['<eos>']:
                            completed_sequences.append((seq, score))
                        else:
                            beam.append((seq, score, hidden))
                    beam = sorted(beam, key=lambda x: x[1], reverse=True)[:beam_width]
        
                # Choose best completed or best incomplete beam
                if completed_sequences:
                    best_seq = max(completed_sequences, key=lambda x: x[1])[0]
                else:
                    best_seq = max(beam, key=lambda x: x[1])[0]
        
                # Remove <sos> if present
                if best_seq[0] == output_char2idx['<sos>']:
                    best_seq = best_seq[1:]
        
                # Compare prediction with target
                target_seq = target_tensor[0, 1:].tolist()
                pad_idx = output_char2idx['<pad>']
        
                # Token accuracy
                for pred_token, tgt_token in zip(best_seq, target_seq):
                    if tgt_token == pad_idx:
                        break
                    if pred_token == tgt_token:
                        correct_tokens += 1
                    total_tokens += 1
        
                # Sequence accuracy
                target_trimmed = [t for t in target_seq if t != pad_idx]
                best_seq_trimmed = best_seq[:len(target_trimmed)]
                if best_seq_trimmed == target_trimmed:
                    correct_sequences += 1
        
                # Optional print
                # predicted_word = indices_to_words([best_seq], output_idx2char)[0]
                # actual_word = indices_to_words([target_trimmed], output_idx2char)[0]
                # # print(f"Predicted: {predicted_word.ljust(20)} | Actual: {actual_word}")
        
        sequence_accuracy = correct_sequences / total_sequences if total_sequences > 0 else 0
        token_accuracy = correct_tokens / total_tokens if total_tokens > 0 else 0
        
        print(f"Token Accuracy: {token_accuracy:.4f}")
        print(f"Sequence Accuracy: {sequence_accuracy:.4f}")
        
        wandb.log({
            "epoch": epoch + 1,
            "train_loss": avg_loss,
            "token_accuracy": token_accuracy,
            "sequence_accuracy": sequence_accuracy
        })

In [13]:
sweep_id = wandb.sweep(sweep_config, project="DA6401 Assign3")
wandb.agent(sweep_id, function=train, count=20)
wandb.finish()

Create sweep with ID: 2qor0304
Sweep URL: https://wandb.ai/ishita49-indian-institute-of-technology-madras/DA6401%20Assign3/sweeps/2qor0304


[34m[1mwandb[0m: Agent Starting Run: oid6xp4v with config:
[34m[1mwandb[0m: 	batch_size: 512
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	embed_size: 32
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 1


Epoch 1/20: 100%|██████████| 87/87 [00:05<00:00, 16.17it/s]


Epoch 1/20 Train Loss: 2.9719
Token Accuracy: 0.0576
Sequence Accuracy: 0.0000


Epoch 2/20: 100%|██████████| 87/87 [00:03<00:00, 26.50it/s]


Epoch 2/20 Train Loss: 2.4952
Token Accuracy: 0.0938
Sequence Accuracy: 0.0000


Epoch 3/20: 100%|██████████| 87/87 [00:03<00:00, 26.61it/s]


Epoch 3/20 Train Loss: 2.3141
Token Accuracy: 0.1399
Sequence Accuracy: 0.0005


Epoch 4/20: 100%|██████████| 87/87 [00:03<00:00, 26.47it/s]


Epoch 4/20 Train Loss: 2.1973
Token Accuracy: 0.1475
Sequence Accuracy: 0.0009


Epoch 5/20: 100%|██████████| 87/87 [00:03<00:00, 24.79it/s]


Epoch 5/20 Train Loss: 2.0997
Token Accuracy: 0.1496
Sequence Accuracy: 0.0007


Epoch 6/20: 100%|██████████| 87/87 [00:03<00:00, 26.92it/s]


Epoch 6/20 Train Loss: 1.9857
Token Accuracy: 0.1693
Sequence Accuracy: 0.0007


Epoch 7/20: 100%|██████████| 87/87 [00:03<00:00, 26.69it/s]


Epoch 7/20 Train Loss: 1.8890
Token Accuracy: 0.1750
Sequence Accuracy: 0.0007


Epoch 8/20: 100%|██████████| 87/87 [00:03<00:00, 25.26it/s]


Epoch 8/20 Train Loss: 1.7920
Token Accuracy: 0.1844
Sequence Accuracy: 0.0025


Epoch 9/20: 100%|██████████| 87/87 [00:03<00:00, 26.57it/s]


Epoch 9/20 Train Loss: 1.7016
Token Accuracy: 0.2008
Sequence Accuracy: 0.0034


Epoch 10/20: 100%|██████████| 87/87 [00:03<00:00, 25.31it/s]


Epoch 10/20 Train Loss: 1.6147
Token Accuracy: 0.2265
Sequence Accuracy: 0.0044


Epoch 11/20: 100%|██████████| 87/87 [00:03<00:00, 26.62it/s]


Epoch 11/20 Train Loss: 1.5057
Token Accuracy: 0.2469
Sequence Accuracy: 0.0064


Epoch 12/20: 100%|██████████| 87/87 [00:03<00:00, 26.31it/s]


Epoch 12/20 Train Loss: 1.4225
Token Accuracy: 0.2594
Sequence Accuracy: 0.0092


Epoch 13/20: 100%|██████████| 87/87 [00:03<00:00, 25.38it/s]


Epoch 13/20 Train Loss: 1.3571
Token Accuracy: 0.2619
Sequence Accuracy: 0.0101


Epoch 14/20: 100%|██████████| 87/87 [00:03<00:00, 26.54it/s]


Epoch 14/20 Train Loss: 1.3055
Token Accuracy: 0.2807
Sequence Accuracy: 0.0115


Epoch 15/20: 100%|██████████| 87/87 [00:03<00:00, 25.27it/s]


Epoch 15/20 Train Loss: 1.2539
Token Accuracy: 0.2876
Sequence Accuracy: 0.0140


Epoch 16/20: 100%|██████████| 87/87 [00:03<00:00, 26.38it/s]


Epoch 16/20 Train Loss: 1.2078
Token Accuracy: 0.2949
Sequence Accuracy: 0.0165


Epoch 17/20: 100%|██████████| 87/87 [00:03<00:00, 25.12it/s]


Epoch 17/20 Train Loss: 1.1648
Token Accuracy: 0.3182
Sequence Accuracy: 0.0246


Epoch 18/20: 100%|██████████| 87/87 [00:03<00:00, 26.76it/s]


Epoch 18/20 Train Loss: 1.1259
Token Accuracy: 0.3373
Sequence Accuracy: 0.0298


Epoch 19/20: 100%|██████████| 87/87 [00:03<00:00, 26.40it/s]


Epoch 19/20 Train Loss: 1.0934
Token Accuracy: 0.3483
Sequence Accuracy: 0.0353


Epoch 20/20: 100%|██████████| 87/87 [00:03<00:00, 25.06it/s]


Epoch 20/20 Train Loss: 1.0524
Token Accuracy: 0.3554
Sequence Accuracy: 0.0397


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
sequence_accuracy,▁▁▁▁▁▁▁▁▂▂▂▃▃▃▃▄▅▆▇█
token_accuracy,▁▂▃▃▃▄▄▄▄▅▅▆▆▆▆▇▇███
train_loss,█▆▆▅▅▄▄▄▃▃▃▂▂▂▂▂▁▁▁▁

0,1
epoch,20.0
sequence_accuracy,0.0397
token_accuracy,0.35542
train_loss,1.05242


[34m[1mwandb[0m: Agent Starting Run: pkiwqr9r with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_size: 32
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 1


Epoch 1/20: 100%|██████████| 173/173 [00:05<00:00, 34.59it/s]


Epoch 1/20 Train Loss: 2.8008
Token Accuracy: 0.1044
Sequence Accuracy: 0.0002


Epoch 2/20: 100%|██████████| 173/173 [00:04<00:00, 34.67it/s]


Epoch 2/20 Train Loss: 2.1534
Token Accuracy: 0.1909
Sequence Accuracy: 0.0007


Epoch 3/20: 100%|██████████| 173/173 [00:04<00:00, 34.64it/s]


Epoch 3/20 Train Loss: 1.8503
Token Accuracy: 0.2021
Sequence Accuracy: 0.0011


Epoch 4/20: 100%|██████████| 173/173 [00:05<00:00, 33.70it/s]


Epoch 4/20 Train Loss: 1.6700
Token Accuracy: 0.2311
Sequence Accuracy: 0.0016


Epoch 5/20: 100%|██████████| 173/173 [00:04<00:00, 34.62it/s]


Epoch 5/20 Train Loss: 1.5076
Token Accuracy: 0.2602
Sequence Accuracy: 0.0057


Epoch 6/20: 100%|██████████| 173/173 [00:05<00:00, 33.29it/s]


Epoch 6/20 Train Loss: 1.3674
Token Accuracy: 0.2821
Sequence Accuracy: 0.0117


Epoch 7/20: 100%|██████████| 173/173 [00:05<00:00, 34.42it/s]


Epoch 7/20 Train Loss: 1.2354
Token Accuracy: 0.3091
Sequence Accuracy: 0.0163


Epoch 8/20: 100%|██████████| 173/173 [00:04<00:00, 34.80it/s]


Epoch 8/20 Train Loss: 1.1402
Token Accuracy: 0.3164
Sequence Accuracy: 0.0239


Epoch 9/20: 100%|██████████| 173/173 [00:04<00:00, 34.71it/s]


Epoch 9/20 Train Loss: 1.0641
Token Accuracy: 0.3481
Sequence Accuracy: 0.0374


Epoch 10/20: 100%|██████████| 173/173 [00:05<00:00, 32.33it/s]


Epoch 10/20 Train Loss: 0.9879
Token Accuracy: 0.3759
Sequence Accuracy: 0.0555


Epoch 11/20: 100%|██████████| 173/173 [00:05<00:00, 34.17it/s]


Epoch 11/20 Train Loss: 0.9300
Token Accuracy: 0.4034
Sequence Accuracy: 0.0686


Epoch 12/20: 100%|██████████| 173/173 [00:05<00:00, 34.45it/s]


Epoch 12/20 Train Loss: 0.8685
Token Accuracy: 0.4381
Sequence Accuracy: 0.0932


Epoch 13/20: 100%|██████████| 173/173 [00:05<00:00, 34.50it/s]


Epoch 13/20 Train Loss: 0.8255
Token Accuracy: 0.4560
Sequence Accuracy: 0.1021


Epoch 14/20: 100%|██████████| 173/173 [00:05<00:00, 33.06it/s]


Epoch 14/20 Train Loss: 0.7773
Token Accuracy: 0.4766
Sequence Accuracy: 0.1207


Epoch 15/20: 100%|██████████| 173/173 [00:05<00:00, 34.20it/s]


Epoch 15/20 Train Loss: 0.7414
Token Accuracy: 0.4904
Sequence Accuracy: 0.1303


Epoch 16/20: 100%|██████████| 173/173 [00:05<00:00, 33.60it/s]


Epoch 16/20 Train Loss: 0.7102
Token Accuracy: 0.5054
Sequence Accuracy: 0.1439


Epoch 17/20: 100%|██████████| 173/173 [00:05<00:00, 33.30it/s]


Epoch 17/20 Train Loss: 0.6858
Token Accuracy: 0.5184
Sequence Accuracy: 0.1560


Epoch 18/20: 100%|██████████| 173/173 [00:05<00:00, 34.42it/s]


Epoch 18/20 Train Loss: 0.6521
Token Accuracy: 0.5319
Sequence Accuracy: 0.1673


Epoch 19/20: 100%|██████████| 173/173 [00:05<00:00, 34.32it/s]


Epoch 19/20 Train Loss: 0.6361
Token Accuracy: 0.5539
Sequence Accuracy: 0.1831


Epoch 20/20: 100%|██████████| 173/173 [00:05<00:00, 34.55it/s]


Epoch 20/20 Train Loss: 0.6117
Token Accuracy: 0.5516
Sequence Accuracy: 0.1852


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
sequence_accuracy,▁▁▁▁▁▁▂▂▂▃▄▅▅▆▆▆▇▇██
token_accuracy,▁▂▃▃▃▄▄▄▅▅▆▆▆▇▇▇▇███
train_loss,█▆▅▄▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁

0,1
epoch,20.0
sequence_accuracy,0.18518
token_accuracy,0.55162
train_loss,0.61174


[34m[1mwandb[0m: Agent Starting Run: q4p85buf with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	embed_size: 16
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 2


Epoch 1/20: 100%|██████████| 173/173 [00:05<00:00, 29.82it/s]


Epoch 1/20 Train Loss: 3.0467
Token Accuracy: 0.1017
Sequence Accuracy: 0.0000


Epoch 2/20: 100%|██████████| 173/173 [00:05<00:00, 31.32it/s]


Epoch 2/20 Train Loss: 2.5928
Token Accuracy: 0.1164
Sequence Accuracy: 0.0000


Epoch 3/20: 100%|██████████| 173/173 [00:05<00:00, 31.33it/s]


Epoch 3/20 Train Loss: 2.5047
Token Accuracy: 0.1374
Sequence Accuracy: 0.0000


Epoch 4/20: 100%|██████████| 173/173 [00:05<00:00, 30.52it/s]


Epoch 4/20 Train Loss: 2.3641
Token Accuracy: 0.1584
Sequence Accuracy: 0.0000


Epoch 5/20: 100%|██████████| 173/173 [00:05<00:00, 31.60it/s]


Epoch 5/20 Train Loss: 2.1860
Token Accuracy: 0.1666
Sequence Accuracy: 0.0000


Epoch 6/20: 100%|██████████| 173/173 [00:05<00:00, 30.90it/s]


Epoch 6/20 Train Loss: 2.0381
Token Accuracy: 0.1834
Sequence Accuracy: 0.0000


Epoch 7/20: 100%|██████████| 173/173 [00:05<00:00, 31.28it/s]


Epoch 7/20 Train Loss: 1.9398
Token Accuracy: 0.1980
Sequence Accuracy: 0.0000


Epoch 8/20: 100%|██████████| 173/173 [00:05<00:00, 30.32it/s]


Epoch 8/20 Train Loss: 1.8470
Token Accuracy: 0.2063
Sequence Accuracy: 0.0005


Epoch 9/20: 100%|██████████| 173/173 [00:05<00:00, 31.13it/s]


Epoch 9/20 Train Loss: 1.7814
Token Accuracy: 0.2194
Sequence Accuracy: 0.0005


Epoch 10/20: 100%|██████████| 173/173 [00:05<00:00, 30.85it/s]


Epoch 10/20 Train Loss: 1.7178
Token Accuracy: 0.2331
Sequence Accuracy: 0.0011


Epoch 11/20: 100%|██████████| 173/173 [00:05<00:00, 30.35it/s]


Epoch 11/20 Train Loss: 1.6564
Token Accuracy: 0.2455
Sequence Accuracy: 0.0018


Epoch 12/20: 100%|██████████| 173/173 [00:05<00:00, 31.02it/s]


Epoch 12/20 Train Loss: 1.5963
Token Accuracy: 0.2601
Sequence Accuracy: 0.0018


Epoch 13/20: 100%|██████████| 173/173 [00:05<00:00, 31.38it/s]


Epoch 13/20 Train Loss: 1.5524
Token Accuracy: 0.2809
Sequence Accuracy: 0.0032


Epoch 14/20: 100%|██████████| 173/173 [00:05<00:00, 29.51it/s]


Epoch 14/20 Train Loss: 1.4999
Token Accuracy: 0.2882
Sequence Accuracy: 0.0039


Epoch 15/20: 100%|██████████| 173/173 [00:05<00:00, 31.32it/s]


Epoch 15/20 Train Loss: 1.4575
Token Accuracy: 0.3051
Sequence Accuracy: 0.0101


Epoch 16/20: 100%|██████████| 173/173 [00:05<00:00, 29.93it/s]


Epoch 16/20 Train Loss: 1.4115
Token Accuracy: 0.3192
Sequence Accuracy: 0.0101


Epoch 17/20: 100%|██████████| 173/173 [00:05<00:00, 31.04it/s]


Epoch 17/20 Train Loss: 1.3653
Token Accuracy: 0.3283
Sequence Accuracy: 0.0145


Epoch 18/20: 100%|██████████| 173/173 [00:05<00:00, 31.01it/s]


Epoch 18/20 Train Loss: 1.3369
Token Accuracy: 0.3420
Sequence Accuracy: 0.0161


Epoch 19/20: 100%|██████████| 173/173 [00:05<00:00, 30.88it/s]


Epoch 19/20 Train Loss: 1.2940
Token Accuracy: 0.3515
Sequence Accuracy: 0.0232


Epoch 20/20: 100%|██████████| 173/173 [00:05<00:00, 31.08it/s]


Epoch 20/20 Train Loss: 1.2521
Token Accuracy: 0.3607
Sequence Accuracy: 0.0241


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
sequence_accuracy,▁▁▁▁▁▁▁▁▁▁▂▂▂▂▄▄▅▆██
token_accuracy,▁▁▂▃▃▃▄▄▄▅▅▅▆▆▆▇▇▇██
train_loss,█▆▆▅▅▄▄▃▃▃▃▂▂▂▂▂▁▁▁▁

0,1
epoch,20.0
sequence_accuracy,0.02409
token_accuracy,0.36068
train_loss,1.25206


[34m[1mwandb[0m: Agent Starting Run: bsdkjnvf with config:
[34m[1mwandb[0m: 	batch_size: 512
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	embed_size: 32
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 2


Epoch 1/20: 100%|██████████| 87/87 [00:03<00:00, 26.81it/s]


Epoch 1/20 Train Loss: 3.0463
Token Accuracy: 0.0481
Sequence Accuracy: 0.0000


Epoch 2/20: 100%|██████████| 87/87 [00:03<00:00, 25.95it/s]


Epoch 2/20 Train Loss: 2.5561
Token Accuracy: 0.0837
Sequence Accuracy: 0.0000


Epoch 3/20: 100%|██████████| 87/87 [00:03<00:00, 27.06it/s]


Epoch 3/20 Train Loss: 2.4281
Token Accuracy: 0.0988
Sequence Accuracy: 0.0000


Epoch 4/20: 100%|██████████| 87/87 [00:03<00:00, 26.34it/s]


Epoch 4/20 Train Loss: 2.2513
Token Accuracy: 0.1340
Sequence Accuracy: 0.0000


Epoch 5/20: 100%|██████████| 87/87 [00:03<00:00, 26.10it/s]


Epoch 5/20 Train Loss: 2.0796
Token Accuracy: 0.1684
Sequence Accuracy: 0.0000


Epoch 6/20: 100%|██████████| 87/87 [00:03<00:00, 27.39it/s]


Epoch 6/20 Train Loss: 1.9395
Token Accuracy: 0.1978
Sequence Accuracy: 0.0002


Epoch 7/20: 100%|██████████| 87/87 [00:03<00:00, 25.44it/s]


Epoch 7/20 Train Loss: 1.8180
Token Accuracy: 0.2218
Sequence Accuracy: 0.0005


Epoch 8/20: 100%|██████████| 87/87 [00:03<00:00, 27.59it/s]


Epoch 8/20 Train Loss: 1.7005
Token Accuracy: 0.2416
Sequence Accuracy: 0.0007


Epoch 9/20: 100%|██████████| 87/87 [00:03<00:00, 26.01it/s]


Epoch 9/20 Train Loss: 1.6127
Token Accuracy: 0.2640
Sequence Accuracy: 0.0023


Epoch 10/20: 100%|██████████| 87/87 [00:03<00:00, 27.24it/s]


Epoch 10/20 Train Loss: 1.5224
Token Accuracy: 0.2735
Sequence Accuracy: 0.0041


Epoch 11/20: 100%|██████████| 87/87 [00:03<00:00, 27.48it/s]


Epoch 11/20 Train Loss: 1.4342
Token Accuracy: 0.3052
Sequence Accuracy: 0.0087


Epoch 12/20: 100%|██████████| 87/87 [00:03<00:00, 25.54it/s]


Epoch 12/20 Train Loss: 1.3427
Token Accuracy: 0.3351
Sequence Accuracy: 0.0147


Epoch 13/20: 100%|██████████| 87/87 [00:03<00:00, 27.50it/s]


Epoch 13/20 Train Loss: 1.2778
Token Accuracy: 0.3517
Sequence Accuracy: 0.0186


Epoch 14/20: 100%|██████████| 87/87 [00:03<00:00, 25.94it/s]


Epoch 14/20 Train Loss: 1.2076
Token Accuracy: 0.3760
Sequence Accuracy: 0.0296


Epoch 15/20: 100%|██████████| 87/87 [00:03<00:00, 27.59it/s]


Epoch 15/20 Train Loss: 1.1401
Token Accuracy: 0.3972
Sequence Accuracy: 0.0367


Epoch 16/20: 100%|██████████| 87/87 [00:03<00:00, 27.73it/s]


Epoch 16/20 Train Loss: 1.0886
Token Accuracy: 0.4136
Sequence Accuracy: 0.0503


Epoch 17/20: 100%|██████████| 87/87 [00:03<00:00, 25.97it/s]


Epoch 17/20 Train Loss: 1.0357
Token Accuracy: 0.4346
Sequence Accuracy: 0.0620


Epoch 18/20: 100%|██████████| 87/87 [00:03<00:00, 27.27it/s]


Epoch 18/20 Train Loss: 0.9843
Token Accuracy: 0.4494
Sequence Accuracy: 0.0709


Epoch 19/20: 100%|██████████| 87/87 [00:03<00:00, 27.27it/s]


Epoch 19/20 Train Loss: 0.9483
Token Accuracy: 0.4631
Sequence Accuracy: 0.0844


Epoch 20/20: 100%|██████████| 87/87 [00:03<00:00, 25.16it/s]


Epoch 20/20 Train Loss: 0.9003
Token Accuracy: 0.4861
Sequence Accuracy: 0.1033


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
sequence_accuracy,▁▁▁▁▁▁▁▁▁▁▂▂▂▃▃▄▅▆▇█
token_accuracy,▁▂▂▂▃▃▄▄▄▅▅▆▆▆▇▇▇▇██
train_loss,█▆▆▅▅▄▄▄▃▃▃▂▂▂▂▂▁▁▁▁

0,1
epoch,20.0
sequence_accuracy,0.10326
token_accuracy,0.48607
train_loss,0.90034


[34m[1mwandb[0m: Agent Starting Run: bjqgil0d with config:
[34m[1mwandb[0m: 	batch_size: 512
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	embed_size: 32
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0005
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 1


Epoch 1/20: 100%|██████████| 87/87 [00:03<00:00, 27.35it/s]


Epoch 1/20 Train Loss: 3.2740
Token Accuracy: 0.0537
Sequence Accuracy: 0.0000


Epoch 2/20: 100%|██████████| 87/87 [00:02<00:00, 29.60it/s]


Epoch 2/20 Train Loss: 2.6351
Token Accuracy: 0.0800
Sequence Accuracy: 0.0000


Epoch 3/20: 100%|██████████| 87/87 [00:02<00:00, 29.66it/s]


Epoch 3/20 Train Loss: 2.4458
Token Accuracy: 0.1147
Sequence Accuracy: 0.0002


Epoch 4/20: 100%|██████████| 87/87 [00:03<00:00, 28.23it/s]


Epoch 4/20 Train Loss: 2.3334
Token Accuracy: 0.1302
Sequence Accuracy: 0.0002


Epoch 5/20: 100%|██████████| 87/87 [00:02<00:00, 30.17it/s]


Epoch 5/20 Train Loss: 2.2538
Token Accuracy: 0.1453
Sequence Accuracy: 0.0007


Epoch 6/20: 100%|██████████| 87/87 [00:03<00:00, 28.59it/s]


Epoch 6/20 Train Loss: 2.1373
Token Accuracy: 0.1729
Sequence Accuracy: 0.0005


Epoch 7/20: 100%|██████████| 87/87 [00:02<00:00, 30.05it/s]


Epoch 7/20 Train Loss: 2.0272
Token Accuracy: 0.1802
Sequence Accuracy: 0.0005


Epoch 8/20: 100%|██████████| 87/87 [00:02<00:00, 30.21it/s]


Epoch 8/20 Train Loss: 1.9755


0,1
epoch,▁▂▃▅▆▇█
sequence_accuracy,▁▁▃▃█▆▆
token_accuracy,▁▂▄▅▆██
train_loss,█▄▃▃▂▂▁

0,1
epoch,7.0
sequence_accuracy,0.00046
token_accuracy,0.18016
train_loss,2.0272


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: lnie27as with config:
[34m[1mwandb[0m: 	batch_size: 512
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_size: 64
[34m[1mwandb[0m: 	hidden_size: 16
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 3


Epoch 1/20: 100%|██████████| 87/87 [00:02<00:00, 32.01it/s]


Epoch 1/20 Train Loss: 3.6316
Token Accuracy: 0.0352
Sequence Accuracy: 0.0000


Epoch 2/20: 100%|██████████| 87/87 [00:02<00:00, 30.29it/s]


Epoch 2/20 Train Loss: 2.7728
Token Accuracy: 0.0352
Sequence Accuracy: 0.0000


Epoch 3/20: 100%|██████████| 87/87 [00:02<00:00, 32.36it/s]


Epoch 3/20 Train Loss: 2.5066
Token Accuracy: 0.0722
Sequence Accuracy: 0.0000


Epoch 4/20: 100%|██████████| 87/87 [00:02<00:00, 32.17it/s]


Epoch 4/20 Train Loss: 2.3598
Token Accuracy: 0.0836
Sequence Accuracy: 0.0000


Epoch 5/20: 100%|██████████| 87/87 [00:02<00:00, 30.53it/s]


Epoch 5/20 Train Loss: 2.2947
Token Accuracy: 0.0881
Sequence Accuracy: 0.0000


Epoch 6/20: 100%|██████████| 87/87 [00:03<00:00, 28.74it/s]


Epoch 6/20 Train Loss: 2.2371
Token Accuracy: 0.0996
Sequence Accuracy: 0.0000


Epoch 7/20: 100%|██████████| 87/87 [00:02<00:00, 32.55it/s]


Epoch 7/20 Train Loss: 2.1820
Token Accuracy: 0.1037
Sequence Accuracy: 0.0000


Epoch 8/20: 100%|██████████| 87/87 [00:02<00:00, 30.56it/s]


Epoch 8/20 Train Loss: 2.1692


0,1
epoch,▁▂▃▅▆▇█
sequence_accuracy,▁▁▁▁▁▁▁
token_accuracy,▁▁▅▆▆██
train_loss,█▄▃▂▂▁▁

0,1
epoch,7.0
sequence_accuracy,0.0
token_accuracy,0.10375
train_loss,2.18199


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 07h11e5p with config:
[34m[1mwandb[0m: 	batch_size: 512
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	embed_size: 64
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0005
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 3


Epoch 1/20: 100%|██████████| 87/87 [00:03<00:00, 28.13it/s]


Epoch 1/20 Train Loss: 3.6180
Token Accuracy: 0.0236
Sequence Accuracy: 0.0000


Epoch 2/20: 100%|██████████| 87/87 [00:02<00:00, 32.06it/s]


Epoch 2/20 Train Loss: 2.7752
Token Accuracy: 0.0447
Sequence Accuracy: 0.0000


Epoch 3/20: 100%|██████████| 87/87 [00:02<00:00, 31.78it/s]


Epoch 3/20 Train Loss: 2.5209
Token Accuracy: 0.0820
Sequence Accuracy: 0.0000


Epoch 4/20: 100%|██████████| 87/87 [00:02<00:00, 30.02it/s]


Epoch 4/20 Train Loss: 2.3586
Token Accuracy: 0.0712
Sequence Accuracy: 0.0000


Epoch 5/20: 100%|██████████| 87/87 [00:02<00:00, 31.92it/s]


Epoch 5/20 Train Loss: 2.2627
Token Accuracy: 0.0712
Sequence Accuracy: 0.0000


Epoch 6/20: 100%|██████████| 87/87 [00:02<00:00, 32.07it/s]


Epoch 6/20 Train Loss: 2.2105
Token Accuracy: 0.1007
Sequence Accuracy: 0.0000


Epoch 7/20: 100%|██████████| 87/87 [00:02<00:00, 30.03it/s]


Epoch 7/20 Train Loss: 2.1733
Token Accuracy: 0.1029
Sequence Accuracy: 0.0000


Epoch 8/20: 100%|██████████| 87/87 [00:02<00:00, 32.52it/s]


Epoch 8/20 Train Loss: 2.1414


0,1
epoch,▁▂▃▅▆▇█
sequence_accuracy,▁▁▁▁▁▁▁
token_accuracy,▁▃▆▅▅██
train_loss,█▄▃▂▁▁▁

0,1
epoch,7.0
sequence_accuracy,0.0
token_accuracy,0.1029
train_loss,2.17333


[34m[1mwandb[0m: Agent Starting Run: nr0mhr2j with config:
[34m[1mwandb[0m: 	batch_size: 512
[34m[1mwandb[0m: 	beam_size: 4
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	embed_size: 32
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 1


Epoch 1/20: 100%|██████████| 87/87 [00:03<00:00, 28.10it/s]


Epoch 1/20 Train Loss: 2.9982
Token Accuracy: 0.0784
Sequence Accuracy: 0.0000


Epoch 2/20: 100%|██████████| 87/87 [00:02<00:00, 29.88it/s]


Epoch 2/20 Train Loss: 2.4414
Token Accuracy: 0.0983
Sequence Accuracy: 0.0000


Epoch 3/20: 100%|██████████| 87/87 [00:02<00:00, 30.51it/s]


Epoch 3/20 Train Loss: 2.1588
Token Accuracy: 0.1734
Sequence Accuracy: 0.0000


Epoch 4/20: 100%|██████████| 87/87 [00:02<00:00, 29.91it/s]


Epoch 4/20 Train Loss: 1.9542
Token Accuracy: 0.1893
Sequence Accuracy: 0.0005


Epoch 5/20: 100%|██████████| 87/87 [00:03<00:00, 28.19it/s]


Epoch 5/20 Train Loss: 1.7987
Token Accuracy: 0.2046
Sequence Accuracy: 0.0009


Epoch 6/20: 100%|██████████| 87/87 [00:02<00:00, 30.01it/s]


Epoch 6/20 Train Loss: 1.6713
Token Accuracy: 0.2133
Sequence Accuracy: 0.0011


Epoch 7/20: 100%|██████████| 87/87 [00:02<00:00, 29.95it/s]


Epoch 7/20 Train Loss: 1.5828
Token Accuracy: 0.2158
Sequence Accuracy: 0.0014


Epoch 8/20: 100%|██████████| 87/87 [00:02<00:00, 29.63it/s]


Epoch 8/20 Train Loss: 1.5115
Token Accuracy: 0.2374
Sequence Accuracy: 0.0016


Epoch 9/20: 100%|██████████| 87/87 [00:02<00:00, 30.04it/s]


Epoch 9/20 Train Loss: 1.4415
Token Accuracy: 0.2445
Sequence Accuracy: 0.0032


Epoch 10/20: 100%|██████████| 87/87 [00:03<00:00, 28.46it/s]


Epoch 10/20 Train Loss: 1.3771
Token Accuracy: 0.2535
Sequence Accuracy: 0.0046


Epoch 11/20: 100%|██████████| 87/87 [00:02<00:00, 30.26it/s]


Epoch 11/20 Train Loss: 1.3069
Token Accuracy: 0.2682
Sequence Accuracy: 0.0083


Epoch 12/20: 100%|██████████| 87/87 [00:03<00:00, 27.97it/s]


Epoch 12/20 Train Loss: 1.2386
Token Accuracy: 0.2889
Sequence Accuracy: 0.0119


Epoch 13/20: 100%|██████████| 87/87 [00:02<00:00, 29.89it/s]


Epoch 13/20 Train Loss: 1.1834
Token Accuracy: 0.3101
Sequence Accuracy: 0.0193


Epoch 14/20: 100%|██████████| 87/87 [00:02<00:00, 29.80it/s]


Epoch 14/20 Train Loss: 1.1315
Token Accuracy: 0.3267
Sequence Accuracy: 0.0266


Epoch 15/20: 100%|██████████| 87/87 [00:03<00:00, 28.61it/s]


Epoch 15/20 Train Loss: 1.0924
Token Accuracy: 0.3346
Sequence Accuracy: 0.0301


Epoch 16/20: 100%|██████████| 87/87 [00:02<00:00, 30.15it/s]


Epoch 16/20 Train Loss: 1.0414
Token Accuracy: 0.3484
Sequence Accuracy: 0.0346


Epoch 17/20: 100%|██████████| 87/87 [00:02<00:00, 30.32it/s]


Epoch 17/20 Train Loss: 0.9927
Token Accuracy: 0.3740
Sequence Accuracy: 0.0491


Epoch 18/20: 100%|██████████| 87/87 [00:02<00:00, 30.18it/s]


Epoch 18/20 Train Loss: 0.9632
Token Accuracy: 0.3897
Sequence Accuracy: 0.0521


Epoch 19/20: 100%|██████████| 87/87 [00:03<00:00, 27.27it/s]


Epoch 19/20 Train Loss: 0.9209
Token Accuracy: 0.3982
Sequence Accuracy: 0.0560


Epoch 20/20: 100%|██████████| 87/87 [00:02<00:00, 30.01it/s]


Epoch 20/20 Train Loss: 0.8891
Token Accuracy: 0.4109
Sequence Accuracy: 0.0654


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
sequence_accuracy,▁▁▁▁▁▁▁▁▁▁▂▂▃▄▄▅▆▇▇█
token_accuracy,▁▁▃▃▄▄▄▄▄▅▅▅▆▆▆▇▇███
train_loss,█▆▅▅▄▄▃▃▃▃▂▂▂▂▂▂▁▁▁▁

0,1
epoch,20.0
sequence_accuracy,0.0654
token_accuracy,0.41092
train_loss,0.88905


[34m[1mwandb[0m: Agent Starting Run: vm0vzjxg with config:
[34m[1mwandb[0m: 	batch_size: 512
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	embed_size: 16
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0005
[34m[1mwandb[0m: 	num_decoder_layers: 1
[34m[1mwandb[0m: 	num_encoder_layers: 2


Epoch 1/20: 100%|██████████| 87/87 [00:02<00:00, 35.40it/s]


Epoch 1/20 Train Loss: 3.7816
Token Accuracy: 0.0002
Sequence Accuracy: 0.0000


Epoch 2/20: 100%|██████████| 87/87 [00:02<00:00, 33.98it/s]


Epoch 2/20 Train Loss: 2.8506
Token Accuracy: 0.0366
Sequence Accuracy: 0.0000


Epoch 3/20: 100%|██████████| 87/87 [00:02<00:00, 36.66it/s]


Epoch 3/20 Train Loss: 2.6110
Token Accuracy: 0.0487
Sequence Accuracy: 0.0000


Epoch 4/20: 100%|██████████| 87/87 [00:02<00:00, 35.82it/s]


Epoch 4/20 Train Loss: 2.4451
Token Accuracy: 0.0801
Sequence Accuracy: 0.0000


Epoch 5/20: 100%|██████████| 87/87 [00:02<00:00, 30.47it/s]


Epoch 5/20 Train Loss: 2.3546
Token Accuracy: 0.1152
Sequence Accuracy: 0.0000


Epoch 6/20: 100%|██████████| 87/87 [00:02<00:00, 36.51it/s]


Epoch 6/20 Train Loss: 2.2889
Token Accuracy: 0.1196
Sequence Accuracy: 0.0000


Epoch 7/20: 100%|██████████| 87/87 [00:02<00:00, 33.83it/s]


Epoch 7/20 Train Loss: 2.2563
Token Accuracy: 0.1169
Sequence Accuracy: 0.0000


Epoch 8/20: 100%|██████████| 87/87 [00:02<00:00, 36.32it/s]


Epoch 8/20 Train Loss: 2.2291
Token Accuracy: 0.1135
Sequence Accuracy: 0.0000


Epoch 9/20: 100%|██████████| 87/87 [00:02<00:00, 36.65it/s]


Epoch 9/20 Train Loss: 2.2065


0,1
epoch,▁▂▃▄▅▆▇█
sequence_accuracy,▁▁▁▁▁▁▁▁
token_accuracy,▁▃▄▆████
train_loss,█▄▃▂▂▁▁▁

0,1
epoch,8.0
sequence_accuracy,0.0
token_accuracy,0.11352
train_loss,2.2291


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: upmfw422 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	beam_size: 3
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	embed_size: 64
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.0005
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 2


Epoch 1/20: 100%|██████████| 346/346 [00:11<00:00, 30.24it/s]


Epoch 1/20 Train Loss: 2.9877
Token Accuracy: 0.1103
Sequence Accuracy: 0.0000


Epoch 2/20: 100%|██████████| 346/346 [00:11<00:00, 30.86it/s]


Epoch 2/20 Train Loss: 2.6087
Token Accuracy: 0.1260
Sequence Accuracy: 0.0002


Epoch 3/20: 100%|██████████| 346/346 [00:11<00:00, 30.35it/s]


Epoch 3/20 Train Loss: 2.5314
Token Accuracy: 0.1482
Sequence Accuracy: 0.0005


Epoch 4/20: 100%|██████████| 346/346 [00:11<00:00, 30.41it/s]


Epoch 4/20 Train Loss: 2.4499
Token Accuracy: 0.1630
Sequence Accuracy: 0.0000


Epoch 5/20: 100%|██████████| 346/346 [00:11<00:00, 30.88it/s]


Epoch 5/20 Train Loss: 2.3421
Token Accuracy: 0.1751
Sequence Accuracy: 0.0000


Epoch 6/20: 100%|██████████| 346/346 [00:11<00:00, 30.83it/s]


Epoch 6/20 Train Loss: 2.2520
Token Accuracy: 0.1871
Sequence Accuracy: 0.0000


Epoch 7/20: 100%|██████████| 346/346 [00:11<00:00, 30.77it/s]


Epoch 7/20 Train Loss: 2.1428
Token Accuracy: 0.1990
Sequence Accuracy: 0.0002


Epoch 8/20: 100%|██████████| 346/346 [00:11<00:00, 31.15it/s]


Epoch 8/20 Train Loss: 2.0215
Token Accuracy: 0.2251
Sequence Accuracy: 0.0000


Epoch 9/20: 100%|██████████| 346/346 [00:11<00:00, 31.19it/s]


Epoch 9/20 Train Loss: 1.8952
Token Accuracy: 0.2362
Sequence Accuracy: 0.0011


Epoch 10/20: 100%|██████████| 346/346 [00:11<00:00, 31.35it/s]


Epoch 10/20 Train Loss: 1.8286
Token Accuracy: 0.2454
Sequence Accuracy: 0.0016


Epoch 11/20: 100%|██████████| 346/346 [00:11<00:00, 31.16it/s]


Epoch 11/20 Train Loss: 1.7612
Token Accuracy: 0.2468
Sequence Accuracy: 0.0018


Epoch 12/20: 100%|██████████| 346/346 [00:11<00:00, 31.38it/s]


Epoch 12/20 Train Loss: 1.7020
Token Accuracy: 0.2548
Sequence Accuracy: 0.0028


Epoch 13/20: 100%|██████████| 346/346 [00:11<00:00, 31.23it/s]


Epoch 13/20 Train Loss: 1.6500
Token Accuracy: 0.2635
Sequence Accuracy: 0.0028


Epoch 14/20: 100%|██████████| 346/346 [00:11<00:00, 31.26it/s]


Epoch 14/20 Train Loss: 1.5959
Token Accuracy: 0.2685
Sequence Accuracy: 0.0039


Epoch 15/20: 100%|██████████| 346/346 [00:11<00:00, 31.10it/s]


Epoch 15/20 Train Loss: 1.5550
Token Accuracy: 0.2792
Sequence Accuracy: 0.0060


Epoch 16/20: 100%|██████████| 346/346 [00:11<00:00, 31.01it/s]


Epoch 16/20 Train Loss: 1.5206
Token Accuracy: 0.2860
Sequence Accuracy: 0.0076


Epoch 17/20: 100%|██████████| 346/346 [00:11<00:00, 31.09it/s]


Epoch 17/20 Train Loss: 1.4870
Token Accuracy: 0.2899
Sequence Accuracy: 0.0092


Epoch 18/20: 100%|██████████| 346/346 [00:11<00:00, 30.91it/s]


Epoch 18/20 Train Loss: 1.4601
Token Accuracy: 0.2988
Sequence Accuracy: 0.0089


Epoch 19/20: 100%|██████████| 346/346 [00:11<00:00, 31.05it/s]


Epoch 19/20 Train Loss: 1.4338
Token Accuracy: 0.3040
Sequence Accuracy: 0.0110


Epoch 20/20: 100%|██████████| 346/346 [00:11<00:00, 31.11it/s]


Epoch 20/20 Train Loss: 1.4058
Token Accuracy: 0.3098
Sequence Accuracy: 0.0122


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
sequence_accuracy,▁▁▁▁▁▁▁▁▂▂▂▃▃▃▄▅▆▆▇█
token_accuracy,▁▂▂▃▃▄▄▅▅▆▆▆▆▇▇▇▇███
train_loss,█▆▆▆▅▅▄▄▃▃▃▂▂▂▂▂▁▁▁▁

0,1
epoch,20.0
sequence_accuracy,0.01216
token_accuracy,0.30977
train_loss,1.40579


[34m[1mwandb[0m: Agent Starting Run: 5qg7p3a4 with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_size: 32
[34m[1mwandb[0m: 	hidden_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 3


Epoch 1/20: 100%|██████████| 173/173 [00:04<00:00, 36.03it/s]


Epoch 1/20 Train Loss: 2.9512
Token Accuracy: 0.0568
Sequence Accuracy: 0.0000


Epoch 2/20: 100%|██████████| 173/173 [00:04<00:00, 36.11it/s]


Epoch 2/20 Train Loss: 2.3083
Token Accuracy: 0.0913
Sequence Accuracy: 0.0000


Epoch 3/20: 100%|██████████| 173/173 [00:05<00:00, 34.59it/s]


Epoch 3/20 Train Loss: 2.2215
Token Accuracy: 0.1123
Sequence Accuracy: 0.0000


Epoch 4/20: 100%|██████████| 173/173 [00:04<00:00, 35.23it/s]


Epoch 4/20 Train Loss: 2.1693
Token Accuracy: 0.1131
Sequence Accuracy: 0.0000


Epoch 5/20: 100%|██████████| 173/173 [00:04<00:00, 35.73it/s]


Epoch 5/20 Train Loss: 2.1092
Token Accuracy: 0.1297
Sequence Accuracy: 0.0000


Epoch 6/20: 100%|██████████| 173/173 [00:04<00:00, 35.87it/s]


Epoch 6/20 Train Loss: 2.0884
Token Accuracy: 0.1269
Sequence Accuracy: 0.0000


Epoch 7/20: 100%|██████████| 173/173 [00:04<00:00, 35.65it/s]


Epoch 7/20 Train Loss: 2.0500
Token Accuracy: 0.1443
Sequence Accuracy: 0.0000


Epoch 8/20: 100%|██████████| 173/173 [00:04<00:00, 35.95it/s]


Epoch 8/20 Train Loss: 2.0294


0,1
epoch,▁▂▃▅▆▇█
sequence_accuracy,▁▁▁▁▁▁▁
token_accuracy,▁▄▅▆▇▇█
train_loss,█▃▂▂▁▁▁

0,1
epoch,7.0
sequence_accuracy,0.0
token_accuracy,0.1443
train_loss,2.04995


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: k2deirs1 with config:
[34m[1mwandb[0m: 	batch_size: 512
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	embed_size: 64
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.005
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 2


Epoch 1/20: 100%|██████████| 87/87 [00:03<00:00, 26.36it/s]


Epoch 1/20 Train Loss: 2.6387
Token Accuracy: 0.1402
Sequence Accuracy: 0.0000


Epoch 2/20: 100%|██████████| 87/87 [00:03<00:00, 25.21it/s]


Epoch 2/20 Train Loss: 1.9941
Token Accuracy: 0.1916
Sequence Accuracy: 0.0007


Epoch 3/20: 100%|██████████| 87/87 [00:03<00:00, 27.05it/s]


Epoch 3/20 Train Loss: 1.6217
Token Accuracy: 0.2830
Sequence Accuracy: 0.0062


Epoch 4/20: 100%|██████████| 87/87 [00:03<00:00, 25.70it/s]


Epoch 4/20 Train Loss: 1.2975
Token Accuracy: 0.3908
Sequence Accuracy: 0.0317


Epoch 5/20: 100%|██████████| 87/87 [00:03<00:00, 27.34it/s]


Epoch 5/20 Train Loss: 1.0784
Token Accuracy: 0.4620
Sequence Accuracy: 0.0753


Epoch 6/20: 100%|██████████| 87/87 [00:03<00:00, 26.64it/s]


Epoch 6/20 Train Loss: 0.9289
Token Accuracy: 0.4914
Sequence Accuracy: 0.1117


Epoch 7/20: 100%|██████████| 87/87 [00:03<00:00, 25.66it/s]


Epoch 7/20 Train Loss: 0.8212
Token Accuracy: 0.5420
Sequence Accuracy: 0.1572


Epoch 8/20: 100%|██████████| 87/87 [00:03<00:00, 27.23it/s]


Epoch 8/20 Train Loss: 0.7364
Token Accuracy: 0.5702
Sequence Accuracy: 0.1964


Epoch 9/20: 100%|██████████| 87/87 [00:03<00:00, 27.22it/s]


Epoch 9/20 Train Loss: 0.6732
Token Accuracy: 0.5975
Sequence Accuracy: 0.2237


Epoch 10/20: 100%|██████████| 87/87 [00:03<00:00, 27.11it/s]


Epoch 10/20 Train Loss: 0.6173
Token Accuracy: 0.6132
Sequence Accuracy: 0.2320


Epoch 11/20: 100%|██████████| 87/87 [00:03<00:00, 25.88it/s]


Epoch 11/20 Train Loss: 0.5861
Token Accuracy: 0.6306
Sequence Accuracy: 0.2637


Epoch 12/20: 100%|██████████| 87/87 [00:03<00:00, 26.37it/s]


Epoch 12/20 Train Loss: 0.5463
Token Accuracy: 0.6527
Sequence Accuracy: 0.2864


Epoch 13/20: 100%|██████████| 87/87 [00:03<00:00, 27.23it/s]


Epoch 13/20 Train Loss: 0.5216
Token Accuracy: 0.6563
Sequence Accuracy: 0.2919


Epoch 14/20: 100%|██████████| 87/87 [00:03<00:00, 25.63it/s]


Epoch 14/20 Train Loss: 0.4914
Token Accuracy: 0.6698
Sequence Accuracy: 0.3128


Epoch 15/20: 100%|██████████| 87/87 [00:03<00:00, 27.37it/s]


Epoch 15/20 Train Loss: 0.4781
Token Accuracy: 0.6603
Sequence Accuracy: 0.3027


Epoch 16/20: 100%|██████████| 87/87 [00:03<00:00, 27.59it/s]


Epoch 16/20 Train Loss: 0.4517
Token Accuracy: 0.6600
Sequence Accuracy: 0.3118


Epoch 17/20: 100%|██████████| 87/87 [00:03<00:00, 25.60it/s]


Epoch 17/20 Train Loss: 0.4384
Token Accuracy: 0.6805
Sequence Accuracy: 0.3258


Epoch 18/20: 100%|██████████| 87/87 [00:03<00:00, 27.37it/s]


Epoch 18/20 Train Loss: 0.4217
Token Accuracy: 0.6868
Sequence Accuracy: 0.3332


Epoch 19/20: 100%|██████████| 87/87 [00:03<00:00, 27.37it/s]


Epoch 19/20 Train Loss: 0.4152
Token Accuracy: 0.6815
Sequence Accuracy: 0.3309


Epoch 20/20: 100%|██████████| 87/87 [00:03<00:00, 27.41it/s]


Epoch 20/20 Train Loss: 0.3989
Token Accuracy: 0.6884
Sequence Accuracy: 0.3334


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
sequence_accuracy,▁▁▁▂▃▃▄▅▆▆▇▇▇█▇█████
token_accuracy,▁▂▃▄▅▅▆▆▇▇▇█████████
train_loss,█▆▅▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
sequence_accuracy,0.33341
token_accuracy,0.68839
train_loss,0.39893


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: jtyi0hmc with config:
[34m[1mwandb[0m: 	batch_size: 512
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	embed_size: 64
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 2


Epoch 1/20: 100%|██████████| 87/87 [00:03<00:00, 21.87it/s]


Epoch 1/20 Train Loss: 3.0080
Token Accuracy: 0.0965
Sequence Accuracy: 0.0000


Epoch 2/20: 100%|██████████| 87/87 [00:03<00:00, 23.09it/s]


Epoch 2/20 Train Loss: 2.5641
Token Accuracy: 0.1099
Sequence Accuracy: 0.0000


Epoch 3/20: 100%|██████████| 87/87 [00:04<00:00, 20.66it/s]


Epoch 3/20 Train Loss: 2.4506
Token Accuracy: 0.1323
Sequence Accuracy: 0.0000


Epoch 4/20: 100%|██████████| 87/87 [00:03<00:00, 22.81it/s]


Epoch 4/20 Train Loss: 2.3465
Token Accuracy: 0.1520
Sequence Accuracy: 0.0000


Epoch 5/20: 100%|██████████| 87/87 [00:03<00:00, 23.24it/s]


Epoch 5/20 Train Loss: 2.2004
Token Accuracy: 0.1905
Sequence Accuracy: 0.0000


Epoch 6/20: 100%|██████████| 87/87 [00:03<00:00, 23.45it/s]


Epoch 6/20 Train Loss: 2.0850
Token Accuracy: 0.2221
Sequence Accuracy: 0.0000


Epoch 7/20: 100%|██████████| 87/87 [00:03<00:00, 23.52it/s]


Epoch 7/20 Train Loss: 1.9692
Token Accuracy: 0.2430
Sequence Accuracy: 0.0007


Epoch 8/20: 100%|██████████| 87/87 [00:03<00:00, 22.11it/s]


Epoch 8/20 Train Loss: 1.8818
Token Accuracy: 0.2530
Sequence Accuracy: 0.0005


Epoch 9/20: 100%|██████████| 87/87 [00:03<00:00, 23.27it/s]


Epoch 9/20 Train Loss: 1.7812
Token Accuracy: 0.2720
Sequence Accuracy: 0.0009


Epoch 10/20: 100%|██████████| 87/87 [00:03<00:00, 22.45it/s]


Epoch 10/20 Train Loss: 1.6971
Token Accuracy: 0.2880
Sequence Accuracy: 0.0016


Epoch 11/20: 100%|██████████| 87/87 [00:03<00:00, 23.32it/s]


Epoch 11/20 Train Loss: 1.6004
Token Accuracy: 0.3000
Sequence Accuracy: 0.0044


Epoch 12/20: 100%|██████████| 87/87 [00:03<00:00, 23.16it/s]


Epoch 12/20 Train Loss: 1.5031
Token Accuracy: 0.3198
Sequence Accuracy: 0.0073


Epoch 13/20: 100%|██████████| 87/87 [00:03<00:00, 22.29it/s]


Epoch 13/20 Train Loss: 1.4387
Token Accuracy: 0.3315
Sequence Accuracy: 0.0087


Epoch 14/20: 100%|██████████| 87/87 [00:03<00:00, 23.35it/s]


Epoch 14/20 Train Loss: 1.3643
Token Accuracy: 0.3504
Sequence Accuracy: 0.0142


Epoch 15/20: 100%|██████████| 87/87 [00:03<00:00, 23.31it/s]


Epoch 15/20 Train Loss: 1.2953
Token Accuracy: 0.3654
Sequence Accuracy: 0.0163


Epoch 16/20: 100%|██████████| 87/87 [00:03<00:00, 23.52it/s]


Epoch 16/20 Train Loss: 1.2436
Token Accuracy: 0.3776
Sequence Accuracy: 0.0232


Epoch 17/20: 100%|██████████| 87/87 [00:03<00:00, 22.42it/s]


Epoch 17/20 Train Loss: 1.2014
Token Accuracy: 0.3942
Sequence Accuracy: 0.0273


Epoch 18/20: 100%|██████████| 87/87 [00:03<00:00, 23.29it/s]


Epoch 18/20 Train Loss: 1.1557
Token Accuracy: 0.4163
Sequence Accuracy: 0.0390


Epoch 19/20: 100%|██████████| 87/87 [00:03<00:00, 23.42it/s]


Epoch 19/20 Train Loss: 1.1068
Token Accuracy: 0.4185
Sequence Accuracy: 0.0404


Epoch 20/20: 100%|██████████| 87/87 [00:03<00:00, 22.23it/s]


Epoch 20/20 Train Loss: 1.0710
Token Accuracy: 0.4426
Sequence Accuracy: 0.0498


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
sequence_accuracy,▁▁▁▁▁▁▁▁▁▁▂▂▂▃▃▄▅▆▇█
token_accuracy,▁▁▂▂▃▄▄▄▅▅▅▆▆▆▆▇▇▇██
train_loss,█▆▆▆▅▅▄▄▄▃▃▃▂▂▂▂▁▁▁▁

0,1
epoch,20.0
sequence_accuracy,0.04979
token_accuracy,0.44262
train_loss,1.07098


[34m[1mwandb[0m: Agent Starting Run: x4f5jf5l with config:
[34m[1mwandb[0m: 	batch_size: 512
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	embed_size: 32
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.005
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 2


Epoch 1/20: 100%|██████████| 87/87 [00:03<00:00, 26.59it/s]


Epoch 1/20 Train Loss: 2.6511
Token Accuracy: 0.1230
Sequence Accuracy: 0.0000


Epoch 2/20: 100%|██████████| 87/87 [00:03<00:00, 26.76it/s]


Epoch 2/20 Train Loss: 1.9707
Token Accuracy: 0.1886
Sequence Accuracy: 0.0000


Epoch 3/20: 100%|██████████| 87/87 [00:03<00:00, 26.89it/s]


Epoch 3/20 Train Loss: 1.6708
Token Accuracy: 0.2708
Sequence Accuracy: 0.0016


Epoch 4/20: 100%|██████████| 87/87 [00:03<00:00, 25.31it/s]


Epoch 4/20 Train Loss: 1.3984
Token Accuracy: 0.3444
Sequence Accuracy: 0.0117


Epoch 5/20: 100%|██████████| 87/87 [00:03<00:00, 25.72it/s]


Epoch 5/20 Train Loss: 1.1789
Token Accuracy: 0.4251
Sequence Accuracy: 0.0427


Epoch 6/20: 100%|██████████| 87/87 [00:03<00:00, 26.90it/s]


Epoch 6/20 Train Loss: 1.0020
Token Accuracy: 0.4879
Sequence Accuracy: 0.0870


Epoch 7/20: 100%|██████████| 87/87 [00:03<00:00, 25.78it/s]


Epoch 7/20 Train Loss: 0.8709
Token Accuracy: 0.5187
Sequence Accuracy: 0.1317


Epoch 8/20: 100%|██████████| 87/87 [00:03<00:00, 27.23it/s]


Epoch 8/20 Train Loss: 0.7803
Token Accuracy: 0.5547
Sequence Accuracy: 0.1652


Epoch 9/20: 100%|██████████| 87/87 [00:03<00:00, 25.68it/s]


Epoch 9/20 Train Loss: 0.7013
Token Accuracy: 0.5749
Sequence Accuracy: 0.1868


Epoch 10/20: 100%|██████████| 87/87 [00:03<00:00, 27.36it/s]


Epoch 10/20 Train Loss: 0.6429
Token Accuracy: 0.6055
Sequence Accuracy: 0.2265


Epoch 11/20: 100%|██████████| 87/87 [00:03<00:00, 26.94it/s]


Epoch 11/20 Train Loss: 0.6048
Token Accuracy: 0.6270
Sequence Accuracy: 0.2611


Epoch 12/20: 100%|██████████| 87/87 [00:03<00:00, 27.31it/s]


Epoch 12/20 Train Loss: 0.5728
Token Accuracy: 0.6270
Sequence Accuracy: 0.2593


Epoch 13/20: 100%|██████████| 87/87 [00:03<00:00, 27.48it/s]


Epoch 13/20 Train Loss: 0.5396
Token Accuracy: 0.6407
Sequence Accuracy: 0.2763


Epoch 14/20: 100%|██████████| 87/87 [00:03<00:00, 25.82it/s]


Epoch 14/20 Train Loss: 0.5163
Token Accuracy: 0.6479
Sequence Accuracy: 0.2887


Epoch 15/20: 100%|██████████| 87/87 [00:03<00:00, 27.21it/s]


Epoch 15/20 Train Loss: 0.4904
Token Accuracy: 0.6601
Sequence Accuracy: 0.3040


Epoch 16/20: 100%|██████████| 87/87 [00:03<00:00, 26.99it/s]


Epoch 16/20 Train Loss: 0.4666
Token Accuracy: 0.6657
Sequence Accuracy: 0.3109


Epoch 17/20: 100%|██████████| 87/87 [00:03<00:00, 25.72it/s]


Epoch 17/20 Train Loss: 0.4590
Token Accuracy: 0.6635
Sequence Accuracy: 0.3077


Epoch 18/20: 100%|██████████| 87/87 [00:03<00:00, 27.07it/s]


Epoch 18/20 Train Loss: 0.4383
Token Accuracy: 0.6793
Sequence Accuracy: 0.3224


Epoch 19/20: 100%|██████████| 87/87 [00:03<00:00, 25.81it/s]


Epoch 19/20 Train Loss: 0.4247
Token Accuracy: 0.6838
Sequence Accuracy: 0.3256


Epoch 20/20: 100%|██████████| 87/87 [00:03<00:00, 27.47it/s]


Epoch 20/20 Train Loss: 0.4121
Token Accuracy: 0.6932
Sequence Accuracy: 0.3437


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
sequence_accuracy,▁▁▁▁▂▃▄▄▅▆▆▆▇▇▇▇▇███
token_accuracy,▁▂▃▄▅▅▆▆▇▇▇▇▇▇██████
train_loss,█▆▅▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁

0,1
epoch,20.0
sequence_accuracy,0.34374
token_accuracy,0.69321
train_loss,0.41209


[34m[1mwandb[0m: Agent Starting Run: jzyvyu5l with config:
[34m[1mwandb[0m: 	batch_size: 512
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	embed_size: 64
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.005
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 2


Epoch 1/20: 100%|██████████| 87/87 [00:03<00:00, 23.43it/s]


Epoch 1/20 Train Loss: 2.7051
Token Accuracy: 0.1309
Sequence Accuracy: 0.0000


Epoch 2/20: 100%|██████████| 87/87 [00:03<00:00, 22.44it/s]


Epoch 2/20 Train Loss: 2.3974
Token Accuracy: 0.1686
Sequence Accuracy: 0.0002


Epoch 3/20: 100%|██████████| 87/87 [00:03<00:00, 23.04it/s]


Epoch 3/20 Train Loss: 1.9382
Token Accuracy: 0.2197
Sequence Accuracy: 0.0005


Epoch 4/20: 100%|██████████| 87/87 [00:03<00:00, 23.51it/s]


Epoch 4/20 Train Loss: 1.7417
Token Accuracy: 0.2808
Sequence Accuracy: 0.0023


Epoch 5/20: 100%|██████████| 87/87 [00:03<00:00, 22.14it/s]


Epoch 5/20 Train Loss: 1.5589
Token Accuracy: 0.3425
Sequence Accuracy: 0.0071


Epoch 6/20: 100%|██████████| 87/87 [00:03<00:00, 23.26it/s]


Epoch 6/20 Train Loss: 1.3780
Token Accuracy: 0.3988
Sequence Accuracy: 0.0181


Epoch 7/20: 100%|██████████| 87/87 [00:03<00:00, 23.58it/s]


Epoch 7/20 Train Loss: 1.2343
Token Accuracy: 0.4234
Sequence Accuracy: 0.0319


Epoch 8/20: 100%|██████████| 87/87 [00:03<00:00, 23.51it/s]


Epoch 8/20 Train Loss: 1.1177
Token Accuracy: 0.4611
Sequence Accuracy: 0.0548


Epoch 9/20: 100%|██████████| 87/87 [00:03<00:00, 22.16it/s]


Epoch 9/20 Train Loss: 1.0417
Token Accuracy: 0.4768
Sequence Accuracy: 0.0638


Epoch 10/20: 100%|██████████| 87/87 [00:03<00:00, 23.22it/s]


Epoch 10/20 Train Loss: 0.9678
Token Accuracy: 0.4983
Sequence Accuracy: 0.0925


Epoch 11/20: 100%|██████████| 87/87 [00:03<00:00, 23.44it/s]


Epoch 11/20 Train Loss: 0.9007
Token Accuracy: 0.5241
Sequence Accuracy: 0.1237


Epoch 12/20: 100%|██████████| 87/87 [00:03<00:00, 22.49it/s]


Epoch 12/20 Train Loss: 0.8531
Token Accuracy: 0.5466
Sequence Accuracy: 0.1423


Epoch 13/20: 100%|██████████| 87/87 [00:03<00:00, 23.12it/s]


Epoch 13/20 Train Loss: 0.8168
Token Accuracy: 0.5510
Sequence Accuracy: 0.1487


Epoch 14/20: 100%|██████████| 87/87 [00:03<00:00, 22.03it/s]


Epoch 14/20 Train Loss: 0.7733
Token Accuracy: 0.5680
Sequence Accuracy: 0.1625


Epoch 15/20: 100%|██████████| 87/87 [00:03<00:00, 23.54it/s]


Epoch 15/20 Train Loss: 0.7425
Token Accuracy: 0.5886
Sequence Accuracy: 0.1831


Epoch 16/20: 100%|██████████| 87/87 [00:03<00:00, 22.57it/s]


Epoch 16/20 Train Loss: 0.7086
Token Accuracy: 0.5920
Sequence Accuracy: 0.1962


Epoch 17/20: 100%|██████████| 87/87 [00:03<00:00, 22.85it/s]


Epoch 17/20 Train Loss: 0.6861
Token Accuracy: 0.5948
Sequence Accuracy: 0.1946


Epoch 18/20: 100%|██████████| 87/87 [00:03<00:00, 22.29it/s]


Epoch 18/20 Train Loss: 0.6636
Token Accuracy: 0.6051
Sequence Accuracy: 0.2067


Epoch 19/20: 100%|██████████| 87/87 [00:04<00:00, 21.45it/s]


Epoch 19/20 Train Loss: 0.6372
Token Accuracy: 0.6230
Sequence Accuracy: 0.2279


Epoch 20/20: 100%|██████████| 87/87 [00:03<00:00, 23.39it/s]


Epoch 20/20 Train Loss: 0.6186
Token Accuracy: 0.6238
Sequence Accuracy: 0.2302


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
sequence_accuracy,▁▁▁▁▁▂▂▃▃▄▅▅▆▆▇▇▇▇██
token_accuracy,▁▂▂▃▄▅▅▆▆▆▇▇▇▇██████
train_loss,█▇▅▅▄▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁

0,1
epoch,20.0
sequence_accuracy,0.23015
token_accuracy,0.62381
train_loss,0.6186


[34m[1mwandb[0m: Agent Starting Run: a0r5yzvb with config:
[34m[1mwandb[0m: 	batch_size: 512
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_size: 64
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 1


Epoch 1/20: 100%|██████████| 87/87 [00:03<00:00, 22.42it/s]


Epoch 1/20 Train Loss: 2.9926
Token Accuracy: 0.0928
Sequence Accuracy: 0.0000


Epoch 2/20: 100%|██████████| 87/87 [00:03<00:00, 23.78it/s]


Epoch 2/20 Train Loss: 2.4622
Token Accuracy: 0.1144
Sequence Accuracy: 0.0000


Epoch 3/20: 100%|██████████| 87/87 [00:03<00:00, 23.77it/s]


Epoch 3/20 Train Loss: 2.3083
Token Accuracy: 0.1477
Sequence Accuracy: 0.0000


Epoch 4/20: 100%|██████████| 87/87 [00:03<00:00, 24.04it/s]


Epoch 4/20 Train Loss: 2.1802
Token Accuracy: 0.1682
Sequence Accuracy: 0.0007


Epoch 5/20: 100%|██████████| 87/87 [00:03<00:00, 24.09it/s]


Epoch 5/20 Train Loss: 2.0637
Token Accuracy: 0.1852
Sequence Accuracy: 0.0014


Epoch 6/20: 100%|██████████| 87/87 [00:03<00:00, 22.82it/s]


Epoch 6/20 Train Loss: 1.9729
Token Accuracy: 0.1925
Sequence Accuracy: 0.0016


Epoch 7/20: 100%|██████████| 87/87 [00:03<00:00, 22.89it/s]


Epoch 7/20 Train Loss: 1.8942
Token Accuracy: 0.1802
Sequence Accuracy: 0.0018


Epoch 8/20: 100%|██████████| 87/87 [00:03<00:00, 22.21it/s]


Epoch 8/20 Train Loss: 1.8052


0,1
epoch,▁▂▃▅▆▇█
sequence_accuracy,▁▁▁▄▆▇█
token_accuracy,▁▃▅▆▇█▇
train_loss,█▅▄▃▂▂▁

0,1
epoch,7.0
sequence_accuracy,0.00184
token_accuracy,0.18021
train_loss,1.89421


[34m[1mwandb[0m: Agent Starting Run: iujmcbf9 with config:
[34m[1mwandb[0m: 	batch_size: 512
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_size: 64
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.005
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 2


Epoch 1/20: 100%|██████████| 87/87 [00:03<00:00, 26.05it/s]


Epoch 1/20 Train Loss: 2.6227
Token Accuracy: 0.1527
Sequence Accuracy: 0.0000


Epoch 2/20: 100%|██████████| 87/87 [00:03<00:00, 27.05it/s]


Epoch 2/20 Train Loss: 1.8847
Token Accuracy: 0.2065
Sequence Accuracy: 0.0007


Epoch 3/20: 100%|██████████| 87/87 [00:03<00:00, 27.02it/s]


Epoch 3/20 Train Loss: 1.5555
Token Accuracy: 0.3138
Sequence Accuracy: 0.0060


Epoch 4/20: 100%|██████████| 87/87 [00:03<00:00, 26.56it/s]


Epoch 4/20 Train Loss: 1.2553
Token Accuracy: 0.4132
Sequence Accuracy: 0.0390


Epoch 5/20: 100%|██████████| 87/87 [00:03<00:00, 25.78it/s]


Epoch 5/20 Train Loss: 1.0183
Token Accuracy: 0.4590
Sequence Accuracy: 0.0826


Epoch 6/20: 100%|██████████| 87/87 [00:03<00:00, 27.44it/s]


Epoch 6/20 Train Loss: 0.8648
Token Accuracy: 0.5173
Sequence Accuracy: 0.1225


Epoch 7/20: 100%|██████████| 87/87 [00:03<00:00, 27.32it/s]


Epoch 7/20 Train Loss: 0.7564
Token Accuracy: 0.5592
Sequence Accuracy: 0.1737


Epoch 8/20: 100%|██████████| 87/87 [00:03<00:00, 25.83it/s]


Epoch 8/20 Train Loss: 0.6691
Token Accuracy: 0.5863
Sequence Accuracy: 0.2081


Epoch 9/20: 100%|██████████| 87/87 [00:03<00:00, 27.30it/s]


Epoch 9/20 Train Loss: 0.6013
Token Accuracy: 0.6079
Sequence Accuracy: 0.2306


Epoch 10/20: 100%|██████████| 87/87 [00:03<00:00, 25.80it/s]


Epoch 10/20 Train Loss: 0.5618
Token Accuracy: 0.6331
Sequence Accuracy: 0.2646


Epoch 11/20: 100%|██████████| 87/87 [00:03<00:00, 27.28it/s]


Epoch 11/20 Train Loss: 0.5182
Token Accuracy: 0.6320
Sequence Accuracy: 0.2600


Epoch 12/20: 100%|██████████| 87/87 [00:03<00:00, 26.96it/s]


Epoch 12/20 Train Loss: 0.4881
Token Accuracy: 0.6600
Sequence Accuracy: 0.2953


Epoch 13/20: 100%|██████████| 87/87 [00:03<00:00, 27.12it/s]


Epoch 13/20 Train Loss: 0.4731
Token Accuracy: 0.6562
Sequence Accuracy: 0.2955


Epoch 14/20: 100%|██████████| 87/87 [00:03<00:00, 26.98it/s]


Epoch 14/20 Train Loss: 0.4435
Token Accuracy: 0.6762
Sequence Accuracy: 0.3118


Epoch 15/20: 100%|██████████| 87/87 [00:03<00:00, 25.24it/s]


Epoch 15/20 Train Loss: 0.4249
Token Accuracy: 0.6851
Sequence Accuracy: 0.3261


Epoch 16/20: 100%|██████████| 87/87 [00:03<00:00, 26.60it/s]


Epoch 16/20 Train Loss: 0.4068
Token Accuracy: 0.6864
Sequence Accuracy: 0.3277


Epoch 17/20: 100%|██████████| 87/87 [00:03<00:00, 26.76it/s]


Epoch 17/20 Train Loss: 0.3907
Token Accuracy: 0.6950
Sequence Accuracy: 0.3380


Epoch 18/20: 100%|██████████| 87/87 [00:03<00:00, 25.34it/s]


Epoch 18/20 Train Loss: 0.3785
Token Accuracy: 0.6953
Sequence Accuracy: 0.3474


Epoch 19/20: 100%|██████████| 87/87 [00:03<00:00, 26.51it/s]


Epoch 19/20 Train Loss: 0.3704
Token Accuracy: 0.7027
Sequence Accuracy: 0.3481


Epoch 20/20: 100%|██████████| 87/87 [00:03<00:00, 25.43it/s]


Epoch 20/20 Train Loss: 0.3633
Token Accuracy: 0.6998
Sequence Accuracy: 0.3525


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
sequence_accuracy,▁▁▁▂▃▃▄▅▆▆▆▇▇▇▇█████
token_accuracy,▁▂▃▄▅▆▆▇▇▇▇▇▇███████
train_loss,█▆▅▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
sequence_accuracy,0.35246
token_accuracy,0.69977
train_loss,0.36328


[34m[1mwandb[0m: Agent Starting Run: pophi1i8 with config:
[34m[1mwandb[0m: 	batch_size: 512
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	embed_size: 64
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.005
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 3


Epoch 1/20: 100%|██████████| 87/87 [00:03<00:00, 22.31it/s]


Epoch 1/20 Train Loss: 2.7372
Token Accuracy: 0.1319
Sequence Accuracy: 0.0000


Epoch 2/20: 100%|██████████| 87/87 [00:03<00:00, 22.65it/s]


Epoch 2/20 Train Loss: 2.4305
Token Accuracy: 0.1683
Sequence Accuracy: 0.0000


Epoch 3/20: 100%|██████████| 87/87 [00:03<00:00, 22.03it/s]


Epoch 3/20 Train Loss: 2.0882
Token Accuracy: 0.1854
Sequence Accuracy: 0.0000


Epoch 4/20: 100%|██████████| 87/87 [00:03<00:00, 22.76it/s]


Epoch 4/20 Train Loss: 1.8425
Token Accuracy: 0.2552
Sequence Accuracy: 0.0011


Epoch 5/20: 100%|██████████| 87/87 [00:04<00:00, 21.72it/s]


Epoch 5/20 Train Loss: 1.6965
Token Accuracy: 0.2996
Sequence Accuracy: 0.0037


Epoch 6/20: 100%|██████████| 87/87 [00:03<00:00, 22.99it/s]


Epoch 6/20 Train Loss: 1.5629
Token Accuracy: 0.3375
Sequence Accuracy: 0.0048


Epoch 7/20: 100%|██████████| 87/87 [00:03<00:00, 23.05it/s]


Epoch 7/20 Train Loss: 1.4562
Token Accuracy: 0.3584
Sequence Accuracy: 0.0099


Epoch 8/20: 100%|██████████| 87/87 [00:03<00:00, 23.19it/s]


Epoch 8/20 Train Loss: 1.3442
Token Accuracy: 0.3792
Sequence Accuracy: 0.0124


Epoch 9/20: 100%|██████████| 87/87 [00:03<00:00, 22.87it/s]


Epoch 9/20 Train Loss: 1.2309
Token Accuracy: 0.3989
Sequence Accuracy: 0.0172


Epoch 10/20: 100%|██████████| 87/87 [00:03<00:00, 22.14it/s]


Epoch 10/20 Train Loss: 1.1506
Token Accuracy: 0.4464
Sequence Accuracy: 0.0395


Epoch 11/20: 100%|██████████| 87/87 [00:03<00:00, 22.83it/s]


Epoch 11/20 Train Loss: 1.0717
Token Accuracy: 0.4858
Sequence Accuracy: 0.0544


Epoch 12/20: 100%|██████████| 87/87 [00:03<00:00, 22.84it/s]


Epoch 12/20 Train Loss: 0.9947
Token Accuracy: 0.5073
Sequence Accuracy: 0.0776


Epoch 13/20: 100%|██████████| 87/87 [00:03<00:00, 21.91it/s]


Epoch 13/20 Train Loss: 0.9394
Token Accuracy: 0.5329
Sequence Accuracy: 0.0987


Epoch 14/20: 100%|██████████| 87/87 [00:03<00:00, 23.14it/s]


Epoch 14/20 Train Loss: 0.8960
Token Accuracy: 0.5414
Sequence Accuracy: 0.1138


Epoch 15/20: 100%|██████████| 87/87 [00:03<00:00, 21.96it/s]


Epoch 15/20 Train Loss: 0.8561
Token Accuracy: 0.5585
Sequence Accuracy: 0.1322


Epoch 16/20: 100%|██████████| 87/87 [00:03<00:00, 23.19it/s]


Epoch 16/20 Train Loss: 0.8172
Token Accuracy: 0.5604
Sequence Accuracy: 0.1526


Epoch 17/20: 100%|██████████| 87/87 [00:03<00:00, 23.22it/s]


Epoch 17/20 Train Loss: 0.7808
Token Accuracy: 0.5908
Sequence Accuracy: 0.1794


Epoch 18/20: 100%|██████████| 87/87 [00:03<00:00, 22.90it/s]


Epoch 18/20 Train Loss: 0.7510
Token Accuracy: 0.5909
Sequence Accuracy: 0.1838


Epoch 19/20: 100%|██████████| 87/87 [00:03<00:00, 23.10it/s]


Epoch 19/20 Train Loss: 0.7239
Token Accuracy: 0.6069
Sequence Accuracy: 0.2054


Epoch 20/20: 100%|██████████| 87/87 [00:03<00:00, 21.85it/s]


Epoch 20/20 Train Loss: 0.7046
Token Accuracy: 0.6149
Sequence Accuracy: 0.2125


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
sequence_accuracy,▁▁▁▁▁▁▁▁▂▂▃▄▄▅▅▆▇▇██
token_accuracy,▁▂▂▃▃▄▄▅▅▆▆▆▇▇▇▇████
train_loss,█▇▆▅▄▄▄▃▃▃▂▂▂▂▂▁▁▁▁▁

0,1
epoch,20.0
sequence_accuracy,0.21248
token_accuracy,0.61493
train_loss,0.70459


[34m[1mwandb[0m: Agent Starting Run: wshn3ems with config:
[34m[1mwandb[0m: 	batch_size: 512
[34m[1mwandb[0m: 	beam_size: 5
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	embed_size: 64
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.005
[34m[1mwandb[0m: 	num_decoder_layers: 2
[34m[1mwandb[0m: 	num_encoder_layers: 2


Epoch 1/20: 100%|██████████| 87/87 [00:03<00:00, 27.19it/s]


Epoch 1/20 Train Loss: 2.6186
Token Accuracy: 0.1540
Sequence Accuracy: 0.0000


Epoch 2/20: 100%|██████████| 87/87 [00:03<00:00, 25.88it/s]


Epoch 2/20 Train Loss: 1.9867
Token Accuracy: 0.2009
Sequence Accuracy: 0.0002


Epoch 3/20: 100%|██████████| 87/87 [00:03<00:00, 27.12it/s]


Epoch 3/20 Train Loss: 1.6450
Token Accuracy: 0.2769
Sequence Accuracy: 0.0028


Epoch 4/20: 100%|██████████| 87/87 [00:03<00:00, 27.24it/s]


Epoch 4/20 Train Loss: 1.3526
Token Accuracy: 0.3679
Sequence Accuracy: 0.0174


Epoch 5/20: 100%|██████████| 87/87 [00:03<00:00, 27.35it/s]


Epoch 5/20 Train Loss: 1.1227
Token Accuracy: 0.4571
Sequence Accuracy: 0.0672


Epoch 6/20: 100%|██████████| 87/87 [00:03<00:00, 27.05it/s]


Epoch 6/20 Train Loss: 0.9437
Token Accuracy: 0.5084
Sequence Accuracy: 0.1104


Epoch 7/20: 100%|██████████| 87/87 [00:03<00:00, 25.78it/s]


Epoch 7/20 Train Loss: 0.8170
Token Accuracy: 0.5592
Sequence Accuracy: 0.1634


Epoch 8/20: 100%|██████████| 87/87 [00:03<00:00, 27.25it/s]


Epoch 8/20 Train Loss: 0.7217
Token Accuracy: 0.5908
Sequence Accuracy: 0.2047


Epoch 9/20: 100%|██████████| 87/87 [00:03<00:00, 27.20it/s]


Epoch 9/20 Train Loss: 0.6571
Token Accuracy: 0.6008
Sequence Accuracy: 0.2196


Epoch 10/20: 100%|██████████| 87/87 [00:03<00:00, 27.08it/s]


Epoch 10/20 Train Loss: 0.6087
Token Accuracy: 0.6152
Sequence Accuracy: 0.2396


Epoch 11/20: 100%|██████████| 87/87 [00:03<00:00, 23.06it/s]


Epoch 11/20 Train Loss: 0.5680
Token Accuracy: 0.6323
Sequence Accuracy: 0.2568


Epoch 12/20: 100%|██████████| 87/87 [00:03<00:00, 26.95it/s]


Epoch 12/20 Train Loss: 0.5335
Token Accuracy: 0.6525
Sequence Accuracy: 0.2795


Epoch 13/20: 100%|██████████| 87/87 [00:03<00:00, 25.40it/s]


Epoch 13/20 Train Loss: 0.5046
Token Accuracy: 0.6638
Sequence Accuracy: 0.2923


Epoch 14/20: 100%|██████████| 87/87 [00:03<00:00, 27.11it/s]


Epoch 14/20 Train Loss: 0.4790
Token Accuracy: 0.6647
Sequence Accuracy: 0.3027


Epoch 15/20: 100%|██████████| 87/87 [00:03<00:00, 26.87it/s]


Epoch 15/20 Train Loss: 0.4603
Token Accuracy: 0.6740
Sequence Accuracy: 0.3130


Epoch 16/20: 100%|██████████| 87/87 [00:03<00:00, 25.81it/s]


Epoch 16/20 Train Loss: 0.4526
Token Accuracy: 0.6732
Sequence Accuracy: 0.3201


Epoch 17/20: 100%|██████████| 87/87 [00:03<00:00, 27.05it/s]


Epoch 17/20 Train Loss: 0.4350
Token Accuracy: 0.6768
Sequence Accuracy: 0.3169


Epoch 18/20: 100%|██████████| 87/87 [00:03<00:00, 27.01it/s]


Epoch 18/20 Train Loss: 0.4210
Token Accuracy: 0.6851
Sequence Accuracy: 0.3362


Epoch 19/20: 100%|██████████| 87/87 [00:03<00:00, 27.29it/s]


Epoch 19/20 Train Loss: 0.4096
Token Accuracy: 0.6891
Sequence Accuracy: 0.3414


Epoch 20/20: 100%|██████████| 87/87 [00:03<00:00, 27.48it/s]


Epoch 20/20 Train Loss: 0.3977
Token Accuracy: 0.6947
Sequence Accuracy: 0.3447


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
sequence_accuracy,▁▁▁▁▂▃▄▅▅▆▆▇▇▇▇█▇███
token_accuracy,▁▂▃▄▅▆▆▇▇▇▇▇████████
train_loss,█▆▅▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
sequence_accuracy,0.34465
token_accuracy,0.69465
train_loss,0.39775


[34m[1mwandb[0m: Agent Starting Run: 6s5ecnvs with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	beam_size: 4
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	embed_size: 64
[34m[1mwandb[0m: 	hidden_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.005
[34m[1mwandb[0m: 	num_decoder_layers: 3
[34m[1mwandb[0m: 	num_encoder_layers: 3


Epoch 1/20: 100%|██████████| 346/346 [00:12<00:00, 27.28it/s]


Epoch 1/20 Train Loss: 2.3962
Token Accuracy: 0.1968
Sequence Accuracy: 0.0000


Epoch 2/20: 100%|██████████| 346/346 [00:12<00:00, 27.68it/s]


Epoch 2/20 Train Loss: 1.7843
Token Accuracy: 0.3300
Sequence Accuracy: 0.0069


Epoch 3/20: 100%|██████████| 346/346 [00:12<00:00, 27.62it/s]


Epoch 3/20 Train Loss: 1.3589
Token Accuracy: 0.4440
Sequence Accuracy: 0.0422


Epoch 4/20: 100%|██████████| 346/346 [00:12<00:00, 27.38it/s]


Epoch 4/20 Train Loss: 1.0817
Token Accuracy: 0.5138
Sequence Accuracy: 0.1035


Epoch 5/20: 100%|██████████| 346/346 [00:12<00:00, 27.69it/s]


Epoch 5/20 Train Loss: 0.9193
Token Accuracy: 0.5644
Sequence Accuracy: 0.1535


Epoch 6/20: 100%|██████████| 346/346 [00:12<00:00, 27.21it/s]


Epoch 6/20 Train Loss: 0.8189
Token Accuracy: 0.5878
Sequence Accuracy: 0.1831


Epoch 7/20: 100%|██████████| 346/346 [00:12<00:00, 27.69it/s]


Epoch 7/20 Train Loss: 0.7466
Token Accuracy: 0.6074
Sequence Accuracy: 0.2194


Epoch 8/20: 100%|██████████| 346/346 [00:12<00:00, 27.66it/s]


Epoch 8/20 Train Loss: 0.6940
Token Accuracy: 0.6250
Sequence Accuracy: 0.2396


Epoch 9/20: 100%|██████████| 346/346 [00:12<00:00, 27.47it/s]


Epoch 9/20 Train Loss: 0.6529
Token Accuracy: 0.6399
Sequence Accuracy: 0.2529


Epoch 10/20: 100%|██████████| 346/346 [00:12<00:00, 27.72it/s]


Epoch 10/20 Train Loss: 0.6244
Token Accuracy: 0.6542
Sequence Accuracy: 0.2760


Epoch 11/20: 100%|██████████| 346/346 [00:12<00:00, 27.41it/s]


Epoch 11/20 Train Loss: 0.5904
Token Accuracy: 0.6548
Sequence Accuracy: 0.2774


Epoch 12/20: 100%|██████████| 346/346 [00:12<00:00, 27.70it/s]


Epoch 12/20 Train Loss: 0.5667
Token Accuracy: 0.6588
Sequence Accuracy: 0.2841


Epoch 13/20: 100%|██████████| 346/346 [00:12<00:00, 27.67it/s]


Epoch 13/20 Train Loss: 0.5484
Token Accuracy: 0.6673
Sequence Accuracy: 0.2981


Epoch 14/20: 100%|██████████| 346/346 [00:12<00:00, 27.52it/s]


Epoch 14/20 Train Loss: 0.5385
Token Accuracy: 0.6811
Sequence Accuracy: 0.3123


Epoch 15/20: 100%|██████████| 346/346 [00:12<00:00, 27.71it/s]


Epoch 15/20 Train Loss: 0.5191
Token Accuracy: 0.6731
Sequence Accuracy: 0.3073


Epoch 16/20: 100%|██████████| 346/346 [00:12<00:00, 27.02it/s]


Epoch 16/20 Train Loss: 0.5090
Token Accuracy: 0.6861
Sequence Accuracy: 0.3229


Epoch 17/20: 100%|██████████| 346/346 [00:12<00:00, 27.61it/s]


Epoch 17/20 Train Loss: 0.4997
Token Accuracy: 0.6867
Sequence Accuracy: 0.3212


Epoch 18/20: 100%|██████████| 346/346 [00:12<00:00, 27.59it/s]


Epoch 18/20 Train Loss: 0.4885
Token Accuracy: 0.6878
Sequence Accuracy: 0.3313


Epoch 19/20: 100%|██████████| 346/346 [00:12<00:00, 27.35it/s]


Epoch 19/20 Train Loss: 0.4783
Token Accuracy: 0.6957
Sequence Accuracy: 0.3421


Epoch 20/20: 100%|██████████| 346/346 [00:12<00:00, 27.60it/s]


Epoch 20/20 Train Loss: 0.4691
Token Accuracy: 0.7002
Sequence Accuracy: 0.3474


0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
sequence_accuracy,▁▁▂▃▄▅▅▆▆▇▇▇▇▇▇█▇███
token_accuracy,▁▃▄▅▆▆▇▇▇▇▇▇████████
train_loss,█▆▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
sequence_accuracy,0.34741
token_accuracy,0.70019
train_loss,0.46909


In [16]:
from tqdm import tqdm
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torch.nn.functional as F

filepath_test = "/kaggle/input/dakshina-dataset-v1-0/dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.test.tsv"
pairs_test = load_dakshina_lexicon_pairs(filepath_test)
dataset_test = TransliterationDataset(pairs_test, input_char2idx, output_char2idx)
dataloader_test = DataLoader(dataset_test, batch_size=1, shuffle=False, collate_fn=collate_fn)

filepath_val = "/kaggle/input/dakshina-dataset-v1-0/dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.dev.tsv"
pairs_val = load_dakshina_lexicon_pairs(filepath_val)
dataset_val = TransliterationDataset(pairs_val, input_char2idx, output_char2idx)
dataloader_val = DataLoader(dataset_val, batch_size=1, shuffle=False, collate_fn=collate_fn)

dataset = TransliterationDataset(pairs, input_char2idx, output_char2idx)
dataloader = DataLoader(dataset, batch_size=1, shuffle=False, collate_fn=collate_fn)

#  Best configuration
embed_size=64
num_encoder_layers=3
num_decoder_layers=3
hidden_size=64
cell_type='lstm'
dropout=0.4
batch_size=128
learning_rate=0.005
beam_size=4
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize encoder and decoder
encoder = Encoder(
    input_vocab_size=len(input_char2idx),
    embed_size=embed_size,
    hidden_size=hidden_size,
    num_encoder_layers=num_encoder_layers,
    cell_type=cell_type,
    dropout=dropout
).to(device)

decoder = Decoder(
    output_vocab_size=len(output_char2idx),
    embed_size=embed_size,
    hidden_size=hidden_size,
    num_decoder_layers=num_decoder_layers,
    cell_type=cell_type,
    dropout=dropout
).to(device)

encoder_optimizer = torch.optim.Adam(encoder.parameters(), lr=learning_rate, weight_decay=1e-5)
decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=learning_rate, weight_decay=1e-5)
criterion = nn.CrossEntropyLoss(ignore_index=output_char2idx['<pad>'])

dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)

num_epochs = 20
for epoch in range(num_epochs):
    # ======== TRAINING ========
    encoder.train()
    decoder.train()
    total_loss = 0

    with tqdm(dataloader, desc=f"Epoch {epoch+1}/{num_epochs}") as pbar:
        for input_tensor, input_lengths, target_tensor, target_lengths in pbar:
            input_tensor = input_tensor.to(device)
            target_tensor = target_tensor.to(device)

            encoder_optimizer.zero_grad()
            decoder_optimizer.zero_grad()

            encoder_outputs, encoder_hidden = encoder(input_tensor, input_lengths)
            decoder_input = target_tensor[:, 0].unsqueeze(1)  # <sos>
            decoder_hidden = encoder_hidden

            loss = 0
            max_target_len = target_tensor.size(1)

            for t in range(1, max_target_len):
                decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
                output = decoder_output.squeeze(1)
                # print(f'output = {output}')
                # print(f'target tensor = {target_tensor[:,t]}')
                loss += criterion(output, target_tensor[:, t])
                decoder_input = target_tensor[:, t].unsqueeze(1)  # Teacher forcing

            loss.backward()
            torch.nn.utils.clip_grad_norm_(encoder.parameters(), max_norm=1.0)
            torch.nn.utils.clip_grad_norm_(decoder.parameters(), max_norm=1.0)

            encoder_optimizer.step()
            decoder_optimizer.step()

            total_loss += loss.item() / (max_target_len - 1)

    avg_loss = total_loss / len(dataloader)
    print(f"Epoch {epoch+1}/{num_epochs} Train Loss: {avg_loss:.4f}")

    # ======== VALIDATION ========
    encoder.eval()
    decoder.eval()
    correct_sequences = 0
    total_sequences = 0
    correct_tokens = 0
    total_tokens = 0
    beam_width = beam_size  # You can change this
    with torch.no_grad():
        for input_tensor, input_lengths, target_tensor, target_lengths in dataloader_val:
            input_tensor = input_tensor.to(device)
            target_tensor = target_tensor.to(device)
    
            encoder_outputs, encoder_hidden = encoder(input_tensor, input_lengths)
            max_target_len = target_tensor.size(1)
            total_sequences += 1
    
            # Beam is a list of tuples: (sequence_so_far, cumulative_log_prob, decoder_hidden)
            beam = [([output_char2idx['<sos>']], 0.0, encoder_hidden)]
    
            completed_sequences = []
    
            for _ in range(1, max_target_len):
                new_beam = []
                for seq, score, hidden in beam:
                    decoder_input = torch.tensor([[seq[-1]]], device=device)
                    decoder_output, hidden_next = decoder(decoder_input, hidden)
                    log_probs = F.log_softmax(decoder_output.squeeze(1), dim=1)
    
                    topk_log_probs, topk_indices = log_probs.topk(beam_width)
    
                    for k in range(beam_width):
                        next_token = topk_indices[0][k].item()
                        next_score = score + topk_log_probs[0][k].item()
                        new_seq = seq + [next_token]
                        new_beam.append((new_seq, next_score, hidden_next))
    
                # Keep top `beam_width` beams with highest scores
                beam = sorted(new_beam, key=lambda x: x[1], reverse=True)[:beam_width]
    
                # Move completed sequences out
                beam, completed = [], []
                for seq, score, hidden in new_beam:
                    if seq[-1] == output_char2idx['<eos>']:
                        completed_sequences.append((seq, score))
                    else:
                        beam.append((seq, score, hidden))
                beam = sorted(beam, key=lambda x: x[1], reverse=True)[:beam_width]
    
            # Choose best completed or best incomplete beam
            if completed_sequences:
                best_seq = max(completed_sequences, key=lambda x: x[1])[0]
            else:
                best_seq = max(beam, key=lambda x: x[1])[0]
    
            # Remove <sos> if present
            if best_seq[0] == output_char2idx['<sos>']:
                best_seq = best_seq[1:]
    
            # Compare prediction with target
            target_seq = target_tensor[0, 1:].tolist()
            pad_idx = output_char2idx['<pad>']
    
            # Token accuracy
            for pred_token, tgt_token in zip(best_seq, target_seq):
                if tgt_token == pad_idx:
                    break
                if pred_token == tgt_token:
                    correct_tokens += 1
                total_tokens += 1
    
            # Sequence accuracy
            target_trimmed = [t for t in target_seq if t != pad_idx]
            best_seq_trimmed = best_seq[:len(target_trimmed)]
            if best_seq_trimmed == target_trimmed:
                correct_sequences += 1
    
            # Optional print
            # predicted_word = indices_to_words([best_seq], output_idx2char)[0]
            # actual_word = indices_to_words([target_trimmed], output_idx2char)[0]
            # # print(f"Predicted: {predicted_word.ljust(20)} | Actual: {actual_word}")
    
    sequence_accuracy = correct_sequences / total_sequences if total_sequences > 0 else 0
    token_accuracy = correct_tokens / total_tokens if total_tokens > 0 else 0
    
    print(f"Token Accuracy: {token_accuracy:.4f}")
    print(f"Sequence Accuracy: {sequence_accuracy:.4f}")

Epoch 1/20: 100%|██████████| 346/346 [00:11<00:00, 30.09it/s]


Epoch 1/20 Train Loss: 2.4632
Token Accuracy: 0.1851
Sequence Accuracy: 0.0000


Epoch 2/20: 100%|██████████| 346/346 [00:11<00:00, 29.82it/s]


Epoch 2/20 Train Loss: 1.7781
Token Accuracy: 0.3352
Sequence Accuracy: 0.0071


Epoch 3/20: 100%|██████████| 346/346 [00:11<00:00, 29.69it/s]


Epoch 3/20 Train Loss: 1.3679
Token Accuracy: 0.4372
Sequence Accuracy: 0.0379


Epoch 4/20: 100%|██████████| 346/346 [00:11<00:00, 29.83it/s]


Epoch 4/20 Train Loss: 1.1130
Token Accuracy: 0.5031
Sequence Accuracy: 0.0945


Epoch 5/20: 100%|██████████| 346/346 [00:11<00:00, 29.00it/s]


Epoch 5/20 Train Loss: 0.9587
Token Accuracy: 0.5527
Sequence Accuracy: 0.1374


Epoch 6/20: 100%|██████████| 346/346 [00:11<00:00, 29.73it/s]


Epoch 6/20 Train Loss: 0.8397
Token Accuracy: 0.5839
Sequence Accuracy: 0.1827


Epoch 7/20: 100%|██████████| 346/346 [00:11<00:00, 29.54it/s]


Epoch 7/20 Train Loss: 0.7599
Token Accuracy: 0.6088
Sequence Accuracy: 0.2129


Epoch 8/20: 100%|██████████| 346/346 [00:11<00:00, 29.65it/s]


Epoch 8/20 Train Loss: 0.7014
Token Accuracy: 0.6251
Sequence Accuracy: 0.2329


Epoch 9/20: 100%|██████████| 346/346 [00:11<00:00, 29.35it/s]


Epoch 9/20 Train Loss: 0.6558
Token Accuracy: 0.6215
Sequence Accuracy: 0.2377


Epoch 10/20: 100%|██████████| 346/346 [00:11<00:00, 29.69it/s]


Epoch 10/20 Train Loss: 0.6214
Token Accuracy: 0.6454
Sequence Accuracy: 0.2708


Epoch 11/20: 100%|██████████| 346/346 [00:11<00:00, 29.55it/s]


Epoch 11/20 Train Loss: 0.5930
Token Accuracy: 0.6623
Sequence Accuracy: 0.2861


Epoch 12/20: 100%|██████████| 346/346 [00:11<00:00, 29.51it/s]


Epoch 12/20 Train Loss: 0.5677
Token Accuracy: 0.6696
Sequence Accuracy: 0.3001


Epoch 13/20: 100%|██████████| 346/346 [00:11<00:00, 29.47it/s]


Epoch 13/20 Train Loss: 0.5445
Token Accuracy: 0.6721
Sequence Accuracy: 0.3061


Epoch 14/20: 100%|██████████| 346/346 [00:11<00:00, 29.70it/s]


Epoch 14/20 Train Loss: 0.5339
Token Accuracy: 0.6792
Sequence Accuracy: 0.3139


Epoch 15/20: 100%|██████████| 346/346 [00:11<00:00, 29.41it/s]


Epoch 15/20 Train Loss: 0.5139
Token Accuracy: 0.6819
Sequence Accuracy: 0.3242


Epoch 16/20: 100%|██████████| 346/346 [00:11<00:00, 29.72it/s]


Epoch 16/20 Train Loss: 0.4998
Token Accuracy: 0.6885
Sequence Accuracy: 0.3327


Epoch 17/20: 100%|██████████| 346/346 [00:11<00:00, 29.58it/s]


Epoch 17/20 Train Loss: 0.4880
Token Accuracy: 0.6903
Sequence Accuracy: 0.3437


Epoch 18/20: 100%|██████████| 346/346 [00:11<00:00, 29.57it/s]


Epoch 18/20 Train Loss: 0.4762
Token Accuracy: 0.6977
Sequence Accuracy: 0.3465


Epoch 19/20: 100%|██████████| 346/346 [00:11<00:00, 29.80it/s]


Epoch 19/20 Train Loss: 0.4795
Token Accuracy: 0.6980
Sequence Accuracy: 0.3437


Epoch 20/20: 100%|██████████| 346/346 [00:11<00:00, 29.56it/s]


Epoch 20/20 Train Loss: 0.4595
Token Accuracy: 0.7008
Sequence Accuracy: 0.3508


In [28]:
# ======== TEST ========
encoder.eval()
decoder.eval()
correct_sequences = 0
total_sequences = 0
correct_tokens = 0
total_tokens = 0
beam_width = beam_size  # You can change this
result=[]
with torch.no_grad():
    for input_tensor, input_lengths, target_tensor, target_lengths in dataloader_test:
        input_tensor = input_tensor.to(device)
        target_tensor = target_tensor.to(device)

        encoder_outputs, encoder_hidden = encoder(input_tensor, input_lengths)
        max_target_len = target_tensor.size(1)
        total_sequences += 1

        # Beam is a list of tuples: (sequence_so_far, cumulative_log_prob, decoder_hidden)
        beam = [([output_char2idx['<sos>']], 0.0, encoder_hidden)]

        completed_sequences = []

        for _ in range(1, max_target_len):
            new_beam = []
            for seq, score, hidden in beam:
                decoder_input = torch.tensor([[seq[-1]]], device=device)
                decoder_output, hidden_next = decoder(decoder_input, hidden)
                log_probs = F.log_softmax(decoder_output.squeeze(1), dim=1)

                topk_log_probs, topk_indices = log_probs.topk(beam_width)

                for k in range(beam_width):
                    next_token = topk_indices[0][k].item()
                    next_score = score + topk_log_probs[0][k].item()
                    new_seq = seq + [next_token]
                    new_beam.append((new_seq, next_score, hidden_next))

            # Keep top `beam_width` beams with highest scores
            beam = sorted(new_beam, key=lambda x: x[1], reverse=True)[:beam_width]

            # Move completed sequences out
            beam, completed = [], []
            for seq, score, hidden in new_beam:
                if seq[-1] == output_char2idx['<eos>']:
                    completed_sequences.append((seq, score))
                else:
                    beam.append((seq, score, hidden))
            beam = sorted(beam, key=lambda x: x[1], reverse=True)[:beam_width]

        # Choose best completed or best incomplete beam
        if completed_sequences:
            best_seq = max(completed_sequences, key=lambda x: x[1])[0]
        else:
            best_seq = max(beam, key=lambda x: x[1])[0]

        # Remove <sos> if present
        if best_seq[0] == output_char2idx['<sos>']:
            best_seq = best_seq[1:]
        
        # Compare prediction with target
        target_seq = target_tensor[0, 1:].tolist()
        pad_idx = output_char2idx['<pad>']

        # Token accuracy
        for pred_token, tgt_token in zip(best_seq, target_seq):
            if tgt_token == pad_idx:
                break
            if pred_token == tgt_token:
                correct_tokens += 1
            total_tokens += 1

        # Sequence accuracy
        target_trimmed = [t for t in target_seq if t != pad_idx]
        best_seq_trimmed = best_seq[:len(target_trimmed)]
        if best_seq_trimmed == target_trimmed:
            correct_sequences += 1
        
        if best_seq[-1] == output_char2idx['<eos>']:
            best_seq = best_seq[:best_seq.index(output_char2idx['<eos>'])]
        predicted_word=''.join(output_idx2char[i] for i in best_seq)
        target_seq = target_tensor.tolist() if hasattr(target_tensor, 'tolist') else target_tensor
        if isinstance(target_seq[0], list):
            target_seq = target_seq[0]
        # Remove <sos> and truncate at <eos> if present
        if target_seq[0] == output_char2idx['<sos>']:
            target_seq = target_seq[1:]
        if output_char2idx.get('<eos>') in target_seq:
            target_seq = target_seq[:target_seq.index(output_char2idx['<eos>'])]
        
        target_word = ''.join(output_idx2char[i] for i in target_seq)
        
        input_seq = input_tensor.tolist() if hasattr(input_tensor, 'tolist') else input_tensor
        if isinstance(input_seq[0], list):
            input_seq = input_seq[0]

        if input_seq[0] == input_char2idx['<sos>']:
            input_seq = input_seq[1:]
        if input_char2idx.get('<eos>') in input_seq:
            input_seq = input_seq[:input_seq.index(input_char2idx['<eos>'])]
        input_word = ''.join(input_idx2char[i] for i in input_seq)

        result.append((input_word, predicted_word, target_word))
        # Optional print
        # predicted_word = indices_to_words([best_seq], output_idx2char)[0]
        # actual_word = indices_to_words([target_trimmed], output_idx2char)[0]
        # # print(f"Predicted: {predicted_word.ljust(20)} | Actual: {actual_word}")

sequence_accuracy = correct_sequences / total_sequences if total_sequences > 0 else 0
token_accuracy = correct_tokens / total_tokens if total_tokens > 0 else 0
print("Test:")
print(f"Token Accuracy: {token_accuracy:.4f}")
print(f"Sequence Accuracy: {sequence_accuracy:.4f}")

Test:
Token Accuracy: 0.7133
Sequence Accuracy: 0.3541


In [29]:
print(result[0])

('ank', 'एनक', 'अंक')


In [30]:
import csv

with open('predictions_vanilla.csv', 'w', newline='', encoding='utf-8') as f:
    writer = csv.writer(f)
    writer.writerow(['Input', 'Predicted', 'Target'])  # Header
    for t, pred, target in result:
        writer.writerow([t, pred, target])