In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load
'''
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
'''
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import torch
import torch.nn as nn
import random
import wandb
import torch
import torch.nn.functional as F
import torch.optim as optim
from torch.nn.utils.rnn import pad_sequence

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Encoder module............................................................
class InputEncoder(nn.Module):
    def __init__(self, vocab_size, embedding_size, hidden_size, layers, rnn_type='LSTM', dropout_rate=0.2, is_bidirectional=False):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_size, padding_idx=0)
        self.is_bidirectional = is_bidirectional
        self.rnn_type = rnn_type
        self.num_directions = 2 if is_bidirectional else 1
        self.hidden_size = hidden_size
        self.num_layers = layers

        rnn_class = {'RNN': nn.RNN, 'LSTM': nn.LSTM, 'GRU': nn.GRU}[rnn_type]
        self.rnn = rnn_class(
            embedding_size,
            hidden_size // self.num_directions,
            layers,
            dropout=dropout_rate,
            batch_first=True,
            bidirectional=is_bidirectional
        )
# forward pass........................................
    def forward(self, x):
        embedded = self.embedding(x)
        output, hidden = self.rnn(embedded)
        return hidden

# Decoder module...................................................
class OutputDecoder(nn.Module):
    def __init__(self, vocab_size, embedding_size, hidden_size, layers, rnn_type='LSTM', dropout_rate=0.2):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_size, padding_idx=0)
        self.rnn_type = rnn_type
        self.hidden_size = hidden_size
        self.num_layers = layers
        # model defining.....................................
        rnn_class = {'RNN': nn.RNN, 'LSTM': nn.LSTM, 'GRU': nn.GRU}[rnn_type]
        self.rnn = rnn_class(
            embedding_size,
            hidden_size,
            layers,
            dropout=dropout_rate,
            batch_first=True
        )
        # output layer................................
        self.output_layer = nn.Linear(hidden_size, vocab_size)
        
# forward pass.........................    
    def forward(self, token, hidden):
        token = token.unsqueeze(1)
        embedded = self.embedding(token)
        output, hidden = self.rnn(embedded, hidden)
        output = self.output_layer(output.squeeze(1))
        return output, hidden

# Main module..................................................
class TransliterationModel(nn.Module):
    def __init__(self, input_vocab_size, output_vocab_size, embedding_size, hidden_size, enc_layers, dec_layers,
                 rnn_type='LSTM', dropout_rate=0.2, is_bidirectional=False):
        super().__init__()
        self.encoder = InputEncoder(input_vocab_size, embedding_size, hidden_size, enc_layers, rnn_type, dropout_rate, is_bidirectional)
        self.decoder = OutputDecoder(output_vocab_size, embedding_size, hidden_size, dec_layers, rnn_type, dropout_rate)
        self.rnn_type = rnn_type
        self.hidden_size = hidden_size
        self.enc_layers = enc_layers
        self.dec_layers = dec_layers
        self.is_bidirectional = is_bidirectional

    # forward pass.................................................................
    def forward(self, source, target, teacher_forcing_prob=0.5):
        batch_size, target_len = target.size()
        output_vocab_size = self.decoder.output_layer.out_features
        predictions = torch.zeros(batch_size, target_len, output_vocab_size, device=source.device)

        encoder_hidden = self.encoder(source)
        
# bidirectional.....................................................
        def merge_bidirectional(state):
            return torch.cat([state[::2], state[1::2]], dim=2)

        def match_layers(state, required_layers):
            actual_layers = state.size(0)
            if actual_layers == required_layers:
                return state
            elif actual_layers < required_layers:
                pad = torch.zeros(required_layers - actual_layers, *state.shape[1:], device=state.device)
                return torch.cat([state, pad], dim=0)
            else:
                return state[:required_layers]

        if self.rnn_type == 'LSTM':
            h, c = encoder_hidden
            if self.is_bidirectional:
                h = merge_bidirectional(h)
                c = merge_bidirectional(c)
            h = match_layers(h, self.dec_layers)
            c = match_layers(c, self.dec_layers)
            decoder_hidden = (h, c)
        else:
            h = encoder_hidden
            if self.is_bidirectional:
                h = merge_bidirectional(h)
            h = match_layers(h, self.dec_layers)
            decoder_hidden = h

        decoder_input = target[:, 0]
        for t in range(1, target_len):
            output, decoder_hidden = self.decoder(decoder_input, decoder_hidden)
            predictions[:, t] = output
            top1 = output.argmax(1)
            decoder_input = target[:, t] if random.random() < teacher_forcing_prob else top1

        return predictions



In [3]:
# Data processing and vocabulary.............................
def build_vocab_and_prepare_batch(seqs, device):
    special_tokens = {'<pad>': 0, '<sos>': 1, '<eos>': 2, '<unk>': 3}
    unique_chars_latin = sorted(set(ch for seq in seqs for ch in seq[0]))
    unique_chars_dev = sorted(set(ch for seq in seqs for ch in seq[1]))

    src_vocab = {ch: idx+len(special_tokens) for idx, ch in enumerate(unique_chars_latin)}
    src_vocab.update(special_tokens)
    tgt_vocab = {ch: idx+len(special_tokens) for idx, ch in enumerate(unique_chars_dev)}
    tgt_vocab.update(special_tokens)
    idx2src = {idx: ch for ch, idx in src_vocab.items()}
    idx2tgt = {idx: ch for ch, idx in tgt_vocab.items()}

    def encode_text(seq, vocab):
        return [vocab.get(ch, vocab['<unk>']) for ch in seq]

    def create_batch(pairs):
        src = [torch.tensor(encode_text(x, src_vocab) + [src_vocab['<eos>']]) for x, _ in pairs]
        tgt = [torch.tensor([tgt_vocab['<sos>']] + encode_text(y, tgt_vocab) + [tgt_vocab['<eos>']]) for _, y in pairs]
        src = pad_sequence(src, batch_first=True, padding_value=src_vocab['<pad>'])
        tgt = pad_sequence(tgt, batch_first=True, padding_value=tgt_vocab['<pad>'])
        return src.to(device), tgt.to(device)

    return src_vocab, idx2src, tgt_vocab, idx2tgt, create_batch, unique_chars_latin, unique_chars_dev

# input.................................
def read_pairs(file_path):
    with open(file_path, encoding='utf-8') as f:
        return [(line.split('\t')[1], line.split('\t')[0]) for line in f.read().strip().split('\n') if '\t' in line]

# word level accuracy..................................
def compute_word_level_accuracy(preds, targets, vocab):
    sos, eos, pad = vocab['<sos>'], vocab['<eos>'], vocab['<pad>']
    preds = preds.tolist()
    targets = targets.tolist()
    correct = 0
    for p, t in zip(preds, targets):
        p = [x for x in p if x != pad and x != eos]
        t = [x for x in t if x != pad and x != eos]
        if p == t:
            correct += 1
    return correct / len(preds) * 100

# Training funtion..................................................
def run_training():
    wandb.init(config={
        "embedding_size": 128,
        "hidden_size": 256,
        "enc_layers": 2,
        "dec_layers": 2,
        "rnn_type": "LSTM",
        "dropout_rate": 0.2,
        "epochs": 10,
        "batch_size": 64,
        "is_bidirectional": False,
        "learning_rate": 0.001,
        "optimizer": "adam",
        "teacher_forcing_prob": 0.5
    })
    cfg = wandb.config
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    train_path = "/kaggle/input/dakshina-data/dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.train.tsv"
    dev_path = "/kaggle/input/dakshina-data/dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.dev.tsv"
    train_set = read_pairs(train_path)
    dev_set = read_pairs(dev_path)

    # src_vocab, idx2src, tgt_vocab, idx2tgt, create_batch = build_vocab_and_prepare_batch(train_set, device)
    src_vocab, idx2src, tgt_vocab, idx2tgt, create_batch,unique_chars_latin, unique_chars_dev = build_vocab_and_prepare_batch(train_set, device)
    model = TransliterationModel(len(src_vocab), len(tgt_vocab), cfg.embedding_size, cfg.hidden_size,
                                 cfg.enc_layers, cfg.dec_layers, cfg.rnn_type, cfg.dropout_rate, cfg.is_bidirectional).to(device)

    optimizer = optim.Adam(model.parameters(), lr=cfg.learning_rate)
    criterion = nn.CrossEntropyLoss(ignore_index=tgt_vocab['<pad>'])

    epochs = cfg.epochs if isinstance(cfg.epochs, int) else cfg.epochs[0]
    for epoch in range(epochs):
        model.train()
        total_loss, total_acc = 0, 0
        random.shuffle(train_set)

        for i in range(0, len(train_set), cfg.batch_size):
            batch = train_set[i:i+cfg.batch_size]
            src, tgt = create_batch(batch)

            optimizer.zero_grad()
            outputs = model(src, tgt, cfg.teacher_forcing_prob)
            loss = criterion(outputs[:, 1:].reshape(-1, outputs.size(-1)), tgt[:, 1:].reshape(-1))

            preds = outputs.argmax(-1)
            acc = compute_word_level_accuracy(preds[:, 1:], tgt[:, 1:], tgt_vocab)

            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            total_acc += acc

        avg_train_loss = total_loss / (len(train_set) // cfg.batch_size)
        avg_train_acc = total_acc / (len(train_set) // cfg.batch_size)

        model.eval()
        dev_loss, dev_acc = 0, 0
        printed = 0
        with torch.no_grad():
            for i in range(0, len(dev_set), cfg.batch_size):
                batch = dev_set[i:i+cfg.batch_size]
                src, tgt = create_batch(batch)
                outputs = model(src, tgt, 0)
                loss = criterion(outputs[:, 1:].reshape(-1, outputs.size(-1)), tgt[:, 1:].reshape(-1))

                preds = outputs.argmax(-1)
                acc = compute_word_level_accuracy(preds[:, 1:], tgt[:, 1:], tgt_vocab)

                dev_loss += loss.item()
                dev_acc += acc

        avg_dev_loss = dev_loss / (len(dev_set) // cfg.batch_size)
        avg_dev_acc = dev_acc / (len(dev_set) // cfg.batch_size)

        wandb.log({
            "Train Loss": avg_train_loss,
            "Train Accuracy": avg_train_acc,
            "Validation Loss": avg_dev_loss,
            "Validation Accuracy": avg_dev_acc,
            "Epoch": epoch + 1
        })

        print(f"Epoch {epoch + 1}/{cfg.epochs} | Train Loss: {avg_train_loss:.4f}, Train Acc: {avg_train_acc:.2f}% | Val Loss: {avg_dev_loss:.4f}, Val Acc: {avg_dev_acc:.2f}%")
    
    wandb.finish()
    return model


In [7]:
# sweep configuration..............................................
sweep_config = {
    'method': 'bayes',
    'metric': {'name': 'Validation Accuracy', 'goal': 'maximize'},
    'parameters': {
        'embed_dim': {
            'values': [32, 64, 128, 256]
        }, 
        'hidden_dim': {
            'values': [64, 128, 256]
        },
        'enc_layers': {
            'values': [1,2,3]
        }, 
        'dec_layers': {
            'values': [1,2,3]
        }, 
        'cell_type': {
            'values': ['GRU','LSTM','RNN']
        }, 
        'dropout': {
            'values': [0.2, 0.3]
        }, 
        'batch_size': {
            'values': [32, 64]
        }, 
        'epochs': {
            'values': [5,10]

        },
        'bidirectional': {
            'values': [False, True]
        }, 
        'learning_rate': {
            'values': [0.001, 0.002, 0.001]
        }, 
        'optimizer': {
            'values': ['adam', 'nadam']
        }, 
        'teacher_forcing_ratio': {
            'values': [0.2, 0.5, 0.7]
        }, 
        'beam_width': {
            'values': [1, 3, 5]
        }
    }
}

sweep_id = wandb.sweep(sweep_config, project="MA23M021_A3_seq2seq")
wandb.agent(sweep_id, function=run_training, count = 50)

Create sweep with ID: uk1jwuvp
Sweep URL: https://wandb.ai/ma23m021-iit-madras/MA23M021_A3_seq2seq/sweeps/uk1jwuvp


[34m[1mwandb[0m: Agent Starting Run: g945s026 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_width: 3
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dec_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 256
[34m[1mwandb[0m: 	enc_layers: 1
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.7
[34m[1mwandb[0m: Currently logged in as: [33mma23m021[0m ([33mma23m021-iit-madras[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.




Epoch 1/10 | Train Loss: 2.0565, Train Acc: 3.89% | Val Loss: 1.5326, Val Acc: 12.73%
Epoch 2/10 | Train Loss: 1.0196, Train Acc: 19.56% | Val Loss: 1.2909, Val Acc: 22.42%
Epoch 3/10 | Train Loss: 0.7838, Train Acc: 29.73% | Val Loss: 1.2113, Val Acc: 26.27%
Epoch 4/10 | Train Loss: 0.6647, Train Acc: 35.98% | Val Loss: 1.1571, Val Acc: 29.18%
Epoch 5/10 | Train Loss: 0.5731, Train Acc: 41.77% | Val Loss: 1.1575, Val Acc: 31.17%
Epoch 6/10 | Train Loss: 0.5040, Train Acc: 46.25% | Val Loss: 1.1499, Val Acc: 31.54%
Epoch 7/10 | Train Loss: 0.4514, Train Acc: 50.28% | Val Loss: 1.1565, Val Acc: 33.33%
Epoch 8/10 | Train Loss: 0.4053, Train Acc: 53.98% | Val Loss: 1.2182, Val Acc: 32.34%
Epoch 9/10 | Train Loss: 0.3628, Train Acc: 57.43% | Val Loss: 1.2029, Val Acc: 32.73%
Epoch 10/10 | Train Loss: 0.3267, Train Acc: 60.87% | Val Loss: 1.2341, Val Acc: 32.58%


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▃▄▅▆▆▇▇██
Train Loss,█▄▃▂▂▂▂▁▁▁
Validation Accuracy,▁▄▆▇▇▇████
Validation Loss,█▄▂▁▁▁▁▂▂▃

0,1
Epoch,10.0
Train Accuracy,60.86689
Train Loss,0.32668
Validation Accuracy,32.57506
Validation Loss,1.23408


[34m[1mwandb[0m: Agent Starting Run: ndqixoki with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_width: 1
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_dim: 64
[34m[1mwandb[0m: 	enc_layers: 3
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.2


Epoch 1/10 | Train Loss: 2.6383, Train Acc: 0.31% | Val Loss: 1.9821, Val Acc: 2.92%
Epoch 2/10 | Train Loss: 1.2140, Train Acc: 12.99% | Val Loss: 1.2286, Val Acc: 24.32%
Epoch 3/10 | Train Loss: 0.8232, Train Acc: 23.88% | Val Loss: 1.1314, Val Acc: 31.24%
Epoch 4/10 | Train Loss: 0.6626, Train Acc: 29.85% | Val Loss: 1.0726, Val Acc: 32.12%
Epoch 5/10 | Train Loss: 0.5733, Train Acc: 37.55% | Val Loss: 1.0676, Val Acc: 33.93%
Epoch 6/10 | Train Loss: 0.5024, Train Acc: 40.98% | Val Loss: 1.0344, Val Acc: 36.30%
Epoch 7/10 | Train Loss: 0.4436, Train Acc: 42.88% | Val Loss: 1.0859, Val Acc: 37.73%
Epoch 8/10 | Train Loss: 0.4007, Train Acc: 43.10% | Val Loss: 1.1006, Val Acc: 37.49%
Epoch 9/10 | Train Loss: 0.3635, Train Acc: 47.13% | Val Loss: 1.0730, Val Acc: 37.58%
Epoch 10/10 | Train Loss: 0.3300, Train Acc: 50.38% | Val Loss: 1.1024, Val Acc: 39.10%


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▃▄▅▆▇▇▇██
Train Loss,█▄▂▂▂▂▁▁▁▁
Validation Accuracy,▁▅▆▇▇▇████
Validation Loss,█▂▂▁▁▁▁▁▁▂

0,1
Epoch,10.0
Train Accuracy,50.37694
Train Loss,0.32999
Validation Accuracy,39.1008
Validation Loss,1.1024


[34m[1mwandb[0m: Agent Starting Run: f409k0gv with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_width: 3
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 2
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_dim: 128
[34m[1mwandb[0m: 	enc_layers: 1
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.5




Epoch 1/5 | Train Loss: 2.4064, Train Acc: 1.65% | Val Loss: 1.6482, Val Acc: 9.57%
Epoch 2/5 | Train Loss: 1.0938, Train Acc: 16.86% | Val Loss: 1.2286, Val Acc: 24.51%
Epoch 3/5 | Train Loss: 0.8206, Train Acc: 27.12% | Val Loss: 1.1296, Val Acc: 28.29%
Epoch 4/5 | Train Loss: 0.6914, Train Acc: 32.87% | Val Loss: 1.1002, Val Acc: 33.27%
Epoch 5/5 | Train Loss: 0.6022, Train Acc: 36.92% | Val Loss: 1.0566, Val Acc: 34.17%


0,1
Epoch,▁▃▅▆█
Train Accuracy,▁▄▆▇█
Train Loss,█▃▂▁▁
Validation Accuracy,▁▅▆██
Validation Loss,█▃▂▂▁

0,1
Epoch,5.0
Train Accuracy,36.91885
Train Loss,0.60219
Validation Accuracy,34.1682
Validation Loss,1.05657


[34m[1mwandb[0m: Agent Starting Run: guczfg2n with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_width: 1
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 128
[34m[1mwandb[0m: 	enc_layers: 2
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.002
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.7


Epoch 1/10 | Train Loss: 2.5360, Train Acc: 0.71% | Val Loss: 1.8381, Val Acc: 6.14%
Epoch 2/10 | Train Loss: 1.1804, Train Acc: 13.79% | Val Loss: 1.3038, Val Acc: 22.89%
Epoch 3/10 | Train Loss: 0.8359, Train Acc: 24.55% | Val Loss: 1.1604, Val Acc: 27.48%
Epoch 4/10 | Train Loss: 0.6992, Train Acc: 28.41% | Val Loss: 1.1183, Val Acc: 31.92%
Epoch 5/10 | Train Loss: 0.6129, Train Acc: 34.66% | Val Loss: 1.0837, Val Acc: 34.83%
Epoch 6/10 | Train Loss: 0.5587, Train Acc: 36.77% | Val Loss: 1.0897, Val Acc: 34.51%
Epoch 7/10 | Train Loss: 0.5144, Train Acc: 40.00% | Val Loss: 1.0735, Val Acc: 35.07%
Epoch 8/10 | Train Loss: 0.4687, Train Acc: 41.12% | Val Loss: 1.1038, Val Acc: 33.39%
Epoch 9/10 | Train Loss: 0.4356, Train Acc: 40.84% | Val Loss: 1.0951, Val Acc: 36.37%
Epoch 10/10 | Train Loss: 0.4136, Train Acc: 38.49% | Val Loss: 1.0804, Val Acc: 36.51%


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▃▅▆▇▇████
Train Loss,█▄▂▂▂▁▁▁▁▁
Validation Accuracy,▁▅▆▇███▇██
Validation Loss,█▃▂▁▁▁▁▁▁▁

0,1
Epoch,10.0
Train Accuracy,38.48773
Train Loss,0.41359
Validation Accuracy,36.51195
Validation Loss,1.08038


[34m[1mwandb[0m: Agent Starting Run: bd1q79ij with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_width: 3
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 128
[34m[1mwandb[0m: 	enc_layers: 3
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.7


Epoch 1/10 | Train Loss: 2.1067, Train Acc: 4.07% | Val Loss: 1.4065, Val Acc: 16.41%
Epoch 2/10 | Train Loss: 0.9161, Train Acc: 22.42% | Val Loss: 1.1416, Val Acc: 29.30%
Epoch 3/10 | Train Loss: 0.7002, Train Acc: 31.60% | Val Loss: 1.0922, Val Acc: 32.83%
Epoch 4/10 | Train Loss: 0.5862, Train Acc: 35.74% | Val Loss: 1.0527, Val Acc: 34.73%
Epoch 5/10 | Train Loss: 0.5120, Train Acc: 40.84% | Val Loss: 1.0200, Val Acc: 36.44%
Epoch 6/10 | Train Loss: 0.4493, Train Acc: 44.70% | Val Loss: 1.0603, Val Acc: 38.76%
Epoch 7/10 | Train Loss: 0.4031, Train Acc: 46.71% | Val Loss: 1.0336, Val Acc: 38.14%
Epoch 8/10 | Train Loss: 0.3687, Train Acc: 50.46% | Val Loss: 1.0915, Val Acc: 37.22%
Epoch 9/10 | Train Loss: 0.3355, Train Acc: 52.05% | Val Loss: 1.0873, Val Acc: 38.01%
Epoch 10/10 | Train Loss: 0.3097, Train Acc: 53.98% | Val Loss: 1.1463, Val Acc: 37.56%


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▄▅▅▆▇▇███
Train Loss,█▃▃▂▂▂▁▁▁▁
Validation Accuracy,▁▅▆▇▇█████
Validation Loss,█▃▂▂▁▂▁▂▂▃

0,1
Epoch,10.0
Train Accuracy,53.97885
Train Loss,0.30967
Validation Accuracy,37.56127
Validation Loss,1.14634


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: e5e6a1tq with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_width: 3
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dec_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 256
[34m[1mwandb[0m: 	enc_layers: 2
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.002
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.7


Epoch 1/5 | Train Loss: 1.8528, Train Acc: 6.86% | Val Loss: 1.3956, Val Acc: 16.93%
Epoch 2/5 | Train Loss: 0.8876, Train Acc: 24.60% | Val Loss: 1.1852, Val Acc: 26.03%
Epoch 3/5 | Train Loss: 0.7025, Train Acc: 33.03% | Val Loss: 1.1999, Val Acc: 28.62%
Epoch 4/5 | Train Loss: 0.6036, Train Acc: 39.68% | Val Loss: 1.1631, Val Acc: 30.66%
Epoch 5/5 | Train Loss: 0.5272, Train Acc: 44.56% | Val Loss: 1.1615, Val Acc: 31.25%


0,1
Epoch,▁▃▅▆█
Train Accuracy,▁▄▆▇█
Train Loss,█▃▂▁▁
Validation Accuracy,▁▅▇██
Validation Loss,█▂▂▁▁

0,1
Epoch,5.0
Train Accuracy,44.56089
Train Loss,0.52717
Validation Accuracy,31.25
Validation Loss,1.16146


[34m[1mwandb[0m: Agent Starting Run: rs768606 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_width: 3
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 32
[34m[1mwandb[0m: 	enc_layers: 1
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.002
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.5


Epoch 1/5 | Train Loss: 1.8193, Train Acc: 7.47% | Val Loss: 1.4157, Val Acc: 17.32%
Epoch 2/5 | Train Loss: 0.8785, Train Acc: 25.27% | Val Loss: 1.2386, Val Acc: 26.06%
Epoch 3/5 | Train Loss: 0.6960, Train Acc: 33.90% | Val Loss: 1.1651, Val Acc: 27.93%
Epoch 4/5 | Train Loss: 0.5958, Train Acc: 38.99% | Val Loss: 1.1572, Val Acc: 30.37%
Epoch 5/5 | Train Loss: 0.5297, Train Acc: 43.50% | Val Loss: 1.1473, Val Acc: 30.69%


0,1
Epoch,▁▃▅▆█
Train Accuracy,▁▄▆▇█
Train Loss,█▃▂▁▁
Validation Accuracy,▁▆▇██
Validation Loss,█▃▁▁▁

0,1
Epoch,5.0
Train Accuracy,43.50482
Train Loss,0.52968
Validation Accuracy,30.69087
Validation Loss,1.14732


[34m[1mwandb[0m: Agent Starting Run: 9rgnj36l with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_width: 5
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_dim: 32
[34m[1mwandb[0m: 	enc_layers: 2
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.002
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.2


Epoch 1/10 | Train Loss: 1.8971, Train Acc: 6.37% | Val Loss: 1.4293, Val Acc: 15.91%
Epoch 2/10 | Train Loss: 0.9112, Train Acc: 23.59% | Val Loss: 1.2623, Val Acc: 23.45%
Epoch 3/10 | Train Loss: 0.7269, Train Acc: 32.01% | Val Loss: 1.1799, Val Acc: 27.83%
Epoch 4/10 | Train Loss: 0.6130, Train Acc: 38.25% | Val Loss: 1.1673, Val Acc: 30.05%
Epoch 5/10 | Train Loss: 0.5581, Train Acc: 42.56% | Val Loss: 1.1900, Val Acc: 31.29%
Epoch 6/10 | Train Loss: 0.4916, Train Acc: 47.04% | Val Loss: 1.1788, Val Acc: 32.39%
Epoch 7/10 | Train Loss: 0.4434, Train Acc: 49.24% | Val Loss: 1.2005, Val Acc: 32.18%
Epoch 8/10 | Train Loss: 0.4007, Train Acc: 53.18% | Val Loss: 1.2211, Val Acc: 28.99%
Epoch 9/10 | Train Loss: 0.3741, Train Acc: 54.79% | Val Loss: 1.2426, Val Acc: 30.76%
Epoch 10/10 | Train Loss: 0.3440, Train Acc: 56.81% | Val Loss: 1.2944, Val Acc: 30.74%


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▃▅▅▆▇▇▇██
Train Loss,█▄▃▂▂▂▁▁▁▁
Validation Accuracy,▁▄▆▇███▇▇▇
Validation Loss,█▄▁▁▂▁▂▂▃▄

0,1
Epoch,10.0
Train Accuracy,56.81427
Train Loss,0.34403
Validation Accuracy,30.73683
Validation Loss,1.29439


[34m[1mwandb[0m: Agent Starting Run: z98xyhc5 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_width: 3
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dec_layers: 2
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_dim: 64
[34m[1mwandb[0m: 	enc_layers: 2
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.002
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.7


Epoch 1/5 | Train Loss: 2.0443, Train Acc: 5.15% | Val Loss: 1.3596, Val Acc: 17.62%
Epoch 2/5 | Train Loss: 0.8818, Train Acc: 24.47% | Val Loss: 1.1423, Val Acc: 27.95%
Epoch 3/5 | Train Loss: 0.6839, Train Acc: 30.81% | Val Loss: 1.0708, Val Acc: 32.74%
Epoch 4/5 | Train Loss: 0.5696, Train Acc: 37.03% | Val Loss: 1.0491, Val Acc: 35.01%
Epoch 5/5 | Train Loss: 0.4977, Train Acc: 40.13% | Val Loss: 1.0645, Val Acc: 35.32%


0,1
Epoch,▁▃▅▆█
Train Accuracy,▁▅▆▇█
Train Loss,█▃▂▁▁
Validation Accuracy,▁▅▇██
Validation Loss,█▃▁▁▁

0,1
Epoch,5.0
Train Accuracy,40.13155
Train Loss,0.49774
Validation Accuracy,35.32475
Validation Loss,1.06452


[34m[1mwandb[0m: Agent Starting Run: sz69stz8 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_width: 1
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 128
[34m[1mwandb[0m: 	enc_layers: 3
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.7


Epoch 1/5 | Train Loss: 2.1649, Train Acc: 2.99% | Val Loss: 1.4991, Val Acc: 13.10%
Epoch 2/5 | Train Loss: 0.9714, Train Acc: 20.23% | Val Loss: 1.1425, Val Acc: 27.11%
Epoch 3/5 | Train Loss: 0.7263, Train Acc: 28.23% | Val Loss: 1.1071, Val Acc: 31.77%
Epoch 4/5 | Train Loss: 0.6118, Train Acc: 33.23% | Val Loss: 1.0667, Val Acc: 34.18%
Epoch 5/5 | Train Loss: 0.5319, Train Acc: 36.66% | Val Loss: 1.0815, Val Acc: 34.32%


0,1
Epoch,▁▃▅▆█
Train Accuracy,▁▅▆▇█
Train Loss,█▃▂▁▁
Validation Accuracy,▁▆▇██
Validation Loss,█▂▂▁▁

0,1
Epoch,5.0
Train Accuracy,36.66124
Train Loss,0.53189
Validation Accuracy,34.32138
Validation Loss,1.08155


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: tgnbdlt8 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_width: 3
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 2
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 128
[34m[1mwandb[0m: 	enc_layers: 2
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.5


Epoch 1/10 | Train Loss: 2.2428, Train Acc: 2.83% | Val Loss: 1.4913, Val Acc: 12.77%
Epoch 2/10 | Train Loss: 0.9672, Train Acc: 21.04% | Val Loss: 1.1812, Val Acc: 27.12%
Epoch 3/10 | Train Loss: 0.7345, Train Acc: 30.55% | Val Loss: 1.1091, Val Acc: 30.89%
Epoch 4/10 | Train Loss: 0.5999, Train Acc: 36.89% | Val Loss: 1.0872, Val Acc: 33.01%
Epoch 5/10 | Train Loss: 0.5119, Train Acc: 43.19% | Val Loss: 1.0605, Val Acc: 36.29%
Epoch 6/10 | Train Loss: 0.4541, Train Acc: 47.57% | Val Loss: 1.0627, Val Acc: 36.29%
Epoch 7/10 | Train Loss: 0.3974, Train Acc: 52.54% | Val Loss: 1.0438, Val Acc: 37.19%
Epoch 8/10 | Train Loss: 0.3536, Train Acc: 56.64% | Val Loss: 1.1072, Val Acc: 36.68%
Epoch 9/10 | Train Loss: 0.3142, Train Acc: 59.92% | Val Loss: 1.1058, Val Acc: 35.80%
Epoch 10/10 | Train Loss: 0.2835, Train Acc: 62.47% | Val Loss: 1.1715, Val Acc: 37.44%


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▃▄▅▆▆▇▇██
Train Loss,█▃▃▂▂▂▁▁▁▁
Validation Accuracy,▁▅▆▇██████
Validation Loss,█▃▂▂▁▁▁▂▂▃

0,1
Epoch,10.0
Train Accuracy,62.46624
Train Loss,0.28346
Validation Accuracy,37.43873
Validation Loss,1.1715


[34m[1mwandb[0m: Agent Starting Run: uh7nzdbd with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_width: 3
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_dim: 128
[34m[1mwandb[0m: 	enc_layers: 3
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.002
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.5


Epoch 1/10 | Train Loss: 1.7892, Train Acc: 7.43% | Val Loss: 1.4175, Val Acc: 18.28%
Epoch 2/10 | Train Loss: 0.8741, Train Acc: 25.53% | Val Loss: 1.2382, Val Acc: 24.98%
Epoch 3/10 | Train Loss: 0.7138, Train Acc: 33.27% | Val Loss: 1.1690, Val Acc: 28.85%
Epoch 4/10 | Train Loss: 0.5976, Train Acc: 39.83% | Val Loss: 1.1802, Val Acc: 30.19%
Epoch 5/10 | Train Loss: 0.5252, Train Acc: 44.74% | Val Loss: 1.1651, Val Acc: 31.19%
Epoch 6/10 | Train Loss: 0.4692, Train Acc: 49.20% | Val Loss: 1.1863, Val Acc: 31.72%
Epoch 7/10 | Train Loss: 0.4167, Train Acc: 52.28% | Val Loss: 1.2044, Val Acc: 32.34%
Epoch 8/10 | Train Loss: 0.3833, Train Acc: 54.32% | Val Loss: 1.2287, Val Acc: 31.13%
Epoch 9/10 | Train Loss: 0.3497, Train Acc: 56.74% | Val Loss: 1.2268, Val Acc: 31.59%
Epoch 10/10 | Train Loss: 0.3218, Train Acc: 58.43% | Val Loss: 1.2767, Val Acc: 31.56%


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▃▅▅▆▇▇▇██
Train Loss,█▄▃▂▂▂▁▁▁▁
Validation Accuracy,▁▄▆▇▇██▇██
Validation Loss,█▃▁▁▁▂▂▃▃▄

0,1
Epoch,10.0
Train Accuracy,58.42844
Train Loss,0.32183
Validation Accuracy,31.56403
Validation Loss,1.27666


[34m[1mwandb[0m: Agent Starting Run: m9aql5du with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_width: 5
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dec_layers: 2
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 32
[34m[1mwandb[0m: 	enc_layers: 2
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.7


Epoch 1/5 | Train Loss: 2.2670, Train Acc: 2.87% | Val Loss: 1.5106, Val Acc: 12.52%
Epoch 2/5 | Train Loss: 0.9793, Train Acc: 20.65% | Val Loss: 1.1917, Val Acc: 26.58%
Epoch 3/5 | Train Loss: 0.7298, Train Acc: 30.35% | Val Loss: 1.1111, Val Acc: 29.63%
Epoch 4/5 | Train Loss: 0.6037, Train Acc: 37.15% | Val Loss: 1.0743, Val Acc: 34.05%
Epoch 5/5 | Train Loss: 0.5297, Train Acc: 40.98% | Val Loss: 1.0427, Val Acc: 33.86%


0,1
Epoch,▁▃▅▆█
Train Accuracy,▁▄▆▇█
Train Loss,█▃▂▁▁
Validation Accuracy,▁▆▇██
Validation Loss,█▃▂▁▁

0,1
Epoch,5.0
Train Accuracy,40.97888
Train Loss,0.52972
Validation Accuracy,33.86183
Validation Loss,1.04267


[34m[1mwandb[0m: Agent Starting Run: 5vflffbx with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_width: 3
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dec_layers: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_dim: 64
[34m[1mwandb[0m: 	enc_layers: 2
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.5


Epoch 1/10 | Train Loss: 1.6904, Train Acc: 8.19% | Val Loss: 1.3601, Val Acc: 18.93%
Epoch 2/10 | Train Loss: 0.8664, Train Acc: 26.04% | Val Loss: 1.1969, Val Acc: 26.78%
Epoch 3/10 | Train Loss: 0.6814, Train Acc: 34.14% | Val Loss: 1.1219, Val Acc: 31.05%
Epoch 4/10 | Train Loss: 0.5753, Train Acc: 40.42% | Val Loss: 1.1297, Val Acc: 31.10%
Epoch 5/10 | Train Loss: 0.5018, Train Acc: 45.62% | Val Loss: 1.1304, Val Acc: 30.70%
Epoch 6/10 | Train Loss: 0.4509, Train Acc: 49.83% | Val Loss: 1.1351, Val Acc: 33.36%
Epoch 7/10 | Train Loss: 0.3989, Train Acc: 52.79% | Val Loss: 1.1416, Val Acc: 32.61%
Epoch 8/10 | Train Loss: 0.3598, Train Acc: 54.61% | Val Loss: 1.1532, Val Acc: 33.71%
Epoch 9/10 | Train Loss: 0.3279, Train Acc: 56.26% | Val Loss: 1.2289, Val Acc: 31.92%
Epoch 10/10 | Train Loss: 0.2955, Train Acc: 59.67% | Val Loss: 1.2907, Val Acc: 31.56%


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▃▅▅▆▇▇▇██
Train Loss,█▄▃▂▂▂▂▁▁▁
Validation Accuracy,▁▅▇▇▇█▇█▇▇
Validation Loss,█▃▁▁▁▁▂▂▄▆

0,1
Epoch,10.0
Train Accuracy,59.66691
Train Loss,0.29553
Validation Accuracy,31.55637
Validation Loss,1.29065


[34m[1mwandb[0m: Agent Starting Run: 2e9bqaak with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_width: 1
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_dim: 256
[34m[1mwandb[0m: 	enc_layers: 1
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.7


Epoch 1/10 | Train Loss: 2.5153, Train Acc: 0.63% | Val Loss: 1.7549, Val Acc: 6.92%
Epoch 2/10 | Train Loss: 1.1724, Train Acc: 13.52% | Val Loss: 1.2390, Val Acc: 22.12%
Epoch 3/10 | Train Loss: 0.8406, Train Acc: 24.17% | Val Loss: 1.1278, Val Acc: 30.49%
Epoch 4/10 | Train Loss: 0.7128, Train Acc: 29.50% | Val Loss: 1.0939, Val Acc: 32.85%
Epoch 5/10 | Train Loss: 0.6281, Train Acc: 33.25% | Val Loss: 1.0882, Val Acc: 34.53%
Epoch 6/10 | Train Loss: 0.5717, Train Acc: 37.70% | Val Loss: 1.0731, Val Acc: 35.55%
Epoch 7/10 | Train Loss: 0.5213, Train Acc: 39.13% | Val Loss: 1.1134, Val Acc: 35.91%
Epoch 8/10 | Train Loss: 0.4842, Train Acc: 40.37% | Val Loss: 1.0602, Val Acc: 35.96%
Epoch 9/10 | Train Loss: 0.4449, Train Acc: 44.33% | Val Loss: 1.0770, Val Acc: 36.80%
Epoch 10/10 | Train Loss: 0.4170, Train Acc: 47.01% | Val Loss: 1.1119, Val Acc: 37.08%


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▃▅▅▆▇▇▇██
Train Loss,█▄▂▂▂▂▁▁▁▁
Validation Accuracy,▁▅▆▇▇█████
Validation Loss,█▃▂▁▁▁▂▁▁▂

0,1
Epoch,10.0
Train Accuracy,47.007
Train Loss,0.41698
Validation Accuracy,37.07874
Validation Loss,1.11189


[34m[1mwandb[0m: Agent Starting Run: okivbffn with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_width: 1
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 128
[34m[1mwandb[0m: 	enc_layers: 2
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.5


Epoch 1/5 | Train Loss: 2.2165, Train Acc: 3.14% | Val Loss: 1.4582, Val Acc: 15.68%
Epoch 2/5 | Train Loss: 0.9520, Train Acc: 20.89% | Val Loss: 1.1534, Val Acc: 26.88%
Epoch 3/5 | Train Loss: 0.7236, Train Acc: 27.44% | Val Loss: 1.0828, Val Acc: 32.18%
Epoch 4/5 | Train Loss: 0.6148, Train Acc: 31.56% | Val Loss: 1.0114, Val Acc: 34.46%
Epoch 5/5 | Train Loss: 0.5371, Train Acc: 30.30% | Val Loss: 1.0091, Val Acc: 36.67%


0,1
Epoch,▁▃▅▆█
Train Accuracy,▁▅▇██
Train Loss,█▃▂▁▁
Validation Accuracy,▁▅▇▇█
Validation Loss,█▃▂▁▁

0,1
Epoch,5.0
Train Accuracy,30.30488
Train Loss,0.53711
Validation Accuracy,36.67279
Validation Loss,1.00911


[34m[1mwandb[0m: Agent Starting Run: gii4td4w with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_width: 3
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dec_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 32
[34m[1mwandb[0m: 	enc_layers: 3
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.2


Epoch 1/10 | Train Loss: 2.0982, Train Acc: 3.65% | Val Loss: 1.5638, Val Acc: 12.36%
Epoch 2/10 | Train Loss: 1.0108, Train Acc: 19.84% | Val Loss: 1.2595, Val Acc: 23.40%
Epoch 3/10 | Train Loss: 0.7732, Train Acc: 30.23% | Val Loss: 1.1877, Val Acc: 28.89%
Epoch 4/10 | Train Loss: 0.6572, Train Acc: 36.96% | Val Loss: 1.1476, Val Acc: 30.21%
Epoch 5/10 | Train Loss: 0.5747, Train Acc: 42.05% | Val Loss: 1.1286, Val Acc: 31.92%
Epoch 6/10 | Train Loss: 0.5073, Train Acc: 46.85% | Val Loss: 1.1063, Val Acc: 31.95%
Epoch 7/10 | Train Loss: 0.4539, Train Acc: 50.86% | Val Loss: 1.1184, Val Acc: 32.89%
Epoch 8/10 | Train Loss: 0.4063, Train Acc: 54.35% | Val Loss: 1.1515, Val Acc: 32.14%
Epoch 9/10 | Train Loss: 0.3677, Train Acc: 58.15% | Val Loss: 1.1727, Val Acc: 31.86%
Epoch 10/10 | Train Loss: 0.3339, Train Acc: 61.44% | Val Loss: 1.2199, Val Acc: 32.94%


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▃▄▅▆▆▇▇██
Train Loss,█▄▃▂▂▂▁▁▁▁
Validation Accuracy,▁▅▇▇██████
Validation Loss,█▃▂▂▁▁▁▂▂▃

0,1
Epoch,10.0
Train Accuracy,61.43651
Train Loss,0.33387
Validation Accuracy,32.93505
Validation Loss,1.2199


[34m[1mwandb[0m: Agent Starting Run: jv2shzfm with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_width: 1
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dec_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 256
[34m[1mwandb[0m: 	enc_layers: 1
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.002
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.5


Epoch 1/5 | Train Loss: 1.5558, Train Acc: 10.72% | Val Loss: 1.3172, Val Acc: 19.85%
Epoch 2/5 | Train Loss: 0.8284, Train Acc: 27.61% | Val Loss: 1.2104, Val Acc: 26.71%
Epoch 3/5 | Train Loss: 0.6737, Train Acc: 34.57% | Val Loss: 1.1820, Val Acc: 27.37%
Epoch 4/5 | Train Loss: 0.5883, Train Acc: 40.14% | Val Loss: 1.1621, Val Acc: 29.94%
Epoch 5/5 | Train Loss: 0.5244, Train Acc: 43.87% | Val Loss: 1.1377, Val Acc: 31.89%


0,1
Epoch,▁▃▅▆█
Train Accuracy,▁▅▆▇█
Train Loss,█▃▂▁▁
Validation Accuracy,▁▅▅▇█
Validation Loss,█▄▃▂▁

0,1
Epoch,5.0
Train Accuracy,43.87219
Train Loss,0.52444
Validation Accuracy,31.88572
Validation Loss,1.13767


[34m[1mwandb[0m: Agent Starting Run: f5we3i4n with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_width: 3
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_dim: 128
[34m[1mwandb[0m: 	enc_layers: 2
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.7


Epoch 1/10 | Train Loss: 2.7344, Train Acc: 0.16% | Val Loss: 2.1522, Val Acc: 1.22%
Epoch 2/10 | Train Loss: 1.3260, Train Acc: 11.04% | Val Loss: 1.3035, Val Acc: 22.07%
Epoch 3/10 | Train Loss: 0.8772, Train Acc: 23.83% | Val Loss: 1.1568, Val Acc: 29.53%
Epoch 4/10 | Train Loss: 0.7293, Train Acc: 30.26% | Val Loss: 1.0694, Val Acc: 31.54%
Epoch 5/10 | Train Loss: 0.6303, Train Acc: 33.76% | Val Loss: 1.0316, Val Acc: 34.12%
Epoch 6/10 | Train Loss: 0.5559, Train Acc: 34.57% | Val Loss: 1.0572, Val Acc: 34.75%
Epoch 7/10 | Train Loss: 0.5043, Train Acc: 36.74% | Val Loss: 1.0458, Val Acc: 36.67%
Epoch 8/10 | Train Loss: 0.4564, Train Acc: 40.50% | Val Loss: 1.0756, Val Acc: 37.15%
Epoch 9/10 | Train Loss: 0.4182, Train Acc: 39.59% | Val Loss: 1.0755, Val Acc: 38.31%
Epoch 10/10 | Train Loss: 0.3885, Train Acc: 44.40% | Val Loss: 1.0367, Val Acc: 37.89%


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▃▅▆▆▆▇▇▇█
Train Loss,█▄▂▂▂▁▁▁▁▁
Validation Accuracy,▁▅▆▇▇▇████
Validation Loss,█▃▂▁▁▁▁▁▁▁

0,1
Epoch,10.0
Train Accuracy,44.40114
Train Loss,0.38845
Validation Accuracy,37.89062
Validation Loss,1.03666


[34m[1mwandb[0m: Agent Starting Run: p8a7mmyl with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_width: 1
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 64
[34m[1mwandb[0m: 	enc_layers: 2
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.002
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.7


Epoch 1/10 | Train Loss: 2.0341, Train Acc: 3.79% | Val Loss: 1.3872, Val Acc: 16.36%
Epoch 2/10 | Train Loss: 0.9530, Train Acc: 15.15% | Val Loss: 1.1628, Val Acc: 27.69%
Epoch 3/10 | Train Loss: 0.7623, Train Acc: 19.74% | Val Loss: 1.0877, Val Acc: 30.28%
Epoch 4/10 | Train Loss: 0.6691, Train Acc: 25.84% | Val Loss: 1.1005, Val Acc: 31.83%
Epoch 5/10 | Train Loss: 0.6039, Train Acc: 31.10% | Val Loss: 1.0943, Val Acc: 33.43%
Epoch 6/10 | Train Loss: 0.5614, Train Acc: 34.94% | Val Loss: 1.0703, Val Acc: 33.85%
Epoch 7/10 | Train Loss: 0.5232, Train Acc: 35.59% | Val Loss: 1.0760, Val Acc: 34.49%
Epoch 8/10 | Train Loss: 0.5008, Train Acc: 35.67% | Val Loss: 1.0702, Val Acc: 33.36%
Epoch 9/10 | Train Loss: 0.4772, Train Acc: 37.66% | Val Loss: 1.0885, Val Acc: 34.41%
Epoch 10/10 | Train Loss: 0.4564, Train Acc: 38.99% | Val Loss: 1.1018, Val Acc: 33.96%


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▃▄▅▆▇▇▇██
Train Loss,█▃▂▂▂▁▁▁▁▁
Validation Accuracy,▁▅▆▇██████
Validation Loss,█▃▁▂▂▁▁▁▁▂

0,1
Epoch,10.0
Train Accuracy,38.98896
Train Loss,0.45641
Validation Accuracy,33.9614
Validation Loss,1.10183


[34m[1mwandb[0m: Agent Starting Run: zd9fgrkb with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_width: 3
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_dim: 32
[34m[1mwandb[0m: 	enc_layers: 2
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.5


Epoch 1/10 | Train Loss: 2.2682, Train Acc: 2.44% | Val Loss: 1.4826, Val Acc: 11.98%
Epoch 2/10 | Train Loss: 1.0002, Train Acc: 17.18% | Val Loss: 1.1510, Val Acc: 26.37%
Epoch 3/10 | Train Loss: 0.7480, Train Acc: 25.61% | Val Loss: 1.1091, Val Acc: 31.07%
Epoch 4/10 | Train Loss: 0.6381, Train Acc: 29.40% | Val Loss: 1.0800, Val Acc: 34.15%
Epoch 5/10 | Train Loss: 0.5571, Train Acc: 34.82% | Val Loss: 1.0519, Val Acc: 34.76%
Epoch 6/10 | Train Loss: 0.4967, Train Acc: 34.76% | Val Loss: 1.0349, Val Acc: 37.15%
Epoch 7/10 | Train Loss: 0.4470, Train Acc: 37.61% | Val Loss: 1.0477, Val Acc: 36.47%
Epoch 8/10 | Train Loss: 0.4133, Train Acc: 40.36% | Val Loss: 1.0361, Val Acc: 37.88%
Epoch 9/10 | Train Loss: 0.3805, Train Acc: 40.69% | Val Loss: 1.1015, Val Acc: 36.47%
Epoch 10/10 | Train Loss: 0.3586, Train Acc: 47.65% | Val Loss: 1.1068, Val Acc: 37.54%


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▃▅▅▆▆▆▇▇█
Train Loss,█▃▂▂▂▂▁▁▁▁
Validation Accuracy,▁▅▆▇▇█████
Validation Loss,█▃▂▂▁▁▁▁▂▂

0,1
Epoch,10.0
Train Accuracy,47.6504
Train Loss,0.35863
Validation Accuracy,37.5383
Validation Loss,1.10676


[34m[1mwandb[0m: Agent Starting Run: 3d4wjwba with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_width: 1
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dec_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 256
[34m[1mwandb[0m: 	enc_layers: 3
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.5


Epoch 1/10 | Train Loss: 1.6930, Train Acc: 8.35% | Val Loss: 1.3454, Val Acc: 17.98%
Epoch 2/10 | Train Loss: 0.8688, Train Acc: 25.99% | Val Loss: 1.1868, Val Acc: 25.77%
Epoch 3/10 | Train Loss: 0.6884, Train Acc: 34.23% | Val Loss: 1.1398, Val Acc: 29.61%
Epoch 4/10 | Train Loss: 0.5896, Train Acc: 40.64% | Val Loss: 1.1314, Val Acc: 30.85%
Epoch 5/10 | Train Loss: 0.5134, Train Acc: 45.55% | Val Loss: 1.1211, Val Acc: 30.60%
Epoch 6/10 | Train Loss: 0.4515, Train Acc: 50.01% | Val Loss: 1.1618, Val Acc: 31.56%
Epoch 7/10 | Train Loss: 0.4077, Train Acc: 53.62% | Val Loss: 1.1609, Val Acc: 33.47%
Epoch 8/10 | Train Loss: 0.3668, Train Acc: 56.38% | Val Loss: 1.1735, Val Acc: 32.64%
Epoch 9/10 | Train Loss: 0.3339, Train Acc: 60.16% | Val Loss: 1.2215, Val Acc: 31.17%
Epoch 10/10 | Train Loss: 0.3033, Train Acc: 62.36% | Val Loss: 1.2265, Val Acc: 33.04%


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▃▄▅▆▆▇▇██
Train Loss,█▄▃▂▂▂▂▁▁▁
Validation Accuracy,▁▅▆▇▇▇██▇█
Validation Loss,█▃▂▁▁▂▂▃▄▄

0,1
Epoch,10.0
Train Accuracy,62.36121
Train Loss,0.30327
Validation Accuracy,33.04228
Validation Loss,1.2265


[34m[1mwandb[0m: Agent Starting Run: 3paf8d5g with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_width: 1
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 256
[34m[1mwandb[0m: 	enc_layers: 1
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.5


Epoch 1/10 | Train Loss: 1.7303, Train Acc: 7.98% | Val Loss: 1.3853, Val Acc: 17.98%
Epoch 2/10 | Train Loss: 0.8689, Train Acc: 25.59% | Val Loss: 1.1773, Val Acc: 26.73%
Epoch 3/10 | Train Loss: 0.6933, Train Acc: 34.14% | Val Loss: 1.1228, Val Acc: 28.62%
Epoch 4/10 | Train Loss: 0.5863, Train Acc: 40.15% | Val Loss: 1.1232, Val Acc: 30.80%
Epoch 5/10 | Train Loss: 0.5125, Train Acc: 44.92% | Val Loss: 1.1171, Val Acc: 31.10%
Epoch 6/10 | Train Loss: 0.4517, Train Acc: 49.60% | Val Loss: 1.1424, Val Acc: 32.18%
Epoch 7/10 | Train Loss: 0.4071, Train Acc: 53.17% | Val Loss: 1.1510, Val Acc: 31.29%
Epoch 8/10 | Train Loss: 0.3646, Train Acc: 56.12% | Val Loss: 1.1740, Val Acc: 32.12%
Epoch 9/10 | Train Loss: 0.3323, Train Acc: 59.47% | Val Loss: 1.2245, Val Acc: 31.63%
Epoch 10/10 | Train Loss: 0.2992, Train Acc: 62.26% | Val Loss: 1.2757, Val Acc: 32.44%


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▃▄▅▆▆▇▇██
Train Loss,█▄▃▂▂▂▂▁▁▁
Validation Accuracy,▁▅▆▇▇█▇███
Validation Loss,█▃▁▁▁▂▂▂▄▅

0,1
Epoch,10.0
Train Accuracy,62.26014
Train Loss,0.2992
Validation Accuracy,32.43719
Validation Loss,1.27566


[34m[1mwandb[0m: Agent Starting Run: s3klo773 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_width: 5
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 256
[34m[1mwandb[0m: 	enc_layers: 3
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.002
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.7


Epoch 1/10 | Train Loss: 1.5760, Train Acc: 10.15% | Val Loss: 1.3488, Val Acc: 19.10%
Epoch 2/10 | Train Loss: 0.8299, Train Acc: 26.65% | Val Loss: 1.2067, Val Acc: 24.92%
Epoch 3/10 | Train Loss: 0.6742, Train Acc: 34.41% | Val Loss: 1.1732, Val Acc: 28.37%
Epoch 4/10 | Train Loss: 0.5827, Train Acc: 40.71% | Val Loss: 1.1730, Val Acc: 29.04%
Epoch 5/10 | Train Loss: 0.5198, Train Acc: 45.24% | Val Loss: 1.1581, Val Acc: 30.70%
Epoch 6/10 | Train Loss: 0.4634, Train Acc: 48.38% | Val Loss: 1.2262, Val Acc: 31.14%
Epoch 7/10 | Train Loss: 0.4291, Train Acc: 50.20% | Val Loss: 1.2221, Val Acc: 31.36%
Epoch 8/10 | Train Loss: 0.3973, Train Acc: 52.41% | Val Loss: 1.2331, Val Acc: 29.30%
Epoch 9/10 | Train Loss: 0.3698, Train Acc: 53.56% | Val Loss: 1.2420, Val Acc: 31.24%
Epoch 10/10 | Train Loss: 0.3408, Train Acc: 57.08% | Val Loss: 1.2736, Val Acc: 29.63%


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▃▅▆▆▇▇▇▇█
Train Loss,█▄▃▂▂▂▂▁▁▁
Validation Accuracy,▁▄▆▇███▇█▇
Validation Loss,█▃▂▂▁▄▃▄▄▅

0,1
Epoch,10.0
Train Accuracy,57.08499
Train Loss,0.3408
Validation Accuracy,29.63388
Validation Loss,1.27361


[34m[1mwandb[0m: Agent Starting Run: 9k8mwx54 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_width: 5
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dec_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 256
[34m[1mwandb[0m: 	enc_layers: 1
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.002
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.7


Epoch 1/10 | Train Loss: 1.5466, Train Acc: 10.60% | Val Loss: 1.2953, Val Acc: 21.05%
Epoch 2/10 | Train Loss: 0.8205, Train Acc: 26.89% | Val Loss: 1.1975, Val Acc: 26.62%
Epoch 3/10 | Train Loss: 0.6771, Train Acc: 34.41% | Val Loss: 1.1891, Val Acc: 28.43%
Epoch 4/10 | Train Loss: 0.5903, Train Acc: 39.25% | Val Loss: 1.1967, Val Acc: 28.72%
Epoch 5/10 | Train Loss: 0.5255, Train Acc: 43.90% | Val Loss: 1.1738, Val Acc: 32.17%
Epoch 6/10 | Train Loss: 0.4761, Train Acc: 46.08% | Val Loss: 1.1772, Val Acc: 31.13%
Epoch 7/10 | Train Loss: 0.4368, Train Acc: 49.18% | Val Loss: 1.2075, Val Acc: 28.90%
Epoch 8/10 | Train Loss: 0.4005, Train Acc: 52.50% | Val Loss: 1.2011, Val Acc: 30.41%
Epoch 9/10 | Train Loss: 0.3774, Train Acc: 54.68% | Val Loss: 1.2856, Val Acc: 30.83%
Epoch 10/10 | Train Loss: 0.3464, Train Acc: 56.79% | Val Loss: 1.2860, Val Acc: 30.92%


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▃▅▅▆▆▇▇██
Train Loss,█▄▃▂▂▂▂▁▁▁
Validation Accuracy,▁▅▆▆█▇▆▇▇▇
Validation Loss,█▂▂▂▁▁▃▃▇▇

0,1
Epoch,10.0
Train Accuracy,56.79233
Train Loss,0.34643
Validation Accuracy,30.92065
Validation Loss,1.28602


[34m[1mwandb[0m: Agent Starting Run: c3zp997v with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_width: 3
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dec_layers: 2
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_dim: 32
[34m[1mwandb[0m: 	enc_layers: 2
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.7


Epoch 1/10 | Train Loss: 2.2014, Train Acc: 3.66% | Val Loss: 1.4625, Val Acc: 12.78%
Epoch 2/10 | Train Loss: 0.9403, Train Acc: 21.95% | Val Loss: 1.2087, Val Acc: 26.07%
Epoch 3/10 | Train Loss: 0.7157, Train Acc: 32.15% | Val Loss: 1.1095, Val Acc: 31.16%
Epoch 4/10 | Train Loss: 0.5790, Train Acc: 39.62% | Val Loss: 1.0873, Val Acc: 35.48%
Epoch 5/10 | Train Loss: 0.5117, Train Acc: 45.01% | Val Loss: 1.0433, Val Acc: 36.31%
Epoch 6/10 | Train Loss: 0.4488, Train Acc: 48.48% | Val Loss: 1.0577, Val Acc: 35.83%
Epoch 7/10 | Train Loss: 0.3915, Train Acc: 52.75% | Val Loss: 1.0820, Val Acc: 37.98%
Epoch 8/10 | Train Loss: 0.3526, Train Acc: 57.12% | Val Loss: 1.0959, Val Acc: 36.15%
Epoch 9/10 | Train Loss: 0.3101, Train Acc: 59.69% | Val Loss: 1.1156, Val Acc: 36.68%
Epoch 10/10 | Train Loss: 0.2843, Train Acc: 62.82% | Val Loss: 1.1335, Val Acc: 38.01%


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▃▄▅▆▆▇▇██
Train Loss,█▃▃▂▂▂▁▁▁▁
Validation Accuracy,▁▅▆▇█▇█▇██
Validation Loss,█▄▂▂▁▁▂▂▂▃

0,1
Epoch,10.0
Train Accuracy,62.82032
Train Loss,0.2843
Validation Accuracy,38.01317
Validation Loss,1.13348


[34m[1mwandb[0m: Agent Starting Run: 5zh2at4d with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_width: 5
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 64
[34m[1mwandb[0m: 	enc_layers: 1
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.7


Epoch 1/5 | Train Loss: 2.8961, Train Acc: 0.01% | Val Loss: 2.6680, Val Acc: 0.14%
Epoch 2/5 | Train Loss: 1.7224, Train Acc: 4.93% | Val Loss: 1.4334, Val Acc: 15.11%
Epoch 3/5 | Train Loss: 1.0255, Train Acc: 17.50% | Val Loss: 1.2413, Val Acc: 24.23%
Epoch 4/5 | Train Loss: 0.8106, Train Acc: 24.14% | Val Loss: 1.1235, Val Acc: 27.79%
Epoch 5/5 | Train Loss: 0.6993, Train Acc: 30.77% | Val Loss: 1.1348, Val Acc: 32.34%


0,1
Epoch,▁▃▅▆█
Train Accuracy,▁▂▅▆█
Train Loss,█▄▂▁▁
Validation Accuracy,▁▄▆▇█
Validation Loss,█▂▂▁▁

0,1
Epoch,5.0
Train Accuracy,30.77157
Train Loss,0.69933
Validation Accuracy,32.33762
Validation Loss,1.13485


[34m[1mwandb[0m: Agent Starting Run: x9a5pgm8 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_width: 1
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 64
[34m[1mwandb[0m: 	enc_layers: 2
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.5


Epoch 1/10 | Train Loss: 1.6981, Train Acc: 8.50% | Val Loss: 1.3595, Val Acc: 18.10%
Epoch 2/10 | Train Loss: 0.8598, Train Acc: 26.12% | Val Loss: 1.1810, Val Acc: 26.27%
Epoch 3/10 | Train Loss: 0.6770, Train Acc: 35.12% | Val Loss: 1.1684, Val Acc: 28.71%
Epoch 4/10 | Train Loss: 0.5773, Train Acc: 41.26% | Val Loss: 1.1301, Val Acc: 31.29%
Epoch 5/10 | Train Loss: 0.5054, Train Acc: 46.14% | Val Loss: 1.1514, Val Acc: 31.53%
Epoch 6/10 | Train Loss: 0.4445, Train Acc: 50.71% | Val Loss: 1.1525, Val Acc: 32.25%
Epoch 7/10 | Train Loss: 0.3965, Train Acc: 54.29% | Val Loss: 1.1891, Val Acc: 32.28%
Epoch 8/10 | Train Loss: 0.3616, Train Acc: 58.17% | Val Loss: 1.1801, Val Acc: 32.23%
Epoch 9/10 | Train Loss: 0.3267, Train Acc: 60.63% | Val Loss: 1.1921, Val Acc: 31.56%
Epoch 10/10 | Train Loss: 0.2990, Train Acc: 63.10% | Val Loss: 1.2548, Val Acc: 31.91%


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▃▄▅▆▆▇▇██
Train Loss,█▄▃▂▂▂▁▁▁▁
Validation Accuracy,▁▅▆███████
Validation Loss,█▃▂▁▂▂▃▃▃▅

0,1
Epoch,10.0
Train Accuracy,63.10192
Train Loss,0.29897
Validation Accuracy,31.9087
Validation Loss,1.25477


[34m[1mwandb[0m: Agent Starting Run: bzilhtxu with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_width: 1
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dec_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 256
[34m[1mwandb[0m: 	enc_layers: 3
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.002
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.7


Epoch 1/5 | Train Loss: 1.6033, Train Acc: 9.89% | Val Loss: 1.3715, Val Acc: 19.13%
Epoch 2/5 | Train Loss: 0.8365, Train Acc: 26.26% | Val Loss: 1.2210, Val Acc: 25.29%
Epoch 3/5 | Train Loss: 0.6787, Train Acc: 33.58% | Val Loss: 1.1882, Val Acc: 28.62%
Epoch 4/5 | Train Loss: 0.5871, Train Acc: 39.12% | Val Loss: 1.1882, Val Acc: 29.60%
Epoch 5/5 | Train Loss: 0.5254, Train Acc: 43.20% | Val Loss: 1.1755, Val Acc: 29.41%


0,1
Epoch,▁▃▅▆█
Train Accuracy,▁▄▆▇█
Train Loss,█▃▂▁▁
Validation Accuracy,▁▅▇██
Validation Loss,█▃▁▁▁

0,1
Epoch,5.0
Train Accuracy,43.20164
Train Loss,0.52536
Validation Accuracy,29.41176
Validation Loss,1.17546


[34m[1mwandb[0m: Agent Starting Run: jwdz0usu with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_width: 3
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 256
[34m[1mwandb[0m: 	enc_layers: 3
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.5


Epoch 1/5 | Train Loss: 2.2111, Train Acc: 2.15% | Val Loss: 1.4955, Val Acc: 12.48%
Epoch 2/5 | Train Loss: 1.0015, Train Acc: 17.85% | Val Loss: 1.1927, Val Acc: 27.20%
Epoch 3/5 | Train Loss: 0.7418, Train Acc: 28.04% | Val Loss: 1.0850, Val Acc: 31.44%
Epoch 4/5 | Train Loss: 0.6275, Train Acc: 33.48% | Val Loss: 1.0453, Val Acc: 36.04%
Epoch 5/5 | Train Loss: 0.5442, Train Acc: 38.34% | Val Loss: 1.0447, Val Acc: 35.45%


0,1
Epoch,▁▃▅▆█
Train Accuracy,▁▄▆▇█
Train Loss,█▃▂▁▁
Validation Accuracy,▁▅▇██
Validation Loss,█▃▂▁▁

0,1
Epoch,5.0
Train Accuracy,38.34027
Train Loss,0.54417
Validation Accuracy,35.4473
Validation Loss,1.04471


[34m[1mwandb[0m: Agent Starting Run: nu2bs21g with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_width: 5
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dec_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 128
[34m[1mwandb[0m: 	enc_layers: 2
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.2


Epoch 1/5 | Train Loss: 1.7244, Train Acc: 7.85% | Val Loss: 1.3674, Val Acc: 17.29%
Epoch 2/5 | Train Loss: 0.8762, Train Acc: 25.27% | Val Loss: 1.1725, Val Acc: 26.65%
Epoch 3/5 | Train Loss: 0.6932, Train Acc: 33.83% | Val Loss: 1.1111, Val Acc: 30.58%
Epoch 4/5 | Train Loss: 0.5877, Train Acc: 39.10% | Val Loss: 1.1093, Val Acc: 32.36%
Epoch 5/5 | Train Loss: 0.5120, Train Acc: 41.90% | Val Loss: 1.1405, Val Acc: 31.42%


0,1
Epoch,▁▃▅▆█
Train Accuracy,▁▅▆▇█
Train Loss,█▃▂▁▁
Validation Accuracy,▁▅▇██
Validation Loss,█▃▁▁▂

0,1
Epoch,5.0
Train Accuracy,41.90427
Train Loss,0.51197
Validation Accuracy,31.4185
Validation Loss,1.14048


[34m[1mwandb[0m: Agent Starting Run: pdme897t with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_width: 3
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_dim: 32
[34m[1mwandb[0m: 	enc_layers: 3
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.5


Epoch 1/5 | Train Loss: 2.5727, Train Acc: 0.43% | Val Loss: 1.8865, Val Acc: 3.47%
Epoch 2/5 | Train Loss: 1.1950, Train Acc: 14.26% | Val Loss: 1.2462, Val Acc: 24.50%
Epoch 3/5 | Train Loss: 0.8140, Train Acc: 25.45% | Val Loss: 1.0887, Val Acc: 30.73%
Epoch 4/5 | Train Loss: 0.6595, Train Acc: 32.88% | Val Loss: 1.0706, Val Acc: 32.10%
Epoch 5/5 | Train Loss: 0.5730, Train Acc: 36.71% | Val Loss: 1.0414, Val Acc: 36.70%


0,1
Epoch,▁▃▅▆█
Train Accuracy,▁▄▆▇█
Train Loss,█▃▂▁▁
Validation Accuracy,▁▅▇▇█
Validation Loss,█▃▁▁▁

0,1
Epoch,5.0
Train Accuracy,36.70557
Train Loss,0.57304
Validation Accuracy,36.70343
Validation Loss,1.04143


[34m[1mwandb[0m: Agent Starting Run: aklbgaua with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_width: 5
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 32
[34m[1mwandb[0m: 	enc_layers: 2
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.2


Epoch 1/10 | Train Loss: 2.2310, Train Acc: 2.83% | Val Loss: 1.4473, Val Acc: 13.99%
Epoch 2/10 | Train Loss: 0.9661, Train Acc: 19.90% | Val Loss: 1.1461, Val Acc: 27.04%
Epoch 3/10 | Train Loss: 0.7395, Train Acc: 27.95% | Val Loss: 1.1058, Val Acc: 32.39%
Epoch 4/10 | Train Loss: 0.6210, Train Acc: 34.98% | Val Loss: 1.0697, Val Acc: 34.08%
Epoch 5/10 | Train Loss: 0.5435, Train Acc: 39.43% | Val Loss: 1.0142, Val Acc: 35.47%
Epoch 6/10 | Train Loss: 0.4902, Train Acc: 40.93% | Val Loss: 1.0084, Val Acc: 37.78%
Epoch 7/10 | Train Loss: 0.4391, Train Acc: 45.54% | Val Loss: 1.0430, Val Acc: 36.42%
Epoch 8/10 | Train Loss: 0.4041, Train Acc: 47.70% | Val Loss: 1.0783, Val Acc: 37.71%
Epoch 9/10 | Train Loss: 0.3740, Train Acc: 50.12% | Val Loss: 1.0986, Val Acc: 37.79%
Epoch 10/10 | Train Loss: 0.3494, Train Acc: 53.84% | Val Loss: 1.0740, Val Acc: 38.78%


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▃▄▅▆▆▇▇▇█
Train Loss,█▃▂▂▂▂▁▁▁▁
Validation Accuracy,▁▅▆▇▇█▇███
Validation Loss,█▃▃▂▁▁▂▂▂▂

0,1
Epoch,10.0
Train Accuracy,53.8378
Train Loss,0.34937
Validation Accuracy,38.77911
Validation Loss,1.07397


[34m[1mwandb[0m: Agent Starting Run: gx0nqhyr with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_width: 5
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 32
[34m[1mwandb[0m: 	enc_layers: 1
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.002
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.5


Epoch 1/10 | Train Loss: 2.3143, Train Acc: 1.65% | Val Loss: 1.5940, Val Acc: 8.95%
Epoch 2/10 | Train Loss: 1.1590, Train Acc: 13.52% | Val Loss: 1.2700, Val Acc: 21.72%
Epoch 3/10 | Train Loss: 0.9034, Train Acc: 21.49% | Val Loss: 1.1795, Val Acc: 25.59%
Epoch 4/10 | Train Loss: 0.7787, Train Acc: 26.62% | Val Loss: 1.1311, Val Acc: 28.64%
Epoch 5/10 | Train Loss: 0.7060, Train Acc: 30.18% | Val Loss: 1.1438, Val Acc: 29.30%
Epoch 6/10 | Train Loss: 0.6504, Train Acc: 33.37% | Val Loss: 1.1333, Val Acc: 30.10%
Epoch 7/10 | Train Loss: 0.6097, Train Acc: 34.66% | Val Loss: 1.0995, Val Acc: 31.24%
Epoch 8/10 | Train Loss: 0.5787, Train Acc: 36.17% | Val Loss: 1.1433, Val Acc: 32.44%
Epoch 9/10 | Train Loss: 0.5472, Train Acc: 37.80% | Val Loss: 1.1458, Val Acc: 32.44%
Epoch 10/10 | Train Loss: 0.5229, Train Acc: 36.16% | Val Loss: 1.1320, Val Acc: 31.89%


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▃▅▆▇▇▇███
Train Loss,█▃▂▂▂▁▁▁▁▁
Validation Accuracy,▁▅▆▇▇▇████
Validation Loss,█▃▂▁▂▁▁▂▂▁

0,1
Epoch,10.0
Train Accuracy,36.16416
Train Loss,0.52287
Validation Accuracy,31.89338
Validation Loss,1.13196


[34m[1mwandb[0m: Agent Starting Run: o3nqx78m with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_width: 1
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dec_layers: 2
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_dim: 64
[34m[1mwandb[0m: 	enc_layers: 2
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.002
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.2


Epoch 1/10 | Train Loss: 2.0148, Train Acc: 6.06% | Val Loss: 1.3543, Val Acc: 18.50%
Epoch 2/10 | Train Loss: 0.8496, Train Acc: 24.39% | Val Loss: 1.1477, Val Acc: 28.79%
Epoch 3/10 | Train Loss: 0.6457, Train Acc: 33.90% | Val Loss: 1.0654, Val Acc: 31.67%
Epoch 4/10 | Train Loss: 0.5442, Train Acc: 39.38% | Val Loss: 1.0816, Val Acc: 35.01%
Epoch 5/10 | Train Loss: 0.4783, Train Acc: 43.17% | Val Loss: 1.0904, Val Acc: 34.36%
Epoch 6/10 | Train Loss: 0.4325, Train Acc: 45.63% | Val Loss: 1.1111, Val Acc: 34.61%
Epoch 7/10 | Train Loss: 0.3856, Train Acc: 46.75% | Val Loss: 1.1269, Val Acc: 35.46%
Epoch 8/10 | Train Loss: 0.3509, Train Acc: 48.57% | Val Loss: 1.1368, Val Acc: 34.34%
Epoch 9/10 | Train Loss: 0.3296, Train Acc: 53.26% | Val Loss: 1.1416, Val Acc: 35.53%
Epoch 10/10 | Train Loss: 0.3115, Train Acc: 56.49% | Val Loss: 1.2169, Val Acc: 34.87%


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▄▅▆▆▆▇▇██
Train Loss,█▃▂▂▂▁▁▁▁▁
Validation Accuracy,▁▅▆███████
Validation Loss,█▃▁▁▂▂▂▃▃▅

0,1
Epoch,10.0
Train Accuracy,56.49415
Train Loss,0.31149
Validation Accuracy,34.87286
Validation Loss,1.21687


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: kssbzt75 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_width: 3
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dec_layers: 2
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 128
[34m[1mwandb[0m: 	enc_layers: 2
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.2


Epoch 1/5 | Train Loss: 1.8124, Train Acc: 7.92% | Val Loss: 1.2738, Val Acc: 20.24%
Epoch 2/5 | Train Loss: 0.8192, Train Acc: 26.80% | Val Loss: 1.0916, Val Acc: 29.97%
Epoch 3/5 | Train Loss: 0.6333, Train Acc: 36.47% | Val Loss: 1.0434, Val Acc: 33.34%
Epoch 4/5 | Train Loss: 0.5327, Train Acc: 43.02% | Val Loss: 1.0215, Val Acc: 35.01%
Epoch 5/5 | Train Loss: 0.4575, Train Acc: 48.09% | Val Loss: 1.0475, Val Acc: 35.81%


0,1
Epoch,▁▃▅▆█
Train Accuracy,▁▄▆▇█
Train Loss,█▃▂▁▁
Validation Accuracy,▁▅▇██
Validation Loss,█▃▂▁▂

0,1
Epoch,5.0
Train Accuracy,48.08638
Train Loss,0.45755
Validation Accuracy,35.80729
Validation Loss,1.04754


[34m[1mwandb[0m: Agent Starting Run: mzsw3ava with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_width: 3
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dec_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 256
[34m[1mwandb[0m: 	enc_layers: 3
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.5


Epoch 1/10 | Train Loss: 2.0744, Train Acc: 3.88% | Val Loss: 1.5419, Val Acc: 12.77%
Epoch 2/10 | Train Loss: 1.0044, Train Acc: 20.22% | Val Loss: 1.2631, Val Acc: 22.07%
Epoch 3/10 | Train Loss: 0.7749, Train Acc: 30.02% | Val Loss: 1.1821, Val Acc: 27.67%
Epoch 4/10 | Train Loss: 0.6522, Train Acc: 36.43% | Val Loss: 1.1804, Val Acc: 30.31%
Epoch 5/10 | Train Loss: 0.5661, Train Acc: 42.31% | Val Loss: 1.1519, Val Acc: 32.42%
Epoch 6/10 | Train Loss: 0.5018, Train Acc: 45.84% | Val Loss: 1.1540, Val Acc: 33.67%
Epoch 7/10 | Train Loss: 0.4446, Train Acc: 50.24% | Val Loss: 1.1761, Val Acc: 30.84%
Epoch 8/10 | Train Loss: 0.4021, Train Acc: 54.21% | Val Loss: 1.1708, Val Acc: 31.43%
Epoch 9/10 | Train Loss: 0.3627, Train Acc: 56.99% | Val Loss: 1.1892, Val Acc: 33.35%
Epoch 10/10 | Train Loss: 0.3330, Train Acc: 59.24% | Val Loss: 1.2425, Val Acc: 32.94%


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▃▄▅▆▆▇▇██
Train Loss,█▄▃▂▂▂▁▁▁▁
Validation Accuracy,▁▄▆▇██▇▇██
Validation Loss,█▃▂▂▁▁▁▁▂▃

0,1
Epoch,10.0
Train Accuracy,59.24037
Train Loss,0.33299
Validation Accuracy,32.93505
Validation Loss,1.24252


[34m[1mwandb[0m: Agent Starting Run: p4n360m9 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_width: 1
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 256
[34m[1mwandb[0m: 	enc_layers: 2
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.002
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.7


Epoch 1/5 | Train Loss: 2.0777, Train Acc: 4.11% | Val Loss: 1.4273, Val Acc: 15.21%
Epoch 2/5 | Train Loss: 0.9738, Train Acc: 19.68% | Val Loss: 1.1816, Val Acc: 26.40%
Epoch 3/5 | Train Loss: 0.7750, Train Acc: 26.44% | Val Loss: 1.1412, Val Acc: 29.68%
Epoch 4/5 | Train Loss: 0.6765, Train Acc: 31.03% | Val Loss: 1.0716, Val Acc: 32.15%
Epoch 5/5 | Train Loss: 0.6022, Train Acc: 33.02% | Val Loss: 1.0764, Val Acc: 34.03%


0,1
Epoch,▁▃▅▆█
Train Accuracy,▁▅▆██
Train Loss,█▃▂▁▁
Validation Accuracy,▁▅▆▇█
Validation Loss,█▃▂▁▁

0,1
Epoch,5.0
Train Accuracy,33.02106
Train Loss,0.60215
Validation Accuracy,34.03033
Validation Loss,1.0764


[34m[1mwandb[0m: Agent Starting Run: p5p8zg2p with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_width: 5
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_dim: 128
[34m[1mwandb[0m: 	enc_layers: 3
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.7


Epoch 1/10 | Train Loss: 2.1289, Train Acc: 3.24% | Val Loss: 1.4109, Val Acc: 14.10%
Epoch 2/10 | Train Loss: 0.9429, Train Acc: 17.25% | Val Loss: 1.1375, Val Acc: 27.60%
Epoch 3/10 | Train Loss: 0.7119, Train Acc: 27.27% | Val Loss: 1.0707, Val Acc: 32.31%
Epoch 4/10 | Train Loss: 0.5932, Train Acc: 31.31% | Val Loss: 1.0737, Val Acc: 34.73%
Epoch 5/10 | Train Loss: 0.5145, Train Acc: 34.27% | Val Loss: 1.0712, Val Acc: 34.86%
Epoch 6/10 | Train Loss: 0.4564, Train Acc: 38.51% | Val Loss: 1.0330, Val Acc: 36.69%
Epoch 7/10 | Train Loss: 0.4146, Train Acc: 42.94% | Val Loss: 1.0444, Val Acc: 37.98%
Epoch 8/10 | Train Loss: 0.3754, Train Acc: 44.93% | Val Loss: 1.0639, Val Acc: 38.07%
Epoch 9/10 | Train Loss: 0.3402, Train Acc: 47.31% | Val Loss: 1.0638, Val Acc: 37.08%
Epoch 10/10 | Train Loss: 0.3182, Train Acc: 49.48% | Val Loss: 1.1153, Val Acc: 37.59%


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▃▅▅▆▆▇▇██
Train Loss,█▃▃▂▂▂▁▁▁▁
Validation Accuracy,▁▅▆▇▇█████
Validation Loss,█▃▂▂▂▁▁▂▂▃

0,1
Epoch,10.0
Train Accuracy,49.48482
Train Loss,0.31822
Validation Accuracy,37.59191
Validation Loss,1.11532


[34m[1mwandb[0m: Agent Starting Run: bqujxo1o with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_width: 5
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dec_layers: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_dim: 256
[34m[1mwandb[0m: 	enc_layers: 1
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.5


Epoch 1/5 | Train Loss: 1.7180, Train Acc: 7.94% | Val Loss: 1.3655, Val Acc: 18.41%
Epoch 2/5 | Train Loss: 0.8614, Train Acc: 25.91% | Val Loss: 1.1811, Val Acc: 25.95%
Epoch 3/5 | Train Loss: 0.6853, Train Acc: 34.74% | Val Loss: 1.1144, Val Acc: 27.12%
Epoch 4/5 | Train Loss: 0.5792, Train Acc: 40.45% | Val Loss: 1.1411, Val Acc: 29.98%
Epoch 5/5 | Train Loss: 0.5101, Train Acc: 45.56% | Val Loss: 1.1046, Val Acc: 31.10%


0,1
Epoch,▁▃▅▆█
Train Accuracy,▁▄▆▇█
Train Loss,█▃▂▁▁
Validation Accuracy,▁▅▆▇█
Validation Loss,█▃▁▂▁

0,1
Epoch,5.0
Train Accuracy,45.56255
Train Loss,0.51011
Validation Accuracy,31.09681
Validation Loss,1.10456


[34m[1mwandb[0m: Agent Starting Run: 15pyu2q3 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_width: 1
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_dim: 64
[34m[1mwandb[0m: 	enc_layers: 1
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.2


Epoch 1/5 | Train Loss: 2.4045, Train Acc: 1.37% | Val Loss: 1.6528, Val Acc: 10.75%
Epoch 2/5 | Train Loss: 1.1135, Train Acc: 14.74% | Val Loss: 1.2274, Val Acc: 23.85%
Epoch 3/5 | Train Loss: 0.8285, Train Acc: 25.13% | Val Loss: 1.1427, Val Acc: 28.99%
Epoch 4/5 | Train Loss: 0.6973, Train Acc: 30.34% | Val Loss: 1.0927, Val Acc: 31.94%
Epoch 5/5 | Train Loss: 0.6222, Train Acc: 34.66% | Val Loss: 1.0463, Val Acc: 33.25%


0,1
Epoch,▁▃▅▆█
Train Accuracy,▁▄▆▇█
Train Loss,█▃▂▁▁
Validation Accuracy,▁▅▇██
Validation Loss,█▃▂▂▁

0,1
Epoch,5.0
Train Accuracy,34.66087
Train Loss,0.62216
Validation Accuracy,33.24908
Validation Loss,1.0463


[34m[1mwandb[0m: Agent Starting Run: no64773m with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_width: 5
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_dim: 256
[34m[1mwandb[0m: 	enc_layers: 2
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.002
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.7


Epoch 1/10 | Train Loss: 2.7624, Train Acc: 0.04% | Val Loss: 2.5288, Val Acc: 0.21%
Epoch 2/10 | Train Loss: 1.7901, Train Acc: 3.04% | Val Loss: 1.6914, Val Acc: 9.11%
Epoch 3/10 | Train Loss: 1.1910, Train Acc: 13.10% | Val Loss: 1.3669, Val Acc: 17.57%
Epoch 4/10 | Train Loss: 0.9228, Train Acc: 20.03% | Val Loss: 1.2554, Val Acc: 25.17%
Epoch 5/10 | Train Loss: 0.7898, Train Acc: 25.19% | Val Loss: 1.2232, Val Acc: 26.78%
Epoch 6/10 | Train Loss: 0.7002, Train Acc: 28.72% | Val Loss: 1.1881, Val Acc: 27.83%
Epoch 7/10 | Train Loss: 0.6353, Train Acc: 33.60% | Val Loss: 1.1837, Val Acc: 28.27%
Epoch 8/10 | Train Loss: 0.5769, Train Acc: 35.89% | Val Loss: 1.1829, Val Acc: 30.78%
Epoch 9/10 | Train Loss: 0.5323, Train Acc: 40.27% | Val Loss: 1.2099, Val Acc: 29.96%
Epoch 10/10 | Train Loss: 0.5093, Train Acc: 41.82% | Val Loss: 1.2027, Val Acc: 31.11%


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▂▃▄▅▆▇▇██
Train Loss,█▅▃▂▂▂▁▁▁▁
Validation Accuracy,▁▃▅▇▇▇▇███
Validation Loss,█▄▂▁▁▁▁▁▁▁

0,1
Epoch,10.0
Train Accuracy,41.82003
Train Loss,0.50934
Validation Accuracy,31.11213
Validation Loss,1.20273


[34m[1mwandb[0m: Agent Starting Run: c3b5z3fc with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_width: 5
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_dim: 64
[34m[1mwandb[0m: 	enc_layers: 1
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.002
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.5


Epoch 1/5 | Train Loss: 2.6706, Train Acc: 0.14% | Val Loss: 2.0658, Val Acc: 1.40%
Epoch 2/5 | Train Loss: 1.3882, Train Acc: 9.26% | Val Loss: 1.4204, Val Acc: 18.07%
Epoch 3/5 | Train Loss: 0.9839, Train Acc: 19.47% | Val Loss: 1.2340, Val Acc: 25.15%
Epoch 4/5 | Train Loss: 0.8253, Train Acc: 25.39% | Val Loss: 1.1491, Val Acc: 28.39%
Epoch 5/5 | Train Loss: 0.7189, Train Acc: 30.98% | Val Loss: 1.1154, Val Acc: 30.34%


0,1
Epoch,▁▃▅▆█
Train Accuracy,▁▃▅▇█
Train Loss,█▃▂▁▁
Validation Accuracy,▁▅▇██
Validation Loss,█▃▂▁▁

0,1
Epoch,5.0
Train Accuracy,30.983
Train Loss,0.71894
Validation Accuracy,30.33854
Validation Loss,1.11536


[34m[1mwandb[0m: Agent Starting Run: fdrzqi3n with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_width: 3
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 32
[34m[1mwandb[0m: 	enc_layers: 3
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.002
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.7


Epoch 1/10 | Train Loss: 2.4177, Train Acc: 1.20% | Val Loss: 1.6822, Val Acc: 8.17%
Epoch 2/10 | Train Loss: 1.1001, Train Acc: 14.79% | Val Loss: 1.2351, Val Acc: 24.33%
Epoch 3/10 | Train Loss: 0.7979, Train Acc: 22.91% | Val Loss: 1.1368, Val Acc: 29.83%
Epoch 4/10 | Train Loss: 0.6679, Train Acc: 23.64% | Val Loss: 1.1203, Val Acc: 31.53%
Epoch 5/10 | Train Loss: 0.5886, Train Acc: 25.72% | Val Loss: 1.0934, Val Acc: 34.68%
Epoch 6/10 | Train Loss: 0.5205, Train Acc: 31.20% | Val Loss: 1.0639, Val Acc: 34.32%
Epoch 7/10 | Train Loss: 0.4772, Train Acc: 35.23% | Val Loss: 1.1041, Val Acc: 35.46%
Epoch 8/10 | Train Loss: 0.4388, Train Acc: 42.05% | Val Loss: 1.0904, Val Acc: 36.70%
Epoch 9/10 | Train Loss: 0.4088, Train Acc: 44.04% | Val Loss: 1.1109, Val Acc: 36.21%
Epoch 10/10 | Train Loss: 0.3830, Train Acc: 45.43% | Val Loss: 1.1412, Val Acc: 36.63%


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▃▄▅▅▆▆▇██
Train Loss,█▃▂▂▂▁▁▁▁▁
Validation Accuracy,▁▅▆▇█▇████
Validation Loss,█▃▂▂▁▁▁▁▂▂

0,1
Epoch,10.0
Train Accuracy,45.42593
Train Loss,0.38301
Validation Accuracy,36.62684
Validation Loss,1.14125


[34m[1mwandb[0m: Agent Starting Run: za9kw2s8 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_width: 3
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dec_layers: 2
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 128
[34m[1mwandb[0m: 	enc_layers: 2
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.002
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.2


Epoch 1/5 | Train Loss: 2.0526, Train Acc: 5.15% | Val Loss: 1.3686, Val Acc: 18.67%
Epoch 2/5 | Train Loss: 0.8586, Train Acc: 24.27% | Val Loss: 1.1456, Val Acc: 28.17%
Epoch 3/5 | Train Loss: 0.6606, Train Acc: 33.58% | Val Loss: 1.0980, Val Acc: 30.59%
Epoch 4/5 | Train Loss: 0.5487, Train Acc: 39.38% | Val Loss: 1.0685, Val Acc: 34.20%
Epoch 5/5 | Train Loss: 0.4793, Train Acc: 43.04% | Val Loss: 1.0877, Val Acc: 34.42%


0,1
Epoch,▁▃▅▆█
Train Accuracy,▁▅▆▇█
Train Loss,█▃▂▁▁
Validation Accuracy,▁▅▆██
Validation Loss,█▃▂▁▁

0,1
Epoch,5.0
Train Accuracy,43.04121
Train Loss,0.4793
Validation Accuracy,34.42096
Validation Loss,1.08773


[34m[1mwandb[0m: Agent Starting Run: c21mfjvr with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	beam_width: 5
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: GRU
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 64
[34m[1mwandb[0m: 	enc_layers: 1
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.002
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.5


Epoch 1/10 | Train Loss: 2.1779, Train Acc: 2.70% | Val Loss: 1.6143, Val Acc: 10.81%
Epoch 2/10 | Train Loss: 1.1102, Train Acc: 14.94% | Val Loss: 1.2491, Val Acc: 21.57%
Epoch 3/10 | Train Loss: 0.8657, Train Acc: 21.40% | Val Loss: 1.1953, Val Acc: 26.53%
Epoch 4/10 | Train Loss: 0.7587, Train Acc: 26.96% | Val Loss: 1.1314, Val Acc: 31.27%
Epoch 5/10 | Train Loss: 0.6794, Train Acc: 29.06% | Val Loss: 1.1149, Val Acc: 31.11%
Epoch 6/10 | Train Loss: 0.6312, Train Acc: 28.79% | Val Loss: 1.1225, Val Acc: 32.95%
Epoch 7/10 | Train Loss: 0.5872, Train Acc: 32.12% | Val Loss: 1.1226, Val Acc: 33.57%
Epoch 8/10 | Train Loss: 0.5611, Train Acc: 33.93% | Val Loss: 1.1342, Val Acc: 33.39%
Epoch 9/10 | Train Loss: 0.5316, Train Acc: 34.89% | Val Loss: 1.1317, Val Acc: 33.71%
Epoch 10/10 | Train Loss: 0.5101, Train Acc: 38.11% | Val Loss: 1.1307, Val Acc: 34.15%


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▃▅▆▆▆▇▇▇█
Train Loss,█▄▂▂▂▂▁▁▁▁
Validation Accuracy,▁▄▆▇▇█████
Validation Loss,█▃▂▁▁▁▁▁▁▁

0,1
Epoch,10.0
Train Accuracy,38.11097
Train Loss,0.51014
Validation Accuracy,34.14522
Validation Loss,1.13075


[34m[1mwandb[0m: Agent Starting Run: bkz6f4dc with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_width: 5
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dec_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_dim: 128
[34m[1mwandb[0m: 	enc_layers: 1
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.002
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.7


Epoch 1/10 | Train Loss: 2.7283, Train Acc: 0.12% | Val Loss: 2.2085, Val Acc: 1.70%
Epoch 2/10 | Train Loss: 1.3631, Train Acc: 9.47% | Val Loss: 1.3326, Val Acc: 18.96%
Epoch 3/10 | Train Loss: 0.9299, Train Acc: 20.67% | Val Loss: 1.1923, Val Acc: 27.72%
Epoch 4/10 | Train Loss: 0.7730, Train Acc: 21.90% | Val Loss: 1.1429, Val Acc: 30.13%
Epoch 5/10 | Train Loss: 0.6719, Train Acc: 24.95% | Val Loss: 1.1542, Val Acc: 31.07%
Epoch 6/10 | Train Loss: 0.6093, Train Acc: 30.14% | Val Loss: 1.1047, Val Acc: 34.15%
Epoch 7/10 | Train Loss: 0.5586, Train Acc: 31.99% | Val Loss: 1.1132, Val Acc: 34.32%
Epoch 8/10 | Train Loss: 0.5163, Train Acc: 33.05% | Val Loss: 1.1179, Val Acc: 34.32%
Epoch 9/10 | Train Loss: 0.4833, Train Acc: 36.32% | Val Loss: 1.0958, Val Acc: 35.66%
Epoch 10/10 | Train Loss: 0.4586, Train Acc: 40.04% | Val Loss: 1.1327, Val Acc: 34.24%


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▃▅▅▅▆▇▇▇█
Train Loss,█▄▂▂▂▁▁▁▁▁
Validation Accuracy,▁▅▆▇▇█████
Validation Loss,█▂▂▁▁▁▁▁▁▁

0,1
Epoch,10.0
Train Accuracy,40.04241
Train Loss,0.45861
Validation Accuracy,34.24479
Validation Loss,1.13272


[34m[1mwandb[0m: Agent Starting Run: zhk7f8s5 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_width: 5
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: LSTM
[34m[1mwandb[0m: 	dec_layers: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_dim: 64
[34m[1mwandb[0m: 	enc_layers: 1
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.7


Epoch 1/10 | Train Loss: 2.0285, Train Acc: 4.17% | Val Loss: 1.5248, Val Acc: 12.58%
Epoch 2/10 | Train Loss: 0.9934, Train Acc: 20.57% | Val Loss: 1.2642, Val Acc: 22.76%
Epoch 3/10 | Train Loss: 0.7767, Train Acc: 30.21% | Val Loss: 1.1812, Val Acc: 28.32%
Epoch 4/10 | Train Loss: 0.6595, Train Acc: 35.94% | Val Loss: 1.1379, Val Acc: 31.07%
Epoch 5/10 | Train Loss: 0.5728, Train Acc: 39.89% | Val Loss: 1.1401, Val Acc: 30.66%
Epoch 6/10 | Train Loss: 0.5047, Train Acc: 44.40% | Val Loss: 1.1573, Val Acc: 33.00%
Epoch 7/10 | Train Loss: 0.4525, Train Acc: 49.38% | Val Loss: 1.1262, Val Acc: 33.43%
Epoch 8/10 | Train Loss: 0.4054, Train Acc: 53.12% | Val Loss: 1.1576, Val Acc: 34.26%
Epoch 9/10 | Train Loss: 0.3686, Train Acc: 56.55% | Val Loss: 1.1899, Val Acc: 32.89%
Epoch 10/10 | Train Loss: 0.3318, Train Acc: 58.10% | Val Loss: 1.2104, Val Acc: 33.20%


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train Accuracy,▁▃▄▅▆▆▇▇██
Train Loss,█▄▃▂▂▂▁▁▁▁
Validation Accuracy,▁▄▆▇▇█████
Validation Loss,█▃▂▁▁▂▁▂▂▂

0,1
Epoch,10.0
Train Accuracy,58.10297
Train Loss,0.33177
Validation Accuracy,33.20312
Validation Loss,1.21037


[34m[1mwandb[0m: Agent Starting Run: awphueja with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_width: 1
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dec_layers: 2
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embed_dim: 64
[34m[1mwandb[0m: 	enc_layers: 1
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.002
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.5


Epoch 1/5 | Train Loss: 2.2285, Train Acc: 3.08% | Val Loss: 1.5245, Val Acc: 13.21%
Epoch 2/5 | Train Loss: 1.0345, Train Acc: 17.86% | Val Loss: 1.2223, Val Acc: 23.14%
Epoch 3/5 | Train Loss: 0.8026, Train Acc: 24.73% | Val Loss: 1.1807, Val Acc: 29.54%
Epoch 4/5 | Train Loss: 0.6822, Train Acc: 26.95% | Val Loss: 1.1147, Val Acc: 31.15%
Epoch 5/5 | Train Loss: 0.6065, Train Acc: 33.09% | Val Loss: 1.0726, Val Acc: 33.67%


0,1
Epoch,▁▃▅▆█
Train Accuracy,▁▄▆▇█
Train Loss,█▃▂▁▁
Validation Accuracy,▁▄▇▇█
Validation Loss,█▃▃▂▁

0,1
Epoch,5.0
Train Accuracy,33.0863
Train Loss,0.60645
Validation Accuracy,33.67034
Validation Loss,1.07263


[34m[1mwandb[0m: Agent Starting Run: y5ciift5 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_width: 5
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: RNN
[34m[1mwandb[0m: 	dec_layers: 2
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embed_dim: 128
[34m[1mwandb[0m: 	enc_layers: 1
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.7


Epoch 1/5 | Train Loss: 2.3704, Train Acc: 1.82% | Val Loss: 1.6356, Val Acc: 9.29%
Epoch 2/5 | Train Loss: 1.0821, Train Acc: 16.83% | Val Loss: 1.2568, Val Acc: 23.31%
Epoch 3/5 | Train Loss: 0.8214, Train Acc: 26.63% | Val Loss: 1.1438, Val Acc: 29.88%
Epoch 4/5 | Train Loss: 0.6857, Train Acc: 33.35% | Val Loss: 1.1218, Val Acc: 32.48%
Epoch 5/5 | Train Loss: 0.5957, Train Acc: 38.85% | Val Loss: 1.0890, Val Acc: 34.12%


0,1
Epoch,▁▃▅▆█
Train Accuracy,▁▄▆▇█
Train Loss,█▃▂▁▁
Validation Accuracy,▁▅▇██
Validation Loss,█▃▂▁▁

0,1
Epoch,5.0
Train Accuracy,38.85231
Train Loss,0.59565
Validation Accuracy,34.12224
Validation Loss,1.08902


In [3]:
# best configuration............................
best_config = {
    "embed_dim": 128,
    "hidden_dim": 256,
    "enc_layers": 3,
    "dec_layers": 3,
    "cell_type": "LSTM",
    "dropout": 0.2,
    "batch_size": 64,
    "bidirectional": False,
    "learning_rate": 0.001,
    "epochs": 10,
    "teacher_forcing_ratio": 0.5
}

# printing the test accuracy after training the best model...............................
def training_test(best_config):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


    train_path = "/kaggle/input/dakshina-data/dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.train.tsv"
    test_path = "/kaggle/input/dakshina-data/dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.test.tsv"
    train_set = read_pairs(train_path)
    test_set = read_pairs(test_path)

    # src_vocab, idx2src, tgt_vocab, idx2tgt, create_batch = build_vocab_and_prepare_batch(train_set, device)
    src_vocab, idx2src, tgt_vocab, idx2tgt, create_batch,unique_chars_latin, unique_chars_dev = build_vocab_and_prepare_batch(train_set, device)
    model = TransliterationModel(len(src_vocab), len(tgt_vocab), best_config["embed_dim"],best_config["hidden_dim"],
                                 best_config["enc_layers"], best_config["dec_layers"], best_config["cell_type"], best_config["dropout"], best_config["bidirectional"]).to(device)

    optimizer = optim.Adam(model.parameters(), lr=best_config["learning_rate"])
    criterion = nn.CrossEntropyLoss(ignore_index=tgt_vocab['<pad>'])

    for epoch in range(best_config["epochs"]):
        model.train()
        total_loss, total_acc = 0, 0
        random.shuffle(train_set)

        for i in range(0, len(train_set), best_config["batch_size"]):
            batch = train_set[i:i+best_config["batch_size"]]
            src, tgt = create_batch(batch)


            optimizer.zero_grad()
            outputs = model(src, tgt, best_config["teacher_forcing_ratio"])
            loss = criterion(outputs[:, 1:].reshape(-1, outputs.size(-1)), tgt[:, 1:].reshape(-1))

            preds = outputs.argmax(-1)
            acc = compute_word_level_accuracy(preds[:, 1:], tgt[:, 1:], tgt_vocab)

            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            total_acc += acc

        avg_train_loss = total_loss / (len(train_set) // best_config["batch_size"])
        avg_train_acc = total_acc / (len(train_set) // best_config["batch_size"])
        
# evaluating for test data........................................................
        model.eval()
        test_loss, test_acc = 0, 0
        printed = 0
        with torch.no_grad():
            for i in range(0, len(test_set), best_config["batch_size"]):
                batch = test_set[i:i+ best_config["batch_size"]]
                src, tgt = create_batch(batch)
                outputs = model(src, tgt, 0)
                loss = criterion(outputs[:, 1:].reshape(-1, outputs.size(-1)), tgt[:, 1:].reshape(-1))


                preds = outputs.argmax(-1)
                acc = compute_word_level_accuracy(preds[:, 1:], tgt[:, 1:], tgt_vocab)

                test_loss += loss.item()
                test_acc += acc

                if printed < 5:
                    for j in range(min(3, src.size(0))):
                        input_seq = ''.join([idx2src.get(idx.item(), '<unk>') for idx in src[j] if idx.item() not in [src_vocab['<pad>'], src_vocab['<eos>']]])
                        target_seq = ''.join([idx2tgt.get(idx.item(), '<unk>') for idx in tgt[j][1:] if idx.item() not in [tgt_vocab['<pad>'], tgt_vocab['<eos>']]])
                        pred_seq = ''.join([idx2tgt.get(idx.item(), '<unk>') for idx in preds[j][1:] if idx.item() not in [tgt_vocab['<pad>'], tgt_vocab['<eos>']]])
                        print(f"\n Input:{input_seq} | Target:{target_seq} | Predicted:{pred_seq}")
                        print("-" * 40)
                        printed += 1

        avg_test_loss = test_loss / (len(test_set) // best_config["batch_size"])
        avg_test_acc = test_acc / (len(test_set) // best_config["batch_size"])


    print(f" test Loss: {avg_test_loss:.4f}, test Acc: {avg_test_acc:.2f}%")

# calling the function..........................
training_test(best_config)    


 Input:ank | Target:अंक | Predicted:अन्क
----------------------------------------

 Input:anka | Target:अंक | Predicted:अनाा
----------------------------------------

 Input:ankit | Target:अंकित | Predicted:अन्ति
----------------------------------------

 Input:atthas | Target:अट्टहास | Predicted:अत्त
----------------------------------------

 Input:addon | Target:अड्डों | Predicted:अद्दों
----------------------------------------

 Input:athak | Target:अथक | Predicted:अतात
----------------------------------------

 Input:ank | Target:अंक | Predicted:अंक
----------------------------------------

 Input:anka | Target:अंक | Predicted:अंका
----------------------------------------

 Input:ankit | Target:अंकित | Predicted:अंकितत
----------------------------------------

 Input:atthas | Target:अट्टहास | Predicted:अत्थास
----------------------------------------

 Input:addon | Target:अड्डों | Predicted:अददों
----------------------------------------

 Input:athak | Target:अथक | Predicted:अठाक


In [8]:
# plotting the grid and saving to folder.................................

import csv
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

best_config = {
    "embed_dim": 128,
    "hidden_dim": 256,
    "enc_layers": 3,
    "dec_layers": 3,
    "cell_type": "LSTM",
    "dropout": 0.2,
    "batch_size": 64,
    "bidirectional": False,
    "learning_rate": 0.001,
    "epochs": 10,
    "teacher_forcing_ratio": 0.5
}

def training_test(best_config):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    train_path = "/kaggle/input/dakshina-data/dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.train.tsv"
    test_path = "/kaggle/input/dakshina-data/dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.test.tsv"
    train_set = read_pairs(train_path)
    test_set = read_pairs(test_path)

    src_vocab, idx2src, tgt_vocab, idx2tgt, create_batch, unique_chars_latin, unique_chars_dev = build_vocab_and_prepare_batch(train_set, device)

    model = TransliterationModel(len(src_vocab), len(tgt_vocab), best_config["embed_dim"], best_config["hidden_dim"],
                                 best_config["enc_layers"], best_config["dec_layers"], best_config["cell_type"],
                                 best_config["dropout"], best_config["bidirectional"]).to(device)

    optimizer = optim.Adam(model.parameters(), lr=best_config["learning_rate"])
    criterion = nn.CrossEntropyLoss(ignore_index=tgt_vocab['<pad>'])
    
    # training for number of epochs...........................
    for epoch in range(best_config["epochs"]):
        model.train()
        total_loss, total_acc = 0, 0
        random.shuffle(train_set)

        for i in range(0, len(train_set), best_config["batch_size"]):
            batch = train_set[i:i+best_config["batch_size"]]
            src, tgt = create_batch(batch)

            optimizer.zero_grad()
            outputs = model(src, tgt, best_config["teacher_forcing_ratio"])
            loss = criterion(outputs[:, 1:].reshape(-1, outputs.size(-1)), tgt[:, 1:].reshape(-1))

            preds = outputs.argmax(-1)
            acc = compute_word_level_accuracy(preds[:, 1:], tgt[:, 1:], tgt_vocab)

            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            total_acc += acc
            
# evaluating test data.......................................
    model.eval()
    test_loss, test_acc = 0, 0
    all_predictions = []

    with torch.no_grad():
        for i in range(0, len(test_set), best_config["batch_size"]):
            batch = test_set[i:i + best_config["batch_size"]]
            src, tgt = create_batch(batch)
            outputs = model(src, tgt, 0)
            preds = outputs.argmax(-1)

            for j in range(src.size(0)):
                input_seq = ''.join([idx2src.get(idx.item(), '') for idx in src[j] if idx.item() not in [src_vocab['<pad>'], src_vocab['<eos>']]])
                target_seq = ''.join([idx2tgt.get(idx.item(), '') for idx in tgt[j][1:] if idx.item() not in [tgt_vocab['<pad>'], tgt_vocab['<eos>']]])
                pred_seq = ''.join([idx2tgt.get(idx.item(), '') for idx in preds[j][1:] if idx.item() not in [tgt_vocab['<pad>'], tgt_vocab['<eos>']]])
                all_predictions.append({'Input': input_seq, 'Target': target_seq, 'Predicted': pred_seq})

    # Save all predictions to CSV................................
    os.makedirs("predictions_vanilla", exist_ok=True)
    with open("predictions_vanilla/test_predictions.csv", "w", newline='', encoding='utf-8') as f:
        writer = csv.DictWriter(f, fieldnames=['Input', 'Target', 'Predicted'])
        writer.writeheader()
        writer.writerows(all_predictions)

    

    # Print 3x3 sample grid....................
    sample_df = pd.DataFrame(all_predictions[:9])
    print("\nSample Prediction Grid:\n")
    for i, row in sample_df.iterrows():
        print(f"{i+1}. Input: {row['Input']} | Target: {row['Target']} | Predicted: {row['Predicted']}")

    

# Call the function
training_test(best_config)



Sample Prediction Grid:

1. Input: ank | Target: अंक | Predicted: अंक
2. Input: anka | Target: अंक | Predicted: अंका
3. Input: ankit | Target: अंकित | Predicted: अंकित
4. Input: anakon | Target: अंकों | Predicted: अनकों
5. Input: ankhon | Target: अंकों | Predicted: आंखों
6. Input: ankon | Target: अंकों | Predicted: अंकों
7. Input: angkor | Target: अंकोर | Predicted: अंककर
8. Input: ankor | Target: अंकोर | Predicted: अंकोर
9. Input: angaarak | Target: अंगारक | Predicted: अंगारक
