In [1]:
import numpy as np 
import pandas as pd 


In [2]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import pandas as pd

def load_data(path,batch_size = 32):
    df = pd.read_csv(path)
    df.columns = ['input_word','target_word']
    
    # Define maximum sequence lengths for letters
    max_input_len = max(len(word) for word in df['input_word'])
    max_target_len = max(len(word) for word in df['target_word'])

    # Define vocabulary mappings for letters
    input_letter_vocab = {'<pad>': 0, '<sos>': 1, '<eos>': 2}  # Add special tokens
    target_letter_vocab = {'<pad>': 0, '<sos>': 1, '<eos>': 2}  # Add special tokens
    letter_idx = 3

    input_str = ''
    target_str = ''
    # Preprocess the data and update vocabulary mappings for letters
    for input_word, target_word in zip(df['input_word'], df['target_word']):
        input_str += input_word
        target_str += target_word


    # Update vocabulary mappings for input letters
    for letter in sorted(set(input_str)):
        input_letter_vocab[letter] = letter_idx
        letter_idx += 1
    letter_idx = 3
    # Update vocabulary mappings for target letters
    for letter in sorted(set(target_str)):
        if letter not in target_letter_vocab:
            target_letter_vocab[letter] = letter_idx
            letter_idx += 1

    # Tokenize function at the letter level
    def tokenize_input_letters(word, vocab, max_len):
        token_ids = [vocab[char] for char in word if char in vocab]
        padded = token_ids[:max_len] + [vocab['<pad>']] * (max_len - len(token_ids))
        return torch.tensor(padded)

    def tokenize_target_letters(word, vocab, max_len):
        token_ids = [vocab[char] for char in word if char in vocab]
        padded =  [vocab['<pad>']]+ token_ids[:max_len] +[vocab['<pad>']] * (max_len - len(token_ids))
        return torch.tensor(padded)
    

    # Custom Dataset class for letter-level tokenization
    class CustomDataset(Dataset):
        def __init__(self, input_data, target_data, input_vocab, target_vocab, max_input_len, max_target_len):
            self.input_data = input_data
            self.target_data = target_data
            self.input_vocab = input_vocab
            self.target_vocab = target_vocab
            self.max_input_len = max_input_len
            self.max_target_len = max_target_len

        def __len__(self):
            return len(self.input_data)

        def __getitem__(self, idx):
            input_word = self.input_data[idx]
            target_word = self.target_data[idx]

            # Tokenize input and target words at the letter level
            input_letters = tokenize_input_letters(input_word, self.input_vocab, self.max_input_len)
            target_letters = tokenize_target_letters(target_word, self.target_vocab, self.max_target_len)

            return input_letters, target_letters

    # Create DataLoader
    custom_dataset = CustomDataset(df['input_word'], df['target_word'], input_letter_vocab, target_letter_vocab, max_input_len, max_target_len)
    data_loader1 = DataLoader(custom_dataset, batch_size=batch_size, shuffle = False )
    
    return custom_dataset,data_loader1, input_letter_vocab, target_letter_vocab, max_input_len, max_target_len



In [3]:
path1 = '/kaggle/input/aksharantar/aksharantar_sampled/ben/ben_train.csv'
custom_dataset1,train_loader_ben,a,b,_,_ = load_data(path1,batch_size = 64)
path2 = '/kaggle/input/aksharantar/aksharantar_sampled/ben/ben_valid.csv'
custom_dataset,val_loader_ben,_,_,_,_ = load_data(path2,batch_size = 64)
print(a,b)

{'<pad>': 0, '<sos>': 1, '<eos>': 2, 'a': 3, 'b': 4, 'c': 5, 'd': 6, 'e': 7, 'f': 8, 'g': 9, 'h': 10, 'i': 11, 'j': 12, 'k': 13, 'l': 14, 'm': 15, 'n': 16, 'o': 17, 'p': 18, 'q': 19, 'r': 20, 's': 21, 't': 22, 'u': 23, 'v': 24, 'w': 25, 'x': 26, 'y': 27, 'z': 28} {'<pad>': 0, '<sos>': 1, '<eos>': 2, 'ঁ': 3, 'ং': 4, 'ঃ': 5, 'অ': 6, 'আ': 7, 'ই': 8, 'ঈ': 9, 'উ': 10, 'ঊ': 11, 'ঋ': 12, 'এ': 13, 'ঐ': 14, 'ও': 15, 'ঔ': 16, 'ক': 17, 'খ': 18, 'গ': 19, 'ঘ': 20, 'ঙ': 21, 'চ': 22, 'ছ': 23, 'জ': 24, 'ঝ': 25, 'ঞ': 26, 'ট': 27, 'ঠ': 28, 'ড': 29, 'ঢ': 30, 'ণ': 31, 'ত': 32, 'থ': 33, 'দ': 34, 'ধ': 35, 'ন': 36, 'প': 37, 'ফ': 38, 'ব': 39, 'ভ': 40, 'ম': 41, 'য': 42, 'র': 43, 'ল': 44, 'শ': 45, 'ষ': 46, 'স': 47, 'হ': 48, '়': 49, 'া': 50, 'ি': 51, 'ী': 52, 'ু': 53, 'ূ': 54, 'ৃ': 55, 'ে': 56, 'ৈ': 57, 'ো': 58, 'ৌ': 59, '্': 60, 'ৎ': 61, '২': 62}


# **Seq2Seq Bidirectional Model**

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm


# Encoder class
class Encoder(nn.Module):
    def __init__(self, input_size, hidden_size, embed_size, encoder_layers=1, drop_prob=0.5, cell_type='gru', bidirectional=False):
        super(Encoder, self).__init__()
        self.hidden_size = hidden_size
        self.encoder_layers = encoder_layers
        self.cell_type = cell_type
        self.bidirectional = bidirectional
        self.dropout = nn.Dropout(drop_prob)
        self.embedding = nn.Embedding(input_size, embed_size)
        
        if cell_type == 'lstm':
            self.rnn = nn.LSTM(embed_size, hidden_size, encoder_layers, dropout=drop_prob, bidirectional=bidirectional, batch_first=True)
        elif cell_type == 'gru':
            self.rnn = nn.GRU(embed_size, hidden_size, encoder_layers, dropout=drop_prob, bidirectional=bidirectional, batch_first=True)
        else:
            self.rnn = nn.RNN(embed_size, hidden_size, encoder_layers, dropout=drop_prob, bidirectional=bidirectional, batch_first=True)
    
    def forward(self, x):
        embedded = self.dropout(self.embedding(x))
        output, hidden = self.rnn(embedded)
        
        if self.cell_type == 'lstm':
            hidden_states, cell_states = hidden
            if self.bidirectional:
                return torch.sum(hidden_states[:-2],dim= 0, keepdim = True), torch.sum(cell_states[-2:],dim= 0, keepdim = True)
            else:
                return hidden_states[-1].unsqueeze(0), cell_states[-1].unsqueeze(0)
        else:
            if self.bidirectional:
                return torch.sum(hidden[:-2],dim= 0, keepdim = True)
            else:
                return hidden[-1].unsqueeze(0)
        
# Decoder class
class Decoder(nn.Module):
    def __init__(self, hidden_size, embed_size, output_size, decoder_layers=1, drop_prob=0.5, cell_type='gru'):
        super(Decoder, self).__init__()
        self.hidden_size = hidden_size
        self.decoder_layers = decoder_layers
        self.cell_type = cell_type
        self.dropout = nn.Dropout(drop_prob)
        self.embedding = nn.Embedding(output_size, embed_size)
        if cell_type == 'lstm':
            self.rnn = nn.LSTM(embed_size, hidden_size, decoder_layers, dropout=drop_prob, batch_first=True)
        elif cell_type == 'gru':
            self.rnn = nn.GRU(embed_size, hidden_size, decoder_layers, dropout=drop_prob, batch_first=True)
        else:
            self.rnn = nn.RNN(embed_size, hidden_size, decoder_layers, dropout=drop_prob, batch_first=True)
        
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x, hidden):
        x = x.unsqueeze(1)
        embedded = self.dropout(self.embedding(x))
        output, hidden = self.rnn(embedded, hidden)
        output = self.fc(output)
        return output.squeeze(1), hidden

# Sequence to sequence class
class Seq2Seq(nn.Module):
    def __init__(self, input_size, output_size, hidden_size, embed_size, encoder_layers=1, decoder_layers=1, drop_prob=0.3, cell_type='gru', bidirectional= True):
        super(Seq2Seq, self).__init__()
        self.encoder = Encoder(input_size, hidden_size, embed_size, encoder_layers, drop_prob, cell_type, bidirectional)
        self.decoder = Decoder(hidden_size, embed_size, output_size, decoder_layers, drop_prob, cell_type)

    def forward(self, source, target, teacher_forcing_ratio=0.5):
        batch_size = source.size(0)
        target_len = target.size(1)
        output_vocab_size = output_size #self.decoder.embedding.num_embeddings

        outputs = torch.zeros(batch_size, target_len, output_vocab_size).to(source.device)

        encoder_hidden = self.encoder(source)  # , encoder_cell
        decoder_hidden = self._init_decoder_hidden(encoder_hidden)  # Initialize decoder hidden state
        decoder_input = target[:, 0]  # start token

        for t in range(1, target_len):
            decoder_output, decoder_hidden = self.decoder(decoder_input, decoder_hidden)
            outputs[:, t] = decoder_output
            teacher_force = torch.rand(1).item() < teacher_forcing_ratio
            t1 = decoder_output.argmax(1)
            decoder_input = target[:, t] if teacher_force else t1

        return outputs

    def _init_decoder_hidden(self, encoder_hidden):
        decoder_layers = self.decoder.decoder_layers
        encoder_layers = encoder_hidden[0].shape[0] if isinstance(encoder_hidden, tuple) else encoder_hidden.shape[0]

        if self.decoder.cell_type == 'lstm':
            if encoder_layers < decoder_layers:
                # Pad the encoder hidden state with zeros to match decoder_layers
                encoder_hidden = (
                    torch.cat(
                        [encoder_hidden[0], torch.zeros(decoder_layers - encoder_layers, *encoder_hidden[0].shape[1:], device=encoder_hidden[0].device)],
                        dim=0),torch.cat(
                        [encoder_hidden[1], torch.zeros(decoder_layers - encoder_layers, *encoder_hidden[1].shape[1:], device=encoder_hidden[1].device)],
                        dim=0))
            if encoder_hidden[0].shape[0] != decoder_layers:
                # If encoder layers and decoder layers are different, adjust the hidden state
                encoder_hidden = (encoder_hidden[0][:decoder_layers], encoder_hidden[1][:decoder_layers])

        else:
            if encoder_layers < decoder_layers:
                # Pad the encoder hidden state with zeros to match decoder_layers
                encoder_hidden = torch.cat(
                    [encoder_hidden, torch.zeros(decoder_layers - encoder_layers, *encoder_hidden.shape[1:], device=encoder_hidden.device)],
                    dim=0
                )
            if encoder_hidden.shape[0] != decoder_layers:
                # If encoder layers and decoder layers are different, adjust the hidden state
                encoder_hidden = encoder_hidden[:decoder_layers]

        return encoder_hidden


# **Train and evaluate**

In [5]:
    
# Training function
def train(model, dataloader, criterion, optimizer, device):
    model.train()
    total_loss = 0
    
    for latin, devanagari in dataloader:  
        latin = latin.to(device)
        devanagari = devanagari.to(device)
        
        optimizer.zero_grad()
        
        output = model(latin, devanagari)
        output_dim = output.shape[-1]
        output = output.view(-1, output_dim)
        devanagari = devanagari.view(-1)
        
        loss = criterion(output, devanagari)
        total_loss += loss.item()
        
        loss.backward()
        optimizer.step()
    return model, total_loss / len(dataloader)

# Evaluation function
def evaluate(model, dataloader, criterion, device):
    model.eval()
    total_loss = 0
    total_correct = 0
    total_samples = 0
    
    with torch.no_grad():
        for latin, devanagari in dataloader:  #tqdm(dataloader, desc='Evaluating', unit='batch'):
            latin = latin.to(device)
            devanagari = devanagari.to(device)
            
            output = model(latin, devanagari,teacher_forcing_ratio=0.0)
                                           
            output_dim = output.shape[-1]                                                   
           
            loss = criterion(output.view(-1, output_dim), devanagari.view(-1))
            total_loss += loss.item()
            
            max_values ,max_index = torch.max(output, 2) #output.argmax(dim=1)
            ind = max_index > 9
            max_index[ind] -= 2
#             print(f"prediction:{max_index} actual:{devanagari}")
            correct1=(max_index == devanagari).all(dim=1).sum().item()  # Calculate word accuracy
#             
            total_correct += correct1
            total_samples += devanagari.size(0)
    
    avg_loss = total_loss / len(dataloader)
    accuracy = total_correct / total_samples
    
    return avg_loss, accuracy*100

# Example usage
input_size = 30  # Number of Latin characters
output_size = 70  # Number of Devanagari characters
embed_size = 128
hidden_size = 128
encoder_layers = 3
decoder_layers = 2
cell_type = 'lstm'
batch_size = 64
num_epochs = 20
drop_prob = 0.2
learning_rate = 0.001
bidirectional = True


# Initialize the model, criterion, and optimizer
model = Seq2Seq(input_size, output_size, hidden_size,embed_size, encoder_layers,decoder_layers,drop_prob, cell_type,bidirectional)
print(model)

# model = Attention_model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
# for epoch in range(num_epochs):
#     trained_model, train_loss = train(model, train_loader_ben, criterion, optimizer, device)
#     val_loss, val_accuracy = evaluate(trained_model, val_loader_ben, criterion, device)
#     model = trained_model
#     print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}')

Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.2, inplace=False)
    (embedding): Embedding(30, 128)
    (rnn): LSTM(128, 128, num_layers=3, batch_first=True, dropout=0.2, bidirectional=True)
  )
  (decoder): Decoder(
    (dropout): Dropout(p=0.2, inplace=False)
    (embedding): Embedding(70, 128)
    (rnn): LSTM(128, 128, num_layers=2, batch_first=True, dropout=0.2)
    (fc): Linear(in_features=128, out_features=70, bias=True)
  )
)


# **Wandb Setup**

In [5]:
# !pip install wandb
import wandb
import numpy as np
from types import SimpleNamespace
import random

In [6]:
wandb.login(key='bb3c7761be2856a8335d16d1483149380482ae9e')#bb3c7761be2856a8335d16d1483149380482ae9e


[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [7]:
sweep_config = {
    'method': 'bayes',
    'metric': {
      'name': 'val_accuracy',
      'goal': 'maximize'
    },
    'parameters': {
        'embedding_size':{
            'values': [16,32,64,128,256]
        },
        'dropout': {
            'values': [0.3, 0.2,0.5]
        },
        'encoder_layers': {
            'values': [1,2,3]
        },
        'decoder_layers':{
            'values': [1,2,3]
        },
        'hidden_layer_size':{
            'values': [16,32,64,128,256]
        },
        'cell_type': {
            'values': [ 'lstm', 'rnn', 'gru']
        },
        'bidirectional': {
            'values': [True, False]
        },
        'batch_size': {
            'values': [32,64]
        },
        'num_epochs': {
            'values': [10,12]
        },
        'learning_rate': {
            'values': [0.01,0.001]
        }
    }
}

sweep_id = wandb.sweep(sweep=sweep_config, project='DL_assignment_3')


Create sweep with ID: 9hpnde6s
Sweep URL: https://wandb.ai/abanisingha1997/DL_assignment_3/sweeps/9hpnde6s


In [8]:
def main():
    '''
    WandB calls main function each time with differnet combination.

    We can retrive the same and use the same values for our hypermeters.

    '''

    with wandb.init() as run:
        run_name="ct-"+str(wandb.config.cell_type)+"_el-"+str(wandb.config.encoder_layers)+"_dl-"+str(wandb.config.decoder_layers)+"_drop-"+str(wandb.config.dropout)+"_es-"+str(wandb.config.embedding_size)+"_hs-"+str(wandb.config.hidden_layer_size)+"_bs-"+str(wandb.config.batch_size)+"_ep-"+str(wandb.config.num_epochs)+"lr"+str(wandb.config.learning_rate)
        wandb.run.name=run_name

        
        model = Seq2Seq(input_size=30, output_size=70, hidden_size=wandb.config.hidden_layer_size,embed_size=wandb.config.embedding_size,encoder_layers=wandb.config.encoder_layers,
                        decoder_layers=wandb.config.decoder_layers,drop_prob=wandb.config.dropout, cell_type=wandb.config.cell_type, bidirectional=wandb.config.bidirectional)
        print(model)
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=wandb.config.learning_rate)

        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model.to(device)
        path1 = '/kaggle/input/aksharantar/aksharantar_sampled/ben/ben_train.csv'
        custom_dataset1,train_loader_ben,a,b,_,_ = load_data(path1,batch_size = wandb.config.batch_size)
        path2 = '/kaggle/input/aksharantar/aksharantar_sampled/ben/ben_valid.csv'
        custom_dataset,val_loader_ben,_,_,_,_ = load_data(path2,batch_size = wandb.config.batch_size)

        # Training loop
        for epoch in range(wandb.config.num_epochs):
            trained_model, train_loss = train(model, train_loader_ben, criterion, optimizer, device)
            val_loss, val_accuracy = evaluate(trained_model, val_loader_ben, criterion, device)
            model = trained_model
            wandb.log({'Epoch': epoch, 'train_loss': train_loss , ' val_loss': val_loss, 'val_accuracy':val_accuracy})
            print(f'Epoch {epoch+1}/{wandb.config.num_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}')

        
wandb.agent(sweep_id, function= main,count= 30) # calls main function for count number of times.
wandb.finish()

[34m[1mwandb[0m: Agent Starting Run: c3fl6c58 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	hidden_layer_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	num_epochs: 12
[34m[1mwandb[0m: Currently logged in as: [33mabanisingha1997[0m. Use [1m`wandb login --relogin`[0m to force relogin




Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.5, inplace=False)
    (embedding): Embedding(30, 64)
    (rnn): LSTM(64, 64, num_layers=2, batch_first=True, dropout=0.5, bidirectional=True)
  )
  (decoder): Decoder(
    (dropout): Dropout(p=0.5, inplace=False)
    (embedding): Embedding(70, 64)
    (rnn): LSTM(64, 64, batch_first=True, dropout=0.5)
    (fc): Linear(in_features=64, out_features=70, bias=True)
  )
)
Epoch 1/12, Train Loss: 1.0375, Val Loss: 2.5188, Val Accuracy: 0.8059
Epoch 2/12, Train Loss: 0.8383, Val Loss: 2.7877, Val Accuracy: 1.7827
Epoch 3/12, Train Loss: 0.7780, Val Loss: 2.9903, Val Accuracy: 3.4432
Epoch 4/12, Train Loss: 0.7457, Val Loss: 3.0975, Val Accuracy: 4.0293
Epoch 5/12, Train Loss: 0.7274, Val Loss: 3.1876, Val Accuracy: 4.7375
Epoch 6/12, Train Loss: 0.7164, Val Loss: 3.2424, Val Accuracy: 4.4444
Epoch 7/12, Train Loss: 0.7074, Val Loss: 3.2720, Val Accuracy: 4.9573
Epoch 8/12, Train Loss: 0.6893, Val Loss: 3.3848, Val Accuracy: 5.1526
Epoc

VBox(children=(Label(value='0.742 MB of 0.742 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
val_loss,▁▃▅▅▆▇▇█████
Epoch,▁▂▂▃▄▄▅▅▆▇▇█
train_loss,█▄▃▂▂▂▂▁▁▁▁▁
val_accuracy,▁▂▄▅▅▅▆▆▇▇▇█

0,1
val_loss,3.4324
Epoch,11.0
train_loss,0.66855
val_accuracy,6.95971


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: njhqmcd4 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_layer_size: 16
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 10


Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(30, 64)
    (rnn): LSTM(64, 16, num_layers=3, batch_first=True, dropout=0.3, bidirectional=True)
  )
  (decoder): Decoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(70, 64)
    (rnn): LSTM(64, 16, num_layers=3, batch_first=True, dropout=0.3)
    (fc): Linear(in_features=16, out_features=70, bias=True)
  )
)
Epoch 1/10, Train Loss: 1.6505, Val Loss: 2.0821, Val Accuracy: 0.0000
Epoch 2/10, Train Loss: 1.3051, Val Loss: 2.0845, Val Accuracy: 0.0000
Epoch 3/10, Train Loss: 1.2823, Val Loss: 2.0501, Val Accuracy: 0.0000
Epoch 4/10, Train Loss: 1.2665, Val Loss: 2.0719, Val Accuracy: 0.0000
Epoch 5/10, Train Loss: 1.2551, Val Loss: 2.0661, Val Accuracy: 0.0000
Epoch 6/10, Train Loss: 1.2404, Val Loss: 2.0789, Val Accuracy: 0.0000
Epoch 7/10, Train Loss: 1.2232, Val Loss: 2.0514, Val Accuracy: 0.0000
Epoch 8/10, Train Loss: 1.1953, Val Loss: 2.0298, Val Accurac

VBox(children=(Label(value='0.748 MB of 0.748 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
val_loss,██▅▇▆▇▅▃▃▁
Epoch,▁▂▃▃▄▅▆▆▇█
train_loss,█▃▃▃▂▂▂▂▁▁
val_accuracy,▁▁▁▁▁▁▁██▆

0,1
val_loss,2.00638
Epoch,9.0
train_loss,1.15944
val_accuracy,0.04884


[34m[1mwandb[0m: Agent Starting Run: o1y3w59n with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 128
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_layer_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 12


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112702644444047, max=1.0…



Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(30, 128)
    (rnn): GRU(128, 64, batch_first=True, dropout=0.3)
  )
  (decoder): Decoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(70, 128)
    (rnn): GRU(128, 64, batch_first=True, dropout=0.3)
    (fc): Linear(in_features=64, out_features=70, bias=True)
  )
)
Epoch 1/12, Train Loss: 1.3174, Val Loss: 2.0759, Val Accuracy: 0.0000
Epoch 2/12, Train Loss: 1.0937, Val Loss: 2.1687, Val Accuracy: 0.1954
Epoch 3/12, Train Loss: 1.0041, Val Loss: 2.3215, Val Accuracy: 0.4884
Epoch 4/12, Train Loss: 0.9274, Val Loss: 2.4320, Val Accuracy: 1.2698
Epoch 5/12, Train Loss: 0.8696, Val Loss: 2.5306, Val Accuracy: 2.1490
Epoch 6/12, Train Loss: 0.8215, Val Loss: 2.6093, Val Accuracy: 3.3455
Epoch 7/12, Train Loss: 0.7844, Val Loss: 2.7373, Val Accuracy: 3.1990
Epoch 8/12, Train Loss: 0.7580, Val Loss: 2.7877, Val Accuracy: 5.2747
Epoch 9/12, Train Loss: 0.7355, Val 

VBox(children=(Label(value='0.755 MB of 0.755 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
val_loss,▁▂▃▄▄▅▆▆▇▇██
Epoch,▁▂▂▃▄▄▅▅▆▇▇█
train_loss,█▆▅▄▃▃▂▂▂▁▁▁
val_accuracy,▁▁▁▂▃▄▄▆▇▇▇█

0,1
val_loss,3.03175
Epoch,11.0
train_loss,0.68415
val_accuracy,6.91087


[34m[1mwandb[0m: Agent Starting Run: hysytsa6 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	num_epochs: 12




Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.5, inplace=False)
    (embedding): Embedding(30, 64)
    (rnn): RNN(64, 128, num_layers=3, batch_first=True, dropout=0.5)
  )
  (decoder): Decoder(
    (dropout): Dropout(p=0.5, inplace=False)
    (embedding): Embedding(70, 64)
    (rnn): RNN(64, 128, batch_first=True, dropout=0.5)
    (fc): Linear(in_features=128, out_features=70, bias=True)
  )
)
Epoch 1/12, Train Loss: 1.3106, Val Loss: 2.2378, Val Accuracy: 0.0000
Epoch 2/12, Train Loss: 1.2771, Val Loss: 2.2343, Val Accuracy: 0.0000
Epoch 3/12, Train Loss: 1.2774, Val Loss: 2.2140, Val Accuracy: 0.0000
Epoch 4/12, Train Loss: 1.2741, Val Loss: 2.2632, Val Accuracy: 0.0000
Epoch 5/12, Train Loss: 1.2705, Val Loss: 2.2348, Val Accuracy: 0.0000
Epoch 6/12, Train Loss: 1.2765, Val Loss: 2.1938, Val Accuracy: 0.0000
Epoch 7/12, Train Loss: 1.2731, Val Loss: 2.2322, Val Accuracy: 0.0000
Epoch 8/12, Train Loss: 1.2790, Val Loss: 2.1844, Val Accuracy: 0.0000
Epoch 9/12, Train Loss:

VBox(children=(Label(value='0.761 MB of 0.761 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
val_loss,▆▅▄█▅▂▅▁▄▇▃▁
Epoch,▁▂▂▃▄▄▅▅▆▇▇█
train_loss,█▂▂▂▁▂▁▂▂▃▂▃
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁

0,1
val_loss,2.18664
Epoch,11.0
train_loss,1.27984
val_accuracy,0.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 1rj3rgru with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_layer_size: 16
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 12




Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(30, 16)
    (rnn): GRU(16, 16, batch_first=True, dropout=0.3)
  )
  (decoder): Decoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(70, 16)
    (rnn): GRU(16, 16, num_layers=2, batch_first=True, dropout=0.3)
    (fc): Linear(in_features=16, out_features=70, bias=True)
  )
)
Epoch 1/12, Train Loss: 1.5041, Val Loss: 2.0169, Val Accuracy: 0.0000
Epoch 2/12, Train Loss: 1.2522, Val Loss: 2.0665, Val Accuracy: 0.0000
Epoch 3/12, Train Loss: 1.2176, Val Loss: 2.0632, Val Accuracy: 0.0000
Epoch 4/12, Train Loss: 1.2043, Val Loss: 2.0809, Val Accuracy: 0.0000
Epoch 5/12, Train Loss: 1.1907, Val Loss: 2.0489, Val Accuracy: 0.0000
Epoch 6/12, Train Loss: 1.1752, Val Loss: 2.0899, Val Accuracy: 0.0000
Epoch 7/12, Train Loss: 1.1651, Val Loss: 2.1015, Val Accuracy: 0.0000
Epoch 8/12, Train Loss: 1.1562, Val Loss: 2.1059, Val Accuracy: 0.0000
Epoch 9/12, Train Loss: 1.

VBox(children=(Label(value='0.768 MB of 0.768 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
val_loss,▁▃▃▃▂▄▄▄▅▅▆█
Epoch,▁▂▂▃▄▄▅▅▆▇▇█
train_loss,█▃▃▃▂▂▂▂▂▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁

0,1
val_loss,2.22054
Epoch,11.0
train_loss,1.11788
val_accuracy,0.0


[34m[1mwandb[0m: Agent Starting Run: th9q4iv2 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 12


Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.2, inplace=False)
    (embedding): Embedding(30, 256)
    (rnn): LSTM(256, 128, num_layers=3, batch_first=True, dropout=0.2)
  )
  (decoder): Decoder(
    (dropout): Dropout(p=0.2, inplace=False)
    (embedding): Embedding(70, 256)
    (rnn): LSTM(256, 128, num_layers=2, batch_first=True, dropout=0.2)
    (fc): Linear(in_features=128, out_features=70, bias=True)
  )
)
Epoch 1/12, Train Loss: 1.3041, Val Loss: 2.0884, Val Accuracy: 0.0000
Epoch 2/12, Train Loss: 1.1588, Val Loss: 2.0380, Val Accuracy: 0.0244
Epoch 3/12, Train Loss: 1.0297, Val Loss: 2.2408, Val Accuracy: 0.7082
Epoch 4/12, Train Loss: 0.8327, Val Loss: 2.5816, Val Accuracy: 5.6166
Epoch 5/12, Train Loss: 0.7071, Val Loss: 2.7790, Val Accuracy: 9.1819
Epoch 6/12, Train Loss: 0.6356, Val Loss: 2.9939, Val Accuracy: 13.0403
Epoch 7/12, Train Loss: 0.5903, Val Loss: 3.1537, Val Accuracy: 14.7497
Epoch 8/12, Train Loss: 0.5554, Val Loss: 3.2580, Val Accuracy: 17.3871


VBox(children=(Label(value='0.774 MB of 0.774 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
val_loss,▁▁▂▃▄▅▆▆▇▇██
Epoch,▁▂▂▃▄▄▅▅▆▇▇█
train_loss,█▇▆▄▃▂▂▂▁▁▁▁
val_accuracy,▁▁▁▃▄▅▆▆▇▇██

0,1
val_loss,3.6216
Epoch,11.0
train_loss,0.47711
val_accuracy,22.39316


[34m[1mwandb[0m: Agent Starting Run: 0nwaf9pt with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_layer_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 10




Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(30, 64)
    (rnn): GRU(64, 32, batch_first=True, dropout=0.3)
  )
  (decoder): Decoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(70, 64)
    (rnn): GRU(64, 32, num_layers=3, batch_first=True, dropout=0.3)
    (fc): Linear(in_features=32, out_features=70, bias=True)
  )
)
Epoch 1/10, Train Loss: 1.4870, Val Loss: 2.2459, Val Accuracy: 0.0000
Epoch 2/10, Train Loss: 1.2607, Val Loss: 2.2445, Val Accuracy: 0.0000
Epoch 3/10, Train Loss: 1.2086, Val Loss: 2.1885, Val Accuracy: 0.0000
Epoch 4/10, Train Loss: 1.1948, Val Loss: 2.1667, Val Accuracy: 0.0000
Epoch 5/10, Train Loss: 1.1794, Val Loss: 2.1709, Val Accuracy: 0.0000
Epoch 6/10, Train Loss: 1.1676, Val Loss: 2.1585, Val Accuracy: 0.0000
Epoch 7/10, Train Loss: 1.1510, Val Loss: 2.1888, Val Accuracy: 0.0000
Epoch 8/10, Train Loss: 1.1320, Val Loss: 2.2143, Val Accuracy: 0.0244
Epoch 9/10, Train Loss: 1.

VBox(children=(Label(value='0.780 MB of 0.780 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
val_loss,██▃▂▂▁▃▅▆█
Epoch,▁▂▃▃▄▅▆▆▇█
train_loss,█▄▃▃▂▂▂▂▁▁
val_accuracy,▁▁▁▁▁▁▁▃▆█

0,1
val_loss,2.24809
Epoch,9.0
train_loss,1.10037
val_accuracy,0.07326


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ohhu1yqi with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_layer_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	num_epochs: 10




Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(30, 16)
    (rnn): LSTM(16, 256, batch_first=True, dropout=0.3)
  )
  (decoder): Decoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(70, 16)
    (rnn): LSTM(16, 256, num_layers=2, batch_first=True, dropout=0.3)
    (fc): Linear(in_features=256, out_features=70, bias=True)
  )
)
Epoch 1/10, Train Loss: 1.1764, Val Loss: 2.2686, Val Accuracy: 0.0488
Epoch 2/10, Train Loss: 0.9527, Val Loss: 2.5618, Val Accuracy: 0.9768
Epoch 3/10, Train Loss: 0.8538, Val Loss: 2.7242, Val Accuracy: 1.3431
Epoch 4/10, Train Loss: 0.8074, Val Loss: 2.8114, Val Accuracy: 2.5153
Epoch 5/10, Train Loss: 0.7764, Val Loss: 2.9294, Val Accuracy: 3.3700
Epoch 6/10, Train Loss: 0.7513, Val Loss: 3.0147, Val Accuracy: 3.8828
Epoch 7/10, Train Loss: 0.7330, Val Loss: 3.0928, Val Accuracy: 5.0305
Epoch 8/10, Train Loss: 0.7178, Val Loss: 3.1458, Val Accuracy: 5.2015
Epoch 9/10, Train Los

VBox(children=(Label(value='0.786 MB of 0.786 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
val_loss,▁▃▄▅▆▆▇▇▇█
Epoch,▁▂▃▃▄▅▆▆▇█
train_loss,█▅▃▃▂▂▂▁▁▁
val_accuracy,▁▂▃▄▅▆▇▇▇█

0,1
val_loss,3.2596
Epoch,9.0
train_loss,0.69774
val_accuracy,5.76313


[34m[1mwandb[0m: Agent Starting Run: c7rbgzmr with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_layer_size: 16
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	num_epochs: 10




Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(30, 32)
    (rnn): GRU(32, 16, batch_first=True, dropout=0.3)
  )
  (decoder): Decoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(70, 32)
    (rnn): GRU(32, 16, batch_first=True, dropout=0.3)
    (fc): Linear(in_features=16, out_features=70, bias=True)
  )
)
Epoch 1/10, Train Loss: 1.2867, Val Loss: 2.1962, Val Accuracy: 0.0000
Epoch 2/10, Train Loss: 1.1967, Val Loss: 2.1953, Val Accuracy: 0.0000
Epoch 3/10, Train Loss: 1.1486, Val Loss: 2.1916, Val Accuracy: 0.0000
Epoch 4/10, Train Loss: 1.1280, Val Loss: 2.2249, Val Accuracy: 0.0000
Epoch 5/10, Train Loss: 1.1193, Val Loss: 2.2754, Val Accuracy: 0.0000
Epoch 6/10, Train Loss: 1.1133, Val Loss: 2.2965, Val Accuracy: 0.0244
Epoch 7/10, Train Loss: 1.1017, Val Loss: 2.3192, Val Accuracy: 0.0488
Epoch 8/10, Train Loss: 1.1304, Val Loss: 2.3400, Val Accuracy: 0.0244
Epoch 9/10, Train Loss: 1.0788, Val Loss

VBox(children=(Label(value='0.793 MB of 0.793 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
val_loss,▁▁▁▂▄▅▆▆█▇
Epoch,▁▂▃▃▄▅▆▆▇█
train_loss,█▅▃▃▃▂▂▃▁▁
val_accuracy,▁▁▁▁▁▂▄▂▄█

0,1
val_loss,2.35197
Epoch,9.0
train_loss,1.07361
val_accuracy,0.1221


[34m[1mwandb[0m: Agent Starting Run: 46j4f4tq with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 128
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_layer_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 12




Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.2, inplace=False)
    (embedding): Embedding(30, 128)
    (rnn): LSTM(128, 32, batch_first=True, dropout=0.2)
  )
  (decoder): Decoder(
    (dropout): Dropout(p=0.2, inplace=False)
    (embedding): Embedding(70, 128)
    (rnn): LSTM(128, 32, num_layers=2, batch_first=True, dropout=0.2)
    (fc): Linear(in_features=32, out_features=70, bias=True)
  )
)
Epoch 1/12, Train Loss: 1.3536, Val Loss: 2.0642, Val Accuracy: 0.0000
Epoch 2/12, Train Loss: 1.1767, Val Loss: 2.1055, Val Accuracy: 0.0244
Epoch 3/12, Train Loss: 1.1198, Val Loss: 2.1581, Val Accuracy: 0.0733
Epoch 4/12, Train Loss: 1.0724, Val Loss: 2.2008, Val Accuracy: 0.1465
Epoch 5/12, Train Loss: 1.0300, Val Loss: 2.2976, Val Accuracy: 0.0977
Epoch 6/12, Train Loss: 1.0003, Val Loss: 2.3348, Val Accuracy: 0.3907
Epoch 7/12, Train Loss: 0.9738, Val Loss: 2.3792, Val Accuracy: 0.4151
Epoch 8/12, Train Loss: 0.9481, Val Loss: 2.4507, Val Accuracy: 0.8547
Epoch 9/12, Train Lo

VBox(children=(Label(value='0.799 MB of 0.799 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
val_loss,▁▂▂▃▄▄▅▆▇▇▇█
Epoch,▁▂▂▃▄▄▅▅▆▇▇█
train_loss,█▅▅▄▃▃▂▂▂▁▁▁
val_accuracy,▁▁▁▂▁▃▃▅▇▆█▇

0,1
val_loss,2.61154
Epoch,11.0
train_loss,0.87888
val_accuracy,1.31868


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 81ofa6qr with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 128
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	hidden_layer_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	num_epochs: 10


Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.2, inplace=False)
    (embedding): Embedding(30, 128)
    (rnn): GRU(128, 32, num_layers=2, batch_first=True, dropout=0.2)
  )
  (decoder): Decoder(
    (dropout): Dropout(p=0.2, inplace=False)
    (embedding): Embedding(70, 128)
    (rnn): GRU(128, 32, num_layers=3, batch_first=True, dropout=0.2)
    (fc): Linear(in_features=32, out_features=70, bias=True)
  )
)
Epoch 1/10, Train Loss: 1.2868, Val Loss: 1.9995, Val Accuracy: 0.0000
Epoch 2/10, Train Loss: 1.2022, Val Loss: 2.0910, Val Accuracy: 0.0000
Epoch 3/10, Train Loss: 1.1711, Val Loss: 2.1605, Val Accuracy: 0.0244
Epoch 4/10, Train Loss: 1.1481, Val Loss: 2.1715, Val Accuracy: 0.0244
Epoch 5/10, Train Loss: 1.1316, Val Loss: 2.1817, Val Accuracy: 0.0000
Epoch 6/10, Train Loss: 1.1185, Val Loss: 2.2191, Val Accuracy: 0.0000
Epoch 7/10, Train Loss: 1.1137, Val Loss: 2.2045, Val Accuracy: 0.0000
Epoch 8/10, Train Loss: 1.0976, Val Loss: 2.3002, Val Accuracy: 0.0000
Epoch 9/

VBox(children=(Label(value='0.805 MB of 0.805 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
val_loss,▁▃▄▅▅▆▅▇██
Epoch,▁▂▃▃▄▅▆▆▇█
train_loss,█▅▄▃▃▂▂▁▁▁
val_accuracy,▁▁██▁▁▁▁▁▁

0,1
val_loss,2.30396
Epoch,9.0
train_loss,1.08492
val_accuracy,0.0


[34m[1mwandb[0m: Agent Starting Run: irhupchb with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	num_epochs: 12




Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.2, inplace=False)
    (embedding): Embedding(30, 16)
    (rnn): RNN(16, 128, batch_first=True, dropout=0.2)
  )
  (decoder): Decoder(
    (dropout): Dropout(p=0.2, inplace=False)
    (embedding): Embedding(70, 16)
    (rnn): RNN(16, 128, num_layers=2, batch_first=True, dropout=0.2)
    (fc): Linear(in_features=128, out_features=70, bias=True)
  )
)
Epoch 1/12, Train Loss: 1.3505, Val Loss: 2.2415, Val Accuracy: 0.0000
Epoch 2/12, Train Loss: 1.2857, Val Loss: 2.2405, Val Accuracy: 0.0000
Epoch 3/12, Train Loss: 1.2832, Val Loss: 2.2743, Val Accuracy: 0.0000
Epoch 4/12, Train Loss: 1.2852, Val Loss: 2.2407, Val Accuracy: 0.0000
Epoch 5/12, Train Loss: 1.2813, Val Loss: 2.2375, Val Accuracy: 0.0000
Epoch 6/12, Train Loss: 1.2836, Val Loss: 2.3006, Val Accuracy: 0.0000
Epoch 7/12, Train Loss: 1.2860, Val Loss: 2.8853, Val Accuracy: 0.0000
Epoch 8/12, Train Loss: 1.2860, Val Loss: 2.2922, Val Accuracy: 0.0000
Epoch 9/12, Train Loss:

VBox(children=(Label(value='0.812 MB of 0.829 MB uploaded\r'), FloatProgress(value=0.9785890556829446, max=1.0…

0,1
val_loss,▁▁▁▁▁▂█▂▁▁▁▂
Epoch,▁▂▂▃▄▄▅▅▆▇▇█
train_loss,█▁▁▁▁▁▁▁▂▂▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁

0,1
val_loss,2.28682
Epoch,11.0
train_loss,1.28571
val_accuracy,0.0


[34m[1mwandb[0m: Agent Starting Run: 0cnghlr2 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_layer_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 10




Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.2, inplace=False)
    (embedding): Embedding(30, 64)
    (rnn): GRU(64, 64, batch_first=True, dropout=0.2, bidirectional=True)
  )
  (decoder): Decoder(
    (dropout): Dropout(p=0.2, inplace=False)
    (embedding): Embedding(70, 64)
    (rnn): GRU(64, 64, num_layers=3, batch_first=True, dropout=0.2)
    (fc): Linear(in_features=64, out_features=70, bias=True)
  )
)
Epoch 1/10, Train Loss: 1.3722, Val Loss: 2.1205, Val Accuracy: 0.0000
Epoch 2/10, Train Loss: 1.2655, Val Loss: 2.1420, Val Accuracy: 0.0000
Epoch 3/10, Train Loss: 1.2522, Val Loss: 2.1409, Val Accuracy: 0.0000
Epoch 4/10, Train Loss: 1.2430, Val Loss: 2.1437, Val Accuracy: 0.0000
Epoch 5/10, Train Loss: 1.2398, Val Loss: 2.1515, Val Accuracy: 0.0000
Epoch 6/10, Train Loss: 1.2323, Val Loss: 2.1512, Val Accuracy: 0.0000
Epoch 7/10, Train Loss: 1.2298, Val Loss: 2.1438, Val Accuracy: 0.0000
Epoch 8/10, Train Loss: 1.2283, Val Loss: 2.1731, Val Accuracy: 0.0000
Epoch 

VBox(children=(Label(value='0.818 MB of 0.818 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
val_loss,▁▄▄▄▅▅▄█▆▄
Epoch,▁▂▃▃▄▅▆▆▇█
train_loss,█▃▂▂▂▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
val_loss,2.1457
Epoch,9.0
train_loss,1.22472
val_accuracy,0.0


[34m[1mwandb[0m: Agent Starting Run: dt43mc44 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_layer_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	num_epochs: 12


Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.2, inplace=False)
    (embedding): Embedding(30, 256)
    (rnn): GRU(256, 256, num_layers=3, batch_first=True, dropout=0.2)
  )
  (decoder): Decoder(
    (dropout): Dropout(p=0.2, inplace=False)
    (embedding): Embedding(70, 256)
    (rnn): GRU(256, 256, num_layers=3, batch_first=True, dropout=0.2)
    (fc): Linear(in_features=256, out_features=70, bias=True)
  )
)
Epoch 1/12, Train Loss: 1.2982, Val Loss: 2.1700, Val Accuracy: 0.0000
Epoch 2/12, Train Loss: 1.2247, Val Loss: 2.2232, Val Accuracy: 0.0000
Epoch 3/12, Train Loss: 1.1907, Val Loss: 2.3473, Val Accuracy: 0.0000
Epoch 4/12, Train Loss: 1.1728, Val Loss: 2.3410, Val Accuracy: 0.0000
Epoch 5/12, Train Loss: 1.1685, Val Loss: 2.3875, Val Accuracy: 0.0000
Epoch 6/12, Train Loss: 1.1621, Val Loss: 2.3525, Val Accuracy: 0.0000
Epoch 7/12, Train Loss: 1.1660, Val Loss: 2.3968, Val Accuracy: 0.0000
Epoch 8/12, Train Loss: 1.1629, Val Loss: 2.3995, Val Accuracy: 0.0000
Epoch

VBox(children=(Label(value='0.824 MB of 0.824 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
val_loss,▁▂▆▅▇▆▇▇▆▆█▇
Epoch,▁▂▂▃▄▄▅▅▆▇▇█
train_loss,█▄▂▂▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁

0,1
val_loss,2.40315
Epoch,11.0
train_loss,1.16658
val_accuracy,0.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: asdmho69 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 128
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	hidden_layer_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 10




Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.2, inplace=False)
    (embedding): Embedding(30, 128)
    (rnn): GRU(128, 64, num_layers=2, batch_first=True, dropout=0.2)
  )
  (decoder): Decoder(
    (dropout): Dropout(p=0.2, inplace=False)
    (embedding): Embedding(70, 128)
    (rnn): GRU(128, 64, batch_first=True, dropout=0.2)
    (fc): Linear(in_features=64, out_features=70, bias=True)
  )
)
Epoch 1/10, Train Loss: 1.2972, Val Loss: 2.2738, Val Accuracy: 0.0000
Epoch 2/10, Train Loss: 1.0501, Val Loss: 2.2231, Val Accuracy: 0.1954
Epoch 3/10, Train Loss: 0.9500, Val Loss: 2.3739, Val Accuracy: 0.9035
Epoch 4/10, Train Loss: 0.8662, Val Loss: 2.5150, Val Accuracy: 1.7338
Epoch 5/10, Train Loss: 0.8085, Val Loss: 2.6572, Val Accuracy: 2.7839
Epoch 6/10, Train Loss: 0.7691, Val Loss: 2.7619, Val Accuracy: 3.7363
Epoch 7/10, Train Loss: 0.7372, Val Loss: 2.8653, Val Accuracy: 4.7375
Epoch 8/10, Train Loss: 0.7113, Val Loss: 2.9701, Val Accuracy: 5.0794
Epoch 9/10, Train Loss

VBox(children=(Label(value='0.830 MB of 0.830 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
val_loss,▁▁▂▃▄▅▆▇▇█
Epoch,▁▂▃▃▄▅▆▆▇█
train_loss,█▅▄▃▃▂▂▁▁▁
val_accuracy,▁▁▂▃▄▆▇▇█▇

0,1
val_loss,3.14134
Epoch,9.0
train_loss,0.67206
val_accuracy,5.37241


[34m[1mwandb[0m: Agent Starting Run: sstchh5g with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	encoder_layers: 2
[34m[1mwandb[0m: 	hidden_layer_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 12


Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(30, 64)
    (rnn): GRU(64, 64, num_layers=2, batch_first=True, dropout=0.3, bidirectional=True)
  )
  (decoder): Decoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(70, 64)
    (rnn): GRU(64, 64, num_layers=2, batch_first=True, dropout=0.3)
    (fc): Linear(in_features=64, out_features=70, bias=True)
  )
)
Epoch 1/12, Train Loss: 1.1772, Val Loss: 2.2020, Val Accuracy: 1.2698
Epoch 2/12, Train Loss: 0.8802, Val Loss: 2.5285, Val Accuracy: 4.3468
Epoch 3/12, Train Loss: 0.7803, Val Loss: 2.7333, Val Accuracy: 6.0562
Epoch 4/12, Train Loss: 0.7241, Val Loss: 2.8790, Val Accuracy: 7.8632
Epoch 5/12, Train Loss: 0.6942, Val Loss: 2.9990, Val Accuracy: 9.8657
Epoch 6/12, Train Loss: 0.6643, Val Loss: 3.0864, Val Accuracy: 11.8681
Epoch 7/12, Train Loss: 0.6446, Val Loss: 3.1709, Val Accuracy: 12.1856
Epoch 8/12, Train Loss: 0.6270, Val Loss: 3.2522, Val Accurac

VBox(children=(Label(value='0.836 MB of 0.836 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
val_loss,▁▃▄▅▆▆▇▇▇███
Epoch,▁▂▂▃▄▄▅▅▆▇▇█
train_loss,█▄▃▃▂▂▂▁▁▁▁▁
val_accuracy,▁▃▃▄▅▆▆▇▇▇██

0,1
val_loss,3.37028
Epoch,11.0
train_loss,0.58563
val_accuracy,14.74969


[34m[1mwandb[0m: Agent Starting Run: pc8gc4xx with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 128
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	num_epochs: 12




Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.2, inplace=False)
    (embedding): Embedding(30, 128)
    (rnn): LSTM(128, 128, batch_first=True, dropout=0.2)
  )
  (decoder): Decoder(
    (dropout): Dropout(p=0.2, inplace=False)
    (embedding): Embedding(70, 128)
    (rnn): LSTM(128, 128, batch_first=True, dropout=0.2)
    (fc): Linear(in_features=128, out_features=70, bias=True)
  )
)
Epoch 1/12, Train Loss: 1.1149, Val Loss: 2.2457, Val Accuracy: 0.0733
Epoch 2/12, Train Loss: 1.0134, Val Loss: 2.3359, Val Accuracy: 0.1221
Epoch 3/12, Train Loss: 0.9888, Val Loss: 2.4157, Val Accuracy: 0.2930
Epoch 4/12, Train Loss: 0.9597, Val Loss: 2.4847, Val Accuracy: 0.3663
Epoch 5/12, Train Loss: 0.9342, Val Loss: 2.5585, Val Accuracy: 0.5617
Epoch 6/12, Train Loss: 0.9227, Val Loss: 2.6177, Val Accuracy: 0.6349
Epoch 7/12, Train Loss: 0.9132, Val Loss: 2.6525, Val Accuracy: 0.7814
Epoch 8/12, Train Loss: 0.9064, Val Loss: 2.7008, Val Accuracy: 0.8303
Epoch 9/12, Train Loss: 0.8987,

VBox(children=(Label(value='0.843 MB of 0.843 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
val_loss,▁▂▃▄▄▅▅▆▆▇██
Epoch,▁▂▂▃▄▄▅▅▆▇▇█
train_loss,█▅▄▄▃▂▂▂▂▂▁▁
val_accuracy,▁▁▂▃▄▄▅▅▅▆▇█

0,1
val_loss,2.91007
Epoch,11.0
train_loss,0.87276
val_accuracy,1.36752


[34m[1mwandb[0m: Agent Starting Run: xoeywenu with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_layer_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 12




Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.5, inplace=False)
    (embedding): Embedding(30, 256)
    (rnn): RNN(256, 64, num_layers=3, batch_first=True, dropout=0.5, bidirectional=True)
  )
  (decoder): Decoder(
    (dropout): Dropout(p=0.5, inplace=False)
    (embedding): Embedding(70, 256)
    (rnn): RNN(256, 64, batch_first=True, dropout=0.5)
    (fc): Linear(in_features=64, out_features=70, bias=True)
  )
)
Epoch 1/12, Train Loss: 1.3421, Val Loss: 2.1818, Val Accuracy: 0.0000
Epoch 2/12, Train Loss: 1.1530, Val Loss: 2.1700, Val Accuracy: 0.0244
Epoch 3/12, Train Loss: 1.0996, Val Loss: 2.1504, Val Accuracy: 0.0733
Epoch 4/12, Train Loss: 1.0726, Val Loss: 2.1979, Val Accuracy: 0.0977
Epoch 5/12, Train Loss: 1.0484, Val Loss: 2.2140, Val Accuracy: 0.1221
Epoch 6/12, Train Loss: 1.0372, Val Loss: 2.2195, Val Accuracy: 0.0244
Epoch 7/12, Train Loss: 1.0267, Val Loss: 2.2392, Val Accuracy: 0.0733
Epoch 8/12, Train Loss: 1.0164, Val Loss: 2.2861, Val Accuracy: 0.0977
Ep

VBox(children=(Label(value='0.849 MB of 0.849 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
val_loss,▂▂▁▃▃▄▄▆▆▇▇█
Epoch,▁▂▂▃▄▄▅▅▆▇▇█
train_loss,█▄▃▃▂▂▂▂▁▁▁▁
val_accuracy,▁▂▃▃▄▂▃▃█▆▆▅

0,1
val_loss,2.33735
Epoch,11.0
train_loss,0.98771
val_accuracy,0.14652


[34m[1mwandb[0m: Agent Starting Run: pjp3qs2s with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 128
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_layer_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	num_epochs: 10


Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.2, inplace=False)
    (embedding): Embedding(30, 128)
    (rnn): RNN(128, 256, num_layers=3, batch_first=True, dropout=0.2)
  )
  (decoder): Decoder(
    (dropout): Dropout(p=0.2, inplace=False)
    (embedding): Embedding(70, 128)
    (rnn): RNN(128, 256, num_layers=3, batch_first=True, dropout=0.2)
    (fc): Linear(in_features=256, out_features=70, bias=True)
  )
)
Epoch 1/10, Train Loss: 1.4051, Val Loss: 3.6789, Val Accuracy: 0.0000
Epoch 2/10, Train Loss: 1.6062, Val Loss: 2.5240, Val Accuracy: 0.0000
Epoch 3/10, Train Loss: 1.6119, Val Loss: 2.6150, Val Accuracy: 0.0000
Epoch 4/10, Train Loss: 1.4037, Val Loss: 3.3430, Val Accuracy: 0.0000
Epoch 5/10, Train Loss: 1.3765, Val Loss: 2.3411, Val Accuracy: 0.0000
Epoch 6/10, Train Loss: 1.3715, Val Loss: 2.4198, Val Accuracy: 0.0000
Epoch 7/10, Train Loss: 1.3774, Val Loss: 2.3923, Val Accuracy: 0.0000
Epoch 8/10, Train Loss: 1.3832, Val Loss: 2.3757, Val Accuracy: 0.0000
Epoch

VBox(children=(Label(value='0.855 MB of 0.855 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
val_loss,█▂▂▆▁▁▁▁▁▁
Epoch,▁▂▃▃▄▅▆▆▇█
train_loss,▂██▂▁▁▁▁▂▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
val_loss,2.41927
Epoch,9.0
train_loss,1.38123
val_accuracy,0.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 5s2h34by with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_layer_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 10




Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(30, 64)
    (rnn): GRU(64, 256, num_layers=3, batch_first=True, dropout=0.3)
  )
  (decoder): Decoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(70, 64)
    (rnn): GRU(64, 256, batch_first=True, dropout=0.3)
    (fc): Linear(in_features=256, out_features=70, bias=True)
  )
)
Epoch 1/10, Train Loss: 0.9592, Val Loss: 2.7679, Val Accuracy: 6.3980
Epoch 2/10, Train Loss: 0.6091, Val Loss: 3.2375, Val Accuracy: 14.2857
Epoch 3/10, Train Loss: 0.5236, Val Loss: 3.4637, Val Accuracy: 17.8022
Epoch 4/10, Train Loss: 0.4776, Val Loss: 3.6382, Val Accuracy: 18.3394
Epoch 5/10, Train Loss: 0.4545, Val Loss: 3.7780, Val Accuracy: 21.3675
Epoch 6/10, Train Loss: 0.4356, Val Loss: 3.8236, Val Accuracy: 22.5885
Epoch 7/10, Train Loss: 0.4203, Val Loss: 3.9039, Val Accuracy: 23.1746
Epoch 8/10, Train Loss: 0.4126, Val Loss: 3.9205, Val Accuracy: 24.3956
Epoch 9/10, Trai

VBox(children=(Label(value='0.862 MB of 0.862 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
val_loss,▁▄▅▆▇▇▇▇██
Epoch,▁▂▃▃▄▅▆▆▇█
train_loss,█▄▃▂▂▁▁▁▁▁
val_accuracy,▁▄▅▆▇▇▇███

0,1
val_loss,4.04463
Epoch,9.0
train_loss,0.3958
val_accuracy,24.59096


[34m[1mwandb[0m: Agent Starting Run: 6y1gsv4k with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_layer_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 10




Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.2, inplace=False)
    (embedding): Embedding(30, 256)
    (rnn): LSTM(256, 32, num_layers=3, batch_first=True, dropout=0.2)
  )
  (decoder): Decoder(
    (dropout): Dropout(p=0.2, inplace=False)
    (embedding): Embedding(70, 256)
    (rnn): LSTM(256, 32, batch_first=True, dropout=0.2)
    (fc): Linear(in_features=32, out_features=70, bias=True)
  )
)
Epoch 1/10, Train Loss: 1.3039, Val Loss: 2.1310, Val Accuracy: 0.0244
Epoch 2/10, Train Loss: 1.1503, Val Loss: 2.1463, Val Accuracy: 0.0244
Epoch 3/10, Train Loss: 1.1032, Val Loss: 2.1543, Val Accuracy: 0.0244
Epoch 4/10, Train Loss: 1.0667, Val Loss: 2.2036, Val Accuracy: 0.0000
Epoch 5/10, Train Loss: 1.0157, Val Loss: 2.2846, Val Accuracy: 0.0977
Epoch 6/10, Train Loss: 0.9728, Val Loss: 2.3431, Val Accuracy: 0.3663
Epoch 7/10, Train Loss: 0.9459, Val Loss: 2.3685, Val Accuracy: 0.2930
Epoch 8/10, Train Loss: 0.9272, Val Loss: 2.3982, Val Accuracy: 0.5861
Epoch 9/10, Train Lo

VBox(children=(Label(value='0.868 MB of 0.868 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
val_loss,▁▁▁▂▄▅▆▆▇█
Epoch,▁▂▃▃▄▅▆▆▇█
train_loss,█▅▅▄▃▂▂▂▁▁
val_accuracy,▁▁▁▁▂▃▃▄▆█

0,1
val_loss,2.47507
Epoch,9.0
train_loss,0.89086
val_accuracy,1.24542


[34m[1mwandb[0m: Agent Starting Run: 3anfrb8v with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_layer_size: 16
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	num_epochs: 12


Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.5, inplace=False)
    (embedding): Embedding(30, 256)
    (rnn): RNN(256, 16, num_layers=3, batch_first=True, dropout=0.5, bidirectional=True)
  )
  (decoder): Decoder(
    (dropout): Dropout(p=0.5, inplace=False)
    (embedding): Embedding(70, 256)
    (rnn): RNN(256, 16, num_layers=3, batch_first=True, dropout=0.5)
    (fc): Linear(in_features=16, out_features=70, bias=True)
  )
)
Epoch 1/12, Train Loss: 1.3562, Val Loss: 2.1455, Val Accuracy: 0.0000
Epoch 2/12, Train Loss: 1.3001, Val Loss: 2.1480, Val Accuracy: 0.0000
Epoch 3/12, Train Loss: 1.2979, Val Loss: 2.1642, Val Accuracy: 0.0000
Epoch 4/12, Train Loss: 1.2931, Val Loss: 2.1649, Val Accuracy: 0.0000
Epoch 5/12, Train Loss: 1.2785, Val Loss: 2.1604, Val Accuracy: 0.0000
Epoch 6/12, Train Loss: 1.2784, Val Loss: 2.1709, Val Accuracy: 0.0000
Epoch 7/12, Train Loss: 1.2745, Val Loss: 2.1975, Val Accuracy: 0.0000
Epoch 8/12, Train Loss: 1.2758, Val Loss: 2.2047, Val Accur

VBox(children=(Label(value='0.874 MB of 0.874 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
val_loss,▁▁▃▃▃▄▇█▆▆█▆
Epoch,▁▂▂▃▄▄▅▅▆▇▇█
train_loss,█▃▃▃▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁

0,1
val_loss,2.18515
Epoch,11.0
train_loss,1.27551
val_accuracy,0.0


[34m[1mwandb[0m: Agent Starting Run: 5c19qjvh with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_layer_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	num_epochs: 10




Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(30, 256)
    (rnn): RNN(256, 64, batch_first=True, dropout=0.3, bidirectional=True)
  )
  (decoder): Decoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(70, 256)
    (rnn): RNN(256, 64, num_layers=2, batch_first=True, dropout=0.3)
    (fc): Linear(in_features=64, out_features=70, bias=True)
  )
)
Epoch 1/10, Train Loss: 1.3024, Val Loss: 2.2045, Val Accuracy: 0.0000
Epoch 2/10, Train Loss: 1.2811, Val Loss: 2.1785, Val Accuracy: 0.0000
Epoch 3/10, Train Loss: 1.2750, Val Loss: 2.1641, Val Accuracy: 0.0000
Epoch 4/10, Train Loss: 1.2784, Val Loss: 2.2382, Val Accuracy: 0.0000
Epoch 5/10, Train Loss: 1.2801, Val Loss: 2.2202, Val Accuracy: 0.0000
Epoch 6/10, Train Loss: 1.2807, Val Loss: 2.2551, Val Accuracy: 0.0000
Epoch 7/10, Train Loss: 1.2786, Val Loss: 2.1877, Val Accuracy: 0.0000
Epoch 8/10, Train Loss: 1.2793, Val Loss: 2.2726, Val Accuracy: 0.0000
Ep

VBox(children=(Label(value='0.880 MB of 0.880 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
val_loss,▃▂▁▅▄▅▂▆█▆
Epoch,▁▂▃▃▄▅▆▆▇█
train_loss,█▃▁▂▂▂▂▂▂▂
val_accuracy,▁▁▁▁▁▁▁▁▁▁

0,1
val_loss,2.27066
Epoch,9.0
train_loss,1.28001
val_accuracy,0.0


[34m[1mwandb[0m: Agent Starting Run: cq3t5lrr with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	decoder_layers: 3
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	embedding_size: 128
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_layer_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	num_epochs: 12


Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.5, inplace=False)
    (embedding): Embedding(30, 128)
    (rnn): GRU(128, 32, num_layers=3, batch_first=True, dropout=0.5)
  )
  (decoder): Decoder(
    (dropout): Dropout(p=0.5, inplace=False)
    (embedding): Embedding(70, 128)
    (rnn): GRU(128, 32, num_layers=3, batch_first=True, dropout=0.5)
    (fc): Linear(in_features=32, out_features=70, bias=True)
  )
)
Epoch 1/12, Train Loss: 1.3689, Val Loss: 2.1234, Val Accuracy: 0.0000
Epoch 2/12, Train Loss: 1.2878, Val Loss: 2.1308, Val Accuracy: 0.0000
Epoch 3/12, Train Loss: 1.2786, Val Loss: 2.1486, Val Accuracy: 0.0000
Epoch 4/12, Train Loss: 1.2764, Val Loss: 2.1409, Val Accuracy: 0.0000
Epoch 5/12, Train Loss: 1.2794, Val Loss: 2.1900, Val Accuracy: 0.0000
Epoch 6/12, Train Loss: 1.2787, Val Loss: 2.1480, Val Accuracy: 0.0000
Epoch 7/12, Train Loss: 1.2802, Val Loss: 2.1410, Val Accuracy: 0.0000
Epoch 8/12, Train Loss: 1.2791, Val Loss: 2.1624, Val Accuracy: 0.0000
Epoch 9/

VBox(children=(Label(value='0.887 MB of 0.887 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
val_loss,▁▂▄▃█▄▃▅▄▄▅▆
Epoch,▁▂▂▃▄▄▅▅▆▇▇█
train_loss,█▂▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁

0,1
val_loss,2.16863
Epoch,11.0
train_loss,1.27833
val_accuracy,0.0


[34m[1mwandb[0m: Agent Starting Run: 8bkfdje2 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_layer_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 10


Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(30, 64)
    (rnn): GRU(64, 256, num_layers=3, batch_first=True, dropout=0.3, bidirectional=True)
  )
  (decoder): Decoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(70, 64)
    (rnn): GRU(64, 256, num_layers=2, batch_first=True, dropout=0.3)
    (fc): Linear(in_features=256, out_features=70, bias=True)
  )
)
Epoch 1/10, Train Loss: 0.9086, Val Loss: 3.0545, Val Accuracy: 11.2576
Epoch 2/10, Train Loss: 0.5729, Val Loss: 3.4372, Val Accuracy: 18.5592
Epoch 3/10, Train Loss: 0.4914, Val Loss: 3.6830, Val Accuracy: 22.0024
Epoch 4/10, Train Loss: 0.4542, Val Loss: 3.8650, Val Accuracy: 25.2015
Epoch 5/10, Train Loss: 0.4272, Val Loss: 3.9449, Val Accuracy: 26.1783
Epoch 6/10, Train Loss: 0.4061, Val Loss: 4.0439, Val Accuracy: 27.5458
Epoch 7/10, Train Loss: 0.3937, Val Loss: 4.1027, Val Accuracy: 27.8144
Epoch 8/10, Train Loss: 0.3828, Val Loss: 4.1747, Val

VBox(children=(Label(value='0.893 MB of 0.893 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
val_loss,▁▃▅▆▆▇▇███
Epoch,▁▂▃▃▄▅▆▆▇█
train_loss,█▄▃▂▂▁▁▁▁▁
val_accuracy,▁▄▅▆▇▇▇▇▇█

0,1
val_loss,4.2577
Epoch,9.0
train_loss,0.37025
val_accuracy,30.08547


[34m[1mwandb[0m: Agent Starting Run: c98qzap5 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 10




Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(30, 32)
    (rnn): GRU(32, 128, num_layers=3, batch_first=True, dropout=0.3)
  )
  (decoder): Decoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(70, 32)
    (rnn): GRU(32, 128, batch_first=True, dropout=0.3)
    (fc): Linear(in_features=128, out_features=70, bias=True)
  )
)
Epoch 1/10, Train Loss: 1.2488, Val Loss: 2.2899, Val Accuracy: 0.0244
Epoch 2/10, Train Loss: 0.9457, Val Loss: 2.4014, Val Accuracy: 1.1233
Epoch 3/10, Train Loss: 0.7878, Val Loss: 2.6752, Val Accuracy: 2.2466
Epoch 4/10, Train Loss: 0.7021, Val Loss: 2.8763, Val Accuracy: 3.3944
Epoch 5/10, Train Loss: 0.6459, Val Loss: 3.0313, Val Accuracy: 5.8608
Epoch 6/10, Train Loss: 0.6059, Val Loss: 3.1470, Val Accuracy: 7.5458
Epoch 7/10, Train Loss: 0.5777, Val Loss: 3.2715, Val Accuracy: 8.4493
Epoch 8/10, Train Loss: 0.5572, Val Loss: 3.3355, Val Accuracy: 11.3065
Epoch 9/10, Train Loss

VBox(children=(Label(value='0.899 MB of 0.899 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
val_loss,▁▂▃▄▅▆▇▇██
Epoch,▁▂▃▃▄▅▆▆▇█
train_loss,█▅▄▃▂▂▂▁▁▁
val_accuracy,▁▂▂▃▄▄▅▆▆█

0,1
val_loss,3.48391
Epoch,9.0
train_loss,0.52207
val_accuracy,15.09158


[34m[1mwandb[0m: Agent Starting Run: hdnlnz4f with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 128
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_layer_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 10




Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(30, 128)
    (rnn): GRU(128, 256, num_layers=3, batch_first=True, dropout=0.3, bidirectional=True)
  )
  (decoder): Decoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(70, 128)
    (rnn): GRU(128, 256, batch_first=True, dropout=0.3)
    (fc): Linear(in_features=256, out_features=70, bias=True)
  )
)
Epoch 1/10, Train Loss: 0.7128, Val Loss: 3.4038, Val Accuracy: 13.9927
Epoch 2/10, Train Loss: 0.5098, Val Loss: 3.8036, Val Accuracy: 20.1709
Epoch 3/10, Train Loss: 0.4590, Val Loss: 3.9443, Val Accuracy: 22.4420
Epoch 4/10, Train Loss: 0.4347, Val Loss: 4.0673, Val Accuracy: 22.9792
Epoch 5/10, Train Loss: 0.4151, Val Loss: 4.1907, Val Accuracy: 23.2967
Epoch 6/10, Train Loss: 0.4024, Val Loss: 4.2723, Val Accuracy: 25.0794
Epoch 7/10, Train Loss: 0.3920, Val Loss: 4.2915, Val Accuracy: 25.0794
Epoch 8/10, Train Loss: 0.3855, Val Loss: 4.3946, Val Accuracy:

VBox(children=(Label(value='0.905 MB of 0.905 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
val_loss,▁▄▅▅▆▇▇███
Epoch,▁▂▃▃▄▅▆▆▇█
train_loss,█▄▃▂▂▂▁▁▁▁
val_accuracy,▁▅▆▆▇█████

0,1
val_loss,4.4576
Epoch,9.0
train_loss,0.37666
val_accuracy,25.56777


[34m[1mwandb[0m: Agent Starting Run: q0b1nd8n with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	decoder_layers: 2
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_layer_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 10


Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(30, 64)
    (rnn): RNN(64, 256, num_layers=3, batch_first=True, dropout=0.3, bidirectional=True)
  )
  (decoder): Decoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(70, 64)
    (rnn): RNN(64, 256, num_layers=2, batch_first=True, dropout=0.3)
    (fc): Linear(in_features=256, out_features=70, bias=True)
  )
)
Epoch 1/10, Train Loss: 1.0861, Val Loss: 2.4927, Val Accuracy: 1.5385
Epoch 2/10, Train Loss: 0.8644, Val Loss: 2.7366, Val Accuracy: 3.3211
Epoch 3/10, Train Loss: 0.7947, Val Loss: 2.8682, Val Accuracy: 5.2015
Epoch 4/10, Train Loss: 0.7590, Val Loss: 2.9422, Val Accuracy: 5.6410
Epoch 5/10, Train Loss: 0.7299, Val Loss: 2.9913, Val Accuracy: 6.1783
Epoch 6/10, Train Loss: 0.7165, Val Loss: 3.0366, Val Accuracy: 7.3016
Epoch 7/10, Train Loss: 0.7020, Val Loss: 3.1441, Val Accuracy: 7.4237
Epoch 8/10, Train Loss: 0.6887, Val Loss: 3.1632, Val Accura

VBox(children=(Label(value='0.911 MB of 0.911 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
val_loss,▁▃▅▅▆▆▇███
Epoch,▁▂▃▃▄▅▆▆▇█
train_loss,█▄▃▂▂▂▂▁▁▁
val_accuracy,▁▃▅▅▆▇▇███

0,1
val_loss,3.19667
Epoch,9.0
train_loss,0.67201
val_accuracy,8.10745


[34m[1mwandb[0m: Agent Starting Run: c0ytodsu with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_layer_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 12




Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(30, 64)
    (rnn): GRU(64, 256, num_layers=3, batch_first=True, dropout=0.3, bidirectional=True)
  )
  (decoder): Decoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(70, 64)
    (rnn): GRU(64, 256, batch_first=True, dropout=0.3)
    (fc): Linear(in_features=256, out_features=70, bias=True)
  )
)
Epoch 1/12, Train Loss: 0.8199, Val Loss: 3.0875, Val Accuracy: 11.2332
Epoch 2/12, Train Loss: 0.5446, Val Loss: 3.5710, Val Accuracy: 18.8523
Epoch 3/12, Train Loss: 0.4758, Val Loss: 3.7806, Val Accuracy: 21.3675
Epoch 4/12, Train Loss: 0.4399, Val Loss: 3.9539, Val Accuracy: 23.0281
Epoch 5/12, Train Loss: 0.4177, Val Loss: 4.0976, Val Accuracy: 24.4444
Epoch 6/12, Train Loss: 0.3988, Val Loss: 4.1782, Val Accuracy: 26.1538
Epoch 7/12, Train Loss: 0.3837, Val Loss: 4.3144, Val Accuracy: 26.8864
Epoch 8/12, Train Loss: 0.3721, Val Loss: 4.3795, Val Accuracy: 26.

VBox(children=(Label(value='0.918 MB of 0.918 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
val_loss,▁▃▄▅▅▆▆▇▇▇██
Epoch,▁▂▂▃▄▄▅▅▆▇▇█
train_loss,█▄▃▂▂▂▂▂▁▁▁▁
val_accuracy,▁▄▅▆▆▇▇▇████

0,1
val_loss,4.66313
Epoch,11.0
train_loss,0.33721
val_accuracy,28.57143


[34m[1mwandb[0m: Agent Starting Run: 0i106cmx with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 128
[34m[1mwandb[0m: 	encoder_layers: 3
[34m[1mwandb[0m: 	hidden_layer_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 12




Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(30, 128)
    (rnn): GRU(128, 256, num_layers=3, batch_first=True, dropout=0.3)
  )
  (decoder): Decoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(70, 128)
    (rnn): GRU(128, 256, batch_first=True, dropout=0.3)
    (fc): Linear(in_features=256, out_features=70, bias=True)
  )
)
Epoch 1/12, Train Loss: 1.0431, Val Loss: 2.6392, Val Accuracy: 1.4164
Epoch 2/12, Train Loss: 0.6819, Val Loss: 3.0230, Val Accuracy: 6.8620
Epoch 3/12, Train Loss: 0.5701, Val Loss: 3.3008, Val Accuracy: 12.1123
Epoch 4/12, Train Loss: 0.5111, Val Loss: 3.5039, Val Accuracy: 16.6056
Epoch 5/12, Train Loss: 0.4780, Val Loss: 3.6454, Val Accuracy: 18.6325
Epoch 6/12, Train Loss: 0.4503, Val Loss: 3.7440, Val Accuracy: 19.8291
Epoch 7/12, Train Loss: 0.4315, Val Loss: 3.8830, Val Accuracy: 20.8303
Epoch 8/12, Train Loss: 0.4230, Val Loss: 3.8875, Val Accuracy: 22.3687
Epoch 9/12, T

VBox(children=(Label(value='0.924 MB of 0.924 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
val_loss,▁▃▄▅▆▆▇▇▇███
Epoch,▁▂▂▃▄▄▅▅▆▇▇█
train_loss,█▄▃▂▂▂▂▁▁▁▁▁
val_accuracy,▁▃▄▆▆▇▇▇████

0,1
val_loss,4.12688
Epoch,11.0
train_loss,0.37914
val_accuracy,24.61538


# **Best model**

In [15]:
# Best hyperparameter configuration
input_size = 30  # Number of Latin characters
output_size = 70  # Number of Devanagari characters
embed_size = 128
hidden_size = 256
encoder_layers = 3
decoder_layers = 3
cell_type = 'lstm'
batch_size = 64
num_epochs = 11
drop_prob = 0.2
learning_rate = 0.001
bidirectional = True

# Assuming you have loaded your dataset into train_loader and val_loader

# Initialize the model, criterion, and optimizer
Best_model = Seq2Seq(input_size, output_size, hidden_size,embed_size, encoder_layers,decoder_layers,drop_prob, cell_type,bidirectional)
print(Best_model)

# model = Attention_model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
Best_model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(Best_model.parameters(), lr=learning_rate)

Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.2, inplace=False)
    (embedding): Embedding(30, 128)
    (rnn): LSTM(128, 256, num_layers=3, batch_first=True, dropout=0.2, bidirectional=True)
  )
  (decoder): Decoder(
    (dropout): Dropout(p=0.2, inplace=False)
    (embedding): Embedding(70, 128)
    (rnn): LSTM(128, 256, num_layers=3, batch_first=True, dropout=0.2)
    (fc): Linear(in_features=256, out_features=70, bias=True)
  )
)


In [16]:
# Training loop
for epoch in range(num_epochs):
    trained_model, train_loss = train(Best_model, train_loader_ben, criterion, optimizer, device)
    model = trained_model

    print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}')
    

Epoch 1/11, Train Loss: 1.2733
Epoch 2/11, Train Loss: 0.9604
Epoch 3/11, Train Loss: 0.6558
Epoch 4/11, Train Loss: 0.5229
Epoch 5/11, Train Loss: 0.4556
Epoch 6/11, Train Loss: 0.4190
Epoch 7/11, Train Loss: 0.3916
Epoch 8/11, Train Loss: 0.3743
Epoch 9/11, Train Loss: 0.3589
Epoch 10/11, Train Loss: 0.3429
Epoch 11/11, Train Loss: 0.3324


In [17]:
path3 = '/kaggle/input/aksharantar/aksharantar_sampled/ben/ben_test.csv'
c, test_loader_ben, input_vocab, target_vocab, max_length, _ = load_data(path3, batch_size=64)  # Use correct path3

val_loss, val_accuracy = evaluate(trained_model, test_loader_ben, criterion, device)
print(f' Test Accuracy: {val_accuracy:.4f}')

 Test Accuracy: 30.0366


# **Prediction**

In [9]:
def decode_indices(indices, idx2token, target_vocab):
    valid_indices = []
    for idx in indices:
        if idx in idx2token and idx not in (target_vocab['<pad>'], target_vocab['<sos>'], target_vocab['<eos>']):
            valid_indices.append(idx)
#             print(valid_indices)
    decoded_text = ''
    for idx in valid_indices:
        decoded_text += idx2token[idx]
#         print(decoded_text)
    return decoded_text

In [10]:
def decode_indices_target(indices, idx2token, target_vocab):
    valid_indices = []
    for idx in indices:
        if idx in idx2token and idx not in (target_vocab['<pad>'], target_vocab['<sos>'], target_vocab['<eos>']):
            if idx < 10:
                valid_indices.append(idx)
            else:
                valid_indices.append(idx-3)
#             print(valid_indices)
    decoded_text = ''
    for idx in valid_indices:
        decoded_text += idx2token[idx]
#         print(decoded_text)
    return decoded_text

In [11]:
def pred(model, dataloader, device):
    model.eval()
    predictions = []
    actual = []
    with torch.no_grad():
        for latin, devanagari in dataloader:#, desc='Evaluating', unit='batch'):
            latin = latin.to(device)
            devanagari = devanagari.to(device)
            output = model(latin, devanagari,0)
            deb = devanagari.cpu().numpy()
            actual.append(deb)
            output = output.argmax(2)
            latin = latin.cpu().numpy()
            output = output.cpu().numpy()
            predictions.append((latin, output))
    return predictions, actual

path3 = '/kaggle/input/aksharantar/aksharantar_sampled/ben/ben_test.csv'
c, test_loader_ben, input_vocab, target_vocab, max_length, _ = load_data(path3, batch_size=64)  # Use correct path3

# Make sure to define the reverse dictionaries for converting indices back to text
latin_idx2token = {idx: char for char, idx in input_vocab.items()}
bangla_idx2token = {idx: char for char, idx in target_vocab.items()}

In [13]:
import csv
with open('seq2seq_results.csv', mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(['Input Text', 'Actual Target', 'Predicted Text'])
    writer.writerows(seq2seq_results)

In [14]:
df1 = pd.read_csv('seq2seq_results.csv')
df1

Unnamed: 0,Input Text,Actual Target,Predicted Text
0,kaarentabaahee,কারেন্টবাহী,কারেনতাহী
1,mashterpiece,মাস্টারপিস,মযাসটারিপে
2,cheeken,চিকেন,চিকেন
3,ekdaala,একডালা,একদালা
4,neerbachokra,নির্বাচকরা,নিরবচকরা
...,...,...,...
4090,samanjasyapurno,সামঞ্জস্যপূর্ণ,সমাজনযপূরণ
4091,fuds,ফুডস,ফুডস
4092,bannar,ব্যানার,বনননার
4093,songosthao,সংস্থাও,সংসথাও


In [12]:
test_predictions, actual = pred(model, test_loader_ben, device)
seq2seq_results = []
for (src_indices, output_indices),act_ind in zip(test_predictions,actual):
#     print(src_indices)
#     print('\njkl',output_indices)
    # Since our data loader might have batch size greater than 1, iterate through each example in the batch
    for i in range(src_indices.shape[0]):
        input_text = decode_indices(src_indices[i], latin_idx2token, input_vocab)
        actual_target_text = decode_indices(act_ind[i], bangla_idx2token, target_vocab)
        predicted_text = decode_indices_target(output_indices[i], bangla_idx2token, target_vocab)
        seq2seq_results.append([input_text, actual_target_text, predicted_text])
        print(f'SL. {i} Input Text: {input_text} -> Actual target: {actual_target_text} -> Predicted Text: {predicted_text}')

SL. 0 Input Text: kaarentabaahee -> Actual target: কারেন্টবাহী -> Predicted Text: কারেনতাহী
SL. 1 Input Text: mashterpiece -> Actual target: মাস্টারপিস -> Predicted Text: মযাসটারিপে
SL. 2 Input Text: cheeken -> Actual target: চিকেন -> Predicted Text: চিকেন
SL. 3 Input Text: ekdaala -> Actual target: একডালা -> Predicted Text: একদালা
SL. 4 Input Text: neerbachokra -> Actual target: নির্বাচকরা -> Predicted Text: নিরবচকরা
SL. 5 Input Text: neture -> Actual target: নেচার -> Predicted Text: নেতুরে
SL. 6 Input Text: michilkey -> Actual target: মিছিলকে -> Predicted Text: মিছিলকে
SL. 7 Input Text: chitfund -> Actual target: চিটফান্ড -> Predicted Text: চিটফুঁদ
SL. 8 Input Text: panchanan -> Actual target: পঞ্চানন -> Predicted Text: পঞচনন
SL. 9 Input Text: manna -> Actual target: মন্ন -> Predicted Text: মাননা
SL. 10 Input Text: portillo -> Actual target: পর্টিল্লো -> Predicted Text: পরতিলো
SL. 11 Input Text: quess -> Actual target: কুয়েস -> Predicted Text: কুয়েস
SL. 12 Input Text: budh -> Actua