In [2]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import pandas as pd

# Function for load data
def load_data(path,batch_size = 32):
    df = pd.read_csv(path)
#     df = df.head(10)
    df.columns = ['input_word','target_word']
    
    # Define maximum sequence lengths for letters
    max_input_len = max(len(word) for word in df['input_word'])
    max_target_len = max(len(word) for word in df['target_word'])

    # Define vocabulary mappings for letters
    input_letter_vocab = {'<pad>': 0, '<sos>': 1, '<eos>': 2}  # Add special tokens
    target_letter_vocab = {'<pad>': 0, '<sos>': 1, '<eos>': 2}  # Add special tokens
    letter_idx = 3

    input_str = ''
    target_str = ''
    # Preprocess the data and update vocabulary mappings for letters
    for input_word, target_word in zip(df['input_word'], df['target_word']):
        input_str += input_word
        target_str += target_word


    # Update vocabulary mappings for input letters
    for letter in sorted(set(input_str)):
        input_letter_vocab[letter] = letter_idx
        letter_idx += 1
    letter_idx = 3
    # Update vocabulary mappings for target letters
    for letter in sorted(set(target_str)):
        if letter not in target_letter_vocab:
            target_letter_vocab[letter] = letter_idx
            letter_idx += 1

    # Tokenize function at the letter level
    def tokenize_input_letters(word, vocab, max_len):
        token_ids = [vocab[char] for char in word if char in vocab]
        padded = token_ids[:max_len] + [vocab['<pad>']] * (max_len - len(token_ids))
        return torch.tensor(padded)

    def tokenize_target_letters(word, vocab, max_len):
        token_ids = [vocab[char] for char in word if char in vocab]
        padded =  [vocab['<pad>']]+ token_ids[:max_len] +[vocab['<pad>']] * (max_len - len(token_ids))
        return torch.tensor(padded)
    
    # Custom Dataset class for letter-level tokenization
    class CustomDataset(Dataset):
        def __init__(self, input_data, target_data, input_vocab, target_vocab, max_input_len, max_target_len):
            self.input_data = input_data
            self.target_data = target_data
            self.input_vocab = input_vocab
            self.target_vocab = target_vocab
            self.max_input_len = max_input_len
            self.max_target_len = max_target_len

        def __len__(self):
            return len(self.input_data)

        def __getitem__(self, idx):
            input_word = self.input_data[idx]
            target_word = self.target_data[idx]

            # Tokenize input and target words at the letter level
            input_letters = tokenize_input_letters(input_word, self.input_vocab, self.max_input_len)
            target_letters = tokenize_target_letters(target_word, self.target_vocab, self.max_target_len)

            return input_letters, target_letters

    # Create DataLoader
    custom_dataset = CustomDataset(df['input_word'], df['target_word'], input_letter_vocab, target_letter_vocab, max_input_len, max_target_len)
    data_loader1 = DataLoader(custom_dataset, batch_size=batch_size, shuffle = False )
    
    return custom_dataset,data_loader1, input_letter_vocab, target_letter_vocab, max_input_len, max_target_len



In [3]:
path1 = '/kaggle/input/aksharantar-sampled-dataset/aksharantar_sampled/ben/ben_train.csv'
custom_dataset1,train_loader_ben,a,b,_,_ = load_data(path1,batch_size = 64)
path2 = '/kaggle/input/aksharantar-sampled-dataset/aksharantar_sampled/ben/ben_valid.csv'
custom_dataset,val_loader_ben,_,_,_,_ = load_data(path2,batch_size = 64)
print(a,b)

{'<pad>': 0, '<sos>': 1, '<eos>': 2, 'a': 3, 'b': 4, 'c': 5, 'd': 6, 'e': 7, 'f': 8, 'g': 9, 'h': 10, 'i': 11, 'j': 12, 'k': 13, 'l': 14, 'm': 15, 'n': 16, 'o': 17, 'p': 18, 'q': 19, 'r': 20, 's': 21, 't': 22, 'u': 23, 'v': 24, 'w': 25, 'x': 26, 'y': 27, 'z': 28} {'<pad>': 0, '<sos>': 1, '<eos>': 2, 'ঁ': 3, 'ং': 4, 'ঃ': 5, 'অ': 6, 'আ': 7, 'ই': 8, 'ঈ': 9, 'উ': 10, 'ঊ': 11, 'ঋ': 12, 'এ': 13, 'ঐ': 14, 'ও': 15, 'ঔ': 16, 'ক': 17, 'খ': 18, 'গ': 19, 'ঘ': 20, 'ঙ': 21, 'চ': 22, 'ছ': 23, 'জ': 24, 'ঝ': 25, 'ঞ': 26, 'ট': 27, 'ঠ': 28, 'ড': 29, 'ঢ': 30, 'ণ': 31, 'ত': 32, 'থ': 33, 'দ': 34, 'ধ': 35, 'ন': 36, 'প': 37, 'ফ': 38, 'ব': 39, 'ভ': 40, 'ম': 41, 'য': 42, 'র': 43, 'ল': 44, 'শ': 45, 'ষ': 46, 'স': 47, 'হ': 48, '়': 49, 'া': 50, 'ি': 51, 'ী': 52, 'ু': 53, 'ূ': 54, 'ৃ': 55, 'ে': 56, 'ৈ': 57, 'ো': 58, 'ৌ': 59, '্': 60, 'ৎ': 61, '২': 62}


# **Attention Model**

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm

# Encoder class for attention model
class Encoder(nn.Module):
    def __init__(self, input_size, hidden_size, embed_size, encoder_layers=1, drop_prob=0.5, cell_type='gru', bidirectional=False):
        super(Encoder, self).__init__()
        self.hidden_size = hidden_size
        self.encoder_layers = encoder_layers
        self.cell_type = cell_type
        self.bidirectional = bidirectional
        self.dropout = nn.Dropout(drop_prob)
        self.embedding = nn.Embedding(input_size, embed_size)
        
        if cell_type == 'lstm':
            self.rnn = nn.LSTM(embed_size, hidden_size, encoder_layers, dropout=drop_prob, bidirectional=bidirectional, batch_first=True)
        elif cell_type == 'gru':
            self.rnn = nn.GRU(embed_size, hidden_size, encoder_layers, dropout=drop_prob, bidirectional=bidirectional, batch_first=True)
        else:
            self.rnn = nn.RNN(embed_size, hidden_size, encoder_layers, dropout=drop_prob, bidirectional=bidirectional, batch_first=True)
    
    def forward(self, x):
        embedded = self.dropout(self.embedding(x))
        output, hidden = self.rnn(embedded)
        
        if self.cell_type == 'lstm':
            hidden_states, cell_states = hidden
            if self.bidirectional:
                hidden = (torch.cat([hidden_states[-2], hidden_states[-1]], dim=1).unsqueeze(0), 
                          torch.cat([cell_states[-2], cell_states[-1]], dim=1).unsqueeze(0))
            else:
                hidden = (hidden_states[-1].unsqueeze(0), cell_states[-1].unsqueeze(0))
        else:
            if self.bidirectional:
                hidden = torch.cat([hidden[-2], hidden[-1]], dim=1).unsqueeze(0)
            else:
                hidden = hidden[-1].unsqueeze(0)

        return output, hidden
    
# Attention class
class Attention(nn.Module):
    def __init__(self, hidden_size):
        super(Attention, self).__init__()
        self.attention = nn.Linear(hidden_size * 2, hidden_size)
        self.v = nn.Parameter(torch.rand(hidden_size))

    def forward(self, hidden, encoder_outputs):
        seq_len = encoder_outputs.shape[1]
        hidden = hidden.repeat(seq_len, 1, 1).transpose(0, 1)
        
        energy = torch.tanh(self.attention(torch.cat((hidden, encoder_outputs), dim=2)))
        energy = energy.transpose(1, 2)
        v = self.v.repeat(encoder_outputs.size(0), 1).unsqueeze(1)
        
        attention_weights = torch.bmm(v, energy)
        return torch.softmax(attention_weights.squeeze(1), dim=1)

# Decoder class with attention
class DecoderWithAttention(nn.Module):
    def __init__(self, hidden_size, embed_size, output_size, attention, decoder_layers=1, drop_prob=0.5, cell_type='gru'):
        super(DecoderWithAttention, self).__init__()
        self.hidden_size = hidden_size
        self.decoder_layers = decoder_layers
        self.cell_type = cell_type
        self.attention = attention
        self.dropout = nn.Dropout(drop_prob)
        self.embedding = nn.Embedding(output_size, embed_size)
        
        input_size = hidden_size + embed_size
        if cell_type == 'lstm':
            self.rnn = nn.LSTM(input_size, hidden_size, decoder_layers, dropout=drop_prob, batch_first=True)
        elif cell_type == 'gru':
            self.rnn = nn.GRU(input_size, hidden_size, decoder_layers, dropout=drop_prob, batch_first=True)
        else:
            self.rnn = nn.RNN(input_size, hidden_size, decoder_layers, dropout=drop_prob, batch_first=True)
        
        self.fc = nn.Linear(hidden_size * 2, output_size)

    def forward(self, x, hidden, encoder_outputs):
        x = x.unsqueeze(1)
        embedded = self.dropout(self.embedding(x))
        
        attention_weights = self.attention(hidden[-1], encoder_outputs)
        context = attention_weights.unsqueeze(1).bmm(encoder_outputs)
        
        rnn_input = torch.cat((embedded, context), dim=2)
        
        if self.cell_type == 'lstm':
            output, (hidden, cell) = self.rnn(rnn_input, hidden)
        else:
            output, hidden = self.rnn(rnn_input, hidden)
        
        output = self.fc(torch.cat((output, context), dim=2).squeeze(1))
        
        if self.cell_type == 'lstm':
            return output, (hidden, cell)
        else:
            return output, hidden

# Seq2seq model
class Seq2Seq(nn.Module):
    def __init__(self, input_size, output_size, hidden_size, embed_size, encoder_layers=1, decoder_layers=1, drop_prob=0.3, cell_type='gru', bidirectional=True):
        super(Seq2Seq, self).__init__()
        self.encoder = Encoder(input_size, hidden_size, embed_size, encoder_layers, drop_prob, cell_type, bidirectional)
        self.attention = Attention(hidden_size * 2 if bidirectional else hidden_size)
        self.decoder = DecoderWithAttention(hidden_size * 2 if bidirectional else hidden_size, embed_size, output_size, self.attention, decoder_layers, drop_prob, cell_type)

    def forward(self, source, target, teacher_forcing_ratio=0.5):
        batch_size = source.size(0)
        target_len = target.size(1)
        output_vocab_size = self.decoder.embedding.num_embeddings

        outputs = torch.zeros(batch_size, target_len, output_vocab_size).to(source.device)

        encoder_outputs, encoder_hidden = self.encoder(source)
        decoder_hidden = self._init_decoder_hidden(encoder_hidden)
        decoder_input = target[:, 0]

        for t in range(1, target_len):
            decoder_output, decoder_hidden = self.decoder(decoder_input, decoder_hidden, encoder_outputs)
            outputs[:, t] = decoder_output
            teacher_force = torch.rand(1).item() < teacher_forcing_ratio
            t1 = decoder_output.argmax(1)
            decoder_input = target[:, t] if teacher_force else t1

        return outputs

    def _init_decoder_hidden(self, encoder_hidden):
        decoder_layers = self.decoder.decoder_layers
        if self.encoder.cell_type == 'lstm':
            encoder_hidden = (
                torch.cat([encoder_hidden[0][i] for i in range(encoder_hidden[0].shape[0])], dim=1).unsqueeze(0), 
                torch.cat([encoder_hidden[1][i] for i in range(encoder_hidden[1].shape[0])], dim=1).unsqueeze(0)
            )
            if encoder_hidden[0].shape[0] != decoder_layers:
                encoder_hidden = (
                    encoder_hidden[0][:decoder_layers],
                    encoder_hidden[1][:decoder_layers]
                )
        else:
            encoder_hidden = torch.cat([encoder_hidden[i] for i in range(encoder_hidden.shape[0])], dim=1).unsqueeze(0)
            if encoder_hidden.shape[0] != decoder_layers:
                encoder_hidden = encoder_hidden[:decoder_layers]

        return encoder_hidden


In [18]:
    
# # Training function
# def train(model, dataloader, criterion, optimizer, device):
#     model.train()
#     total_loss = 0
#     total_correct = 0
#     total_samples = 0
    
#     for latin, devanagari in tqdm(dataloader, desc='Training', unit='batch'):
#         latin = latin.to(device)
#         devanagari = devanagari.to(device)
        
#         optimizer.zero_grad()
        
#         output = model(latin, devanagari)
#         output_dim = output.shape[-1]
#         output1 = output.view(-1, output_dim)
#         devanagari1 = devanagari.view(-1)
        
#         loss = criterion(output1, devanagari1)
#         total_loss += loss.item()
        
#         loss.backward()
#         optimizer.step()
# #         break
        
#         max_values ,max_index = torch.max(output, 2) #output.argmax(dim=1)
#         mask = max_index > 9
#         max_index[mask] -= 2
# #             print(f"prediction:{max_index} '\n' actual:{devanagari}")
#         correct1=(max_index == devanagari).all(dim=1).sum().item()
# #             print(f"correct1:{correct1}")
# #         correct = (max_index == devanagari).sum().item()
# #             print(f"correct:{correct}")
#         total_correct += correct1
#         total_samples += devanagari.size(0)

#     accuracy = total_correct / total_samples
    
#     return model, total_loss / len(dataloader), accuracy

# # Evaluation function
# def evaluate(model, dataloader, criterion, device):
#     model.eval()
#     total_loss = 0
#     total_correct = 0
#     total_samples = 0
    
#     with torch.no_grad():
#         for latin, devanagari in tqdm(dataloader, desc='Evaluating', unit='batch'):
#             latin = latin.to(device)
#             devanagari = devanagari.to(device)
            
#             output = model(latin, devanagari)
# #                                             print("output:",output.shape)
#             output_dim = output.shape[-1]
# #                                                     print(f"output_dim {output_dim}")
# #             output = output.view(-1, output_dim)
# #             print("output.shape:",output.shape)
# #             print("output.:",output)
# #             print(f"devanagari: {devanagari}")
# #             devanagari = devanagari.view(-1)
# #             print(f"devanagari.view(-1): {devanagari}")
# #             
#             loss = criterion(output.view(-1, output_dim), devanagari.view(-1))
#             total_loss += loss.item()
            
#             # Calculate accuracy
#             max_values ,max_index = torch.max(output, 2) #output.argmax(dim=1)
#             mask = max_index > 9
#             max_index[mask] -= 2
# #             print(f"prediction:{max_index} '\n' actual:{devanagari}")
#             correct1=(max_index == devanagari).all(dim=1).sum().item()
# #             print(f"correct1:{correct1}")
#             correct = (max_index == devanagari).sum().item()
# #             print(f"correct:{correct}")
#             total_correct += correct1
#             total_samples += devanagari.size(0)
    
#     avg_loss = total_loss / len(dataloader)
#     accuracy = total_correct / total_samples
    
#     return avg_loss, accuracy*100

In [46]:
def train(model, dataloader, criterion, optimizer, device):
    model.train()
    total_loss = 0
    total_correct = 0
    total_samples = 0
    
    for latin, devanagari in tqdm(dataloader, desc='Training', unit='batch'):
        latin = latin.to(device)
        devanagari = devanagari.to(device)
        
        optimizer.zero_grad()
        
        output = model(latin, devanagari)
        output_dim = output.shape[-1]
        output = output.view(-1, output_dim)
        devanagari = devanagari.view(-1)
        
        loss = criterion(output, devanagari)
        total_loss += loss.item()
        
        loss.backward()
        optimizer.step()
        
        # Reshape the output and target to their original shape
        output = output.view(latin.size(0), -1, output_dim)
        devanagari = devanagari.view(latin.size(0), -1)
        
        max_index = output.argmax(dim=2)
        # Calculate word-level accuracy
        correct = (max_index == devanagari).all(dim=1).sum().item()
        total_correct += correct
        total_samples += devanagari.size(0)
    
    accuracy = total_correct / total_samples
    return model, total_loss / len(dataloader), accuracy*100


def evaluate(model, dataloader, criterion, device):
    model.eval()
    total_loss = 0
    total_correct = 0
    total_samples = 0
    
    with torch.no_grad():
        for latin, devanagari in tqdm(dataloader, desc='Evaluating', unit='batch'):
            latin = latin.to(device)
            devanagari = devanagari.to(device)
            
            output = model(latin, devanagari, teacher_forcing_ratio=0.0)
            output_dim = output.shape[-1]
            output = output.view(-1, output_dim)
            devanagari = devanagari.view(-1)
            
            loss = criterion(output, devanagari)
            total_loss += loss.item()
            
            # Reshape the output and target to their original shape
            output = output.view(latin.size(0), -1, output_dim)
            devanagari = devanagari.view(latin.size(0), -1)
            
            max_index = output.argmax(dim=2)
            mask = max_index > 9
            max_index[mask] -= 2            
            correct = (max_index == devanagari).all(dim=1).sum().item()  #  Calculate word-level accuracy
            total_correct += correct
            total_samples += devanagari.size(0)
    
    avg_loss = total_loss / len(dataloader)
    accuracy = total_correct / total_samples
    return avg_loss, accuracy * 100


In [8]:
# Example usage
input_size = 30  # Number of Latin characters
output_size = 70  # Number of Devanagari characters
embed_size = 16
hidden_size = 32
encoder_layers = 1
decoder_layers = 1
cell_type = 'rnn'
batch_size = 64
num_epochs = 12
drop_prob = 0.3
learning_rate = 0.001

# Assuming you have loaded your dataset into train_loader and val_loader

# Initialize the model, criterion, and optimizer
model = Seq2Seq(input_size, output_size, hidden_size,embed_size, encoder_layers,decoder_layers,drop_prob, cell_type)
print(model)

# model = Attention_model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
ignore_index = 0
criterion = nn.CrossEntropyLoss(ignore_index = 0)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
# for epoch in range(num_epochs):
#     trained_model, train_loss, train_acc  = train(model, train_loader_ben, criterion, optimizer, device)
#     val_loss, val_accuracy = evaluate(trained_model, val_loader_ben, criterion, device)
#     model = trained_model
#     print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Train_acc: {train_acc},  Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}')

Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(30, 16)
    (rnn): RNN(16, 32, batch_first=True, dropout=0.3, bidirectional=True)
  )
  (attention): Attention(
    (attention): Linear(in_features=128, out_features=64, bias=True)
  )
  (decoder): DecoderWithAttention(
    (attention): Attention(
      (attention): Linear(in_features=128, out_features=64, bias=True)
    )
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(70, 16)
    (rnn): RNN(80, 64, batch_first=True, dropout=0.3)
    (fc): Linear(in_features=128, out_features=70, bias=True)
  )
)




In [41]:
# !pip install wandb
import wandb
import numpy as np
from types import SimpleNamespace
import random

In [42]:
wandb.login(key='bb3c7761be2856a8335d16d1483149380482ae9e')#bb3c7761be2856a8335d16d1483149380482ae9e


[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [9]:
sweep_config = {
    'method': 'bayes',
    'metric': {
      'name': 'val_accuracy',
      'goal': 'maximize'
    },
    'parameters': {
        'embedding_size':{
            'values': [16,32,64,128,256]
        },
        'dropout': {
            'values': [0.3, 0.2,0.5]
        },
        'encoder_layers': {
            'values': [1]
        },
        'decoder_layers':{
            'values': [1]
        },
        'hidden_layer_size':{
            'values': [16,32,64,128,256]
        },
        'cell_type': {
            'values': [ 'lstm','rnn', 'gru']
        },
        'bidirectional': {
            'values': [True, False]
        },
        'batch_size': {
            'values': [32,64]
        },
        'num_epochs': {
            'values': [10,12]
        },
        'learning_rate': {
            'values': [0.01,0.001]
        }
    }
}

sweep_id = wandb.sweep(sweep=sweep_config, project='DL_A3_Attention')


Create sweep with ID: vkbi461w
Sweep URL: https://wandb.ai/abanisingha1997/DL_A3_Attention/sweeps/vkbi461w


In [None]:
def main():
    '''
    WandB calls main function each time with differnet combination.

    We can retrive the same and use the same values for our hypermeters.

    '''

    with wandb.init() as run:
        run_name="ct-"+str(wandb.config.cell_type)+"_el-"+str(wandb.config.encoder_layers)+"_dl-"+str(wandb.config.decoder_layers)+"_drop-"+str(wandb.config.dropout)+"_es-"+str(wandb.config.embedding_size)+"_hs-"+str(wandb.config.hidden_layer_size)+"_bs-"+str(wandb.config.batch_size)+"_ep-"+str(wandb.config.num_epochs)+"lr"+str(wandb.config.learning_rate)
        wandb.run.name=run_name

        
        model = Seq2Seq(input_size=30, output_size=70, hidden_size=wandb.config.hidden_layer_size,embed_size=wandb.config.embedding_size,encoder_layers=wandb.config.encoder_layers,
                        decoder_layers=wandb.config.decoder_layers,drop_prob=wandb.config.dropout, cell_type=wandb.config.cell_type, bidirectional=wandb.config.bidirectional)
        print(model)
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=wandb.config.learning_rate)

        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model.to(device)
        path1 = '/kaggle/input/aksharantar-sampled-dataset/aksharantar_sampled/ben/ben_train.csv'
        custom_dataset1,train_loader_ben,a,b,_,_ = load_data(path1,batch_size = wandb.config.batch_size)
        path2 = '/kaggle/input/aksharantar-sampled-dataset/aksharantar_sampled/ben/ben_valid.csv'
        custom_dataset,val_loader_ben,_,_,_,_ = load_data(path2,batch_size = wandb.config.batch_size)

        # Training loop
        for epoch in range(wandb.config.num_epochs):
            trained_model, train_loss, train_acc = train(model, train_loader_ben, criterion, optimizer, device)
            val_loss, val_accuracy = evaluate(trained_model, val_loader_ben, criterion, device)
            model = trained_model
            wandb.log({'Epoch': epoch, 'train_loss': train_loss , ' val_loss': val_loss, 'val_accuracy':val_accuracy})
            print(f'Epoch {epoch+1}/{wandb.config.num_epochs}, Train Loss: {train_loss:.4f},Train_acc: {train_acc} Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}')

        
#         model_train(model,train,validation)
        
wandb.agent(sweep_id, function= main,count= 30) # calls main function for count number of times.
wandb.finish()

[34m[1mwandb[0m: Agent Starting Run: p018rb83 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_layer_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: Currently logged in as: [33mabanisingha1997[0m. Use [1m`wandb login --relogin`[0m to force relogin




Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(30, 64)
    (rnn): GRU(64, 32, batch_first=True, dropout=0.3)
  )
  (attention): Attention(
    (attention): Linear(in_features=64, out_features=32, bias=True)
  )
  (decoder): DecoderWithAttention(
    (attention): Attention(
      (attention): Linear(in_features=64, out_features=32, bias=True)
    )
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(70, 64)
    (rnn): GRU(96, 32, batch_first=True, dropout=0.3)
    (fc): Linear(in_features=64, out_features=70, bias=True)
  )
)


Training: 100%|██████████| 800/800 [00:41<00:00, 19.16batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 61.40batch/s]


Epoch 1/10, Train Loss: 1.3245,Train_acc: 0.0 Val Loss: 2.0894, Val Accuracy: 0.0000


Training: 100%|██████████| 800/800 [00:40<00:00, 19.72batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 61.14batch/s]


Epoch 2/10, Train Loss: 0.9168,Train_acc: 0.0 Val Loss: 2.4424, Val Accuracy: 2.4908


Training: 100%|██████████| 800/800 [00:40<00:00, 19.66batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 60.79batch/s]


Epoch 3/10, Train Loss: 0.7103,Train_acc: 0.0 Val Loss: 2.7389, Val Accuracy: 5.9341


Training: 100%|██████████| 800/800 [00:40<00:00, 19.65batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 61.53batch/s]


Epoch 4/10, Train Loss: 0.6384,Train_acc: 0.0 Val Loss: 2.9046, Val Accuracy: 8.1319


Training: 100%|██████████| 800/800 [00:40<00:00, 19.69batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 58.99batch/s]


Epoch 5/10, Train Loss: 0.5992,Train_acc: 0.0 Val Loss: 3.0114, Val Accuracy: 10.1343


Training: 100%|██████████| 800/800 [00:40<00:00, 19.78batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 60.06batch/s]


Epoch 6/10, Train Loss: 0.5750,Train_acc: 0.0 Val Loss: 3.1156, Val Accuracy: 11.5507


Training: 100%|██████████| 800/800 [00:40<00:00, 19.63batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 60.98batch/s]


Epoch 7/10, Train Loss: 0.5533,Train_acc: 0.0 Val Loss: 3.1955, Val Accuracy: 12.2100


Training: 100%|██████████| 800/800 [00:40<00:00, 19.84batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 61.76batch/s]


Epoch 8/10, Train Loss: 0.5412,Train_acc: 0.0 Val Loss: 3.2315, Val Accuracy: 13.1868


Training: 100%|██████████| 800/800 [00:40<00:00, 19.77batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 60.88batch/s]


Epoch 9/10, Train Loss: 0.5332,Train_acc: 0.0 Val Loss: 3.2689, Val Accuracy: 14.4811


Training: 100%|██████████| 800/800 [00:40<00:00, 19.68batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 62.57batch/s]


Epoch 10/10, Train Loss: 0.5217,Train_acc: 0.0 Val Loss: 3.3293, Val Accuracy: 14.5543


VBox(children=(Label(value='0.099 MB of 0.099 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
val_loss,▁▃▅▆▆▇▇▇██
Epoch,▁▂▃▃▄▅▆▆▇█
train_loss,█▄▃▂▂▁▁▁▁▁
val_accuracy,▁▂▄▅▆▇▇▇██

0,1
val_loss,3.32932
Epoch,9.0
train_loss,0.52174
val_accuracy,14.55433


[34m[1mwandb[0m: Agent Starting Run: xiek6mpe with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_layer_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 12




Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(30, 16)
    (rnn): RNN(16, 64, batch_first=True, dropout=0.3, bidirectional=True)
  )
  (attention): Attention(
    (attention): Linear(in_features=256, out_features=128, bias=True)
  )
  (decoder): DecoderWithAttention(
    (attention): Attention(
      (attention): Linear(in_features=256, out_features=128, bias=True)
    )
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(70, 16)
    (rnn): RNN(144, 128, batch_first=True, dropout=0.3)
    (fc): Linear(in_features=256, out_features=70, bias=True)
  )
)


Training: 100%|██████████| 1600/1600 [01:17<00:00, 20.73batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 68.69batch/s]


Epoch 1/12, Train Loss: 1.0070,Train_acc: 0.0 Val Loss: 2.7212, Val Accuracy: 9.7680


Training: 100%|██████████| 1600/1600 [01:17<00:00, 20.63batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 68.04batch/s]


Epoch 2/12, Train Loss: 0.6644,Train_acc: 0.0 Val Loss: 3.0830, Val Accuracy: 13.0159


Training: 100%|██████████| 1600/1600 [01:17<00:00, 20.66batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 70.09batch/s]


Epoch 3/12, Train Loss: 0.5914,Train_acc: 0.0 Val Loss: 3.2662, Val Accuracy: 14.1880


Training: 100%|██████████| 1600/1600 [01:16<00:00, 20.82batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 69.46batch/s]


Epoch 4/12, Train Loss: 0.5513,Train_acc: 0.0 Val Loss: 3.3730, Val Accuracy: 16.3370


Training: 100%|██████████| 1600/1600 [01:16<00:00, 20.90batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 68.41batch/s]


Epoch 5/12, Train Loss: 0.5281,Train_acc: 0.0 Val Loss: 3.4555, Val Accuracy: 16.8254


Training: 100%|██████████| 1600/1600 [01:16<00:00, 20.80batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 70.83batch/s]


Epoch 6/12, Train Loss: 0.5089,Train_acc: 0.0 Val Loss: 3.5082, Val Accuracy: 18.1929


Training: 100%|██████████| 1600/1600 [01:17<00:00, 20.69batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 68.93batch/s]


Epoch 7/12, Train Loss: 0.4951,Train_acc: 0.0 Val Loss: 3.5251, Val Accuracy: 18.4860


Training: 100%|██████████| 1600/1600 [01:17<00:00, 20.78batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 69.07batch/s]


Epoch 8/12, Train Loss: 0.4842,Train_acc: 0.0019531631477177286 Val Loss: 3.5246, Val Accuracy: 18.9988


Training: 100%|██████████| 1600/1600 [01:17<00:00, 20.77batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 64.80batch/s]


Epoch 9/12, Train Loss: 0.4728,Train_acc: 0.0 Val Loss: 3.5539, Val Accuracy: 18.9499


Training: 100%|██████████| 1600/1600 [01:18<00:00, 20.51batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 68.32batch/s]


Epoch 10/12, Train Loss: 0.4635,Train_acc: 0.0019531631477177286 Val Loss: 3.6396, Val Accuracy: 19.2674


Training: 100%|██████████| 1600/1600 [01:18<00:00, 20.36batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 68.01batch/s]


Epoch 11/12, Train Loss: 0.4577,Train_acc: 0.0 Val Loss: 3.5856, Val Accuracy: 19.9023


Training: 100%|██████████| 1600/1600 [01:19<00:00, 20.20batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 66.47batch/s]


Epoch 12/12, Train Loss: 0.4504,Train_acc: 0.0 Val Loss: 3.6355, Val Accuracy: 20.6593


VBox(children=(Label(value='0.110 MB of 0.110 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
val_loss,▁▄▅▆▇▇▇▇▇███
Epoch,▁▂▂▃▄▄▅▅▆▇▇█
train_loss,█▄▃▂▂▂▂▁▁▁▁▁
val_accuracy,▁▃▄▅▆▆▇▇▇▇██

0,1
val_loss,3.63546
Epoch,11.0
train_loss,0.45038
val_accuracy,20.65934


[34m[1mwandb[0m: Agent Starting Run: g1hqquzz with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_layer_size: 16
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 12




Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.2, inplace=False)
    (embedding): Embedding(30, 64)
    (rnn): GRU(64, 16, batch_first=True, dropout=0.2, bidirectional=True)
  )
  (attention): Attention(
    (attention): Linear(in_features=64, out_features=32, bias=True)
  )
  (decoder): DecoderWithAttention(
    (attention): Attention(
      (attention): Linear(in_features=64, out_features=32, bias=True)
    )
    (dropout): Dropout(p=0.2, inplace=False)
    (embedding): Embedding(70, 64)
    (rnn): GRU(96, 32, batch_first=True, dropout=0.2)
    (fc): Linear(in_features=64, out_features=70, bias=True)
  )
)


Training: 100%|██████████| 800/800 [00:41<00:00, 19.23batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 60.39batch/s]


Epoch 1/12, Train Loss: 1.3359,Train_acc: 0.0 Val Loss: 2.0150, Val Accuracy: 0.0977


Training: 100%|██████████| 800/800 [00:41<00:00, 19.38batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 62.16batch/s]


Epoch 2/12, Train Loss: 0.9459,Train_acc: 0.0 Val Loss: 2.4303, Val Accuracy: 3.5165


Training: 100%|██████████| 800/800 [00:41<00:00, 19.30batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 63.05batch/s]


Epoch 3/12, Train Loss: 0.6943,Train_acc: 0.0 Val Loss: 2.7780, Val Accuracy: 9.0354


Training: 100%|██████████| 800/800 [00:41<00:00, 19.28batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 62.83batch/s]


Epoch 4/12, Train Loss: 0.6152,Train_acc: 0.0 Val Loss: 2.9392, Val Accuracy: 10.9890


Training: 100%|██████████| 800/800 [00:39<00:00, 20.08batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 62.69batch/s]


Epoch 5/12, Train Loss: 0.5802,Train_acc: 0.0 Val Loss: 2.9991, Val Accuracy: 12.5275


Training: 100%|██████████| 800/800 [00:39<00:00, 20.03batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 62.97batch/s]


Epoch 6/12, Train Loss: 0.5536,Train_acc: 0.0 Val Loss: 3.0899, Val Accuracy: 12.9182


Training: 100%|██████████| 800/800 [00:40<00:00, 19.80batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 63.80batch/s]


Epoch 7/12, Train Loss: 0.5349,Train_acc: 0.0 Val Loss: 3.1396, Val Accuracy: 14.5788


Training: 100%|██████████| 800/800 [00:39<00:00, 20.06batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 61.69batch/s]


Epoch 8/12, Train Loss: 0.5214,Train_acc: 0.0 Val Loss: 3.1864, Val Accuracy: 14.3590


Training: 100%|██████████| 800/800 [00:39<00:00, 20.16batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 63.67batch/s]


Epoch 9/12, Train Loss: 0.5100,Train_acc: 0.0 Val Loss: 3.2246, Val Accuracy: 15.3114


Training: 100%|██████████| 800/800 [00:39<00:00, 20.11batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 56.56batch/s]


Epoch 10/12, Train Loss: 0.4945,Train_acc: 0.0 Val Loss: 3.2917, Val Accuracy: 16.3126


Training: 100%|██████████| 800/800 [00:39<00:00, 20.14batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 62.67batch/s]


Epoch 11/12, Train Loss: 0.4899,Train_acc: 0.0 Val Loss: 3.3200, Val Accuracy: 16.5079


Training: 100%|██████████| 800/800 [00:39<00:00, 20.06batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 62.64batch/s]


Epoch 12/12, Train Loss: 0.4805,Train_acc: 0.0 Val Loss: 3.3210, Val Accuracy: 16.9475


VBox(children=(Label(value='0.120 MB of 0.120 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
val_loss,▁▃▅▆▆▇▇▇▇███
Epoch,▁▂▂▃▄▄▅▅▆▇▇█
train_loss,█▅▃▂▂▂▁▁▁▁▁▁
val_accuracy,▁▂▅▆▆▆▇▇▇███

0,1
val_loss,3.32103
Epoch,11.0
train_loss,0.4805
val_accuracy,16.9475


[34m[1mwandb[0m: Agent Starting Run: j3xh5d52 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_layer_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 12




Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.2, inplace=False)
    (embedding): Embedding(30, 32)
    (rnn): GRU(32, 64, batch_first=True, dropout=0.2, bidirectional=True)
  )
  (attention): Attention(
    (attention): Linear(in_features=256, out_features=128, bias=True)
  )
  (decoder): DecoderWithAttention(
    (attention): Attention(
      (attention): Linear(in_features=256, out_features=128, bias=True)
    )
    (dropout): Dropout(p=0.2, inplace=False)
    (embedding): Embedding(70, 32)
    (rnn): GRU(160, 128, batch_first=True, dropout=0.2)
    (fc): Linear(in_features=256, out_features=70, bias=True)
  )
)


Training: 100%|██████████| 800/800 [00:40<00:00, 19.88batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 62.30batch/s]


Epoch 1/12, Train Loss: 1.0732,Train_acc: 0.0 Val Loss: 2.6706, Val Accuracy: 8.0098


Training: 100%|██████████| 800/800 [00:40<00:00, 19.75batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 62.84batch/s]


Epoch 2/12, Train Loss: 0.5682,Train_acc: 0.0 Val Loss: 3.1111, Val Accuracy: 16.3614


Training: 100%|██████████| 800/800 [00:40<00:00, 19.92batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 62.99batch/s]


Epoch 3/12, Train Loss: 0.4856,Train_acc: 0.0 Val Loss: 3.3111, Val Accuracy: 18.2418


Training: 100%|██████████| 800/800 [00:40<00:00, 19.91batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 60.89batch/s]


Epoch 4/12, Train Loss: 0.4460,Train_acc: 0.0 Val Loss: 3.3687, Val Accuracy: 19.6825


Training: 100%|██████████| 800/800 [00:40<00:00, 19.87batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 63.02batch/s]


Epoch 5/12, Train Loss: 0.4213,Train_acc: 0.0 Val Loss: 3.4720, Val Accuracy: 19.9023


Training: 100%|██████████| 800/800 [00:40<00:00, 19.88batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 61.06batch/s]


Epoch 6/12, Train Loss: 0.4075,Train_acc: 0.0 Val Loss: 3.5488, Val Accuracy: 22.6374


Training: 100%|██████████| 800/800 [00:40<00:00, 19.89batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 62.35batch/s]


Epoch 7/12, Train Loss: 0.3950,Train_acc: 0.0 Val Loss: 3.6047, Val Accuracy: 23.9072


Training: 100%|██████████| 800/800 [00:40<00:00, 19.83batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 61.55batch/s]


Epoch 8/12, Train Loss: 0.3821,Train_acc: 0.0019531631477177286 Val Loss: 3.6460, Val Accuracy: 25.5433


Training: 100%|██████████| 800/800 [00:40<00:00, 19.87batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 62.57batch/s]


Epoch 9/12, Train Loss: 0.3706,Train_acc: 0.0019531631477177286 Val Loss: 3.7029, Val Accuracy: 26.4713


Training: 100%|██████████| 800/800 [00:40<00:00, 19.99batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 63.49batch/s]


Epoch 10/12, Train Loss: 0.3647,Train_acc: 0.0 Val Loss: 3.7639, Val Accuracy: 27.5946


Training: 100%|██████████| 800/800 [00:40<00:00, 19.72batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 62.05batch/s]


Epoch 11/12, Train Loss: 0.3597,Train_acc: 0.0 Val Loss: 3.7321, Val Accuracy: 27.9365


Training: 100%|██████████| 800/800 [00:40<00:00, 19.89batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 62.67batch/s]


Epoch 12/12, Train Loss: 0.3538,Train_acc: 0.0 Val Loss: 3.7596, Val Accuracy: 27.5458


VBox(children=(Label(value='0.131 MB of 0.131 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
val_loss,▁▄▅▅▆▇▇▇████
Epoch,▁▂▂▃▄▄▅▅▆▇▇█
train_loss,█▃▂▂▂▂▁▁▁▁▁▁
val_accuracy,▁▄▅▅▅▆▇▇▇███

0,1
val_loss,3.75962
Epoch,11.0
train_loss,0.35385
val_accuracy,27.54579


[34m[1mwandb[0m: Agent Starting Run: szh53gum with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 128
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 12




Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.2, inplace=False)
    (embedding): Embedding(30, 128)
    (rnn): LSTM(128, 128, batch_first=True, dropout=0.2, bidirectional=True)
  )
  (attention): Attention(
    (attention): Linear(in_features=512, out_features=256, bias=True)
  )
  (decoder): DecoderWithAttention(
    (attention): Attention(
      (attention): Linear(in_features=512, out_features=256, bias=True)
    )
    (dropout): Dropout(p=0.2, inplace=False)
    (embedding): Embedding(70, 128)
    (rnn): LSTM(384, 256, batch_first=True, dropout=0.2)
    (fc): Linear(in_features=512, out_features=70, bias=True)
  )
)


Training: 100%|██████████| 1600/1600 [01:18<00:00, 20.27batch/s]
Evaluating: 100%|██████████| 128/128 [00:02<00:00, 61.86batch/s]


Epoch 1/12, Train Loss: 0.6462,Train_acc: 0.0 Val Loss: 3.1938, Val Accuracy: 17.8755


Training: 100%|██████████| 1600/1600 [01:18<00:00, 20.30batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 64.86batch/s]


Epoch 2/12, Train Loss: 0.4087,Train_acc: 0.0 Val Loss: 3.4964, Val Accuracy: 24.2735


Training: 100%|██████████| 1600/1600 [01:19<00:00, 20.20batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 65.18batch/s]


Epoch 3/12, Train Loss: 0.3664,Train_acc: 0.0 Val Loss: 3.6341, Val Accuracy: 26.5934


Training: 100%|██████████| 1600/1600 [01:18<00:00, 20.26batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 65.71batch/s]


Epoch 4/12, Train Loss: 0.3404,Train_acc: 0.0 Val Loss: 3.7901, Val Accuracy: 28.8645


Training: 100%|██████████| 1600/1600 [01:19<00:00, 20.21batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 65.58batch/s]


Epoch 5/12, Train Loss: 0.3267,Train_acc: 0.0019531631477177286 Val Loss: 3.8519, Val Accuracy: 30.6227


Training: 100%|██████████| 1600/1600 [01:18<00:00, 20.27batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 64.92batch/s]


Epoch 6/12, Train Loss: 0.3127,Train_acc: 0.0 Val Loss: 3.9411, Val Accuracy: 31.3553


Training: 100%|██████████| 1600/1600 [01:19<00:00, 20.21batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 65.25batch/s]


Epoch 7/12, Train Loss: 0.3041,Train_acc: 0.0019531631477177286 Val Loss: 4.0331, Val Accuracy: 32.4786


Training: 100%|██████████| 1600/1600 [01:19<00:00, 20.24batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 64.93batch/s]


Epoch 8/12, Train Loss: 0.2953,Train_acc: 0.0 Val Loss: 4.1387, Val Accuracy: 32.7228


Training: 100%|██████████| 1600/1600 [01:18<00:00, 20.34batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 65.51batch/s]


Epoch 9/12, Train Loss: 0.2903,Train_acc: 0.0019531631477177286 Val Loss: 4.1851, Val Accuracy: 33.4066


Training: 100%|██████████| 1600/1600 [01:18<00:00, 20.25batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 66.23batch/s]


Epoch 10/12, Train Loss: 0.2800,Train_acc: 0.0019531631477177286 Val Loss: 4.2762, Val Accuracy: 33.9683


Training: 100%|██████████| 1600/1600 [01:19<00:00, 20.24batch/s]
Evaluating: 100%|██████████| 128/128 [00:02<00:00, 59.02batch/s]


Epoch 11/12, Train Loss: 0.2763,Train_acc: 0.0019531631477177286 Val Loss: 4.3892, Val Accuracy: 33.6508


Training: 100%|██████████| 1600/1600 [01:18<00:00, 20.30batch/s]
Evaluating: 100%|██████████| 128/128 [00:02<00:00, 63.75batch/s]


Epoch 12/12, Train Loss: 0.2707,Train_acc: 0.0019531631477177286 Val Loss: 4.3228, Val Accuracy: 34.0415


VBox(children=(Label(value='0.142 MB of 0.142 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
val_loss,▁▃▄▄▅▅▆▇▇▇██
Epoch,▁▂▂▃▄▄▅▅▆▇▇█
train_loss,█▄▃▂▂▂▂▁▁▁▁▁
val_accuracy,▁▄▅▆▇▇▇▇████

0,1
val_loss,4.32284
Epoch,11.0
train_loss,0.27068
val_accuracy,34.04151


[34m[1mwandb[0m: Agent Starting Run: otyfpwvj with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_layer_size: 16
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	num_epochs: 10




Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.5, inplace=False)
    (embedding): Embedding(30, 32)
    (rnn): RNN(32, 16, batch_first=True, dropout=0.5)
  )
  (attention): Attention(
    (attention): Linear(in_features=32, out_features=16, bias=True)
  )
  (decoder): DecoderWithAttention(
    (attention): Attention(
      (attention): Linear(in_features=32, out_features=16, bias=True)
    )
    (dropout): Dropout(p=0.5, inplace=False)
    (embedding): Embedding(70, 32)
    (rnn): RNN(48, 16, batch_first=True, dropout=0.5)
    (fc): Linear(in_features=32, out_features=70, bias=True)
  )
)


Training: 100%|██████████| 1600/1600 [01:15<00:00, 21.18batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 69.25batch/s]


Epoch 1/10, Train Loss: 1.0797,Train_acc: 0.0 Val Loss: 2.8076, Val Accuracy: 2.0513


Training: 100%|██████████| 1600/1600 [01:15<00:00, 21.16batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 70.07batch/s]


Epoch 2/10, Train Loss: 0.7997,Train_acc: 0.0 Val Loss: 3.1175, Val Accuracy: 4.5910


Training: 100%|██████████| 1600/1600 [01:15<00:00, 21.13batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 71.25batch/s]


Epoch 3/10, Train Loss: 0.7456,Train_acc: 0.0 Val Loss: 3.2387, Val Accuracy: 4.8352


Training: 100%|██████████| 1600/1600 [01:15<00:00, 21.16batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 70.53batch/s]


Epoch 4/10, Train Loss: 0.7287,Train_acc: 0.0 Val Loss: 3.2781, Val Accuracy: 5.1038


Training: 100%|██████████| 1600/1600 [01:17<00:00, 20.59batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 68.79batch/s]


Epoch 5/10, Train Loss: 0.7190,Train_acc: 0.0 Val Loss: 3.2562, Val Accuracy: 6.2515


Training: 100%|██████████| 1600/1600 [01:16<00:00, 20.89batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 69.67batch/s]


Epoch 6/10, Train Loss: 0.7149,Train_acc: 0.0 Val Loss: 3.3596, Val Accuracy: 5.6899


Training: 100%|██████████| 1600/1600 [01:15<00:00, 21.15batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 71.10batch/s]


Epoch 7/10, Train Loss: 0.7224,Train_acc: 0.0 Val Loss: 3.3927, Val Accuracy: 5.0549


Training: 100%|██████████| 1600/1600 [01:15<00:00, 21.18batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 71.76batch/s]


Epoch 8/10, Train Loss: 0.7048,Train_acc: 0.0 Val Loss: 3.3890, Val Accuracy: 5.9585


Training: 100%|██████████| 1600/1600 [01:15<00:00, 21.21batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 71.76batch/s]


Epoch 9/10, Train Loss: 0.7114,Train_acc: 0.0 Val Loss: 3.4392, Val Accuracy: 5.0305


Training: 100%|██████████| 1600/1600 [01:15<00:00, 21.06batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 68.39batch/s]


Epoch 10/10, Train Loss: 0.7040,Train_acc: 0.0 Val Loss: 3.4352, Val Accuracy: 6.5201


VBox(children=(Label(value='0.152 MB of 0.152 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
val_loss,▁▄▆▆▆▇▇▇██
Epoch,▁▂▃▃▄▅▆▆▇█
train_loss,█▃▂▁▁▁▁▁▁▁
val_accuracy,▁▅▅▆█▇▆▇▆█

0,1
val_loss,3.43519
Epoch,9.0
train_loss,0.704
val_accuracy,6.52015


[34m[1mwandb[0m: Agent Starting Run: no4f80p8 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_layer_size: 16
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	num_epochs: 10




Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.5, inplace=False)
    (embedding): Embedding(30, 64)
    (rnn): LSTM(64, 16, batch_first=True, dropout=0.5)
  )
  (attention): Attention(
    (attention): Linear(in_features=32, out_features=16, bias=True)
  )
  (decoder): DecoderWithAttention(
    (attention): Attention(
      (attention): Linear(in_features=32, out_features=16, bias=True)
    )
    (dropout): Dropout(p=0.5, inplace=False)
    (embedding): Embedding(70, 64)
    (rnn): LSTM(80, 16, batch_first=True, dropout=0.5)
    (fc): Linear(in_features=32, out_features=70, bias=True)
  )
)


Training: 100%|██████████| 800/800 [00:39<00:00, 20.31batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 63.03batch/s]


Epoch 1/10, Train Loss: 1.1562,Train_acc: 0.0 Val Loss: 2.4656, Val Accuracy: 0.0244


Training: 100%|██████████| 800/800 [00:39<00:00, 20.27batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 63.11batch/s]


Epoch 2/10, Train Loss: 0.8057,Train_acc: 0.0 Val Loss: 2.9909, Val Accuracy: 1.2698


Training: 100%|██████████| 800/800 [00:39<00:00, 20.35batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 63.59batch/s]


Epoch 3/10, Train Loss: 0.6884,Train_acc: 0.0 Val Loss: 3.1399, Val Accuracy: 5.5189


Training: 100%|██████████| 800/800 [00:39<00:00, 20.34batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 62.10batch/s]


Epoch 4/10, Train Loss: 0.6484,Train_acc: 0.0 Val Loss: 3.1485, Val Accuracy: 9.1331


Training: 100%|██████████| 800/800 [00:39<00:00, 20.41batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 60.91batch/s]


Epoch 5/10, Train Loss: 0.6290,Train_acc: 0.0 Val Loss: 3.2016, Val Accuracy: 8.6691


Training: 100%|██████████| 800/800 [00:39<00:00, 20.34batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 61.74batch/s]


Epoch 6/10, Train Loss: 0.6146,Train_acc: 0.0 Val Loss: 3.2502, Val Accuracy: 10.4274


Training: 100%|██████████| 800/800 [00:39<00:00, 20.24batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 62.97batch/s]


Epoch 7/10, Train Loss: 0.6017,Train_acc: 0.0 Val Loss: 3.2471, Val Accuracy: 10.4518


Training: 100%|██████████| 800/800 [00:39<00:00, 20.43batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 63.39batch/s]


Epoch 8/10, Train Loss: 0.5940,Train_acc: 0.0 Val Loss: 3.2796, Val Accuracy: 11.8193


Training: 100%|██████████| 800/800 [00:39<00:00, 20.30batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 62.55batch/s]


Epoch 9/10, Train Loss: 0.5843,Train_acc: 0.0 Val Loss: 3.3502, Val Accuracy: 11.5263


Training: 100%|██████████| 800/800 [00:39<00:00, 20.33batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 63.27batch/s]


Epoch 10/10, Train Loss: 0.5826,Train_acc: 0.0 Val Loss: 3.2843, Val Accuracy: 12.4542


VBox(children=(Label(value='0.162 MB of 0.162 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
val_loss,▁▅▆▆▇▇▇▇█▇
Epoch,▁▂▃▃▄▅▆▆▇█
train_loss,█▄▂▂▂▁▁▁▁▁
val_accuracy,▁▂▄▆▆▇▇█▇█

0,1
val_loss,3.28433
Epoch,9.0
train_loss,0.58256
val_accuracy,12.45421


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ujtxzvhb with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	embedding_size: 128
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_layer_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 12




Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.5, inplace=False)
    (embedding): Embedding(30, 128)
    (rnn): RNN(128, 256, batch_first=True, dropout=0.5)
  )
  (attention): Attention(
    (attention): Linear(in_features=512, out_features=256, bias=True)
  )
  (decoder): DecoderWithAttention(
    (attention): Attention(
      (attention): Linear(in_features=512, out_features=256, bias=True)
    )
    (dropout): Dropout(p=0.5, inplace=False)
    (embedding): Embedding(70, 128)
    (rnn): RNN(384, 256, batch_first=True, dropout=0.5)
    (fc): Linear(in_features=512, out_features=70, bias=True)
  )
)


Training: 100%|██████████| 1600/1600 [01:18<00:00, 20.31batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 66.52batch/s]


Epoch 1/12, Train Loss: 0.8167,Train_acc: 0.0 Val Loss: 3.1607, Val Accuracy: 12.8694


Training: 100%|██████████| 1600/1600 [01:18<00:00, 20.47batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 68.01batch/s]


Epoch 2/12, Train Loss: 0.5235,Train_acc: 0.0 Val Loss: 3.3792, Val Accuracy: 15.9951


Training: 100%|██████████| 1600/1600 [01:17<00:00, 20.52batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 67.34batch/s]


Epoch 3/12, Train Loss: 0.4752,Train_acc: 0.0 Val Loss: 3.4908, Val Accuracy: 18.4371


Training: 100%|██████████| 1600/1600 [01:17<00:00, 20.61batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 67.20batch/s]


Epoch 4/12, Train Loss: 0.4507,Train_acc: 0.0 Val Loss: 3.6327, Val Accuracy: 19.6825


Training: 100%|██████████| 1600/1600 [01:17<00:00, 20.58batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 68.88batch/s]


Epoch 5/12, Train Loss: 0.4338,Train_acc: 0.0 Val Loss: 3.6783, Val Accuracy: 19.8535


Training: 100%|██████████| 1600/1600 [01:17<00:00, 20.56batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 65.47batch/s]


Epoch 6/12, Train Loss: 0.4245,Train_acc: 0.0 Val Loss: 3.7109, Val Accuracy: 21.8315


Training: 100%|██████████| 1600/1600 [01:17<00:00, 20.60batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 68.50batch/s]


Epoch 7/12, Train Loss: 0.4171,Train_acc: 0.0019531631477177286 Val Loss: 3.7206, Val Accuracy: 21.9292


Training: 100%|██████████| 1600/1600 [01:17<00:00, 20.63batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 67.81batch/s]


Epoch 8/12, Train Loss: 0.4089,Train_acc: 0.0019531631477177286 Val Loss: 3.7709, Val Accuracy: 22.8327


Training: 100%|██████████| 1600/1600 [01:17<00:00, 20.58batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 67.90batch/s]


Epoch 9/12, Train Loss: 0.4024,Train_acc: 0.0 Val Loss: 3.8581, Val Accuracy: 22.8816


Training: 100%|██████████| 1600/1600 [01:18<00:00, 20.47batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 68.74batch/s]


Epoch 10/12, Train Loss: 0.4009,Train_acc: 0.0 Val Loss: 3.8237, Val Accuracy: 24.1758


Training: 100%|██████████| 1600/1600 [01:17<00:00, 20.63batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 67.81batch/s]


Epoch 11/12, Train Loss: 0.3950,Train_acc: 0.0 Val Loss: 3.8348, Val Accuracy: 22.9548


Training: 100%|██████████| 1600/1600 [01:17<00:00, 20.59batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 68.42batch/s]


Epoch 12/12, Train Loss: 0.3940,Train_acc: 0.0019531631477177286 Val Loss: 3.9054, Val Accuracy: 23.7118


VBox(children=(Label(value='0.173 MB of 0.173 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
val_loss,▁▃▄▅▆▆▆▇█▇▇█
Epoch,▁▂▂▃▄▄▅▅▆▇▇█
train_loss,█▃▂▂▂▂▁▁▁▁▁▁
val_accuracy,▁▃▄▅▅▇▇▇▇█▇█

0,1
val_loss,3.90543
Epoch,11.0
train_loss,0.39398
val_accuracy,23.71184


[34m[1mwandb[0m: Agent Starting Run: orofehm5 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 12


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113843699998445, max=1.0…



Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.2, inplace=False)
    (embedding): Embedding(30, 256)
    (rnn): LSTM(256, 128, batch_first=True, dropout=0.2)
  )
  (attention): Attention(
    (attention): Linear(in_features=256, out_features=128, bias=True)
  )
  (decoder): DecoderWithAttention(
    (attention): Attention(
      (attention): Linear(in_features=256, out_features=128, bias=True)
    )
    (dropout): Dropout(p=0.2, inplace=False)
    (embedding): Embedding(70, 256)
    (rnn): LSTM(384, 128, batch_first=True, dropout=0.2)
    (fc): Linear(in_features=256, out_features=70, bias=True)
  )
)


Training: 100%|██████████| 1600/1600 [01:17<00:00, 20.63batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 66.13batch/s]


Epoch 1/12, Train Loss: 0.7076,Train_acc: 0.0 Val Loss: 3.1907, Val Accuracy: 1.6117


Training: 100%|██████████| 1600/1600 [01:17<00:00, 20.68batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 69.02batch/s]


Epoch 2/12, Train Loss: 0.4812,Train_acc: 0.0 Val Loss: 3.4001, Val Accuracy: 9.3529


Training: 100%|██████████| 1600/1600 [01:17<00:00, 20.74batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 66.61batch/s]


Epoch 3/12, Train Loss: 0.4334,Train_acc: 0.0 Val Loss: 3.5218, Val Accuracy: 8.3761


Training: 100%|██████████| 1600/1600 [01:17<00:00, 20.70batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 66.62batch/s]


Epoch 4/12, Train Loss: 0.4075,Train_acc: 0.0 Val Loss: 3.5938, Val Accuracy: 12.3565


Training: 100%|██████████| 1600/1600 [01:16<00:00, 20.79batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 68.70batch/s]


Epoch 5/12, Train Loss: 0.3892,Train_acc: 0.0 Val Loss: 3.7080, Val Accuracy: 11.7705


Training: 100%|██████████| 1600/1600 [01:16<00:00, 20.84batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 69.17batch/s]


Epoch 6/12, Train Loss: 0.3770,Train_acc: 0.0 Val Loss: 3.7576, Val Accuracy: 14.1148


Training: 100%|██████████| 1600/1600 [01:16<00:00, 20.81batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 66.99batch/s]


Epoch 7/12, Train Loss: 0.3671,Train_acc: 0.0 Val Loss: 3.7829, Val Accuracy: 18.2662


Training: 100%|██████████| 1600/1600 [01:18<00:00, 20.28batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 66.77batch/s]


Epoch 8/12, Train Loss: 0.3588,Train_acc: 0.0 Val Loss: 3.8767, Val Accuracy: 14.0415


Training: 100%|██████████| 1600/1600 [01:19<00:00, 20.10batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 66.68batch/s]


Epoch 9/12, Train Loss: 0.3505,Train_acc: 0.0 Val Loss: 3.9143, Val Accuracy: 18.0220


Training: 100%|██████████| 1600/1600 [01:19<00:00, 20.23batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 64.59batch/s]


Epoch 10/12, Train Loss: 0.3451,Train_acc: 0.0 Val Loss: 3.9205, Val Accuracy: 17.6801


Training: 100%|██████████| 1600/1600 [01:19<00:00, 20.15batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 66.50batch/s]


Epoch 11/12, Train Loss: 0.3396,Train_acc: 0.0 Val Loss: 4.0046, Val Accuracy: 19.2674


Training: 100%|██████████| 1600/1600 [01:19<00:00, 20.23batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 66.51batch/s]


Epoch 12/12, Train Loss: 0.3361,Train_acc: 0.0 Val Loss: 4.0188, Val Accuracy: 18.8523


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
val_loss,▁▃▄▄▅▆▆▇▇▇██
Epoch,▁▂▂▃▄▄▅▅▆▇▇█
train_loss,█▄▃▂▂▂▂▁▁▁▁▁
val_accuracy,▁▄▄▅▅▆█▆█▇██

0,1
val_loss,4.01875
Epoch,11.0
train_loss,0.33606
val_accuracy,18.85226


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: opovlz20 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 12




Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.5, inplace=False)
    (embedding): Embedding(30, 256)
    (rnn): GRU(256, 128, batch_first=True, dropout=0.5, bidirectional=True)
  )
  (attention): Attention(
    (attention): Linear(in_features=512, out_features=256, bias=True)
  )
  (decoder): DecoderWithAttention(
    (attention): Attention(
      (attention): Linear(in_features=512, out_features=256, bias=True)
    )
    (dropout): Dropout(p=0.5, inplace=False)
    (embedding): Embedding(70, 256)
    (rnn): GRU(512, 256, batch_first=True, dropout=0.5)
    (fc): Linear(in_features=512, out_features=70, bias=True)
  )
)


Training: 100%|██████████| 1600/1600 [01:22<00:00, 19.31batch/s]
Evaluating: 100%|██████████| 128/128 [00:02<00:00, 63.91batch/s]


Epoch 1/12, Train Loss: 0.8127,Train_acc: 0.0 Val Loss: 3.0474, Val Accuracy: 10.7937


Training: 100%|██████████| 1600/1600 [01:22<00:00, 19.42batch/s]
Evaluating: 100%|██████████| 128/128 [00:02<00:00, 63.83batch/s]


Epoch 2/12, Train Loss: 0.5141,Train_acc: 0.0 Val Loss: 3.5395, Val Accuracy: 19.9756


Training: 100%|██████████| 1600/1600 [01:22<00:00, 19.30batch/s]
Evaluating: 100%|██████████| 128/128 [00:02<00:00, 63.68batch/s]


Epoch 3/12, Train Loss: 0.4326,Train_acc: 0.0 Val Loss: 3.7000, Val Accuracy: 22.8571


Training: 100%|██████████| 1600/1600 [01:22<00:00, 19.40batch/s]
Evaluating: 100%|██████████| 128/128 [00:02<00:00, 63.75batch/s]


Epoch 4/12, Train Loss: 0.3999,Train_acc: 0.0019531631477177286 Val Loss: 3.7675, Val Accuracy: 23.8095


Training: 100%|██████████| 1600/1600 [01:22<00:00, 19.34batch/s]
Evaluating: 100%|██████████| 128/128 [00:02<00:00, 63.78batch/s]


Epoch 5/12, Train Loss: 0.3852,Train_acc: 0.0 Val Loss: 3.8231, Val Accuracy: 26.5201


Training: 100%|██████████| 1600/1600 [01:21<00:00, 19.66batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 65.56batch/s]


Epoch 6/12, Train Loss: 0.3739,Train_acc: 0.0019531631477177286 Val Loss: 3.9025, Val Accuracy: 27.3993


Training: 100%|██████████| 1600/1600 [01:19<00:00, 20.20batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 65.29batch/s]


Epoch 7/12, Train Loss: 0.3669,Train_acc: 0.0 Val Loss: 3.9549, Val Accuracy: 27.6679


Training: 100%|██████████| 1600/1600 [01:19<00:00, 20.12batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 65.23batch/s]


Epoch 8/12, Train Loss: 0.3593,Train_acc: 0.0 Val Loss: 3.9843, Val Accuracy: 28.4493


Training: 100%|██████████| 1600/1600 [01:19<00:00, 20.20batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 65.81batch/s]


Epoch 9/12, Train Loss: 0.3535,Train_acc: 0.0 Val Loss: 3.9838, Val Accuracy: 28.9866


Training: 100%|██████████| 1600/1600 [01:19<00:00, 20.23batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 65.13batch/s]


Epoch 10/12, Train Loss: 0.3503,Train_acc: 0.0 Val Loss: 4.0681, Val Accuracy: 28.9133


Training: 100%|██████████| 1600/1600 [01:22<00:00, 19.51batch/s]
Evaluating: 100%|██████████| 128/128 [00:02<00:00, 63.91batch/s]


Epoch 11/12, Train Loss: 0.3444,Train_acc: 0.0 Val Loss: 4.1180, Val Accuracy: 28.9621


Training: 100%|██████████| 1600/1600 [01:20<00:00, 19.86batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 65.42batch/s]


Epoch 12/12, Train Loss: 0.3422,Train_acc: 0.0019531631477177286 Val Loss: 4.1781, Val Accuracy: 29.1087


VBox(children=(Label(value='0.196 MB of 0.196 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
val_loss,▁▄▅▅▆▆▇▇▇▇██
Epoch,▁▂▂▃▄▄▅▅▆▇▇█
train_loss,█▄▂▂▂▁▁▁▁▁▁▁
val_accuracy,▁▅▆▆▇▇▇█████

0,1
val_loss,4.17813
Epoch,11.0
train_loss,0.34219
val_accuracy,29.10867


[34m[1mwandb[0m: Agent Starting Run: qfnph41t with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	num_epochs: 12




Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(30, 64)
    (rnn): RNN(64, 128, batch_first=True, dropout=0.3, bidirectional=True)
  )
  (attention): Attention(
    (attention): Linear(in_features=512, out_features=256, bias=True)
  )
  (decoder): DecoderWithAttention(
    (attention): Attention(
      (attention): Linear(in_features=512, out_features=256, bias=True)
    )
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(70, 64)
    (rnn): RNN(320, 256, batch_first=True, dropout=0.3)
    (fc): Linear(in_features=512, out_features=70, bias=True)
  )
)


Training: 100%|██████████| 800/800 [00:40<00:00, 19.84batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 62.35batch/s]


Epoch 1/12, Train Loss: 0.9292,Train_acc: 0.0 Val Loss: 3.4315, Val Accuracy: 5.2747


Training: 100%|██████████| 800/800 [00:40<00:00, 19.81batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 59.85batch/s]


Epoch 2/12, Train Loss: 0.7856,Train_acc: 0.0 Val Loss: 3.6081, Val Accuracy: 3.3944


Training: 100%|██████████| 800/800 [00:40<00:00, 19.68batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 59.90batch/s]


Epoch 3/12, Train Loss: 0.8281,Train_acc: 0.0 Val Loss: 3.3906, Val Accuracy: 1.6606


Training: 100%|██████████| 800/800 [00:40<00:00, 19.77batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 61.19batch/s]


Epoch 4/12, Train Loss: 0.9221,Train_acc: 0.0 Val Loss: 4.0065, Val Accuracy: 0.3663


Training: 100%|██████████| 800/800 [00:40<00:00, 19.67batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 61.36batch/s]


Epoch 5/12, Train Loss: 0.9429,Train_acc: 0.0 Val Loss: 3.6747, Val Accuracy: 0.3175


Training: 100%|██████████| 800/800 [00:40<00:00, 19.76batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 61.48batch/s]


Epoch 6/12, Train Loss: 0.9613,Train_acc: 0.0 Val Loss: 3.5573, Val Accuracy: 0.2442


Training: 100%|██████████| 800/800 [00:40<00:00, 19.88batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 62.21batch/s]


Epoch 7/12, Train Loss: 0.9564,Train_acc: 0.0 Val Loss: 3.3934, Val Accuracy: 0.5861


Training: 100%|██████████| 800/800 [00:40<00:00, 19.84batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 61.64batch/s]


Epoch 8/12, Train Loss: 0.9283,Train_acc: 0.0 Val Loss: 3.5992, Val Accuracy: 0.5128


Training: 100%|██████████| 800/800 [00:40<00:00, 19.65batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 60.66batch/s]


Epoch 9/12, Train Loss: 0.9441,Train_acc: 0.0 Val Loss: 3.5130, Val Accuracy: 0.8791


Training: 100%|██████████| 800/800 [00:40<00:00, 19.81batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 61.45batch/s]


Epoch 10/12, Train Loss: 0.9258,Train_acc: 0.0 Val Loss: 3.4983, Val Accuracy: 1.0745


Training: 100%|██████████| 800/800 [00:40<00:00, 19.77batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 61.72batch/s]


Epoch 11/12, Train Loss: 0.9435,Train_acc: 0.0 Val Loss: 3.6463, Val Accuracy: 0.2686


Training: 100%|██████████| 800/800 [00:40<00:00, 19.69batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 61.94batch/s]


Epoch 12/12, Train Loss: 0.9517,Train_acc: 0.0 Val Loss: 3.5625, Val Accuracy: 0.3419


VBox(children=(Label(value='0.206 MB of 0.206 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
val_loss,▁▃▁█▄▃▁▃▂▂▄▃
Epoch,▁▂▂▃▄▄▅▅▆▇▇█
train_loss,▇▁▃▆▇██▇▇▇▇█
val_accuracy,█▅▃▁▁▁▁▁▂▂▁▁

0,1
val_loss,3.56252
Epoch,11.0
train_loss,0.95175
val_accuracy,0.34188


[34m[1mwandb[0m: Agent Starting Run: h6c3hxu4 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 32
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_layer_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 12




Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.2, inplace=False)
    (embedding): Embedding(30, 32)
    (rnn): RNN(32, 32, batch_first=True, dropout=0.2)
  )
  (attention): Attention(
    (attention): Linear(in_features=64, out_features=32, bias=True)
  )
  (decoder): DecoderWithAttention(
    (attention): Attention(
      (attention): Linear(in_features=64, out_features=32, bias=True)
    )
    (dropout): Dropout(p=0.2, inplace=False)
    (embedding): Embedding(70, 32)
    (rnn): RNN(64, 32, batch_first=True, dropout=0.2)
    (fc): Linear(in_features=64, out_features=70, bias=True)
  )
)


Training: 100%|██████████| 800/800 [00:39<00:00, 20.16batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 63.57batch/s]


Epoch 1/12, Train Loss: 1.3901,Train_acc: 0.0 Val Loss: 2.0651, Val Accuracy: 0.0000


Training: 100%|██████████| 800/800 [00:39<00:00, 20.36batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 63.20batch/s]


Epoch 2/12, Train Loss: 1.0035,Train_acc: 0.0 Val Loss: 2.4463, Val Accuracy: 2.6618


Training: 100%|██████████| 800/800 [00:39<00:00, 20.41batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 63.48batch/s]


Epoch 3/12, Train Loss: 0.7801,Train_acc: 0.0 Val Loss: 2.6979, Val Accuracy: 7.4969


Training: 100%|██████████| 800/800 [00:39<00:00, 20.33batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 63.37batch/s]


Epoch 4/12, Train Loss: 0.7031,Train_acc: 0.0 Val Loss: 2.8743, Val Accuracy: 8.9377


Training: 100%|██████████| 800/800 [00:39<00:00, 20.28batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 63.27batch/s]


Epoch 5/12, Train Loss: 0.6615,Train_acc: 0.0 Val Loss: 2.9527, Val Accuracy: 9.5971


Training: 100%|██████████| 800/800 [00:39<00:00, 20.28batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 62.77batch/s]


Epoch 6/12, Train Loss: 0.6337,Train_acc: 0.0 Val Loss: 3.0361, Val Accuracy: 10.7692


Training: 100%|██████████| 800/800 [00:39<00:00, 20.33batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 63.06batch/s]


Epoch 7/12, Train Loss: 0.6156,Train_acc: 0.0 Val Loss: 3.1207, Val Accuracy: 10.8425


Training: 100%|██████████| 800/800 [00:39<00:00, 20.20batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 62.71batch/s]


Epoch 8/12, Train Loss: 0.5998,Train_acc: 0.0 Val Loss: 3.1221, Val Accuracy: 12.2100


Training: 100%|██████████| 800/800 [00:39<00:00, 20.35batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 63.35batch/s]


Epoch 9/12, Train Loss: 0.5881,Train_acc: 0.0 Val Loss: 3.1589, Val Accuracy: 11.8926


Training: 100%|██████████| 800/800 [00:39<00:00, 20.36batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 63.67batch/s]


Epoch 10/12, Train Loss: 0.5805,Train_acc: 0.0 Val Loss: 3.2632, Val Accuracy: 10.8669


Training: 100%|██████████| 800/800 [00:39<00:00, 20.32batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 63.13batch/s]


Epoch 11/12, Train Loss: 0.5728,Train_acc: 0.0 Val Loss: 3.2651, Val Accuracy: 12.4298


Training: 100%|██████████| 800/800 [00:39<00:00, 20.31batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 63.42batch/s]


Epoch 12/12, Train Loss: 0.5631,Train_acc: 0.0 Val Loss: 3.2775, Val Accuracy: 13.3333


VBox(children=(Label(value='0.217 MB of 0.217 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
val_loss,▁▃▅▆▆▇▇▇▇███
Epoch,▁▂▂▃▄▄▅▅▆▇▇█
train_loss,█▅▃▂▂▂▁▁▁▁▁▁
val_accuracy,▁▂▅▆▆▇▇▇▇▇██

0,1
val_loss,3.27746
Epoch,11.0
train_loss,0.56306
val_accuracy,13.33333


[34m[1mwandb[0m: Agent Starting Run: gig4vkqx with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	embedding_size: 16
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_layer_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 12




Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.5, inplace=False)
    (embedding): Embedding(30, 16)
    (rnn): GRU(16, 32, batch_first=True, dropout=0.5)
  )
  (attention): Attention(
    (attention): Linear(in_features=64, out_features=32, bias=True)
  )
  (decoder): DecoderWithAttention(
    (attention): Attention(
      (attention): Linear(in_features=64, out_features=32, bias=True)
    )
    (dropout): Dropout(p=0.5, inplace=False)
    (embedding): Embedding(70, 16)
    (rnn): GRU(48, 32, batch_first=True, dropout=0.5)
    (fc): Linear(in_features=64, out_features=70, bias=True)
  )
)


Training: 100%|██████████| 800/800 [00:39<00:00, 20.29batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 62.01batch/s]


Epoch 1/12, Train Loss: 1.4235,Train_acc: 0.0 Val Loss: 2.1411, Val Accuracy: 0.0000


Training: 100%|██████████| 800/800 [00:39<00:00, 20.11batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 60.94batch/s]


Epoch 2/12, Train Loss: 1.1847,Train_acc: 0.0 Val Loss: 2.1919, Val Accuracy: 0.0000


Training: 100%|██████████| 800/800 [00:39<00:00, 20.30batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 61.42batch/s]


Epoch 3/12, Train Loss: 1.0588,Train_acc: 0.0 Val Loss: 2.3054, Val Accuracy: 0.0977


Training: 100%|██████████| 800/800 [00:39<00:00, 20.24batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 62.58batch/s]


Epoch 4/12, Train Loss: 0.9251,Train_acc: 0.0 Val Loss: 2.5615, Val Accuracy: 1.5629


Training: 100%|██████████| 800/800 [00:39<00:00, 20.21batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 62.38batch/s]


Epoch 5/12, Train Loss: 0.8351,Train_acc: 0.0 Val Loss: 2.7708, Val Accuracy: 3.4188


Training: 100%|██████████| 800/800 [00:39<00:00, 20.17batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 63.31batch/s]


Epoch 6/12, Train Loss: 0.7866,Train_acc: 0.0 Val Loss: 2.9578, Val Accuracy: 2.9548


Training: 100%|██████████| 800/800 [00:39<00:00, 20.19batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 63.17batch/s]


Epoch 7/12, Train Loss: 0.7527,Train_acc: 0.0 Val Loss: 3.0100, Val Accuracy: 6.0073


Training: 100%|██████████| 800/800 [00:39<00:00, 20.23batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 63.48batch/s]


Epoch 8/12, Train Loss: 0.7293,Train_acc: 0.0 Val Loss: 3.0755, Val Accuracy: 6.6178


Training: 100%|██████████| 800/800 [00:39<00:00, 20.10batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 62.41batch/s]


Epoch 9/12, Train Loss: 0.7056,Train_acc: 0.0 Val Loss: 3.1451, Val Accuracy: 7.3504


Training: 100%|██████████| 800/800 [00:39<00:00, 20.15batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 63.11batch/s]


Epoch 10/12, Train Loss: 0.6909,Train_acc: 0.0 Val Loss: 3.1803, Val Accuracy: 8.0098


Training: 100%|██████████| 800/800 [00:39<00:00, 20.06batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 62.71batch/s]


Epoch 11/12, Train Loss: 0.6789,Train_acc: 0.0 Val Loss: 3.1931, Val Accuracy: 9.2308


Training: 100%|██████████| 800/800 [00:39<00:00, 20.01batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 63.66batch/s]


Epoch 12/12, Train Loss: 0.6698,Train_acc: 0.0 Val Loss: 3.2037, Val Accuracy: 9.6215


VBox(children=(Label(value='0.228 MB of 0.228 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
val_loss,▁▁▂▄▅▆▇▇████
Epoch,▁▂▂▃▄▄▅▅▆▇▇█
train_loss,█▆▅▃▃▂▂▂▁▁▁▁
val_accuracy,▁▁▁▂▃▃▅▆▆▇██

0,1
val_loss,3.20367
Epoch,11.0
train_loss,0.66978
val_accuracy,9.62149


[34m[1mwandb[0m: Agent Starting Run: eaup44zs with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_layer_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 12




Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.2, inplace=False)
    (embedding): Embedding(30, 256)
    (rnn): RNN(256, 256, batch_first=True, dropout=0.2, bidirectional=True)
  )
  (attention): Attention(
    (attention): Linear(in_features=1024, out_features=512, bias=True)
  )
  (decoder): DecoderWithAttention(
    (attention): Attention(
      (attention): Linear(in_features=1024, out_features=512, bias=True)
    )
    (dropout): Dropout(p=0.2, inplace=False)
    (embedding): Embedding(70, 256)
    (rnn): RNN(768, 512, batch_first=True, dropout=0.2)
    (fc): Linear(in_features=1024, out_features=70, bias=True)
  )
)


Training: 100%|██████████| 1600/1600 [01:19<00:00, 20.07batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 65.47batch/s]


Epoch 1/12, Train Loss: 0.6456,Train_acc: 0.0 Val Loss: 3.3222, Val Accuracy: 17.1429


Training: 100%|██████████| 1600/1600 [01:19<00:00, 20.07batch/s]
Evaluating: 100%|██████████| 128/128 [00:02<00:00, 63.94batch/s]


Epoch 2/12, Train Loss: 0.4641,Train_acc: 0.0019531631477177286 Val Loss: 3.4792, Val Accuracy: 19.3407


Training: 100%|██████████| 1600/1600 [01:19<00:00, 20.04batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 64.55batch/s]


Epoch 3/12, Train Loss: 0.4366,Train_acc: 0.0 Val Loss: 3.5899, Val Accuracy: 22.8327


Training: 100%|██████████| 1600/1600 [01:19<00:00, 20.14batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 66.03batch/s]


Epoch 4/12, Train Loss: 0.4092,Train_acc: 0.0 Val Loss: 3.8058, Val Accuracy: 23.9805


Training: 100%|██████████| 1600/1600 [01:20<00:00, 19.84batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 65.46batch/s]


Epoch 5/12, Train Loss: 0.4031,Train_acc: 0.0 Val Loss: 3.8657, Val Accuracy: 23.2723


Training: 100%|██████████| 1600/1600 [01:20<00:00, 19.84batch/s]
Evaluating: 100%|██████████| 128/128 [00:02<00:00, 62.87batch/s]


Epoch 6/12, Train Loss: 0.4017,Train_acc: 0.0 Val Loss: 3.8368, Val Accuracy: 23.8339


Training: 100%|██████████| 1600/1600 [01:19<00:00, 20.05batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 64.15batch/s]


Epoch 7/12, Train Loss: 0.3840,Train_acc: 0.0 Val Loss: 3.9402, Val Accuracy: 24.8840


Training: 100%|██████████| 1600/1600 [01:19<00:00, 20.12batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 65.63batch/s]


Epoch 8/12, Train Loss: 0.3792,Train_acc: 0.0 Val Loss: 3.9492, Val Accuracy: 24.0049


Training: 100%|██████████| 1600/1600 [01:19<00:00, 20.08batch/s]
Evaluating: 100%|██████████| 128/128 [00:02<00:00, 62.75batch/s]


Epoch 9/12, Train Loss: 0.3739,Train_acc: 0.0 Val Loss: 3.9869, Val Accuracy: 26.4469


Training: 100%|██████████| 1600/1600 [01:19<00:00, 20.05batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 65.53batch/s]


Epoch 10/12, Train Loss: 0.3765,Train_acc: 0.0019531631477177286 Val Loss: 4.0661, Val Accuracy: 25.8608


Training: 100%|██████████| 1600/1600 [01:19<00:00, 20.09batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 65.27batch/s]


Epoch 11/12, Train Loss: 0.3718,Train_acc: 0.0019531631477177286 Val Loss: 4.1146, Val Accuracy: 26.3248


Training: 100%|██████████| 1600/1600 [01:20<00:00, 19.88batch/s]
Evaluating: 100%|██████████| 128/128 [00:02<00:00, 63.51batch/s]


Epoch 12/12, Train Loss: 0.3678,Train_acc: 0.0 Val Loss: 4.0870, Val Accuracy: 26.8620


VBox(children=(Label(value='0.239 MB of 0.239 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
val_loss,▁▂▃▅▆▆▆▇▇███
Epoch,▁▂▂▃▄▄▅▅▆▇▇█
train_loss,█▃▃▂▂▂▁▁▁▁▁▁
val_accuracy,▁▃▅▆▅▆▇▆█▇██

0,1
val_loss,4.08699
Epoch,11.0
train_loss,0.36777
val_accuracy,26.86203


[34m[1mwandb[0m: Agent Starting Run: eloqbtht with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_layer_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 12




Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.2, inplace=False)
    (embedding): Embedding(30, 256)
    (rnn): RNN(256, 256, batch_first=True, dropout=0.2, bidirectional=True)
  )
  (attention): Attention(
    (attention): Linear(in_features=1024, out_features=512, bias=True)
  )
  (decoder): DecoderWithAttention(
    (attention): Attention(
      (attention): Linear(in_features=1024, out_features=512, bias=True)
    )
    (dropout): Dropout(p=0.2, inplace=False)
    (embedding): Embedding(70, 256)
    (rnn): RNN(768, 512, batch_first=True, dropout=0.2)
    (fc): Linear(in_features=1024, out_features=70, bias=True)
  )
)


Training: 100%|██████████| 800/800 [00:43<00:00, 18.19batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 57.71batch/s]


Epoch 1/12, Train Loss: 0.7015,Train_acc: 0.0 Val Loss: 3.1337, Val Accuracy: 18.9744


Training: 100%|██████████| 800/800 [00:43<00:00, 18.27batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 57.74batch/s]


Epoch 2/12, Train Loss: 0.4486,Train_acc: 0.0 Val Loss: 3.4200, Val Accuracy: 22.4664


Training: 100%|██████████| 800/800 [00:43<00:00, 18.22batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 59.14batch/s]


Epoch 3/12, Train Loss: 0.4071,Train_acc: 0.0019531631477177286 Val Loss: 3.5410, Val Accuracy: 24.9084


Training: 100%|██████████| 800/800 [00:43<00:00, 18.26batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 58.81batch/s]


Epoch 4/12, Train Loss: 0.3910,Train_acc: 0.0 Val Loss: 3.6980, Val Accuracy: 26.2759


Training: 100%|██████████| 800/800 [00:43<00:00, 18.22batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 57.73batch/s]


Epoch 5/12, Train Loss: 0.3774,Train_acc: 0.0 Val Loss: 3.6550, Val Accuracy: 27.2039


Training: 100%|██████████| 800/800 [00:44<00:00, 18.11batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 58.18batch/s]


Epoch 6/12, Train Loss: 0.3718,Train_acc: 0.0 Val Loss: 3.6953, Val Accuracy: 26.4957


Training: 100%|██████████| 800/800 [00:43<00:00, 18.29batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 57.85batch/s]


Epoch 7/12, Train Loss: 0.3658,Train_acc: 0.0 Val Loss: 3.8692, Val Accuracy: 28.6203


Training: 100%|██████████| 800/800 [00:43<00:00, 18.23batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 59.36batch/s]


Epoch 8/12, Train Loss: 0.3585,Train_acc: 0.0 Val Loss: 3.8244, Val Accuracy: 28.0098


Training: 100%|██████████| 800/800 [00:43<00:00, 18.47batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 58.98batch/s]


Epoch 9/12, Train Loss: 0.3778,Train_acc: 0.0 Val Loss: 3.8226, Val Accuracy: 26.3492


Training: 100%|██████████| 800/800 [00:44<00:00, 18.12batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 56.99batch/s]


Epoch 10/12, Train Loss: 0.3525,Train_acc: 0.0 Val Loss: 3.9611, Val Accuracy: 28.2295


Training: 100%|██████████| 800/800 [00:43<00:00, 18.22batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 57.73batch/s]


Epoch 11/12, Train Loss: 0.3470,Train_acc: 0.0 Val Loss: 3.9234, Val Accuracy: 26.8864


Training: 100%|██████████| 800/800 [00:43<00:00, 18.20batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 56.82batch/s]


Epoch 12/12, Train Loss: 0.3408,Train_acc: 0.0 Val Loss: 4.0519, Val Accuracy: 29.1087


VBox(children=(Label(value='0.250 MB of 0.250 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
val_loss,▁▃▄▅▅▅▇▆▆▇▇█
Epoch,▁▂▂▃▄▄▅▅▆▇▇█
train_loss,█▃▂▂▂▂▁▁▂▁▁▁
val_accuracy,▁▃▅▆▇▆█▇▆▇▆█

0,1
val_loss,4.05195
Epoch,11.0
train_loss,0.34077
val_accuracy,29.10867


[34m[1mwandb[0m: Agent Starting Run: t2fz9unt with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	num_epochs: 10




Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.2, inplace=False)
    (embedding): Embedding(30, 256)
    (rnn): GRU(256, 128, batch_first=True, dropout=0.2, bidirectional=True)
  )
  (attention): Attention(
    (attention): Linear(in_features=512, out_features=256, bias=True)
  )
  (decoder): DecoderWithAttention(
    (attention): Attention(
      (attention): Linear(in_features=512, out_features=256, bias=True)
    )
    (dropout): Dropout(p=0.2, inplace=False)
    (embedding): Embedding(70, 256)
    (rnn): GRU(512, 256, batch_first=True, dropout=0.2)
    (fc): Linear(in_features=512, out_features=70, bias=True)
  )
)


Training: 100%|██████████| 800/800 [00:42<00:00, 18.67batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 56.60batch/s]


Epoch 1/10, Train Loss: 0.7658,Train_acc: 0.0 Val Loss: 3.7646, Val Accuracy: 9.4017


Training: 100%|██████████| 800/800 [00:42<00:00, 18.76batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 57.53batch/s]


Epoch 2/10, Train Loss: 0.6081,Train_acc: 0.0 Val Loss: 3.7917, Val Accuracy: 10.8913


Training: 100%|██████████| 800/800 [00:43<00:00, 18.34batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 58.51batch/s]


Epoch 3/10, Train Loss: 0.5922,Train_acc: 0.0 Val Loss: 4.0736, Val Accuracy: 10.4518


Training: 100%|██████████| 800/800 [00:43<00:00, 18.47batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 58.18batch/s]


Epoch 4/10, Train Loss: 0.5902,Train_acc: 0.0 Val Loss: 4.2483, Val Accuracy: 10.5495


Training: 100%|██████████| 800/800 [00:42<00:00, 18.61batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 54.50batch/s]


Epoch 5/10, Train Loss: 0.5861,Train_acc: 0.0 Val Loss: 4.3731, Val Accuracy: 10.4029


Training: 100%|██████████| 800/800 [00:42<00:00, 18.79batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 59.91batch/s]


Epoch 6/10, Train Loss: 0.5919,Train_acc: 0.0019531631477177286 Val Loss: 4.2490, Val Accuracy: 9.9634


Training: 100%|██████████| 800/800 [00:42<00:00, 18.73batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 59.20batch/s]


Epoch 7/10, Train Loss: 0.6043,Train_acc: 0.0 Val Loss: 4.3391, Val Accuracy: 7.9853


Training: 100%|██████████| 800/800 [00:43<00:00, 18.35batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 58.59batch/s]


Epoch 8/10, Train Loss: 0.6080,Train_acc: 0.0 Val Loss: 4.3056, Val Accuracy: 8.3761


Training: 100%|██████████| 800/800 [00:43<00:00, 18.47batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 58.36batch/s]


Epoch 9/10, Train Loss: 0.6244,Train_acc: 0.0019531631477177286 Val Loss: 4.5179, Val Accuracy: 10.1099


Training: 100%|██████████| 800/800 [00:43<00:00, 18.54batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 58.88batch/s]


Epoch 10/10, Train Loss: 0.6260,Train_acc: 0.0 Val Loss: 4.3801, Val Accuracy: 7.6435


VBox(children=(Label(value='0.260 MB of 0.260 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
val_loss,▁▁▄▅▇▆▆▆█▇
Epoch,▁▂▃▃▄▅▆▆▇█
train_loss,█▂▁▁▁▁▂▂▂▃
val_accuracy,▅█▇▇▇▆▂▃▆▁

0,1
val_loss,4.38007
Epoch,9.0
train_loss,0.62599
val_accuracy,7.64347


[34m[1mwandb[0m: Agent Starting Run: zghdg6jk with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_layer_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 12




Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.2, inplace=False)
    (embedding): Embedding(30, 64)
    (rnn): LSTM(64, 256, batch_first=True, dropout=0.2)
  )
  (attention): Attention(
    (attention): Linear(in_features=512, out_features=256, bias=True)
  )
  (decoder): DecoderWithAttention(
    (attention): Attention(
      (attention): Linear(in_features=512, out_features=256, bias=True)
    )
    (dropout): Dropout(p=0.2, inplace=False)
    (embedding): Embedding(70, 64)
    (rnn): LSTM(320, 256, batch_first=True, dropout=0.2)
    (fc): Linear(in_features=512, out_features=70, bias=True)
  )
)


Training: 100%|██████████| 1600/1600 [01:22<00:00, 19.48batch/s]
Evaluating: 100%|██████████| 128/128 [00:02<00:00, 62.78batch/s]


Epoch 1/12, Train Loss: 0.7205,Train_acc: 0.0 Val Loss: 3.2049, Val Accuracy: 14.1392


Training: 100%|██████████| 1600/1600 [01:23<00:00, 19.24batch/s]
Evaluating: 100%|██████████| 128/128 [00:02<00:00, 61.98batch/s]


Epoch 2/12, Train Loss: 0.4398,Train_acc: 0.0 Val Loss: 3.4831, Val Accuracy: 19.8535


Training: 100%|██████████| 1600/1600 [01:21<00:00, 19.54batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 64.16batch/s]


Epoch 3/12, Train Loss: 0.3857,Train_acc: 0.0019531631477177286 Val Loss: 3.6714, Val Accuracy: 22.0269


Training: 100%|██████████| 1600/1600 [01:19<00:00, 20.07batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 64.18batch/s]


Epoch 4/12, Train Loss: 0.3622,Train_acc: 0.0019531631477177286 Val Loss: 3.7870, Val Accuracy: 26.6422


Training: 100%|██████████| 1600/1600 [01:20<00:00, 19.88batch/s]
Evaluating: 100%|██████████| 128/128 [00:02<00:00, 58.74batch/s]


Epoch 5/12, Train Loss: 0.3406,Train_acc: 0.0019531631477177286 Val Loss: 3.8845, Val Accuracy: 26.6667


Training: 100%|██████████| 1600/1600 [01:21<00:00, 19.57batch/s]
Evaluating: 100%|██████████| 128/128 [00:02<00:00, 63.04batch/s]


Epoch 6/12, Train Loss: 0.3279,Train_acc: 0.0019531631477177286 Val Loss: 3.9894, Val Accuracy: 27.8144


Training: 100%|██████████| 1600/1600 [01:20<00:00, 19.89batch/s]
Evaluating: 100%|██████████| 128/128 [00:02<00:00, 63.14batch/s]


Epoch 7/12, Train Loss: 0.3169,Train_acc: 0.0 Val Loss: 4.0208, Val Accuracy: 29.2308


Training: 100%|██████████| 1600/1600 [01:19<00:00, 20.04batch/s]
Evaluating: 100%|██████████| 128/128 [00:02<00:00, 63.78batch/s]


Epoch 8/12, Train Loss: 0.3050,Train_acc: 0.0019531631477177286 Val Loss: 4.1617, Val Accuracy: 29.7680


Training: 100%|██████████| 1600/1600 [01:19<00:00, 20.07batch/s]
Evaluating: 100%|██████████| 128/128 [00:02<00:00, 62.05batch/s]


Epoch 9/12, Train Loss: 0.2993,Train_acc: 0.0019531631477177286 Val Loss: 4.1675, Val Accuracy: 29.1331


Training: 100%|██████████| 1600/1600 [01:19<00:00, 20.14batch/s]
Evaluating: 100%|██████████| 128/128 [00:02<00:00, 63.83batch/s]


Epoch 10/12, Train Loss: 0.2912,Train_acc: 0.0019531631477177286 Val Loss: 4.2202, Val Accuracy: 30.1099


Training: 100%|██████████| 1600/1600 [01:19<00:00, 20.04batch/s]
Evaluating: 100%|██████████| 128/128 [00:02<00:00, 63.44batch/s]


Epoch 11/12, Train Loss: 0.2858,Train_acc: 0.0019531631477177286 Val Loss: 4.3267, Val Accuracy: 31.0134


Training: 100%|██████████| 1600/1600 [01:19<00:00, 20.06batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 64.01batch/s]


Epoch 12/12, Train Loss: 0.2804,Train_acc: 0.0019531631477177286 Val Loss: 4.4087, Val Accuracy: 31.7216


VBox(children=(Label(value='0.271 MB of 0.271 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
val_loss,▁▃▄▄▅▆▆▇▇▇██
Epoch,▁▂▂▃▄▄▅▅▆▇▇█
train_loss,█▄▃▂▂▂▂▁▁▁▁▁
val_accuracy,▁▃▄▆▆▆▇▇▇▇██

0,1
val_loss,4.40874
Epoch,11.0
train_loss,0.28037
val_accuracy,31.72161


[34m[1mwandb[0m: Agent Starting Run: bsv7bmcn with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_layer_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 12




Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.5, inplace=False)
    (embedding): Embedding(30, 256)
    (rnn): GRU(256, 256, batch_first=True, dropout=0.5)
  )
  (attention): Attention(
    (attention): Linear(in_features=512, out_features=256, bias=True)
  )
  (decoder): DecoderWithAttention(
    (attention): Attention(
      (attention): Linear(in_features=512, out_features=256, bias=True)
    )
    (dropout): Dropout(p=0.5, inplace=False)
    (embedding): Embedding(70, 256)
    (rnn): GRU(512, 256, batch_first=True, dropout=0.5)
    (fc): Linear(in_features=512, out_features=70, bias=True)
  )
)


Training: 100%|██████████| 800/800 [00:42<00:00, 19.04batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 59.82batch/s]


Epoch 1/12, Train Loss: 0.7231,Train_acc: 0.0 Val Loss: 3.2463, Val Accuracy: 10.5983


Training: 100%|██████████| 800/800 [00:41<00:00, 19.13batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 58.70batch/s]


Epoch 2/12, Train Loss: 0.4724,Train_acc: 0.0 Val Loss: 3.4026, Val Accuracy: 19.2918


Training: 100%|██████████| 800/800 [00:41<00:00, 19.10batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 57.63batch/s]


Epoch 3/12, Train Loss: 0.4243,Train_acc: 0.0 Val Loss: 3.5177, Val Accuracy: 20.8303


Training: 100%|██████████| 800/800 [00:41<00:00, 19.06batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 58.43batch/s]


Epoch 4/12, Train Loss: 0.4013,Train_acc: 0.0 Val Loss: 3.6648, Val Accuracy: 22.4664


Training: 100%|██████████| 800/800 [00:41<00:00, 19.16batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 58.23batch/s]


Epoch 5/12, Train Loss: 0.3855,Train_acc: 0.0019531631477177286 Val Loss: 3.7208, Val Accuracy: 26.4957


Training: 100%|██████████| 800/800 [00:42<00:00, 18.98batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 56.33batch/s]


Epoch 6/12, Train Loss: 0.3729,Train_acc: 0.0019531631477177286 Val Loss: 3.7900, Val Accuracy: 26.3980


Training: 100%|██████████| 800/800 [00:41<00:00, 19.06batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 58.52batch/s]


Epoch 7/12, Train Loss: 0.3633,Train_acc: 0.0 Val Loss: 3.8574, Val Accuracy: 26.8376


Training: 100%|██████████| 800/800 [00:42<00:00, 19.01batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 58.93batch/s]


Epoch 8/12, Train Loss: 0.3563,Train_acc: 0.0 Val Loss: 3.9221, Val Accuracy: 27.9365


Training: 100%|██████████| 800/800 [00:42<00:00, 18.97batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 59.13batch/s]


Epoch 9/12, Train Loss: 0.3451,Train_acc: 0.0019531631477177286 Val Loss: 4.0265, Val Accuracy: 28.0830


Training: 100%|██████████| 800/800 [00:42<00:00, 19.02batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 58.40batch/s]


Epoch 10/12, Train Loss: 0.3433,Train_acc: 0.0019531631477177286 Val Loss: 4.0050, Val Accuracy: 28.2051


Training: 100%|██████████| 800/800 [00:41<00:00, 19.07batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 54.63batch/s]


Epoch 11/12, Train Loss: 0.3376,Train_acc: 0.0 Val Loss: 4.0602, Val Accuracy: 29.1575


Training: 100%|██████████| 800/800 [00:42<00:00, 19.04batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 59.23batch/s]


Epoch 12/12, Train Loss: 0.3358,Train_acc: 0.0019531631477177286 Val Loss: 4.0836, Val Accuracy: 29.4017


VBox(children=(Label(value='0.282 MB of 0.282 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
val_loss,▁▂▃▄▅▆▆▇█▇██
Epoch,▁▂▂▃▄▄▅▅▆▇▇█
train_loss,█▃▃▂▂▂▁▁▁▁▁▁
val_accuracy,▁▄▅▅▇▇▇▇████

0,1
val_loss,4.08363
Epoch,11.0
train_loss,0.33584
val_accuracy,29.40171


[34m[1mwandb[0m: Agent Starting Run: s5mc3k04 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_layer_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 12




Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.5, inplace=False)
    (embedding): Embedding(30, 64)
    (rnn): GRU(64, 256, batch_first=True, dropout=0.5)
  )
  (attention): Attention(
    (attention): Linear(in_features=512, out_features=256, bias=True)
  )
  (decoder): DecoderWithAttention(
    (attention): Attention(
      (attention): Linear(in_features=512, out_features=256, bias=True)
    )
    (dropout): Dropout(p=0.5, inplace=False)
    (embedding): Embedding(70, 64)
    (rnn): GRU(320, 256, batch_first=True, dropout=0.5)
    (fc): Linear(in_features=512, out_features=70, bias=True)
  )
)


Training: 100%|██████████| 1600/1600 [01:21<00:00, 19.72batch/s]
Evaluating: 100%|██████████| 128/128 [00:02<00:00, 61.10batch/s]


Epoch 1/12, Train Loss: 0.7597,Train_acc: 0.0 Val Loss: 3.3163, Val Accuracy: 15.9951


Training: 100%|██████████| 1600/1600 [01:20<00:00, 19.86batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 64.04batch/s]


Epoch 2/12, Train Loss: 0.4811,Train_acc: 0.0 Val Loss: 3.5759, Val Accuracy: 19.8779


Training: 100%|██████████| 1600/1600 [01:20<00:00, 19.88batch/s]
Evaluating: 100%|██████████| 128/128 [00:02<00:00, 63.52batch/s]


Epoch 3/12, Train Loss: 0.4328,Train_acc: 0.0 Val Loss: 3.6683, Val Accuracy: 22.0513


Training: 100%|██████████| 1600/1600 [01:20<00:00, 19.86batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 64.06batch/s]


Epoch 4/12, Train Loss: 0.4092,Train_acc: 0.0 Val Loss: 3.7916, Val Accuracy: 23.7851


Training: 100%|██████████| 1600/1600 [01:20<00:00, 19.85batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 64.57batch/s]


Epoch 5/12, Train Loss: 0.3929,Train_acc: 0.0 Val Loss: 3.8329, Val Accuracy: 25.8364


Training: 100%|██████████| 1600/1600 [01:20<00:00, 19.89batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 64.54batch/s]


Epoch 6/12, Train Loss: 0.3820,Train_acc: 0.0 Val Loss: 3.8951, Val Accuracy: 25.7143


Training: 100%|██████████| 1600/1600 [01:20<00:00, 19.89batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 64.25batch/s]


Epoch 7/12, Train Loss: 0.3731,Train_acc: 0.0 Val Loss: 3.9595, Val Accuracy: 26.7399


Training: 100%|██████████| 1600/1600 [01:21<00:00, 19.70batch/s]
Evaluating: 100%|██████████| 128/128 [00:02<00:00, 61.69batch/s]


Epoch 8/12, Train Loss: 0.3669,Train_acc: 0.0 Val Loss: 3.9958, Val Accuracy: 26.3492


Training: 100%|██████████| 1600/1600 [01:20<00:00, 19.76batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 64.75batch/s]


Epoch 9/12, Train Loss: 0.3582,Train_acc: 0.0 Val Loss: 4.0185, Val Accuracy: 27.0085


Training: 100%|██████████| 1600/1600 [01:20<00:00, 19.85batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 64.21batch/s]


Epoch 10/12, Train Loss: 0.3513,Train_acc: 0.0 Val Loss: 4.0735, Val Accuracy: 27.6435


Training: 100%|██████████| 1600/1600 [01:20<00:00, 19.85batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 64.20batch/s]


Epoch 11/12, Train Loss: 0.3475,Train_acc: 0.0 Val Loss: 4.0827, Val Accuracy: 28.4493


Training: 100%|██████████| 1600/1600 [01:20<00:00, 19.79batch/s]
Evaluating: 100%|██████████| 128/128 [00:02<00:00, 63.97batch/s]


Epoch 12/12, Train Loss: 0.3454,Train_acc: 0.0 Val Loss: 4.1201, Val Accuracy: 28.5470


VBox(children=(Label(value='0.293 MB of 0.293 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
val_loss,▁▃▄▅▅▆▇▇▇███
Epoch,▁▂▂▃▄▄▅▅▆▇▇█
train_loss,█▃▂▂▂▂▁▁▁▁▁▁
val_accuracy,▁▃▄▅▆▆▇▇▇▇██

0,1
val_loss,4.12005
Epoch,11.0
train_loss,0.34537
val_accuracy,28.54701


[34m[1mwandb[0m: Agent Starting Run: y7hkiknn with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	embedding_size: 128
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 12




Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.5, inplace=False)
    (embedding): Embedding(30, 128)
    (rnn): RNN(128, 128, batch_first=True, dropout=0.5)
  )
  (attention): Attention(
    (attention): Linear(in_features=256, out_features=128, bias=True)
  )
  (decoder): DecoderWithAttention(
    (attention): Attention(
      (attention): Linear(in_features=256, out_features=128, bias=True)
    )
    (dropout): Dropout(p=0.5, inplace=False)
    (embedding): Embedding(70, 128)
    (rnn): RNN(256, 128, batch_first=True, dropout=0.5)
    (fc): Linear(in_features=256, out_features=70, bias=True)
  )
)


Training: 100%|██████████| 1600/1600 [01:17<00:00, 20.75batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 66.90batch/s]


Epoch 1/12, Train Loss: 1.0035,Train_acc: 0.0 Val Loss: 2.8912, Val Accuracy: 11.8437


Training: 100%|██████████| 1600/1600 [01:17<00:00, 20.76batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 69.42batch/s]


Epoch 2/12, Train Loss: 0.5861,Train_acc: 0.0 Val Loss: 3.2117, Val Accuracy: 14.4322


Training: 100%|██████████| 1600/1600 [01:17<00:00, 20.73batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 68.99batch/s]


Epoch 3/12, Train Loss: 0.5282,Train_acc: 0.0 Val Loss: 3.3995, Val Accuracy: 16.2149


Training: 100%|██████████| 1600/1600 [01:16<00:00, 20.79batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 68.48batch/s]


Epoch 4/12, Train Loss: 0.4953,Train_acc: 0.0 Val Loss: 3.5229, Val Accuracy: 16.8010


Training: 100%|██████████| 1600/1600 [01:16<00:00, 20.85batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 68.46batch/s]


Epoch 5/12, Train Loss: 0.4782,Train_acc: 0.0 Val Loss: 3.5041, Val Accuracy: 17.5336


Training: 100%|██████████| 1600/1600 [01:17<00:00, 20.75batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 67.91batch/s]


Epoch 6/12, Train Loss: 0.4652,Train_acc: 0.0 Val Loss: 3.5313, Val Accuracy: 18.6325


Training: 100%|██████████| 1600/1600 [01:17<00:00, 20.77batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 69.89batch/s]


Epoch 7/12, Train Loss: 0.4551,Train_acc: 0.0 Val Loss: 3.5840, Val Accuracy: 19.2186


Training: 100%|██████████| 1600/1600 [01:16<00:00, 20.79batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 68.48batch/s]


Epoch 8/12, Train Loss: 0.4499,Train_acc: 0.0 Val Loss: 3.6240, Val Accuracy: 19.4872


Training: 100%|██████████| 1600/1600 [01:16<00:00, 20.81batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 67.74batch/s]


Epoch 9/12, Train Loss: 0.4402,Train_acc: 0.0 Val Loss: 3.6983, Val Accuracy: 19.6825


Training: 100%|██████████| 1600/1600 [01:16<00:00, 20.80batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 66.55batch/s]


Epoch 10/12, Train Loss: 0.4329,Train_acc: 0.0 Val Loss: 3.6982, Val Accuracy: 20.6105


Training: 100%|██████████| 1600/1600 [01:17<00:00, 20.72batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 64.93batch/s]


Epoch 11/12, Train Loss: 0.4308,Train_acc: 0.0 Val Loss: 3.6984, Val Accuracy: 20.8791


Training: 100%|██████████| 1600/1600 [01:17<00:00, 20.73batch/s]
Evaluating: 100%|██████████| 128/128 [00:01<00:00, 68.51batch/s]


Epoch 12/12, Train Loss: 0.4259,Train_acc: 0.0 Val Loss: 3.7337, Val Accuracy: 21.8559


VBox(children=(Label(value='0.304 MB of 0.304 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
val_loss,▁▄▅▆▆▆▇▇████
Epoch,▁▂▂▃▄▄▅▅▆▇▇█
train_loss,█▃▂▂▂▁▁▁▁▁▁▁
val_accuracy,▁▃▄▄▅▆▆▆▆▇▇█

0,1
val_loss,3.73371
Epoch,11.0
train_loss,0.42588
val_accuracy,21.85592


[34m[1mwandb[0m: Agent Starting Run: 5bz8bira with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	embedding_size: 128
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_layer_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 12




Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.2, inplace=False)
    (embedding): Embedding(30, 128)
    (rnn): LSTM(128, 64, batch_first=True, dropout=0.2, bidirectional=True)
  )
  (attention): Attention(
    (attention): Linear(in_features=256, out_features=128, bias=True)
  )
  (decoder): DecoderWithAttention(
    (attention): Attention(
      (attention): Linear(in_features=256, out_features=128, bias=True)
    )
    (dropout): Dropout(p=0.2, inplace=False)
    (embedding): Embedding(70, 128)
    (rnn): LSTM(256, 128, batch_first=True, dropout=0.2)
    (fc): Linear(in_features=256, out_features=70, bias=True)
  )
)


Training: 100%|██████████| 800/800 [00:40<00:00, 19.88batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 61.64batch/s]


Epoch 1/12, Train Loss: 0.9166,Train_acc: 0.0 Val Loss: 2.8336, Val Accuracy: 10.3297


Training: 100%|██████████| 800/800 [00:40<00:00, 19.79batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 60.31batch/s]


Epoch 2/12, Train Loss: 0.4996,Train_acc: 0.0 Val Loss: 3.2299, Val Accuracy: 11.3553


Training: 100%|██████████| 800/800 [00:40<00:00, 19.81batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 60.68batch/s]


Epoch 3/12, Train Loss: 0.4352,Train_acc: 0.0 Val Loss: 3.3938, Val Accuracy: 14.6032


Training: 100%|██████████| 800/800 [00:40<00:00, 19.85batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 61.64batch/s]


Epoch 4/12, Train Loss: 0.4066,Train_acc: 0.0 Val Loss: 3.5496, Val Accuracy: 18.3883


Training: 100%|██████████| 800/800 [00:40<00:00, 19.71batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 61.81batch/s]


Epoch 5/12, Train Loss: 0.3846,Train_acc: 0.0 Val Loss: 3.5735, Val Accuracy: 23.0281


Training: 100%|██████████| 800/800 [00:40<00:00, 19.93batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 62.98batch/s]


Epoch 6/12, Train Loss: 0.3711,Train_acc: 0.0 Val Loss: 3.6185, Val Accuracy: 21.2698


Training: 100%|██████████| 800/800 [00:40<00:00, 19.98batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 63.26batch/s]


Epoch 7/12, Train Loss: 0.3603,Train_acc: 0.0 Val Loss: 3.7644, Val Accuracy: 25.4212


Training: 100%|██████████| 800/800 [00:40<00:00, 19.82batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 60.88batch/s]


Epoch 8/12, Train Loss: 0.3515,Train_acc: 0.0 Val Loss: 3.7493, Val Accuracy: 25.4701


Training: 100%|██████████| 800/800 [00:40<00:00, 19.86batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 62.00batch/s]


Epoch 9/12, Train Loss: 0.3400,Train_acc: 0.0 Val Loss: 3.8348, Val Accuracy: 26.5690


Training: 100%|██████████| 800/800 [00:40<00:00, 19.91batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 61.79batch/s]


Epoch 10/12, Train Loss: 0.3377,Train_acc: 0.0 Val Loss: 3.9238, Val Accuracy: 27.5214


Training: 100%|██████████| 800/800 [00:40<00:00, 19.85batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 61.74batch/s]


Epoch 11/12, Train Loss: 0.3277,Train_acc: 0.0 Val Loss: 3.9514, Val Accuracy: 28.3761


Training: 100%|██████████| 800/800 [00:40<00:00, 19.77batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 61.49batch/s]


Epoch 12/12, Train Loss: 0.3237,Train_acc: 0.0019531631477177286 Val Loss: 3.9550, Val Accuracy: 28.4493


VBox(children=(Label(value='0.315 MB of 0.315 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
val_loss,▁▃▄▅▆▆▇▇▇███
Epoch,▁▂▂▃▄▄▅▅▆▇▇█
train_loss,█▃▂▂▂▂▁▁▁▁▁▁
val_accuracy,▁▁▃▄▆▅▇▇▇███

0,1
val_loss,3.95497
Epoch,11.0
train_loss,0.32368
val_accuracy,28.44933


[34m[1mwandb[0m: Agent Starting Run: 4bx1hge7 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_layer_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 12




Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.5, inplace=False)
    (embedding): Embedding(30, 256)
    (rnn): LSTM(256, 256, batch_first=True, dropout=0.5, bidirectional=True)
  )
  (attention): Attention(
    (attention): Linear(in_features=1024, out_features=512, bias=True)
  )
  (decoder): DecoderWithAttention(
    (attention): Attention(
      (attention): Linear(in_features=1024, out_features=512, bias=True)
    )
    (dropout): Dropout(p=0.5, inplace=False)
    (embedding): Embedding(70, 256)
    (rnn): LSTM(768, 512, batch_first=True, dropout=0.5)
    (fc): Linear(in_features=1024, out_features=70, bias=True)
  )
)


Training: 100%|██████████| 800/800 [00:49<00:00, 16.21batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 57.58batch/s]


Epoch 1/12, Train Loss: 0.7494,Train_acc: 0.0 Val Loss: 3.2941, Val Accuracy: 16.9475


Training: 100%|██████████| 800/800 [00:49<00:00, 16.28batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 57.61batch/s]


Epoch 2/12, Train Loss: 0.4146,Train_acc: 0.0 Val Loss: 3.6225, Val Accuracy: 25.2747


Training: 100%|██████████| 800/800 [00:49<00:00, 16.23batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 58.05batch/s]


Epoch 3/12, Train Loss: 0.3623,Train_acc: 0.0 Val Loss: 3.8275, Val Accuracy: 28.3516


Training: 100%|██████████| 800/800 [00:49<00:00, 16.24batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 57.83batch/s]


Epoch 4/12, Train Loss: 0.3402,Train_acc: 0.0 Val Loss: 3.9388, Val Accuracy: 30.6716


Training: 100%|██████████| 800/800 [00:49<00:00, 16.23batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 57.38batch/s]


Epoch 5/12, Train Loss: 0.3250,Train_acc: 0.0 Val Loss: 3.9913, Val Accuracy: 32.4542


Training: 100%|██████████| 800/800 [00:49<00:00, 16.24batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 55.42batch/s]


Epoch 6/12, Train Loss: 0.3123,Train_acc: 0.0 Val Loss: 4.1144, Val Accuracy: 33.5043


Training: 100%|██████████| 800/800 [00:49<00:00, 16.26batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 55.60batch/s]


Epoch 7/12, Train Loss: 0.3005,Train_acc: 0.0 Val Loss: 4.2334, Val Accuracy: 33.7973


Training: 100%|██████████| 800/800 [00:49<00:00, 16.24batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 56.73batch/s]


Epoch 8/12, Train Loss: 0.2945,Train_acc: 0.0 Val Loss: 4.3000, Val Accuracy: 33.8217


Training: 100%|██████████| 800/800 [00:49<00:00, 16.25batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 53.22batch/s]


Epoch 9/12, Train Loss: 0.2875,Train_acc: 0.0 Val Loss: 4.4040, Val Accuracy: 34.7497


Training: 100%|██████████| 800/800 [00:49<00:00, 16.23batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 58.05batch/s]


Epoch 10/12, Train Loss: 0.2782,Train_acc: 0.0019531631477177286 Val Loss: 4.3845, Val Accuracy: 33.8706


Training: 100%|██████████| 800/800 [00:49<00:00, 16.24batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 57.12batch/s]


Epoch 11/12, Train Loss: 0.2765,Train_acc: 0.0 Val Loss: 4.4592, Val Accuracy: 35.1404


Training: 100%|██████████| 800/800 [00:49<00:00, 16.24batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 57.54batch/s]


Epoch 12/12, Train Loss: 0.2671,Train_acc: 0.0019531631477177286 Val Loss: 4.6190, Val Accuracy: 34.5788


VBox(children=(Label(value='0.326 MB of 0.326 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
val_loss,▁▃▄▄▅▅▆▆▇▇▇█
Epoch,▁▂▂▃▄▄▅▅▆▇▇█
train_loss,█▃▂▂▂▂▁▁▁▁▁▁
val_accuracy,▁▄▅▆▇▇▇▇████

0,1
val_loss,4.61898
Epoch,11.0
train_loss,0.26714
val_accuracy,34.57875


[34m[1mwandb[0m: Agent Starting Run: zbbie0q4 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	embedding_size: 64
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_layer_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 12




Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.5, inplace=False)
    (embedding): Embedding(30, 64)
    (rnn): LSTM(64, 256, batch_first=True, dropout=0.5, bidirectional=True)
  )
  (attention): Attention(
    (attention): Linear(in_features=1024, out_features=512, bias=True)
  )
  (decoder): DecoderWithAttention(
    (attention): Attention(
      (attention): Linear(in_features=1024, out_features=512, bias=True)
    )
    (dropout): Dropout(p=0.5, inplace=False)
    (embedding): Embedding(70, 64)
    (rnn): LSTM(576, 512, batch_first=True, dropout=0.5)
    (fc): Linear(in_features=1024, out_features=70, bias=True)
  )
)


Training:  35%|███▍      | 556/1600 [00:28<00:57, 18.15batch/s]

# **Best model**

In [32]:
# Best configutration
input_size = 30  # Number of Latin characters
output_size = 70  # Number of Devanagari characters
embed_size = 256
hidden_size = 256
encoder_layers = 1
decoder_layers = 1
cell_type = 'lstm'
batch_size = 64
num_epochs = 9
drop_prob = 0.3
learning_rate = 0.001
bidirectional=False

# Assuming you have loaded your dataset into train_loader and val_loader

# Initialize the model, criterion, and optimizer
Best_model = Seq2Seq(input_size, output_size, hidden_size,embed_size, encoder_layers,decoder_layers,drop_prob, cell_type, bidirectional)
print(Best_model)

# model = Attention_model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
Best_model.to(device)
ignore_index = 0
criterion = nn.CrossEntropyLoss(ignore_index = 0)
optimizer = optim.Adam(Best_model.parameters(), lr=learning_rate)

Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(30, 256)
    (rnn): LSTM(256, 256, batch_first=True, dropout=0.3)
  )
  (attention): Attention(
    (attention): Linear(in_features=512, out_features=256, bias=True)
  )
  (decoder): DecoderWithAttention(
    (attention): Attention(
      (attention): Linear(in_features=512, out_features=256, bias=True)
    )
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(70, 256)
    (rnn): LSTM(512, 256, batch_first=True, dropout=0.3)
    (fc): Linear(in_features=512, out_features=70, bias=True)
  )
)




# **Prediction on test dataset for best model**

In [44]:
sweep_config = {
    'method': 'bayes',
    'metric': {
      'name': 'val_accuracy',
      'goal': 'maximize'
    },
    'parameters': {
        'embedding_size':{
            'values': [256]
        },
        'dropout': {
            'values': [0.3]
        },
        'encoder_layers': {
            'values': [1]
        },
        'decoder_layers':{
            'values': [1]
        },
        'hidden_layer_size':{
            'values': [256]
        },
        'cell_type': {
            'values': ['lstm']
        },
        'bidirectional': {
            'values': [True]
        },
        'batch_size': {
            'values': [64]
        },
        'num_epochs': {
            'values': [9]
        },
        'learning_rate': {
            'values': [0.001]
        }
    }
}

sweep_id = wandb.sweep(sweep=sweep_config, project='DL_A3_Attention')


Create sweep with ID: c1d3h9dz
Sweep URL: https://wandb.ai/abanisingha1997/DL_A3_Attention/sweeps/c1d3h9dz


In [62]:
def main():
    '''
    WandB calls main function each time with differnet combination.

    We can retrive the same and use the same values for our hypermeters.

    '''

    with wandb.init() as run:
        run_name="ct-"+str(wandb.config.cell_type)+"_el-"+str(wandb.config.encoder_layers)+"_dl-"+str(wandb.config.decoder_layers)+"_drop-"+str(wandb.config.dropout)+"_es-"+str(wandb.config.embedding_size)+"_hs-"+str(wandb.config.hidden_layer_size)+"_bs-"+str(wandb.config.batch_size)+"_ep-"+str(wandb.config.num_epochs)+"lr"+str(wandb.config.learning_rate)
        wandb.run.name=run_name

        
        model = Seq2Seq(input_size=30, output_size=70, hidden_size=wandb.config.hidden_layer_size,embed_size=wandb.config.embedding_size,encoder_layers=wandb.config.encoder_layers,
                        decoder_layers=wandb.config.decoder_layers,drop_prob=wandb.config.dropout, cell_type=wandb.config.cell_type, bidirectional=wandb.config.bidirectional)
        print(model)
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=wandb.config.learning_rate)

        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model.to(device)
        path1 = '/kaggle/input/aksharantar-sampled-dataset/aksharantar_sampled/ben/ben_train.csv'
        custom_dataset1,train_loader_ben,a,b,_,_ = load_data(path1,batch_size = wandb.config.batch_size)
        
        path3 = '/kaggle/input/aksharantar-sampled-dataset/aksharantar_sampled/ben/ben_test.csv'
        c, test_loader_ben, input_vocab, target_vocab, max_length, _ = load_data(path3, batch_size=64)  # Use correct path3

        # Training loop
        for epoch in range(wandb.config.num_epochs):
            trained_model, train_loss, train_acc = train(model, train_loader_ben, criterion, optimizer, device)
            val_loss, val_accuracy = evaluate(trained_model, test_loader_ben, criterion, device)
            model = trained_model
            wandb.log({'Epoch': epoch, 'train_loss': train_loss , 'test_accuracy':val_accuracy})
            print(f'Epoch {epoch+1}/{wandb.config.num_epochs}, Train Loss: {train_loss:.4f}, test_accuracy: {val_accuracy:.4f}')

        
#         model_train(model,train,validation)
        
wandb.agent(sweep_id, function= main,count= 1) # calls main function for count number of times.
wandb.finish()

[34m[1mwandb[0m: Agent Starting Run: pyjc8y6p with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	decoder_layers: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	embedding_size: 256
[34m[1mwandb[0m: 	encoder_layers: 1
[34m[1mwandb[0m: 	hidden_layer_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 9




Seq2Seq(
  (encoder): Encoder(
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(30, 256)
    (rnn): LSTM(256, 256, batch_first=True, dropout=0.3, bidirectional=True)
  )
  (attention): Attention(
    (attention): Linear(in_features=1024, out_features=512, bias=True)
  )
  (decoder): DecoderWithAttention(
    (attention): Attention(
      (attention): Linear(in_features=1024, out_features=512, bias=True)
    )
    (dropout): Dropout(p=0.3, inplace=False)
    (embedding): Embedding(70, 256)
    (rnn): LSTM(768, 512, batch_first=True, dropout=0.3)
    (fc): Linear(in_features=1024, out_features=70, bias=True)
  )
)


Training: 100%|██████████| 800/800 [00:49<00:00, 16.17batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 54.40batch/s]


Epoch 1/9, Train Loss: 0.6319, test_accuracy: 18.8034


Training: 100%|██████████| 800/800 [00:49<00:00, 16.25batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 53.98batch/s]


Epoch 2/9, Train Loss: 0.3938, test_accuracy: 25.2015


Training: 100%|██████████| 800/800 [00:49<00:00, 16.23batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 55.34batch/s]


Epoch 3/9, Train Loss: 0.3532, test_accuracy: 28.6935


Training: 100%|██████████| 800/800 [00:49<00:00, 16.24batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 55.29batch/s]


Epoch 4/9, Train Loss: 0.3247, test_accuracy: 28.0586


Training: 100%|██████████| 800/800 [00:49<00:00, 16.26batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 54.19batch/s]


Epoch 5/9, Train Loss: 0.3090, test_accuracy: 29.5971


Training: 100%|██████████| 800/800 [00:49<00:00, 16.17batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 54.46batch/s]


Epoch 6/9, Train Loss: 0.2984, test_accuracy: 30.4029


Training: 100%|██████████| 800/800 [00:49<00:00, 16.25batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 54.86batch/s]


Epoch 7/9, Train Loss: 0.2868, test_accuracy: 31.6728


Training: 100%|██████████| 800/800 [00:49<00:00, 16.28batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 54.03batch/s]


Epoch 8/9, Train Loss: 0.2752, test_accuracy: 32.8694


Training: 100%|██████████| 800/800 [00:49<00:00, 16.25batch/s]
Evaluating: 100%|██████████| 64/64 [00:01<00:00, 54.47batch/s]


Epoch 9/9, Train Loss: 0.2659, test_accuracy: 32.3077


VBox(children=(Label(value='1.012 MB of 1.012 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Epoch,▁▂▃▄▅▅▆▇█
test_accuracy,▁▄▆▆▆▇▇██
train_loss,█▃▃▂▂▂▁▁▁

0,1
Epoch,8.0
test_accuracy,32.30769
train_loss,0.26593


# **Prediction**

In [63]:
def decode_indices(indices, idx2token, target_vocab):
    valid_indices = []
    for idx in indices:
        if idx in idx2token and idx not in (target_vocab['<pad>'], target_vocab['<sos>'], target_vocab['<eos>']):
            valid_indices.append(idx)
#             print(valid_indices)
    decoded_text = ''
    for idx in valid_indices:
        decoded_text += idx2token[idx]
#         print(decoded_text)
    return decoded_text

def decode_indices_target(indices, idx2token, target_vocab):
    valid_indices = []
    for idx in indices:
        if idx in idx2token and idx not in (target_vocab['<pad>'], target_vocab['<sos>'], target_vocab['<eos>']):
            if idx < 10:
                valid_indices.append(idx)
            else:
                valid_indices.append(idx-3)
#             print(valid_indices)
    decoded_text = ''
    for idx in valid_indices:
        decoded_text += idx2token[idx]
#         print(decoded_text)
    return decoded_text

In [69]:
def decode_indices_target1(indices1,indices2, idx2token, target_vocab):
    valid_indices1 = []
    for idx in indices1:
        if idx in idx2token and idx not in (target_vocab['<pad>'], target_vocab['<sos>'], target_vocab['<eos>']):
            valid_indices1.append(idx)
#             print(valid_indices)
    valid_indices2 = []
    for idx in indices2:
        if idx in idx2token and idx not in (target_vocab['<pad>'], target_vocab['<sos>'], target_vocab['<eos>']):
            if idx < 10:
                valid_indices2.append(idx)
            else:
                valid_indices2.append(idx-3)
#             print(valid_indices)
    decoded_text1 = ''
    decoded_text2 = ''
    l1 = len(valid_indices1)
    val_ind2 = valid_indices2[:l1]
    for idx2 in val_ind2:
#         decoded_text1 += idx2token[idx1]
        decoded_text2 += idx2token[idx2]
    
    return decoded_text2

In [70]:
def pred(model, dataloader, device):
    model.eval()
    predictions = []
    actual = []
    with torch.no_grad():
        for latin, devanagari in dataloader:#, desc='Evaluating', unit='batch'):
            latin = latin.to(device)
            devanagari = devanagari.to(device)
            output = model(latin, devanagari,0)
            deb = devanagari.cpu().numpy()
            actual.append(deb)
            output = output.argmax(2)
            latin = latin.cpu().numpy()
            output = output.cpu().numpy()
            predictions.append((latin, output))
    return predictions, actual


# Make sure to define the reverse dictionaries for converting indices back to text
latin_idx2token = {idx: char for char, idx in input_vocab.items()}
bangla_idx2token = {idx: char for char, idx in target_vocab.items()}

In [71]:
test_predictions, actual = pred(trained_model, test_loader_ben, device)
results = []
for (src_indices, output_indices),act_ind in zip(test_predictions,actual):
#     print(src_indices)
#     print('\njkl',output_indices)
    # Since our data loader might have batch size greater than 1, iterate through each example in the batch
    for i in range(src_indices.shape[0]):
        input_text = decode_indices(src_indices[i], latin_idx2token, input_vocab)
        actual_target_text = decode_indices(act_ind[i], bangla_idx2token, target_vocab)
        predicted_text = decode_indices_target1(act_ind[i],output_indices[i], bangla_idx2token, target_vocab)#decode_indices_target(output_indices[i], bangla_idx2token, target_vocab) 
        results.append([input_text, actual_target_text, predicted_text])
        
        print(f'SL. {i} Input Text: {input_text} -> Actual target: {actual_target_text} -> Predicted Text: {predicted_text}')
#     break

SL. 0 Input Text: kaarentabaahee -> Actual target: কারেন্টবাহী -> Predicted Text: কােনতাবহীী়
SL. 1 Input Text: mashterpiece -> Actual target: মাস্টারপিস -> Predicted Text: মযাশটারপিয
SL. 2 Input Text: cheeken -> Actual target: চিকেন -> Predicted Text: চেকেন
SL. 3 Input Text: ekdaala -> Actual target: একডালা -> Predicted Text: একদালা
SL. 4 Input Text: neerbachokra -> Actual target: নির্বাচকরা -> Predicted Text: নিরবাচকরার
SL. 5 Input Text: neture -> Actual target: নেচার -> Predicted Text: নেতুর
SL. 6 Input Text: michilkey -> Actual target: মিছিলকে -> Predicted Text: মিচিলকে
SL. 7 Input Text: chitfund -> Actual target: চিটফান্ড -> Predicted Text: চিটফুনডড
SL. 8 Input Text: panchanan -> Actual target: পঞ্চানন -> Predicted Text: পঞচনননন
SL. 9 Input Text: manna -> Actual target: মন্ন -> Predicted Text: মননা
SL. 10 Input Text: portillo -> Actual target: পর্টিল্লো -> Predicted Text: পরিলোলোোল
SL. 11 Input Text: quess -> Actual target: কুয়েস -> Predicted Text: কুয়েস
SL. 12 Input Text: budh 

In [72]:
# Writing results to CSV
import csv
with open('results.csv', mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(['Input Text', 'Actual Target', 'Predicted Text'])
    writer.writerows(results)

In [73]:
df1 = pd.read_csv('results.csv')
df1

Unnamed: 0,Input Text,Actual Target,Predicted Text
0,kaarentabaahee,কারেন্টবাহী,কােনতাবহীী়
1,mashterpiece,মাস্টারপিস,মযাশটারপিয
2,cheeken,চিকেন,চেকেন
3,ekdaala,একডালা,একদালা
4,neerbachokra,নির্বাচকরা,নিরবাচকরার
...,...,...,...
4090,samanjasyapurno,সামঞ্জস্যপূর্ণ,সমঞজসযপূরণওণণর
4091,fuds,ফুডস,ফুডস
4092,bannar,ব্যানার,বাননারে
4093,songosthao,সংস্থাও,সংসথাও়
