In [1]:
from utils import *
import numpy as np
import pickle
import wandb

In [2]:
index_to_english_alphabet=pickle.load(open('vocab_tools/index_to_english_alphabet.pickle', 'rb'))
index_to_hindi_alphabet=pickle.load(open('vocab_tools/index_to_hindi_alphabet.pickle', 'rb'))

hindi_alphabet_to_index=pickle.load(open('vocab_tools/hindi_alphabet_to_index.pickle', 'rb')) 
english_alphabet_to_index=pickle.load(open('vocab_tools/english_alphabet_to_index.pickle', 'rb')) 

In [3]:
X_train=np.load('simple_data/X_train.npy')
X_valid=np.load('simple_data/X_val.npy')

y_train=np.load('simple_data/y_train.npy')
y_valid=np.load('simple_data/y_val.npy')

In [4]:
from __future__ import print_function, division
import os
import torch
import pandas as pd
from skimage import io, transform
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

plt.ion()   # interactive mode

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
class Eng_Hind_Dataset(Dataset):
    """Face Landmarks dataset."""

    def __init__(self, in_file, out_file, root_dir='simple_data',device='cuda'):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.input = torch.tensor(np.load(root_dir+'/'+in_file))
        self.output = torch.tensor(np.load(root_dir+'/'+out_file))
        
        assert(len(self.input)==len(self.output),"Error: I/O Lengths must be same")
        

    def __len__(self):
        return len(self.input)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
            
        X=self.input[idx]
        X=X.to(device)
        y=self.output[idx]
        y=y.to(device)
        


        sample = {'input': X, 'output': y}

        return sample

In [6]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [7]:
device

device(type='cuda')

In [8]:
training_data=Eng_Hind_Dataset("X_train.npy","y_train.npy",device=device)
val_data=Eng_Hind_Dataset("X_val.npy","y_val.npy",device=device)
test_data=Eng_Hind_Dataset("X_test.npy","y_test.npy",device=device)

In [9]:
train_dataloader = DataLoader(training_data, batch_size=16,shuffle=True)

In [10]:
val_dataloader = DataLoader(val_data, batch_size=16,shuffle=True)

In [11]:
import torch.nn as nn

In [12]:
def cell_type(mode:str='rnn'):
    mode=mode.lower()
    if mode == 'rnn':
        return nn.RNN
    elif mode =='gru':
        return nn.GRU
    else:
        return nn.LSTM
        

In [13]:
class Encoder(nn.Module):


    def __init__(self, input_size, embed_size, enc_hid_size, dec_hid_size, num_layers, cell_mode, dropout, is_bi):
        super().__init__()
        
        # create embedding layer
        self.embedding = nn.Embedding(input_size, embed_size,padding_idx=english_alphabet_to_index['.'])

        #creating LSTM/GRU/RNN cell
        cell=cell_type(cell_mode)
        
        self.cell=cell(embed_size,enc_hid_size,num_layers,dropout=dropout,bidirectional=is_bi,batch_first=True)
        self.cell_mode=cell_mode
        self.is_bi=is_bi
        if is_bi:
            self.fc=nn.Linear(enc_hid_size*2, dec_hid_size)
        else:
            self.fc=nn.Linear(enc_hid_size, dec_hid_size)
        
        

    def forward(self, input_batch: torch.LongTensor):
        
        embedded = self.embedding(input_batch) # [sent len, batch size, emb dim]
        
        if self.cell_mode.lower()=='lstm':
            outputs, (hidden, cell) = self.cell(embedded)
            
            if self.is_bi:
                concated = torch.cat((hidden[ -2, :,:], hidden[ -1, :,:]), dim=1)
                cellconcat=torch.cat((cell[-2,:,:], cell[-1,:,:]), dim=1)
            else:
                concated = hidden[ -1,:, :]
                cellconcat= cell[ -1,:, :]
            
            hidden = torch.tanh(self.fc(concated))
            
            cell=torch.tanh(self.fc(cellconcat))


            return outputs,hidden,cell
        
        else:
            outputs, hidden = self.cell(embedded)           
            if self.is_bi:
                concated = torch.cat((hidden[ -2, :,:], hidden[ -1, :,:]), dim=1)

            else:
                concated = hidden[ -1,:, :]

            hidden = torch.tanh(self.fc(concated))

            return outputs,hidden

        


In [14]:
class Attention(nn.Module):

    def __init__(self, enc_hid_dim, dec_hid_dim, is_bi):
        super().__init__()
        self.enc_hid_dim = enc_hid_dim
        self.dec_hid_dim = dec_hid_dim

        if is_bi:
            self.fc1 = nn.Linear(enc_hid_dim * 2 + dec_hid_dim, dec_hid_dim)
        else:
            self.fc1 = nn.Linear(enc_hid_dim + dec_hid_dim, dec_hid_dim)
            

            
        self.fc2 = nn.Linear(dec_hid_dim, 1, bias=False)

    def forward(self, encoder_outputs, hidden):
        src_len = encoder_outputs.shape[1]
        batch_size = encoder_outputs.shape[0]
        
        hidden = hidden.unsqueeze(1).repeat(1, src_len, 1)
        outputs = encoder_outputs
        
        concat = torch.cat((hidden, outputs), dim=2)
        energy = torch.tanh(self.fc1(concat))

        attention = self.fc2(energy).squeeze(dim=2)        
        attention_weight = torch.softmax(attention, dim=1)
        return attention_weight

In [15]:
class Decoder(nn.Module):

    def __init__(self, output_size, embed_size, enc_hid_dim, dec_hid_dim, num_layers, cell_mode,\
                 dropout, attention, is_bi):
        super().__init__()
    
        self.dropout = dropout
        self.attention = attention
        self.output_size=output_size

        self.embedding = nn.Embedding(output_size, embed_size,padding_idx=hindi_alphabet_to_index['.'])
        cell=cell_type(cell_mode)
        self.cell_mode=cell_mode.lower()
        
        
        if is_bi:
            self.cell=cell((enc_hid_dim * 2) + embed_size, dec_hid_dim,num_layers,\
                           dropout=dropout,bidirectional=False,batch_first=False)
        

        else:
            self.cell=cell(enc_hid_dim + embed_size, dec_hid_dim,num_layers,\
                           dropout=dropout,bidirectional=False,batch_first=False)
            
        
        
        self.out = nn.Linear(dec_hid_dim, output_size)

    def forward(self, trg, encoder_outputs, hidden,cell=None):
 
        attention = self.attention(encoder_outputs, hidden).unsqueeze(1)
        context = torch.bmm(attention, encoder_outputs).permute(1, 0, 2)

        # input sentence -> embedding
        # [1, batch size, emb dim]
        embedded = self.embedding(trg.unsqueeze(0))
        cell_input = torch.cat((embedded, context), dim=2)
        
        if self.cell_mode=='lstm':
            outputs, (hidden,cell) = self.cell(cell_input, (hidden.unsqueeze(0), cell.unsqueeze(0)))
            prediction = self.out(outputs.squeeze(0))
            return prediction, hidden.squeeze(0), cell.squeeze(0)
            

        outputs, hidden = self.cell(cell_input, hidden.unsqueeze(0))
        prediction = self.out(outputs.squeeze(0))
        return prediction, hidden.squeeze(0)

In [16]:
ss1=next(iter(train_dataloader))

In [17]:
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, device):
        super().__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.device = device


    def forward(self, source_batch, target_batch, teacher_forcing_ratio=0.5):

        batch_size,max_len  = target_batch.shape
        target_vocab_size = self.decoder.output_size

        outputs = torch.zeros(max_len, batch_size, target_vocab_size).to(self.device)

        # last hidden & cell state of the encoder is used as the decoder's initial hidden state
        if self.encoder.cell_mode=='lstm':
            e_outs,hidden,cell = self.encoder(source_batch) 
            
            trg = target_batch[:,0]
            for i in range(1, max_len):
                prediction, hidden, cell = self.decoder(trg,e_outs, hidden, cell)
                outputs[i] = prediction

                if np.random.random() < teacher_forcing_ratio:
                    trg = target_batch[:,i]
                else:
                    trg = prediction.argmax(1)
            return outputs

            
            
        else:    
            e_outs,hidden = self.encoder(source_batch)       

            trg = target_batch[:,0]
            for i in range(1, max_len):
                prediction, hidden = self.decoder(trg,e_outs, hidden)
                outputs[i] = prediction

                if np.random.random() < teacher_forcing_ratio:
                    trg = target_batch[:,i]
                else:
                    trg = prediction.argmax(1)

            return outputs


        
        

In [18]:
State_bi=False
encoder = Encoder(30, 128, 128,128, 1, 'lstm', 0.2, State_bi).to(device)

attention = Attention(128, 128, State_bi).to(device)

decoder = Decoder(68, 128, 128, 128, 1,'lstm', 0.2, attention, State_bi).to(device)

In [19]:
o,h,c=encoder(ss1['input'])

In [20]:
ss1['output'].shape

torch.Size([16, 30])

In [21]:
decoder(ss1['output'][:,29],o,h,c)[0].shape

torch.Size([16, 68])

In [22]:
SS=Seq2Seq(encoder,decoder,device)

In [23]:
SS(ss1['input'],ss1['output']).shape

torch.Size([30, 16, 68])

In [24]:
def accuracy_calc(target_seq,seq2,mode='full',device=device):# predicted
    eos_index=(target_seq==hindi_alphabet_to_index['>']).nonzero()
    eos_idx=eos_index[:,1]
    
    correct=torch.Tensor([0]).to(device)
    correct_chars=torch.Tensor([0]).to(device)
    tot_chars=torch.Tensor([0]).to(device)
    for iterate,idx in enumerate(eos_idx):
        inputter=seq2[iterate][:idx]
        outputter=target_seq[iterate][:idx]
        if torch.all(torch.eq(inputter,outputter)):
            correct+=1
            correct_chars+=idx
            tot_chars+=idx
        else:
            correct_chars+=torch.sum(inputter == outputter).item()
            tot_chars+=idx
            
#         print(correct,correct_chars,tot_chars)
        
    return correct.item(),correct_chars.item(),tot_chars.item()
            
            
        
    

In [25]:
accuracy_calc(ss1['output'],ss1['output'])

(16.0, 143.0, 143.0)

In [26]:
def train(seq2seq, iterator, optimizer, criterion):
    
    
    seq2seq.train()
    
    epoch_loss = 0
    correct=0
    correct_char=0
    tot_char=0
    
    relax_acc=0
    
    
    for batch in iterator:
        optimizer.zero_grad()
        outputs = seq2seq(batch['input'], batch['output'])
        batch_label=batch['output'].transpose(0,1)
        batch_size=len(batch['output'])
        
        _, predicted = torch.max(outputs, dim=2)
        outputs_flatten = outputs.view(-1, outputs.shape[-1])
        trg_flatten = batch_label.reshape(-1)
        

        trg_flatten.requires_grad=False
        loss = criterion(outputs_flatten, trg_flatten)
        correct_temp,correct_chars_temp,tot_chars_temp=accuracy_calc(batch['output'],predicted.transpose(0,1))
        
        #___________
        
        correct+=correct_temp
        correct_char+=correct_chars_temp
        tot_char+=tot_chars_temp
        
        
        #_______________
        

        loss.backward()
        optimizer.step()
        

        epoch_loss += loss.item()
        


    return epoch_loss / len(iterator), correct/(len(iterator)*16),correct_char/tot_char

In [27]:
def evaluate(seq2seq, iterator, criterion):
    seq2seq.eval()

    epoch_loss = 0
    correct=0
    correct_char=0
    tot_char=0
    
    relax_acc=0
    
    with torch.no_grad():
        for batch in iterator:
            outputs = seq2seq(batch['input'], batch['output'],teacher_forcing_ratio=0)
            batch_label=batch['output'].transpose(0,1)
            batch_size=len(batch['output'])


            _, predicted = torch.max(outputs, dim=2)
            #print('wow_preds',predicted.shape)

            outputs_flatten = outputs.view(-1, outputs.shape[-1])
            trg_flatten = batch_label.reshape(-1)

            loss = criterion(outputs_flatten, trg_flatten)
            
            correct_temp,correct_chars_temp,tot_chars_temp=accuracy_calc(batch['output'],predicted.transpose(0,1))
        
            #___________

            correct+=correct_temp
            correct_char+=correct_chars_temp
            tot_char+=tot_chars_temp

            #_______________       
            
            epoch_loss += loss.item()
            

    return epoch_loss / len(iterator), correct/(len(iterator)*16),correct_char/tot_char



In [28]:
def epoch_time(start_time, end_time):
    e_time = end_time - start_time
    mins = e_time // 60
    secs = e_time%60
    return mins, secs,

In [29]:
def count_params(model):
    return sum(param.numel() for param in model.parameters() if param.requires_grad)

In [30]:
import time
import random

In [31]:
import torch.optim as optim
def make_model(train_iterator,valid_iterator, enc_embed_size, enc_hid_size, dec_embed_size, dec_hid_size,\
               num_layers, cell_mode,\
                 dropout, is_bi, epochs=20):
    
    
    E = Encoder(30,  embed_size=enc_embed_size, enc_hid_size=enc_hid_size, dec_hid_size=dec_hid_size,\
                num_layers=num_layers, cell_mode=cell_mode, dropout=dropout, is_bi=is_bi).to(device)
    
    
    A = Attention(enc_hid_dim=enc_hid_size, dec_hid_dim=dec_hid_size, is_bi=is_bi).to(device)

    D = Decoder(68, embed_size=dec_embed_size, enc_hid_dim=enc_hid_size, dec_hid_dim=dec_hid_size,\
                num_layers=1, cell_mode=cell_mode,\
                 dropout=dropout, attention=A, is_bi=is_bi).to(device)

    S=Seq2Seq(E,D,device)
    S.to(device)    
    print(f'The model has {count_params(S):,} trainable parameters')
    
    optimizer = optim.Adam(S.parameters())
    criterion = nn.CrossEntropyLoss(ignore_index=hindi_alphabet_to_index['.'])
    criterion=criterion.to(device)
    
    best_valid_loss = float('inf')
#     return S

    for epoch in range(epochs):    
        start_time = time.time()
        train_loss,train_acc,train_stuff = train(S, train_iterator, optimizer, criterion)
        valid_loss,valid_acc,val_stuff = evaluate(S, valid_iterator, criterion)
        end_time = time.time()

        epoch_mins, epoch_secs = epoch_time(start_time, end_time)

        if valid_loss < best_valid_loss:
            best_valid_loss = valid_loss
            torch.save(S.state_dict(), 'model1.pt')

        # it's easier to see a change in perplexity between epoch as it's an exponential
        # of the loss, hence the scale of the measure is much bigger
        print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs:.2f}s')
        print(f'\t Train Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
        print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')
        print(f'\t Relaxed Train. Acc: {train_stuff*100:.2f}% | Relaxed Val. Acc: {val_stuff*100:.2f}%')
        
        wandb.log({'epoch':epoch, 'train loss':train_loss, 'train acc':train_acc, 'valid loss': valid_loss,
          'valid acc': valid_acc, 'relxd train acc': train_stuff, 'relxd valid acc': val_stuff})
        
    return S

    

In [32]:
def sweeper():
    
    config_defaults=None
    
    
    # Initialize new wandb run
    run=wandb.init(config=config_defaults, resume=True)
    #current config
    
    config = wandb.config
    
    np.random.seed(0) #setting a seed to make better inference of use of params
    
    train_iterator=train_dataloader
    valid_iterator=val_dataloader
    SS=make_model(train_iterator,valid_iterator, config.enc_embed_size, config.enc_hid_size, config.dec_embed_size,
               config.dec_hid_size, config.num_layers, config.cell_mode, config.dropout, config.is_bi, config.epochs)
    
    
    run_name='Run:' +\
    ' enc_embed size: ' + str(config.enc_embed_size) +\
    ' dec_embed size: ' + str(config.dec_embed_size) +\
    ', enc_hid_size: ' + str(config.enc_hid_size) +\
    ', dec_hid_size: ' + str(config.dec_hid_size) +\
    ', num_layers: ' + str(config.num_layers)+\
    ', cell_mode'+ str(config.cell_mode)
    ', dropout:' + str(config.dropout)+\
    ', bidirect?:' + str(config.is_bi)+\
    ', epochs:'+str(config.epochs)+\
    ' Attn.'
    print(run_name)
    wandb.run.name = run_name
    wandb.run.save()
#     run.finalize()

#     wandb.run.finish()
#     run.finish()
    

    
    

In [33]:
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mpragalbh[0m ([33mpragalbh-tushar[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

In [34]:
sweep_config = {
  "name": "Attn Sweep: Recurrence+Attention",
  "metric": {
      "name":"valid acc",
      "goal": "maximize"
  },
  "method": "bayes",
  "parameters": {
              'num_layers':{
                  "values":[1,2,3]
              },
        'enc_embed_size':{
                "values":  [16,32, 64, 128, 256, 512]
                  
              },
      'dec_embed_size':{
                "values":  [16,32, 64, 128, 256, 512]
                  
              },
        'enc_hid_size':{
            "values": [16,32, 64, 128, 256, 512]
        },
      
        'dec_hid_size':{
            "values": [16,32, 64, 128, 256, 512]
        },
      
        'cell_mode':{
            'values': ['rnn','gru','lstm']
        },
        "dropout": {
            "values": [0.2,0.25,0.3]
        },
        'is_bi': {
            "values": [True, False]
        },
        'epochs': {
            "values": [10, 20, 30, 40]
        } ,
        
    }
}      



In [35]:
# sweep_id = wandb.sweep(sweep_config,  entity="pragalbh", project="DL_Assign3")

In [36]:
# run = wandb.init(entity="pragalbh", project="DL_Assign3",id='gyqh6j5c', resume='must')

In [None]:
wandb.agent('mzhrnfy5',project="DL_Assign3",entity="pragalbh", function=sweeper, count=150)

[34m[1mwandb[0m: Agent Starting Run: su09hbha with config:
[34m[1mwandb[0m: 	cell_mode: lstm
[34m[1mwandb[0m: 	dec_embed_size: 16
[34m[1mwandb[0m: 	dec_hid_size: 512
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	enc_embed_size: 256
[34m[1mwandb[0m: 	enc_hid_size: 512
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	is_bi: False
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: Currently logged in as: [33mpragalbh[0m. Use [1m`wandb login --relogin`[0m to force relogin


The model has 6,643,844 trainable parameters
Epoch: 01 | Time: 1.0m 8.77s
	 Train Loss: 1.641 | Train Acc: 13.85%
	 Val. Loss: 1.479 |  Val. Acc: 27.32%
	 Relaxed Train. Acc: 62.57% | Relaxed Val. Acc: 69.56%
Epoch: 02 | Time: 1.0m 9.66s
	 Train Loss: 1.079 | Train Acc: 27.45%
	 Val. Loss: 1.401 |  Val. Acc: 32.15%
	 Relaxed Train. Acc: 78.57% | Relaxed Val. Acc: 72.21%
Epoch: 03 | Time: 1.0m 9.66s
	 Train Loss: 0.984 | Train Acc: 32.61%
	 Val. Loss: 1.347 |  Val. Acc: 35.79%
	 Relaxed Train. Acc: 81.62% | Relaxed Val. Acc: 74.17%
Epoch: 04 | Time: 1.0m 9.62s
	 Train Loss: 0.935 | Train Acc: 35.86%
	 Val. Loss: 1.352 |  Val. Acc: 37.96%
	 Relaxed Train. Acc: 83.22% | Relaxed Val. Acc: 75.21%
Epoch: 05 | Time: 1.0m 9.57s
	 Train Loss: 0.897 | Train Acc: 38.83%
	 Val. Loss: 1.344 |  Val. Acc: 38.31%
	 Relaxed Train. Acc: 84.52% | Relaxed Val. Acc: 75.72%
Epoch: 06 | Time: 1.0m 9.52s
	 Train Loss: 0.870 | Train Acc: 41.24%
	 Val. Loss: 1.331 |  Val. Acc: 38.82%
	 Relaxed Train. Acc: 85.44



Epoch: 10 | Time: 1.0m 9.42s
	 Train Loss: 0.782 | Train Acc: 48.56%
	 Val. Loss: 1.354 |  Val. Acc: 39.84%
	 Relaxed Train. Acc: 88.26% | Relaxed Val. Acc: 76.63%
Run: enc_embed size: 256 dec_embed size: 16, enc_hid_size: 512, dec_hid_size: 512, num_layers: 2, cell_modelstm


0,1
epoch,▁▂▃▃▄▅▆▆▇█
relxd train acc,▁▅▆▇▇▇▇███
relxd valid acc,▁▃▅▆▇▇▇███
train acc,▁▄▅▅▆▇▇▇██
train loss,█▃▃▂▂▂▁▁▁▁
valid acc,▁▄▆▇▇▇████
valid loss,█▄▂▂▂▁▁▂▂▂

0,1
epoch,9.0
relxd train acc,0.88262
relxd valid acc,0.76633
train acc,0.48559
train loss,0.78232
valid acc,0.39844
valid loss,1.3538


[34m[1mwandb[0m: Agent Starting Run: gq8vgfla with config:
[34m[1mwandb[0m: 	cell_mode: lstm
[34m[1mwandb[0m: 	dec_embed_size: 16
[34m[1mwandb[0m: 	dec_hid_size: 256
[34m[1mwandb[0m: 	dropout: 0.25
[34m[1mwandb[0m: 	enc_embed_size: 256
[34m[1mwandb[0m: 	enc_hid_size: 128
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	is_bi: True
[34m[1mwandb[0m: 	num_layers: 2


The model has 1,556,868 trainable parameters
Epoch: 01 | Time: 0.0m 47.28s
	 Train Loss: 1.531 | Train Acc: 14.84%
	 Val. Loss: 1.481 |  Val. Acc: 31.40%
	 Relaxed Train. Acc: 65.53% | Relaxed Val. Acc: 70.18%
Epoch: 02 | Time: 0.0m 47.15s
	 Train Loss: 1.088 | Train Acc: 27.94%
	 Val. Loss: 1.384 |  Val. Acc: 34.16%
	 Relaxed Train. Acc: 78.49% | Relaxed Val. Acc: 73.46%
Epoch: 03 | Time: 0.0m 47.14s
	 Train Loss: 0.996 | Train Acc: 33.59%
	 Val. Loss: 1.350 |  Val. Acc: 38.28%
	 Relaxed Train. Acc: 81.59% | Relaxed Val. Acc: 74.69%
Epoch: 04 | Time: 0.0m 47.10s
	 Train Loss: 0.948 | Train Acc: 36.51%
	 Val. Loss: 1.368 |  Val. Acc: 37.08%
	 Relaxed Train. Acc: 83.10% | Relaxed Val. Acc: 74.51%
Epoch: 05 | Time: 0.0m 47.07s
	 Train Loss: 0.913 | Train Acc: 39.24%
	 Val. Loss: 1.356 |  Val. Acc: 39.92%
	 Relaxed Train. Acc: 84.28% | Relaxed Val. Acc: 75.74%
Epoch: 06 | Time: 0.0m 47.04s
	 Train Loss: 0.886 | Train Acc: 41.38%
	 Val. Loss: 1.320 |  Val. Acc: 40.11%
	 Relaxed Train. Acc:

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
relxd train acc,▁▅▆▆▆▇▇▇▇▇▇▇████████
relxd valid acc,▁▄▅▅▆▇▇▆▇▇█▇█▇█████▇
train acc,▁▃▄▅▅▅▆▆▆▆▇▇▇▇▇█████
train loss,█▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁
valid acc,▁▃▅▄▆▆▇▇▇▇▇█▇▇███▇█▇
valid loss,█▄▂▃▃▁▂▃▂▃▂▃▃▄▄▄▄▄▅▇

0,1
epoch,19.0
relxd train acc,0.90506
relxd valid acc,0.76481
train acc,0.5699
train loss,0.72118
valid acc,0.41064
valid loss,1.45204


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: siif3wik with config:
[34m[1mwandb[0m: 	cell_mode: lstm
[34m[1mwandb[0m: 	dec_embed_size: 16
[34m[1mwandb[0m: 	dec_hid_size: 128
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	enc_embed_size: 128
[34m[1mwandb[0m: 	enc_hid_size: 512
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	is_bi: False
[34m[1mwandb[0m: 	num_layers: 3


The model has 6,015,748 trainable parameters
Epoch: 01 | Time: 0.0m 53.77s
	 Train Loss: 2.521 | Train Acc: 2.88%
	 Val. Loss: 1.669 |  Val. Acc: 21.36%
	 Relaxed Train. Acc: 39.15% | Relaxed Val. Acc: 64.79%
Epoch: 02 | Time: 0.0m 53.65s
	 Train Loss: 1.250 | Train Acc: 21.25%
	 Val. Loss: 1.435 |  Val. Acc: 30.74%
	 Relaxed Train. Acc: 73.95% | Relaxed Val. Acc: 71.76%
Epoch: 03 | Time: 0.0m 53.54s
	 Train Loss: 1.076 | Train Acc: 28.53%
	 Val. Loss: 1.393 |  Val. Acc: 33.86%
	 Relaxed Train. Acc: 79.04% | Relaxed Val. Acc: 72.81%
Epoch: 04 | Time: 0.0m 53.53s
	 Train Loss: 0.996 | Train Acc: 32.79%
	 Val. Loss: 1.382 |  Val. Acc: 35.57%
	 Relaxed Train. Acc: 81.57% | Relaxed Val. Acc: 73.85%
Epoch: 05 | Time: 0.0m 53.51s
	 Train Loss: 0.954 | Train Acc: 35.80%
	 Val. Loss: 1.378 |  Val. Acc: 38.21%
	 Relaxed Train. Acc: 82.92% | Relaxed Val. Acc: 74.47%
Epoch: 06 | Time: 0.0m 53.42s
	 Train Loss: 0.918 | Train Acc: 38.38%
	 Val. Loss: 1.327 |  Val. Acc: 39.75%
	 Relaxed Train. Acc: 

0,1
epoch,▁▂▃▃▄▅▆▆▇█
relxd train acc,▁▆▇▇▇█████
relxd valid acc,▁▅▆▆▇█▇███
train acc,▁▄▅▆▆▇▇▇██
train loss,█▃▂▂▂▁▁▁▁▁
valid acc,▁▄▅▆▇▇▇█▇█
valid loss,█▃▂▂▂▁▂▂▁▂

0,1
epoch,9.0
relxd train acc,0.86986
relxd valid acc,0.76798
train acc,0.45553
train loss,0.82837
valid acc,0.41162
valid loss,1.35739


[34m[1mwandb[0m: Agent Starting Run: 7ekezsj7 with config:
[34m[1mwandb[0m: 	cell_mode: lstm
[34m[1mwandb[0m: 	dec_embed_size: 64
[34m[1mwandb[0m: 	dec_hid_size: 512
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	enc_embed_size: 512
[34m[1mwandb[0m: 	enc_hid_size: 256
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	is_bi: True
[34m[1mwandb[0m: 	num_layers: 1


The model has 4,651,844 trainable parameters
Epoch: 01 | Time: 1.0m 4.24s
	 Train Loss: 1.335 | Train Acc: 18.62%
	 Val. Loss: 1.472 |  Val. Acc: 31.45%
	 Relaxed Train. Acc: 70.97% | Relaxed Val. Acc: 71.61%
Epoch: 02 | Time: 1.0m 4.27s
	 Train Loss: 1.047 | Train Acc: 29.87%
	 Val. Loss: 1.407 |  Val. Acc: 34.52%
	 Relaxed Train. Acc: 79.91% | Relaxed Val. Acc: 72.69%
Epoch: 03 | Time: 1.0m 4.23s
	 Train Loss: 0.977 | Train Acc: 33.76%
	 Val. Loss: 1.368 |  Val. Acc: 36.77%
	 Relaxed Train. Acc: 82.14% | Relaxed Val. Acc: 74.48%
Epoch: 04 | Time: 1.0m 4.19s
	 Train Loss: 0.936 | Train Acc: 36.79%
	 Val. Loss: 1.377 |  Val. Acc: 36.25%
	 Relaxed Train. Acc: 83.48% | Relaxed Val. Acc: 74.27%
Epoch: 05 | Time: 1.0m 4.15s
	 Train Loss: 0.910 | Train Acc: 39.25%
	 Val. Loss: 1.384 |  Val. Acc: 39.31%
	 Relaxed Train. Acc: 84.42% | Relaxed Val. Acc: 75.55%
Epoch: 06 | Time: 1.0m 4.09s
	 Train Loss: 0.893 | Train Acc: 40.75%
	 Val. Loss: 1.362 |  Val. Acc: 39.50%
	 Relaxed Train. Acc: 84.92

0,1
epoch,▁▂▃▃▄▅▆▆▇█
relxd train acc,▁▅▆▆▇▇▇███
relxd valid acc,▁▃▆▅▇▇▇▆█▇
train acc,▁▄▅▅▆▆▇▇██
train loss,█▄▃▃▂▂▂▁▁▁
valid acc,▁▄▆▅██▇▇█▇
valid loss,█▄▁▂▂▁▃▆▃▅

0,1
epoch,9.0
relxd train acc,0.87475
relxd valid acc,0.752
train acc,0.47201
train loss,0.81583
valid acc,0.38013
valid loss,1.41796


[34m[1mwandb[0m: Agent Starting Run: cbh6wlim with config:
[34m[1mwandb[0m: 	cell_mode: gru
[34m[1mwandb[0m: 	dec_embed_size: 16
[34m[1mwandb[0m: 	dec_hid_size: 128
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	enc_embed_size: 128
[34m[1mwandb[0m: 	enc_hid_size: 512
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	is_bi: False
[34m[1mwandb[0m: 	num_layers: 2


The model has 2,976,260 trainable parameters
Epoch: 01 | Time: 0.0m 47.85s
	 Train Loss: 1.577 | Train Acc: 13.39%
	 Val. Loss: 1.539 |  Val. Acc: 26.05%
	 Relaxed Train. Acc: 63.79% | Relaxed Val. Acc: 67.34%
Epoch: 02 | Time: 0.0m 47.74s
	 Train Loss: 1.185 | Train Acc: 23.26%
	 Val. Loss: 1.452 |  Val. Acc: 30.15%
	 Relaxed Train. Acc: 75.31% | Relaxed Val. Acc: 70.98%
Epoch: 03 | Time: 0.0m 47.75s
	 Train Loss: 1.105 | Train Acc: 26.38%
	 Val. Loss: 1.459 |  Val. Acc: 30.57%
	 Relaxed Train. Acc: 77.88% | Relaxed Val. Acc: 71.45%
Epoch: 04 | Time: 0.0m 47.70s
	 Train Loss: 1.073 | Train Acc: 27.78%
	 Val. Loss: 1.432 |  Val. Acc: 31.25%
	 Relaxed Train. Acc: 78.97% | Relaxed Val. Acc: 71.41%
Epoch: 05 | Time: 0.0m 47.70s
	 Train Loss: 1.059 | Train Acc: 28.91%
	 Val. Loss: 1.454 |  Val. Acc: 31.93%
	 Relaxed Train. Acc: 79.39% | Relaxed Val. Acc: 71.82%
Epoch: 06 | Time: 0.0m 48.18s
	 Train Loss: 1.041 | Train Acc: 29.73%
	 Val. Loss: 1.408 |  Val. Acc: 32.81%
	 Relaxed Train. Acc:

0,1
epoch,▁▂▃▃▄▅▆▆▇█
relxd train acc,▁▆▇▇▇█████
relxd valid acc,▁▆▆▆▇█▇▇█▇
train acc,▁▅▆▇▇▇████
train loss,█▃▂▂▂▁▁▁▁▁
valid acc,▁▅▅▆▆▇▇██▇
valid loss,█▃▄▂▃▁▃▃▂▄

0,1
epoch,9.0
relxd train acc,0.8086
relxd valid acc,0.7198
train acc,0.31125
train loss,1.01486
valid acc,0.32446
valid loss,1.45537


[34m[1mwandb[0m: Agent Starting Run: 8zrosdbb with config:
[34m[1mwandb[0m: 	cell_mode: lstm
[34m[1mwandb[0m: 	dec_embed_size: 64
[34m[1mwandb[0m: 	dec_hid_size: 256
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	enc_embed_size: 512
[34m[1mwandb[0m: 	enc_hid_size: 512
[34m[1mwandb[0m: 	epochs: 30
[34m[1mwandb[0m: 	is_bi: False
[34m[1mwandb[0m: 	num_layers: 3


The model has 7,523,396 trainable parameters
Epoch: 01 | Time: 0.0m 56.05s
	 Train Loss: 1.825 | Train Acc: 12.04%
	 Val. Loss: 1.496 |  Val. Acc: 29.47%
	 Relaxed Train. Acc: 58.24% | Relaxed Val. Acc: 69.31%
Epoch: 02 | Time: 0.0m 55.87s
	 Train Loss: 1.100 | Train Acc: 27.01%
	 Val. Loss: 1.376 |  Val. Acc: 34.20%
	 Relaxed Train. Acc: 78.28% | Relaxed Val. Acc: 73.44%
Epoch: 03 | Time: 0.0m 55.82s
	 Train Loss: 1.000 | Train Acc: 32.15%
	 Val. Loss: 1.369 |  Val. Acc: 36.08%
	 Relaxed Train. Acc: 81.34% | Relaxed Val. Acc: 73.82%
Epoch: 04 | Time: 0.0m 55.80s
	 Train Loss: 0.949 | Train Acc: 35.39%
	 Val. Loss: 1.357 |  Val. Acc: 37.62%
	 Relaxed Train. Acc: 82.96% | Relaxed Val. Acc: 75.01%
Epoch: 05 | Time: 0.0m 55.74s
	 Train Loss: 0.920 | Train Acc: 37.56%
	 Val. Loss: 1.339 |  Val. Acc: 39.55%
	 Relaxed Train. Acc: 83.91% | Relaxed Val. Acc: 75.85%
Epoch: 06 | Time: 0.0m 55.68s
	 Train Loss: 0.892 | Train Acc: 39.70%
	 Val. Loss: 1.355 |  Val. Acc: 40.14%
	 Relaxed Train. Acc:

0,1
epoch,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███
relxd train acc,▁▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇██████████████
relxd valid acc,▁▄▅▆▇▆▇▇█▇██▇██████████▇█▇███▇
train acc,▁▃▄▄▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇███████
train loss,█▄▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
valid acc,▁▄▅▅▆▇▇▇█▇█▇▇█▇██▇██▇█▇▇█▇█▇▇▇
valid loss,█▃▂▂▁▂▁▂▁▂▁▂▂▂▂▃▃▃▄▄▅▅▆▆▆█▆▇▇█

0,1
epoch,29.0
relxd train acc,0.92214
relxd valid acc,0.76018
train acc,0.62668
train loss,0.6686
valid acc,0.40527
valid loss,1.50831


[34m[1mwandb[0m: Agent Starting Run: 5gjt3915 with config:
[34m[1mwandb[0m: 	cell_mode: lstm
[34m[1mwandb[0m: 	dec_embed_size: 128
[34m[1mwandb[0m: 	dec_hid_size: 512
[34m[1mwandb[0m: 	dropout: 0.25
[34m[1mwandb[0m: 	enc_embed_size: 512
[34m[1mwandb[0m: 	enc_hid_size: 256
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	is_bi: False
[34m[1mwandb[0m: 	num_layers: 3


The model has 4,265,028 trainable parameters
Epoch: 01 | Time: 1.0m 1.49s
	 Train Loss: 2.013 | Train Acc: 7.74%
	 Val. Loss: 1.572 |  Val. Acc: 26.22%
	 Relaxed Train. Acc: 52.76% | Relaxed Val. Acc: 68.11%
Epoch: 02 | Time: 1.0m 1.46s
	 Train Loss: 1.125 | Train Acc: 25.16%
	 Val. Loss: 1.464 |  Val. Acc: 31.79%
	 Relaxed Train. Acc: 77.51% | Relaxed Val. Acc: 71.72%
Epoch: 03 | Time: 1.0m 1.42s
	 Train Loss: 1.017 | Train Acc: 30.57%
	 Val. Loss: 1.413 |  Val. Acc: 33.86%
	 Relaxed Train. Acc: 80.79% | Relaxed Val. Acc: 72.47%
Epoch: 04 | Time: 1.0m 1.45s
	 Train Loss: 0.964 | Train Acc: 34.02%
	 Val. Loss: 1.398 |  Val. Acc: 35.72%
	 Relaxed Train. Acc: 82.55% | Relaxed Val. Acc: 73.91%
Epoch: 05 | Time: 1.0m 1.32s
	 Train Loss: 0.930 | Train Acc: 36.60%
	 Val. Loss: 1.406 |  Val. Acc: 36.13%
	 Relaxed Train. Acc: 83.62% | Relaxed Val. Acc: 74.15%
Epoch: 06 | Time: 1.0m 1.30s
	 Train Loss: 0.901 | Train Acc: 38.70%
	 Val. Loss: 1.371 |  Val. Acc: 38.13%
	 Relaxed Train. Acc: 84.52%

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
relxd train acc,▁▆▆▆▇▇▇▇▇▇▇█████████
relxd valid acc,▁▄▅▆▆▇▇▇████▇███████
train acc,▁▃▄▅▅▅▆▆▆▆▇▇▇▇▇▇████
train loss,█▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁
valid acc,▁▄▅▆▆▇▆▇███▇▇▇▇▇▇▇▇▇
valid loss,█▄▃▂▂▁▁▂▁▂▂▂▃▄▂▃▄▄▅▅

0,1
epoch,19.0
relxd train acc,0.90831
relxd valid acc,0.76206
train acc,0.58018
train loss,0.70917
valid acc,0.3916
valid loss,1.48499


[34m[1mwandb[0m: Agent Starting Run: bc28yqir with config:
[34m[1mwandb[0m: 	cell_mode: lstm
[34m[1mwandb[0m: 	dec_embed_size: 16
[34m[1mwandb[0m: 	dec_hid_size: 256
[34m[1mwandb[0m: 	dropout: 0.25
[34m[1mwandb[0m: 	enc_embed_size: 512
[34m[1mwandb[0m: 	enc_hid_size: 512
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	is_bi: True
[34m[1mwandb[0m: 	num_layers: 3


The model has 18,755,460 trainable parameters
Epoch: 01 | Time: 1.0m 22.55s
	 Train Loss: 2.376 | Train Acc: 5.32%
	 Val. Loss: 1.519 |  Val. Acc: 24.58%
	 Relaxed Train. Acc: 42.63% | Relaxed Val. Acc: 68.38%
Epoch: 02 | Time: 1.0m 23.02s
	 Train Loss: 1.163 | Train Acc: 24.73%
	 Val. Loss: 1.399 |  Val. Acc: 32.59%
	 Relaxed Train. Acc: 76.11% | Relaxed Val. Acc: 72.33%
Epoch: 03 | Time: 1.0m 22.43s
	 Train Loss: 1.027 | Train Acc: 31.67%
	 Val. Loss: 1.333 |  Val. Acc: 36.38%
	 Relaxed Train. Acc: 80.52% | Relaxed Val. Acc: 74.28%
Epoch: 04 | Time: 1.0m 22.55s
	 Train Loss: 0.964 | Train Acc: 35.64%
	 Val. Loss: 1.335 |  Val. Acc: 38.13%
	 Relaxed Train. Acc: 82.58% | Relaxed Val. Acc: 75.12%
Epoch: 05 | Time: 1.0m 22.49s
	 Train Loss: 0.928 | Train Acc: 38.04%
	 Val. Loss: 1.326 |  Val. Acc: 39.84%
	 Relaxed Train. Acc: 83.72% | Relaxed Val. Acc: 75.70%
Epoch: 06 | Time: 1.0m 22.82s
	 Train Loss: 0.893 | Train Acc: 40.50%
	 Val. Loss: 1.345 |  Val. Acc: 39.55%
	 Relaxed Train. Acc:

0,1
epoch,▁▂▃▃▄▅▆▆▇█
relxd train acc,▁▆▇▇▇█████
relxd valid acc,▁▄▆▆▇▆▇▇██
train acc,▁▄▅▆▆▇▇▇██
train loss,█▃▂▂▂▁▁▁▁▁
valid acc,▁▄▅▆▇▇▇▇██
valid loss,█▄▁▁▁▂▁▂▂▂

0,1
epoch,9.0
relxd train acc,0.87668
relxd valid acc,0.77387
train acc,0.47893
train loss,0.81022
valid acc,0.43433
valid loss,1.33823


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: n5j2csre with config:
[34m[1mwandb[0m: 	cell_mode: lstm
[34m[1mwandb[0m: 	dec_embed_size: 16
[34m[1mwandb[0m: 	dec_hid_size: 256
[34m[1mwandb[0m: 	dropout: 0.25
[34m[1mwandb[0m: 	enc_embed_size: 512
[34m[1mwandb[0m: 	enc_hid_size: 256
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	is_bi: True
[34m[1mwandb[0m: 	num_layers: 3


The model has 5,898,116 trainable parameters
Epoch: 01 | Time: 0.0m 55.57s
	 Train Loss: 2.327 | Train Acc: 5.28%
	 Val. Loss: 1.539 |  Val. Acc: 24.61%
	 Relaxed Train. Acc: 44.06% | Relaxed Val. Acc: 68.69%
Epoch: 02 | Time: 0.0m 55.32s
	 Train Loss: 1.180 | Train Acc: 23.72%
	 Val. Loss: 1.414 |  Val. Acc: 32.10%
	 Relaxed Train. Acc: 75.74% | Relaxed Val. Acc: 71.68%
Epoch: 03 | Time: 0.0m 55.23s
	 Train Loss: 1.040 | Train Acc: 30.41%
	 Val. Loss: 1.364 |  Val. Acc: 36.77%
	 Relaxed Train. Acc: 80.05% | Relaxed Val. Acc: 74.03%
Epoch: 04 | Time: 0.0m 55.23s
	 Train Loss: 0.974 | Train Acc: 34.72%
	 Val. Loss: 1.357 |  Val. Acc: 38.82%
	 Relaxed Train. Acc: 82.22% | Relaxed Val. Acc: 74.88%
Epoch: 05 | Time: 0.0m 55.23s
	 Train Loss: 0.932 | Train Acc: 37.83%
	 Val. Loss: 1.340 |  Val. Acc: 39.97%
	 Relaxed Train. Acc: 83.62% | Relaxed Val. Acc: 75.89%
Epoch: 06 | Time: 0.0m 55.36s
	 Train Loss: 0.898 | Train Acc: 40.21%
	 Val. Loss: 1.348 |  Val. Acc: 40.38%
	 Relaxed Train. Acc: 

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
relxd train acc,▁▆▆▇▇▇▇▇▇▇██████████
relxd valid acc,▁▃▅▆▇▆▇▇█▇█████▇███▇
train acc,▁▃▄▅▅▆▆▆▆▇▇▇▇▇▇█████
train loss,█▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁
valid acc,▁▄▅▆▇▇▇▇▇▇█████▇█▇█▇
valid loss,█▄▃▂▂▂▂▃▁▂▂▂▃▃▂▄▄▄▄▄

0,1
epoch,19.0
relxd train acc,0.91115
relxd valid acc,0.76782
train acc,0.58883
train loss,0.70387
valid acc,0.41943
valid loss,1.42534


[34m[1mwandb[0m: Agent Starting Run: y0xspyza with config:
[34m[1mwandb[0m: 	cell_mode: lstm
[34m[1mwandb[0m: 	dec_embed_size: 16
[34m[1mwandb[0m: 	dec_hid_size: 512
[34m[1mwandb[0m: 	dropout: 0.25
[34m[1mwandb[0m: 	enc_embed_size: 512
[34m[1mwandb[0m: 	enc_hid_size: 128
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	is_bi: True
[34m[1mwandb[0m: 	num_layers: 2


The model has 3,239,556 trainable parameters
Epoch: 01 | Time: 0.0m 56.90s
	 Train Loss: 1.427 | Train Acc: 16.61%
	 Val. Loss: 1.425 |  Val. Acc: 32.71%
	 Relaxed Train. Acc: 68.27% | Relaxed Val. Acc: 71.73%
Epoch: 02 | Time: 0.0m 56.98s
	 Train Loss: 1.058 | Train Acc: 29.32%
	 Val. Loss: 1.358 |  Val. Acc: 36.11%
	 Relaxed Train. Acc: 79.40% | Relaxed Val. Acc: 75.11%
Epoch: 03 | Time: 0.0m 56.86s
	 Train Loss: 0.968 | Train Acc: 34.85%
	 Val. Loss: 1.370 |  Val. Acc: 36.84%
	 Relaxed Train. Acc: 82.36% | Relaxed Val. Acc: 74.41%
Epoch: 04 | Time: 0.0m 56.87s
	 Train Loss: 0.919 | Train Acc: 38.55%
	 Val. Loss: 1.331 |  Val. Acc: 39.26%
	 Relaxed Train. Acc: 84.04% | Relaxed Val. Acc: 75.96%
Epoch: 05 | Time: 0.0m 56.81s
	 Train Loss: 0.885 | Train Acc: 41.35%
	 Val. Loss: 1.348 |  Val. Acc: 40.45%
	 Relaxed Train. Acc: 85.18% | Relaxed Val. Acc: 76.15%
Epoch: 06 | Time: 0.0m 56.81s
	 Train Loss: 0.854 | Train Acc: 43.90%
	 Val. Loss: 1.332 |  Val. Acc: 41.09%
	 Relaxed Train. Acc:

0,1
epoch,▁▂▃▃▄▅▆▆▇█
relxd train acc,▁▅▆▆▇▇▇███
relxd valid acc,▁▅▄▆▇▇████
train acc,▁▃▅▅▆▆▇▇██
train loss,█▄▃▃▂▂▂▁▁▁
valid acc,▁▄▄▆▇▇▇██▇
valid loss,█▃▄▁▂▁▃▄▂▅

0,1
epoch,9.0
relxd train acc,0.89074
relxd valid acc,0.76866
train acc,0.52307
train loss,0.76813
valid acc,0.4082
valid loss,1.38009


[34m[1mwandb[0m: Agent Starting Run: tq548sh9 with config:
[34m[1mwandb[0m: 	cell_mode: lstm
[34m[1mwandb[0m: 	dec_embed_size: 16
[34m[1mwandb[0m: 	dec_hid_size: 256
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	enc_embed_size: 256
[34m[1mwandb[0m: 	enc_hid_size: 512
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	is_bi: True
[34m[1mwandb[0m: 	num_layers: 1


The model has 5,099,908 trainable parameters
Epoch: 01 | Time: 0.0m 58.47s
	 Train Loss: 1.410 | Train Acc: 17.74%
	 Val. Loss: 1.415 |  Val. Acc: 31.45%
	 Relaxed Train. Acc: 68.54% | Relaxed Val. Acc: 71.57%
Epoch: 02 | Time: 0.0m 58.49s
	 Train Loss: 1.061 | Train Acc: 29.70%
	 Val. Loss: 1.380 |  Val. Acc: 35.64%
	 Relaxed Train. Acc: 79.33% | Relaxed Val. Acc: 73.45%
Epoch: 03 | Time: 0.0m 58.43s
	 Train Loss: 0.973 | Train Acc: 35.12%
	 Val. Loss: 1.340 |  Val. Acc: 38.26%
	 Relaxed Train. Acc: 82.34% | Relaxed Val. Acc: 75.61%
Epoch: 04 | Time: 0.0m 58.42s
	 Train Loss: 0.915 | Train Acc: 39.03%
	 Val. Loss: 1.353 |  Val. Acc: 37.99%
	 Relaxed Train. Acc: 84.27% | Relaxed Val. Acc: 75.68%
Epoch: 05 | Time: 0.0m 58.37s
	 Train Loss: 0.887 | Train Acc: 41.77%
	 Val. Loss: 1.341 |  Val. Acc: 39.92%
	 Relaxed Train. Acc: 85.24% | Relaxed Val. Acc: 76.29%
Epoch: 06 | Time: 0.0m 58.32s
	 Train Loss: 0.850 | Train Acc: 44.36%
	 Val. Loss: 1.364 |  Val. Acc: 40.31%
	 Relaxed Train. Acc:

0,1
epoch,▁▂▃▃▄▅▆▆▇█
relxd train acc,▁▅▆▆▇▇▇▇██
relxd valid acc,▁▃▆▆▇▇▇███
train acc,▁▃▄▅▆▆▇▇▇█
train loss,█▄▃▃▂▂▂▂▁▁
valid acc,▁▄▆▆▇▇████
valid loss,█▅▁▂▁▃▁▂▃▃

0,1
epoch,9.0
relxd train acc,0.89702
relxd valid acc,0.76921
train acc,0.53961
train loss,0.75215
valid acc,0.41089
valid loss,1.36538


[34m[1mwandb[0m: Agent Starting Run: of9drytg with config:
[34m[1mwandb[0m: 	cell_mode: gru
[34m[1mwandb[0m: 	dec_embed_size: 32
[34m[1mwandb[0m: 	dec_hid_size: 256
[34m[1mwandb[0m: 	dropout: 0.25
[34m[1mwandb[0m: 	enc_embed_size: 256
[34m[1mwandb[0m: 	enc_hid_size: 512
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	is_bi: True
[34m[1mwandb[0m: 	num_layers: 2


The model has 8,717,252 trainable parameters
Epoch: 01 | Time: 1.0m 3.47s
	 Train Loss: 1.387 | Train Acc: 17.37%
	 Val. Loss: 1.462 |  Val. Acc: 29.81%
	 Relaxed Train. Acc: 68.97% | Relaxed Val. Acc: 70.17%
Epoch: 02 | Time: 1.0m 3.58s
	 Train Loss: 1.097 | Train Acc: 27.03%
	 Val. Loss: 1.445 |  Val. Acc: 31.10%
	 Relaxed Train. Acc: 78.19% | Relaxed Val. Acc: 71.07%
Epoch: 03 | Time: 1.0m 3.55s
	 Train Loss: 1.044 | Train Acc: 29.72%
	 Val. Loss: 1.432 |  Val. Acc: 32.50%
	 Relaxed Train. Acc: 79.83% | Relaxed Val. Acc: 71.95%
Epoch: 04 | Time: 1.0m 3.54s
	 Train Loss: 1.022 | Train Acc: 30.84%
	 Val. Loss: 1.455 |  Val. Acc: 33.79%
	 Relaxed Train. Acc: 80.62% | Relaxed Val. Acc: 71.66%
Epoch: 05 | Time: 1.0m 3.53s
	 Train Loss: 1.029 | Train Acc: 30.71%
	 Val. Loss: 1.436 |  Val. Acc: 33.03%
	 Relaxed Train. Acc: 80.45% | Relaxed Val. Acc: 72.41%
Epoch: 06 | Time: 1.0m 3.52s
	 Train Loss: 1.018 | Train Acc: 31.17%
	 Val. Loss: 1.423 |  Val. Acc: 34.40%
	 Relaxed Train. Acc: 80.82

0,1
epoch,▁▂▃▃▄▅▆▆▇█
relxd train acc,▁▆▇███████
relxd valid acc,▁▃▅▅▇▇█▄▄▆
train acc,▁▆▇███████
train loss,█▃▂▁▁▁▁▁▁▁
valid acc,▁▃▅▇▆█▇▆▃█
valid loss,▆▅▃▅▄▂▁█▅▅

0,1
epoch,9.0
relxd train acc,0.80848
relxd valid acc,0.72259
train acc,0.31348
train loss,1.01467
valid acc,0.34399
valid loss,1.45227


[34m[1mwandb[0m: Agent Starting Run: 41z44yry with config:
[34m[1mwandb[0m: 	cell_mode: lstm
[34m[1mwandb[0m: 	dec_embed_size: 16
[34m[1mwandb[0m: 	dec_hid_size: 128
[34m[1mwandb[0m: 	dropout: 0.25
[34m[1mwandb[0m: 	enc_embed_size: 128
[34m[1mwandb[0m: 	enc_hid_size: 512
[34m[1mwandb[0m: 	epochs: 30
[34m[1mwandb[0m: 	is_bi: False
[34m[1mwandb[0m: 	num_layers: 3


The model has 6,015,748 trainable parameters
Epoch: 01 | Time: 0.0m 53.92s
	 Train Loss: 2.642 | Train Acc: 2.81%
	 Val. Loss: 1.703 |  Val. Acc: 20.68%
	 Relaxed Train. Acc: 35.84% | Relaxed Val. Acc: 63.49%
Epoch: 02 | Time: 0.0m 53.76s
	 Train Loss: 1.271 | Train Acc: 20.67%
	 Val. Loss: 1.479 |  Val. Acc: 29.42%
	 Relaxed Train. Acc: 73.11% | Relaxed Val. Acc: 69.70%
Epoch: 03 | Time: 0.0m 53.69s
	 Train Loss: 1.099 | Train Acc: 26.86%
	 Val. Loss: 1.395 |  Val. Acc: 31.96%
	 Relaxed Train. Acc: 78.21% | Relaxed Val. Acc: 72.41%
Epoch: 04 | Time: 0.0m 53.62s
	 Train Loss: 1.023 | Train Acc: 30.85%
	 Val. Loss: 1.378 |  Val. Acc: 36.01%
	 Relaxed Train. Acc: 80.54% | Relaxed Val. Acc: 73.45%
Epoch: 05 | Time: 0.0m 53.61s
	 Train Loss: 0.980 | Train Acc: 33.34%
	 Val. Loss: 1.354 |  Val. Acc: 37.50%
	 Relaxed Train. Acc: 81.91% | Relaxed Val. Acc: 75.09%
Epoch: 06 | Time: 0.0m 53.60s
	 Train Loss: 0.945 | Train Acc: 35.42%
	 Val. Loss: 1.327 |  Val. Acc: 37.99%
	 Relaxed Train. Acc: 

0,1
epoch,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███
relxd train acc,▁▆▆▇▇▇▇▇▇▇▇▇▇▇▇███████████████
relxd valid acc,▁▄▅▆▇▇▇▇██████████████████████
train acc,▁▃▄▄▄▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇██████
train loss,█▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
valid acc,▁▄▅▆▆▇▇▇▇█████████████████████
valid loss,█▄▃▂▂▁▂▂▁▂▂▂▂▂▂▂▂▂▃▃▂▃▃▃▄▄▄▄▅▅

0,1
epoch,29.0
relxd train acc,0.92724
relxd valid acc,0.77135
train acc,0.64371
train loss,0.65417
valid acc,0.41699
valid loss,1.5069


[34m[1mwandb[0m: Agent Starting Run: gmv4eqxa with config:
[34m[1mwandb[0m: 	cell_mode: lstm
[34m[1mwandb[0m: 	dec_embed_size: 16
[34m[1mwandb[0m: 	dec_hid_size: 256
[34m[1mwandb[0m: 	dropout: 0.25
[34m[1mwandb[0m: 	enc_embed_size: 512
[34m[1mwandb[0m: 	enc_hid_size: 512
[34m[1mwandb[0m: 	epochs: 30
[34m[1mwandb[0m: 	is_bi: True
[34m[1mwandb[0m: 	num_layers: 2


The model has 12,455,812 trainable parameters
Epoch: 01 | Time: 1.0m 10.98s
	 Train Loss: 1.733 | Train Acc: 12.86%
	 Val. Loss: 1.457 |  Val. Acc: 28.74%
	 Relaxed Train. Acc: 59.68% | Relaxed Val. Acc: 69.83%
Epoch: 02 | Time: 1.0m 11.13s
	 Train Loss: 1.103 | Train Acc: 27.48%
	 Val. Loss: 1.389 |  Val. Acc: 33.20%
	 Relaxed Train. Acc: 77.88% | Relaxed Val. Acc: 72.79%
Epoch: 03 | Time: 1.0m 11.10s
	 Train Loss: 1.000 | Train Acc: 32.94%
	 Val. Loss: 1.347 |  Val. Acc: 37.26%
	 Relaxed Train. Acc: 81.36% | Relaxed Val. Acc: 74.44%
Epoch: 04 | Time: 1.0m 11.07s
	 Train Loss: 0.946 | Train Acc: 36.65%
	 Val. Loss: 1.364 |  Val. Acc: 36.47%
	 Relaxed Train. Acc: 83.20% | Relaxed Val. Acc: 74.67%
Epoch: 05 | Time: 1.0m 11.01s
	 Train Loss: 0.915 | Train Acc: 39.06%
	 Val. Loss: 1.325 |  Val. Acc: 40.58%
	 Relaxed Train. Acc: 84.17% | Relaxed Val. Acc: 76.04%
Epoch: 06 | Time: 1.0m 10.98s
	 Train Loss: 0.885 | Train Acc: 41.38%
	 Val. Loss: 1.335 |  Val. Acc: 40.45%
	 Relaxed Train. Acc

0,1
epoch,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███
relxd train acc,▁▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇██████████████
relxd valid acc,▁▄▅▅▇▇▇▇█▇▇▇▇▇▇██▇██▇██▇▇▇▇▇▇▇
train acc,▁▃▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇████████
train loss,█▄▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
valid acc,▁▃▅▅▇▇█▇▇██▇█▇▇████████▇██▇▇▇█
valid loss,▆▄▂▃▁▁▁▃▂▂▃▂▃▄▄▄▄▅▅▄▅▅▅▆▇▆▇▇▇█

0,1
epoch,29.0
relxd train acc,0.92403
relxd valid acc,0.76953
train acc,0.63801
train loss,0.6679
valid acc,0.41479
valid loss,1.49887


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: uodtumxc with config:
[34m[1mwandb[0m: 	cell_mode: lstm
[34m[1mwandb[0m: 	dec_embed_size: 64
[34m[1mwandb[0m: 	dec_hid_size: 256
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	enc_embed_size: 512
[34m[1mwandb[0m: 	enc_hid_size: 512
[34m[1mwandb[0m: 	epochs: 30
[34m[1mwandb[0m: 	is_bi: False
[34m[1mwandb[0m: 	num_layers: 3


The model has 7,523,396 trainable parameters
Epoch: 01 | Time: 0.0m 56.08s
	 Train Loss: 1.750 | Train Acc: 13.04%
	 Val. Loss: 1.514 |  Val. Acc: 30.10%
	 Relaxed Train. Acc: 60.29% | Relaxed Val. Acc: 69.36%
Epoch: 02 | Time: 0.0m 55.97s
	 Train Loss: 1.093 | Train Acc: 27.19%
	 Val. Loss: 1.394 |  Val. Acc: 34.28%
	 Relaxed Train. Acc: 78.42% | Relaxed Val. Acc: 73.37%
Epoch: 03 | Time: 0.0m 55.83s
	 Train Loss: 1.002 | Train Acc: 31.68%
	 Val. Loss: 1.368 |  Val. Acc: 34.18%
	 Relaxed Train. Acc: 81.24% | Relaxed Val. Acc: 74.04%
Epoch: 04 | Time: 0.0m 55.89s
	 Train Loss: 0.952 | Train Acc: 34.89%
	 Val. Loss: 1.384 |  Val. Acc: 37.87%
	 Relaxed Train. Acc: 82.83% | Relaxed Val. Acc: 74.42%
Epoch: 05 | Time: 0.0m 55.85s
	 Train Loss: 0.919 | Train Acc: 37.44%
	 Val. Loss: 1.372 |  Val. Acc: 38.11%
	 Relaxed Train. Acc: 83.94% | Relaxed Val. Acc: 74.92%
Epoch: 06 | Time: 0.0m 55.75s
	 Train Loss: 0.895 | Train Acc: 39.09%
	 Val. Loss: 1.353 |  Val. Acc: 39.33%
	 Relaxed Train. Acc:

0,1
epoch,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███
relxd train acc,▁▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇█████████████
relxd valid acc,▁▄▅▅▆▇▇▇▇▇▇▇█▇▇▇▇▇▇▇▇██▇██▇█▇▇
train acc,▁▃▄▄▄▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇███████
train loss,█▄▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
valid acc,▁▃▃▅▅▆▇▇█▇▇▆▇▇▇▇▇▇█▇██▇█▇▇▇▇▆▇
valid loss,█▃▂▃▂▂▁▁▁▁▂▂▂▂▂▃▃▃▄▅▄▄▄▅▄▅▆▆█▆

0,1
epoch,29.0
relxd train acc,0.92156
relxd valid acc,0.77274
train acc,0.62625
train loss,0.67091
valid acc,0.4082
valid loss,1.45835


[34m[1mwandb[0m: Agent Starting Run: 36dp5iyw with config:
[34m[1mwandb[0m: 	cell_mode: lstm
[34m[1mwandb[0m: 	dec_embed_size: 16
[34m[1mwandb[0m: 	dec_hid_size: 256
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	enc_embed_size: 256
[34m[1mwandb[0m: 	enc_hid_size: 64
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	is_bi: True
[34m[1mwandb[0m: 	num_layers: 2


The model has 833,924 trainable parameters
Epoch: 01 | Time: 0.0m 47.03s
	 Train Loss: 1.631 | Train Acc: 11.47%
	 Val. Loss: 1.541 |  Val. Acc: 26.49%
	 Relaxed Train. Acc: 62.53% | Relaxed Val. Acc: 67.76%
Epoch: 02 | Time: 0.0m 46.82s
	 Train Loss: 1.152 | Train Acc: 24.22%
	 Val. Loss: 1.416 |  Val. Acc: 32.45%
	 Relaxed Train. Acc: 76.48% | Relaxed Val. Acc: 72.24%
Epoch: 03 | Time: 0.0m 46.79s
	 Train Loss: 1.045 | Train Acc: 29.82%
	 Val. Loss: 1.363 |  Val. Acc: 35.38%
	 Relaxed Train. Acc: 79.98% | Relaxed Val. Acc: 74.00%
Epoch: 04 | Time: 0.0m 46.73s
	 Train Loss: 0.990 | Train Acc: 33.27%
	 Val. Loss: 1.360 |  Val. Acc: 35.57%
	 Relaxed Train. Acc: 81.77% | Relaxed Val. Acc: 73.67%
Epoch: 05 | Time: 0.0m 46.72s
	 Train Loss: 0.955 | Train Acc: 35.88%
	 Val. Loss: 1.361 |  Val. Acc: 38.13%
	 Relaxed Train. Acc: 82.82% | Relaxed Val. Acc: 75.13%
Epoch: 06 | Time: 0.0m 46.66s
	 Train Loss: 0.923 | Train Acc: 38.09%
	 Val. Loss: 1.359 |  Val. Acc: 38.01%
	 Relaxed Train. Acc: 8

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
relxd train acc,▁▅▆▆▆▇▇▇▇▇▇▇▇███████
relxd valid acc,▁▄▆▅▆▇▇▇▇████▇▇█▇█▇█
train acc,▁▃▄▅▅▅▆▆▆▆▇▇▇▇▇▇████
train loss,█▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁
valid acc,▁▄▅▅▇▇▇▇▇▇████▇█▇▇▇█
valid loss,█▄▂▂▂▂▂▂▁▁▁▁▂▃▃▃▄▂▄▄

0,1
epoch,19.0
relxd train acc,0.89579
relxd valid acc,0.76795
train acc,0.53799
train loss,0.75194
valid acc,0.40601
valid loss,1.4188


[34m[1mwandb[0m: Agent Starting Run: fxpv9bbx with config:
[34m[1mwandb[0m: 	cell_mode: lstm
[34m[1mwandb[0m: 	dec_embed_size: 32
[34m[1mwandb[0m: 	dec_hid_size: 512
[34m[1mwandb[0m: 	dropout: 0.25
[34m[1mwandb[0m: 	enc_embed_size: 256
[34m[1mwandb[0m: 	enc_hid_size: 256
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	is_bi: True
[34m[1mwandb[0m: 	num_layers: 2


The model has 5,629,124 trainable parameters
Epoch: 01 | Time: 1.0m 6.64s
	 Train Loss: 1.367 | Train Acc: 18.81%
	 Val. Loss: 1.447 |  Val. Acc: 31.05%
	 Relaxed Train. Acc: 70.16% | Relaxed Val. Acc: 71.68%
Epoch: 02 | Time: 1.0m 6.71s
	 Train Loss: 1.024 | Train Acc: 31.70%
	 Val. Loss: 1.374 |  Val. Acc: 37.67%
	 Relaxed Train. Acc: 80.65% | Relaxed Val. Acc: 74.70%
Epoch: 03 | Time: 1.0m 6.70s
	 Train Loss: 0.938 | Train Acc: 36.94%
	 Val. Loss: 1.338 |  Val. Acc: 39.21%
	 Relaxed Train. Acc: 83.41% | Relaxed Val. Acc: 75.92%
Epoch: 04 | Time: 1.0m 6.62s
	 Train Loss: 0.897 | Train Acc: 40.60%
	 Val. Loss: 1.355 |  Val. Acc: 38.92%
	 Relaxed Train. Acc: 84.86% | Relaxed Val. Acc: 75.38%
Epoch: 05 | Time: 1.0m 6.58s
	 Train Loss: 0.858 | Train Acc: 43.68%
	 Val. Loss: 1.348 |  Val. Acc: 41.58%
	 Relaxed Train. Acc: 86.10% | Relaxed Val. Acc: 76.27%
Epoch: 06 | Time: 1.0m 6.52s
	 Train Loss: 0.831 | Train Acc: 46.16%
	 Val. Loss: 1.351 |  Val. Acc: 41.75%
	 Relaxed Train. Acc: 87.04

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
relxd train acc,▁▄▅▆▆▆▇▇▇▇▇▇████████
relxd valid acc,▁▅▆▅▆▇▇▇██▇█▇▇█▇▇▇▇█
train acc,▁▃▄▄▅▅▆▆▆▇▇▇▇▇▇█████
train loss,█▅▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁
valid acc,▁▅▆▆▇▇▇█████▇▇█▇▇▇▇▇
valid loss,▆▃▁▂▁▂▂▃▂▂▄▄▆▇▅▇▇███

0,1
epoch,19.0
relxd train acc,0.92263
relxd valid acc,0.77167
train acc,0.63326
train loss,0.67132
valid acc,0.41064
valid loss,1.49111


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: y48iqpfm with config:
[34m[1mwandb[0m: 	cell_mode: lstm
[34m[1mwandb[0m: 	dec_embed_size: 32
[34m[1mwandb[0m: 	dec_hid_size: 128
[34m[1mwandb[0m: 	dropout: 0.25
[34m[1mwandb[0m: 	enc_embed_size: 512
[34m[1mwandb[0m: 	enc_hid_size: 256
[34m[1mwandb[0m: 	epochs: 30
[34m[1mwandb[0m: 	is_bi: False
[34m[1mwandb[0m: 	num_layers: 3


The model has 2,163,780 trainable parameters
Epoch: 01 | Time: 0.0m 48.54s
	 Train Loss: 2.653 | Train Acc: 0.77%
	 Val. Loss: 1.858 |  Val. Acc: 12.65%
	 Relaxed Train. Acc: 34.79% | Relaxed Val. Acc: 59.72%
Epoch: 02 | Time: 0.0m 48.30s
	 Train Loss: 1.361 | Train Acc: 16.59%
	 Val. Loss: 1.530 |  Val. Acc: 27.61%
	 Relaxed Train. Acc: 70.98% | Relaxed Val. Acc: 68.88%
Epoch: 03 | Time: 0.0m 48.23s
	 Train Loss: 1.131 | Train Acc: 25.37%
	 Val. Loss: 1.423 |  Val. Acc: 32.28%
	 Relaxed Train. Acc: 77.54% | Relaxed Val. Acc: 71.96%
Epoch: 04 | Time: 0.0m 48.18s
	 Train Loss: 1.044 | Train Acc: 29.67%
	 Val. Loss: 1.407 |  Val. Acc: 34.25%
	 Relaxed Train. Acc: 80.17% | Relaxed Val. Acc: 72.91%
Epoch: 05 | Time: 0.0m 48.16s
	 Train Loss: 0.999 | Train Acc: 32.07%
	 Val. Loss: 1.378 |  Val. Acc: 35.89%
	 Relaxed Train. Acc: 81.45% | Relaxed Val. Acc: 73.80%
Epoch: 06 | Time: 0.0m 48.14s
	 Train Loss: 0.962 | Train Acc: 34.55%
	 Val. Loss: 1.391 |  Val. Acc: 37.11%
	 Relaxed Train. Acc: 

0,1
epoch,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███
relxd train acc,▁▆▆▇▇▇▇▇▇▇▇███████████████████
relxd valid acc,▁▅▆▆▆▇▇▇▇▇▇███▇█▇█████████████
train acc,▁▃▄▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇████████
train loss,█▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
valid acc,▁▅▆▆▆▇▇▇█▇████████████████████
valid loss,█▄▂▂▂▂▁▁▁▁▁▁▁▁▁▁▂▂▁▂▁▁▂▂▂▂▂▂▂▂

0,1
epoch,29.0
relxd train acc,0.90095
relxd valid acc,0.77044
train acc,0.5451
train loss,0.73154
valid acc,0.41602
valid loss,1.41468


[34m[1mwandb[0m: Agent Starting Run: d9zpcolw with config:
[34m[1mwandb[0m: 	cell_mode: lstm
[34m[1mwandb[0m: 	dec_embed_size: 64
[34m[1mwandb[0m: 	dec_hid_size: 512
[34m[1mwandb[0m: 	dropout: 0.25
[34m[1mwandb[0m: 	enc_embed_size: 512
[34m[1mwandb[0m: 	enc_hid_size: 512
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	is_bi: True
[34m[1mwandb[0m: 	num_layers: 1


The model has 8,850,244 trainable parameters
Epoch: 01 | Time: 1.0m 24.76s
	 Train Loss: 1.333 | Train Acc: 19.00%
	 Val. Loss: 1.461 |  Val. Acc: 30.32%
	 Relaxed Train. Acc: 70.89% | Relaxed Val. Acc: 71.16%
Epoch: 02 | Time: 1.0m 24.93s
	 Train Loss: 1.046 | Train Acc: 30.23%
	 Val. Loss: 1.412 |  Val. Acc: 34.47%
	 Relaxed Train. Acc: 79.93% | Relaxed Val. Acc: 73.18%
Epoch: 03 | Time: 1.0m 24.98s
	 Train Loss: 0.978 | Train Acc: 33.82%
	 Val. Loss: 1.373 |  Val. Acc: 36.04%
	 Relaxed Train. Acc: 82.07% | Relaxed Val. Acc: 74.45%
Epoch: 04 | Time: 1.0m 24.84s
	 Train Loss: 0.941 | Train Acc: 36.98%
	 Val. Loss: 1.363 |  Val. Acc: 36.94%
	 Relaxed Train. Acc: 83.32% | Relaxed Val. Acc: 74.80%
Epoch: 05 | Time: 1.0m 24.80s
	 Train Loss: 0.914 | Train Acc: 38.76%
	 Val. Loss: 1.390 |  Val. Acc: 38.38%
	 Relaxed Train. Acc: 84.22% | Relaxed Val. Acc: 74.88%
Epoch: 06 | Time: 1.0m 24.77s
	 Train Loss: 0.890 | Train Acc: 40.65%
	 Val. Loss: 1.360 |  Val. Acc: 39.43%
	 Relaxed Train. Acc:

0,1
epoch,▁▂▃▃▄▅▆▆▇█
relxd train acc,▁▅▆▆▇▇▇███
relxd valid acc,▁▄▆▆▆█▇███
train acc,▁▄▅▅▆▆▇▇██
train loss,█▄▃▃▂▂▂▁▁▁
valid acc,▁▄▅▅▆▇███▇
valid loss,█▅▂▁▃▁▂▄▂▄

0,1
epoch,9.0
relxd train acc,0.87403
relxd valid acc,0.76035
train acc,0.47166
train loss,0.81838
valid acc,0.39478
valid loss,1.39879


[34m[1mwandb[0m: Agent Starting Run: 46luix9n with config:
[34m[1mwandb[0m: 	cell_mode: lstm
[34m[1mwandb[0m: 	dec_embed_size: 32
[34m[1mwandb[0m: 	dec_hid_size: 512
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	enc_embed_size: 128
[34m[1mwandb[0m: 	enc_hid_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	is_bi: True
[34m[1mwandb[0m: 	num_layers: 1


The model has 1,915,332 trainable parameters
Epoch: 01 | Time: 0.0m 51.23s
	 Train Loss: 1.427 | Train Acc: 16.24%
	 Val. Loss: 1.427 |  Val. Acc: 31.40%
	 Relaxed Train. Acc: 68.32% | Relaxed Val. Acc: 71.80%
Epoch: 02 | Time: 0.0m 51.33s
	 Train Loss: 1.046 | Train Acc: 29.61%
	 Val. Loss: 1.352 |  Val. Acc: 36.23%
	 Relaxed Train. Acc: 79.84% | Relaxed Val. Acc: 74.75%
Epoch: 03 | Time: 0.0m 51.31s
	 Train Loss: 0.957 | Train Acc: 35.41%
	 Val. Loss: 1.377 |  Val. Acc: 37.06%
	 Relaxed Train. Acc: 82.78% | Relaxed Val. Acc: 74.50%
Epoch: 04 | Time: 0.0m 51.27s
	 Train Loss: 0.903 | Train Acc: 39.62%
	 Val. Loss: 1.378 |  Val. Acc: 38.40%
	 Relaxed Train. Acc: 84.59% | Relaxed Val. Acc: 74.89%
Epoch: 05 | Time: 0.0m 51.17s
	 Train Loss: 0.868 | Train Acc: 42.92%
	 Val. Loss: 1.341 |  Val. Acc: 39.62%
	 Relaxed Train. Acc: 85.71% | Relaxed Val. Acc: 75.91%
Epoch: 06 | Time: 0.0m 51.18s
	 Train Loss: 0.833 | Train Acc: 45.69%
	 Val. Loss: 1.354 |  Val. Acc: 39.11%
	 Relaxed Train. Acc:

0,1
epoch,▁▂▃▃▄▅▆▆▇█
relxd train acc,▁▅▆▆▇▇▇███
relxd valid acc,▁▅▅▅▇▇██▇▇
train acc,▁▃▄▅▆▆▇▇██
train loss,█▄▃▃▂▂▂▁▁▁
valid acc,▁▅▅▆▇▇█▇▇▇
valid loss,▇▂▄▄▁▂▂▅▆█

0,1
epoch,9.0
relxd train acc,0.90063
relxd valid acc,0.76216
train acc,0.55465
train loss,0.73608
valid acc,0.38843
valid loss,1.44072


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: d7q0g17b with config:
[34m[1mwandb[0m: 	cell_mode: lstm
[34m[1mwandb[0m: 	dec_embed_size: 32
[34m[1mwandb[0m: 	dec_hid_size: 256
[34m[1mwandb[0m: 	dropout: 0.25
[34m[1mwandb[0m: 	enc_embed_size: 256
[34m[1mwandb[0m: 	enc_hid_size: 512
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	is_bi: True
[34m[1mwandb[0m: 	num_layers: 2


The model has 11,417,028 trainable parameters
Epoch: 01 | Time: 1.0m 9.97s
	 Train Loss: 1.687 | Train Acc: 14.67%
	 Val. Loss: 1.459 |  Val. Acc: 30.37%
	 Relaxed Train. Acc: 61.49% | Relaxed Val. Acc: 71.22%
Epoch: 02 | Time: 1.0m 10.29s
	 Train Loss: 1.066 | Train Acc: 30.26%
	 Val. Loss: 1.363 |  Val. Acc: 35.55%
	 Relaxed Train. Acc: 79.35% | Relaxed Val. Acc: 73.67%
Epoch: 03 | Time: 1.0m 10.24s
	 Train Loss: 0.968 | Train Acc: 35.75%
	 Val. Loss: 1.349 |  Val. Acc: 36.72%
	 Relaxed Train. Acc: 82.59% | Relaxed Val. Acc: 74.38%
Epoch: 04 | Time: 1.0m 10.20s
	 Train Loss: 0.916 | Train Acc: 39.22%
	 Val. Loss: 1.321 |  Val. Acc: 40.92%
	 Relaxed Train. Acc: 84.25% | Relaxed Val. Acc: 75.97%
Epoch: 05 | Time: 1.0m 10.15s
	 Train Loss: 0.880 | Train Acc: 42.07%
	 Val. Loss: 1.342 |  Val. Acc: 40.43%
	 Relaxed Train. Acc: 85.41% | Relaxed Val. Acc: 76.15%
Epoch: 06 | Time: 1.0m 10.11s
	 Train Loss: 0.854 | Train Acc: 44.34%
	 Val. Loss: 1.342 |  Val. Acc: 42.26%
	 Relaxed Train. Acc:

0,1
epoch,▁▂▃▃▄▅▆▆▇█
relxd train acc,▁▆▆▇▇▇████
relxd valid acc,▁▄▅▇▇█████
train acc,▁▄▅▆▆▇▇▇██
train loss,█▃▃▂▂▂▁▁▁▁
valid acc,▁▄▅▇▇████▇
valid loss,█▃▂▁▂▂▂▂▂▄

0,1
epoch,9.0
relxd train acc,0.89041
relxd valid acc,0.76795
train acc,0.519
train loss,0.76836
valid acc,0.41406
valid loss,1.38302


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 9tflx63k with config:
[34m[1mwandb[0m: 	cell_mode: lstm
[34m[1mwandb[0m: 	dec_embed_size: 16
[34m[1mwandb[0m: 	dec_hid_size: 512
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	enc_embed_size: 256
[34m[1mwandb[0m: 	enc_hid_size: 128
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	is_bi: True
[34m[1mwandb[0m: 	num_layers: 1


The model has 2,574,468 trainable parameters
Epoch: 01 | Time: 0.0m 55.04s
	 Train Loss: 1.399 | Train Acc: 17.18%
	 Val. Loss: 1.448 |  Val. Acc: 30.76%
	 Relaxed Train. Acc: 68.78% | Relaxed Val. Acc: 71.70%
Epoch: 02 | Time: 0.0m 55.23s
	 Train Loss: 1.050 | Train Acc: 30.15%
	 Val. Loss: 1.372 |  Val. Acc: 36.57%
	 Relaxed Train. Acc: 79.68% | Relaxed Val. Acc: 74.09%
Epoch: 03 | Time: 0.0m 55.17s
	 Train Loss: 0.960 | Train Acc: 35.45%
	 Val. Loss: 1.340 |  Val. Acc: 37.18%
	 Relaxed Train. Acc: 82.69% | Relaxed Val. Acc: 75.12%
Epoch: 04 | Time: 0.0m 55.19s
	 Train Loss: 0.912 | Train Acc: 39.21%
	 Val. Loss: 1.344 |  Val. Acc: 38.11%
	 Relaxed Train. Acc: 84.19% | Relaxed Val. Acc: 74.97%
Epoch: 05 | Time: 0.0m 55.13s
	 Train Loss: 0.876 | Train Acc: 42.27%
	 Val. Loss: 1.352 |  Val. Acc: 40.04%
	 Relaxed Train. Acc: 85.43% | Relaxed Val. Acc: 76.12%
Epoch: 06 | Time: 0.0m 55.07s
	 Train Loss: 0.848 | Train Acc: 44.55%
	 Val. Loss: 1.361 |  Val. Acc: 39.97%
	 Relaxed Train. Acc:

0,1
epoch,▁▂▃▃▄▅▆▆▇█
relxd train acc,▁▅▆▆▇▇▇███
relxd valid acc,▁▅▆▆█▇▇▇██
train acc,▁▃▄▅▆▆▇▇██
train loss,█▄▃▃▂▂▂▁▁▁
valid acc,▁▅▆▆██▆███
valid loss,█▃▁▁▂▂▄▄▆▆

0,1
epoch,9.0
relxd train acc,0.89457
relxd valid acc,0.76206
train acc,0.53799
train loss,0.75365
valid acc,0.40137
valid loss,1.41798


[34m[1mwandb[0m: Agent Starting Run: 5n03bw4r with config:
[34m[1mwandb[0m: 	cell_mode: lstm
[34m[1mwandb[0m: 	dec_embed_size: 128
[34m[1mwandb[0m: 	dec_hid_size: 256
[34m[1mwandb[0m: 	dropout: 0.25
[34m[1mwandb[0m: 	enc_embed_size: 256
[34m[1mwandb[0m: 	enc_hid_size: 256
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	is_bi: True
[34m[1mwandb[0m: 	num_layers: 2


The model has 3,911,492 trainable parameters
Epoch: 01 | Time: 0.0m 51.76s
	 Train Loss: 1.563 | Train Acc: 15.59%
	 Val. Loss: 1.480 |  Val. Acc: 31.74%
	 Relaxed Train. Acc: 65.34% | Relaxed Val. Acc: 71.02%
Epoch: 02 | Time: 0.0m 51.56s
	 Train Loss: 1.064 | Train Acc: 29.28%
	 Val. Loss: 1.385 |  Val. Acc: 35.30%
	 Relaxed Train. Acc: 79.52% | Relaxed Val. Acc: 73.77%
Epoch: 03 | Time: 0.0m 51.53s
	 Train Loss: 0.977 | Train Acc: 34.61%
	 Val. Loss: 1.372 |  Val. Acc: 37.06%
	 Relaxed Train. Acc: 82.37% | Relaxed Val. Acc: 74.23%
Epoch: 04 | Time: 0.0m 51.52s
	 Train Loss: 0.935 | Train Acc: 37.82%
	 Val. Loss: 1.389 |  Val. Acc: 38.55%
	 Relaxed Train. Acc: 83.70% | Relaxed Val. Acc: 75.01%
Epoch: 05 | Time: 0.0m 51.42s
	 Train Loss: 0.908 | Train Acc: 39.94%
	 Val. Loss: 1.365 |  Val. Acc: 39.14%
	 Relaxed Train. Acc: 84.61% | Relaxed Val. Acc: 75.31%
Epoch: 06 | Time: 0.0m 51.43s
	 Train Loss: 0.879 | Train Acc: 42.13%
	 Val. Loss: 1.345 |  Val. Acc: 40.97%
	 Relaxed Train. Acc:

0,1
epoch,▁▂▃▃▄▅▆▆▇█
relxd train acc,▁▅▆▇▇▇▇███
relxd valid acc,▁▄▅▅▆▇▆▇██
train acc,▁▄▅▆▆▇▇▇██
train loss,█▃▃▂▂▂▁▁▁▁
valid acc,▁▃▄▅▆▇▇▇██
valid loss,█▄▃▄▃▂▂▂▁▂

0,1
epoch,9.0
relxd train acc,0.88021
relxd valid acc,0.77044
train acc,0.48855
train loss,0.80358
valid acc,0.4231
valid loss,1.35182


[34m[1mwandb[0m: Agent Starting Run: ailaaet8 with config:
[34m[1mwandb[0m: 	cell_mode: lstm
[34m[1mwandb[0m: 	dec_embed_size: 64
[34m[1mwandb[0m: 	dec_hid_size: 512
[34m[1mwandb[0m: 	dropout: 0.25
[34m[1mwandb[0m: 	enc_embed_size: 512
[34m[1mwandb[0m: 	enc_hid_size: 128
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	is_bi: True
[34m[1mwandb[0m: 	num_layers: 1


The model has 2,945,860 trainable parameters
Epoch: 01 | Time: 0.0m 55.94s
	 Train Loss: 1.372 | Train Acc: 17.18%
	 Val. Loss: 1.482 |  Val. Acc: 31.84%
	 Relaxed Train. Acc: 69.93% | Relaxed Val. Acc: 70.70%
Epoch: 02 | Time: 0.0m 55.98s
	 Train Loss: 1.055 | Train Acc: 28.95%
	 Val. Loss: 1.403 |  Val. Acc: 35.03%
	 Relaxed Train. Acc: 79.61% | Relaxed Val. Acc: 73.36%
Epoch: 03 | Time: 0.0m 55.94s
	 Train Loss: 0.978 | Train Acc: 33.82%
	 Val. Loss: 1.379 |  Val. Acc: 36.45%
	 Relaxed Train. Acc: 82.12% | Relaxed Val. Acc: 73.82%
Epoch: 04 | Time: 0.0m 55.90s
	 Train Loss: 0.932 | Train Acc: 37.02%
	 Val. Loss: 1.382 |  Val. Acc: 36.57%
	 Relaxed Train. Acc: 83.66% | Relaxed Val. Acc: 74.38%
Epoch: 05 | Time: 0.0m 55.83s
	 Train Loss: 0.902 | Train Acc: 39.85%
	 Val. Loss: 1.397 |  Val. Acc: 37.21%
	 Relaxed Train. Acc: 84.66% | Relaxed Val. Acc: 74.69%
Epoch: 06 | Time: 0.0m 55.80s
	 Train Loss: 0.877 | Train Acc: 41.87%
	 Val. Loss: 1.390 |  Val. Acc: 38.40%
	 Relaxed Train. Acc:

0,1
epoch,▁▂▃▃▄▅▆▆▇█
relxd train acc,▁▅▆▆▇▇▇███
relxd valid acc,▁▅▅▆▇█████
train acc,▁▄▅▅▆▆▇▇██
train loss,█▄▃▃▂▂▂▁▁▁
valid acc,▁▄▅▆▆▇▇█▆█
valid loss,█▃▁▁▂▂▂▂▃▄

0,1
epoch,9.0
relxd train acc,0.88255
relxd valid acc,0.75753
train acc,0.49686
train loss,0.79254
valid acc,0.39087
valid loss,1.42403


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 89s5b56k with config:
[34m[1mwandb[0m: 	cell_mode: lstm
[34m[1mwandb[0m: 	dec_embed_size: 32
[34m[1mwandb[0m: 	dec_hid_size: 512
[34m[1mwandb[0m: 	dropout: 0.25
[34m[1mwandb[0m: 	enc_embed_size: 128
[34m[1mwandb[0m: 	enc_hid_size: 128
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	is_bi: True
[34m[1mwandb[0m: 	num_layers: 2


The model has 2,868,676 trainable parameters
Epoch: 01 | Time: 0.0m 56.58s
	 Train Loss: 1.416 | Train Acc: 17.11%
	 Val. Loss: 1.468 |  Val. Acc: 32.13%
	 Relaxed Train. Acc: 68.93% | Relaxed Val. Acc: 71.04%
Epoch: 02 | Time: 0.0m 56.75s
	 Train Loss: 1.040 | Train Acc: 30.71%
	 Val. Loss: 1.391 |  Val. Acc: 35.42%
	 Relaxed Train. Acc: 80.15% | Relaxed Val. Acc: 74.17%
Epoch: 03 | Time: 0.0m 56.68s
	 Train Loss: 0.950 | Train Acc: 35.98%
	 Val. Loss: 1.355 |  Val. Acc: 38.31%
	 Relaxed Train. Acc: 83.12% | Relaxed Val. Acc: 75.06%
Epoch: 04 | Time: 0.0m 56.66s
	 Train Loss: 0.903 | Train Acc: 39.73%
	 Val. Loss: 1.349 |  Val. Acc: 40.33%
	 Relaxed Train. Acc: 84.61% | Relaxed Val. Acc: 76.33%
Epoch: 05 | Time: 0.0m 56.69s
	 Train Loss: 0.876 | Train Acc: 42.72%
	 Val. Loss: 1.339 |  Val. Acc: 41.04%
	 Relaxed Train. Acc: 85.58% | Relaxed Val. Acc: 76.24%
Epoch: 06 | Time: 0.0m 56.55s
	 Train Loss: 0.838 | Train Acc: 45.35%
	 Val. Loss: 1.354 |  Val. Acc: 41.87%
	 Relaxed Train. Acc:

0,1
epoch,▁▂▃▃▄▅▆▆▇█
relxd train acc,▁▅▆▆▇▇▇███
relxd valid acc,▁▄▅▇▇▇█▇██
train acc,▁▄▅▅▆▆▇▇██
train loss,█▄▃▃▂▂▂▁▁▁
valid acc,▁▃▅▆▇▇▇▇▇█
valid loss,█▄▂▂▁▂▂▄▃▃

0,1
epoch,9.0
relxd train acc,0.89496
relxd valid acc,0.7751
train acc,0.53961
train loss,0.75436
valid acc,0.43091
valid loss,1.37635


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 84pipxd4 with config:
[34m[1mwandb[0m: 	cell_mode: lstm
[34m[1mwandb[0m: 	dec_embed_size: 32
[34m[1mwandb[0m: 	dec_hid_size: 512
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	enc_embed_size: 128
[34m[1mwandb[0m: 	enc_hid_size: 256
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	is_bi: True
[34m[1mwandb[0m: 	num_layers: 1


The model has 3,786,180 trainable parameters
Epoch: 01 | Time: 1.0m 2.92s
	 Train Loss: 1.348 | Train Acc: 19.79%
	 Val. Loss: 1.437 |  Val. Acc: 33.18%
	 Relaxed Train. Acc: 70.75% | Relaxed Val. Acc: 71.76%
Epoch: 02 | Time: 1.0m 3.12s
	 Train Loss: 1.009 | Train Acc: 32.73%
	 Val. Loss: 1.346 |  Val. Acc: 37.06%
	 Relaxed Train. Acc: 81.11% | Relaxed Val. Acc: 74.43%
Epoch: 03 | Time: 1.0m 3.15s
	 Train Loss: 0.924 | Train Acc: 38.36%
	 Val. Loss: 1.373 |  Val. Acc: 37.23%
	 Relaxed Train. Acc: 83.95% | Relaxed Val. Acc: 74.99%
Epoch: 04 | Time: 1.0m 3.08s
	 Train Loss: 0.875 | Train Acc: 42.27%
	 Val. Loss: 1.333 |  Val. Acc: 40.53%
	 Relaxed Train. Acc: 85.57% | Relaxed Val. Acc: 76.48%
Epoch: 05 | Time: 1.0m 3.07s
	 Train Loss: 0.840 | Train Acc: 45.71%
	 Val. Loss: 1.341 |  Val. Acc: 40.82%
	 Relaxed Train. Acc: 86.71% | Relaxed Val. Acc: 76.57%
Epoch: 06 | Time: 1.0m 3.02s
	 Train Loss: 0.804 | Train Acc: 48.73%
	 Val. Loss: 1.364 |  Val. Acc: 41.85%
	 Relaxed Train. Acc: 87.89

0,1
epoch,▁▂▃▃▄▅▆▆▇█
relxd train acc,▁▅▆▆▆▇▇▇██
relxd valid acc,▁▄▅▇▇▇█▇██
train acc,▁▃▄▅▆▆▇▇██
train loss,█▄▃▃▂▂▂▁▁▁
valid acc,▁▄▄▇▇█████
valid loss,█▂▄▁▂▃▂▆▅▆

0,1
epoch,9.0
relxd train acc,0.91172
relxd valid acc,0.77041
train acc,0.59529
train loss,0.70523
valid acc,0.41602
valid loss,1.41145


[34m[1mwandb[0m: Agent Starting Run: 5wmocix2 with config:
[34m[1mwandb[0m: 	cell_mode: lstm
[34m[1mwandb[0m: 	dec_embed_size: 64
[34m[1mwandb[0m: 	dec_hid_size: 512
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	enc_embed_size: 512
[34m[1mwandb[0m: 	enc_hid_size: 128
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	is_bi: False
[34m[1mwandb[0m: 	num_layers: 2


The model has 2,356,036 trainable parameters
Epoch: 01 | Time: 0.0m 52.46s
	 Train Loss: 1.562 | Train Acc: 13.21%
	 Val. Loss: 1.530 |  Val. Acc: 28.54%
	 Relaxed Train. Acc: 64.89% | Relaxed Val. Acc: 69.48%
Epoch: 02 | Time: 0.0m 52.43s
	 Train Loss: 1.106 | Train Acc: 25.54%
	 Val. Loss: 1.456 |  Val. Acc: 32.76%
	 Relaxed Train. Acc: 78.04% | Relaxed Val. Acc: 71.83%
Epoch: 03 | Time: 0.0m 52.37s
	 Train Loss: 1.008 | Train Acc: 31.32%
	 Val. Loss: 1.409 |  Val. Acc: 35.57%
	 Relaxed Train. Acc: 81.14% | Relaxed Val. Acc: 73.47%
Epoch: 04 | Time: 0.0m 52.34s
	 Train Loss: 0.956 | Train Acc: 34.66%
	 Val. Loss: 1.355 |  Val. Acc: 38.13%
	 Relaxed Train. Acc: 82.78% | Relaxed Val. Acc: 75.33%
Epoch: 05 | Time: 0.0m 52.35s
	 Train Loss: 0.918 | Train Acc: 37.22%
	 Val. Loss: 1.360 |  Val. Acc: 38.45%
	 Relaxed Train. Acc: 83.97% | Relaxed Val. Acc: 75.64%
Epoch: 06 | Time: 0.0m 52.27s
	 Train Loss: 0.885 | Train Acc: 40.04%
	 Val. Loss: 1.373 |  Val. Acc: 38.89%
	 Relaxed Train. Acc:

0,1
epoch,▁▂▃▃▄▅▆▆▇█
relxd train acc,▁▅▆▆▇▇▇███
relxd valid acc,▁▃▅▇▇▇▇▇▇█
train acc,▁▃▅▅▆▆▇▇██
train loss,█▄▃▂▂▂▂▁▁▁
valid acc,▁▄▅▇▇▇▇▇▇█
valid loss,█▅▃▁▁▂▂▃▃▂

0,1
epoch,9.0
relxd train acc,0.8799
relxd valid acc,0.76387
train acc,0.48332
train loss,0.79711
valid acc,0.39697
valid loss,1.39076


[34m[1mwandb[0m: Agent Starting Run: ym4cw5ve with config:
[34m[1mwandb[0m: 	cell_mode: lstm
[34m[1mwandb[0m: 	dec_embed_size: 32
[34m[1mwandb[0m: 	dec_hid_size: 512
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	enc_embed_size: 64
[34m[1mwandb[0m: 	enc_hid_size: 512
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	is_bi: True
[34m[1mwandb[0m: 	num_layers: 2


The model has 13,233,732 trainable parameters
Epoch: 01 | Time: 1.0m 34.61s
	 Train Loss: 1.513 | Train Acc: 18.40%
	 Val. Loss: 1.400 |  Val. Acc: 33.84%
	 Relaxed Train. Acc: 66.56% | Relaxed Val. Acc: 72.74%
Epoch: 02 | Time: 1.0m 34.86s
	 Train Loss: 0.995 | Train Acc: 34.02%
	 Val. Loss: 1.366 |  Val. Acc: 38.84%
	 Relaxed Train. Acc: 81.71% | Relaxed Val. Acc: 75.13%
Epoch: 03 | Time: 1.0m 34.79s
	 Train Loss: 0.903 | Train Acc: 40.19%
	 Val. Loss: 1.329 |  Val. Acc: 39.99%
	 Relaxed Train. Acc: 84.68% | Relaxed Val. Acc: 76.12%
Epoch: 04 | Time: 1.0m 34.72s
	 Train Loss: 0.849 | Train Acc: 44.56%
	 Val. Loss: 1.345 |  Val. Acc: 40.94%
	 Relaxed Train. Acc: 86.47% | Relaxed Val. Acc: 76.35%
Epoch: 05 | Time: 1.0m 34.66s
	 Train Loss: 0.811 | Train Acc: 48.43%
	 Val. Loss: 1.354 |  Val. Acc: 41.19%
	 Relaxed Train. Acc: 87.73% | Relaxed Val. Acc: 76.51%
Epoch: 06 | Time: 1.0m 34.62s
	 Train Loss: 0.770 | Train Acc: 52.12%
	 Val. Loss: 1.335 |  Val. Acc: 44.26%
	 Relaxed Train. Acc

0,1
epoch,▁▂▃▃▄▅▆▆▇█
relxd train acc,▁▅▆▆▇▇▇███
relxd valid acc,▁▄▆▆▆█▇█▇▇
train acc,▁▃▄▅▆▆▇▇██
train loss,█▄▃▂▂▂▂▁▁▁
valid acc,▁▄▅▆▆█▇█▇▇
valid loss,▅▃▁▂▂▁▃▄▇█

0,1
epoch,9.0
relxd train acc,0.9218
relxd valid acc,0.77526
train acc,0.63195
train loss,0.67613
valid acc,0.42627
valid loss,1.44494


[34m[1mwandb[0m: Agent Starting Run: bj6z3fop with config:
[34m[1mwandb[0m: 	cell_mode: lstm
[34m[1mwandb[0m: 	dec_embed_size: 32
[34m[1mwandb[0m: 	dec_hid_size: 256
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	enc_embed_size: 512
[34m[1mwandb[0m: 	enc_hid_size: 512
[34m[1mwandb[0m: 	epochs: 30
[34m[1mwandb[0m: 	is_bi: False
[34m[1mwandb[0m: 	num_layers: 3


The model has 7,488,452 trainable parameters
Epoch: 01 | Time: 0.0m 55.95s
	 Train Loss: 2.175 | Train Acc: 6.83%
	 Val. Loss: 1.533 |  Val. Acc: 26.49%
	 Relaxed Train. Acc: 48.38% | Relaxed Val. Acc: 68.75%
Epoch: 02 | Time: 0.0m 55.85s
	 Train Loss: 1.145 | Train Acc: 24.72%
	 Val. Loss: 1.411 |  Val. Acc: 31.81%
	 Relaxed Train. Acc: 76.76% | Relaxed Val. Acc: 72.27%
Epoch: 03 | Time: 0.0m 55.71s
	 Train Loss: 1.022 | Train Acc: 30.60%
	 Val. Loss: 1.382 |  Val. Acc: 35.69%
	 Relaxed Train. Acc: 80.59% | Relaxed Val. Acc: 74.48%
Epoch: 04 | Time: 0.0m 55.67s
	 Train Loss: 0.966 | Train Acc: 34.28%
	 Val. Loss: 1.331 |  Val. Acc: 38.16%
	 Relaxed Train. Acc: 82.41% | Relaxed Val. Acc: 75.03%
Epoch: 05 | Time: 0.0m 55.73s
	 Train Loss: 0.929 | Train Acc: 36.39%
	 Val. Loss: 1.330 |  Val. Acc: 38.38%
	 Relaxed Train. Acc: 83.51% | Relaxed Val. Acc: 75.99%
Epoch: 06 | Time: 0.0m 55.73s
	 Train Loss: 0.900 | Train Acc: 38.94%
	 Val. Loss: 1.339 |  Val. Acc: 38.65%
	 Relaxed Train. Acc: 

0,1
epoch,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███
relxd train acc,▁▅▆▆▇▇▇▇▇▇▇▇▇▇▇███████████████
relxd valid acc,▁▄▅▆▇▇▇▇▇▇▇█▇▇▇▇▇██▇▇████▇██▇█
train acc,▁▃▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇███████
train loss,█▃▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
valid acc,▁▃▅▆▆▆▇██▇▇██▇▇█▇█▇▇▇▇███▇█▇▇█
valid loss,█▄▃▂▂▂▁▂▁▃▂▂▃▃▃▄▄▄▄▆▅▅▅▆▆▆▆▆██

0,1
epoch,29.0
relxd train acc,0.92761
relxd valid acc,0.77448
train acc,0.64717
train loss,0.65031
valid acc,0.41138
valid loss,1.51897


[34m[1mwandb[0m: Agent Starting Run: wi43bmjd with config:
[34m[1mwandb[0m: 	cell_mode: lstm
[34m[1mwandb[0m: 	dec_embed_size: 32
[34m[1mwandb[0m: 	dec_hid_size: 512
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	enc_embed_size: 256
[34m[1mwandb[0m: 	enc_hid_size: 256
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	is_bi: True
[34m[1mwandb[0m: 	num_layers: 2


The model has 5,629,124 trainable parameters
Epoch: 01 | Time: 1.0m 7.71s
	 Train Loss: 1.375 | Train Acc: 18.89%
	 Val. Loss: 1.421 |  Val. Acc: 32.30%
	 Relaxed Train. Acc: 69.98% | Relaxed Val. Acc: 72.32%
Epoch: 02 | Time: 1.0m 7.91s
	 Train Loss: 1.020 | Train Acc: 31.85%
	 Val. Loss: 1.390 |  Val. Acc: 36.79%
	 Relaxed Train. Acc: 80.79% | Relaxed Val. Acc: 74.36%
Epoch: 03 | Time: 1.0m 7.83s
	 Train Loss: 0.943 | Train Acc: 36.81%
	 Val. Loss: 1.349 |  Val. Acc: 38.82%
	 Relaxed Train. Acc: 83.33% | Relaxed Val. Acc: 75.98%


In [None]:
run=wandb.init(id='gyqh6j5c', resume='must')

In [None]:
a=next(iter(train_dataloader))

In [None]:
bya=SS(a['input'],a['output'])

In [None]:
a['output'].shape

In [None]:
bya.shape

In [None]:
import math

In [None]:
def word_from_torchies(torchie1,index_toalp):
    torchie=torchie1.cpu().numpy()
    return word_from_vecs(torchie,index_toalp,False)

In [None]:
def word_from_batch(batch):
    wordlet=[]
    for i in range(len(batch)):
        wordlet.append(word_from_torchies(batch[i],index_to_hindi_alphabet))
    return wordlet
        

In [None]:
word_from_torchies(bya.transpose(0,1).argmax(2)[0],index_to_hindi_alphabet)

In [None]:
index_to_hindi_alphabet[65]

In [None]:
def make_wrd(stuff):
    asa=[]
    for k in stuff.cpu().numpy():
        asa.append(index_to_hindi_alphabet[k])
    return "".join(asa)
    

In [None]:
make_wrd(bya.transpose(0,1).argmax(2)[10])

In [None]:
bya.transpose(0,1).argmax(2)[10]

In [None]:
for i in range(16):
    print('................')
    print(i)
    print(word_from_torchies(a['output'][i],index_to_hindi_alphabet),\
         '---',\
         make_wrd(bya.transpose(0,1).argmax(2)[i])\
         )


In [None]:
word (self, source_batch,target_batch):
    max_len, batch_size = target_batch.shape
    outputs = torch.zeros(max_len, batch_size, target_vocab_size).to(self.device)


            # last hidden & cell state of the encoder is used as the decoder's initial hidden state
    hidden, cell = self.encoder(source_batch)

    wordet=[]


    trg = torch.tensor(hindi_alphabet_to_index['<'])
    trg=trg.to(device)
    wordet.append(index_to_hindi_alphabet(trg.cpu().numpy()))
    for i in range(1, max_len):
        prediction, hidden, cell = self.decoder(trg, hidden, cell)
        outputs[i] = prediction
        trg = prediction.argmax(1)
        wordet.append(index_to_hindi_alphabet(trg.cpu().numpy()))


    return ''.join(wordet)

In [None]:
word_from_vecs(X_valid[0],index_to_english_alphabet,False)

In [None]:
word_from_vecs(y_valid[0],index_to_hindi_alphabet,False)

In [None]:
next(iter(train_dataloader))

In [None]:
            for j in range(predicted.shape[1]):
                predicted_seq = predicted[:, j]
                targets_seq = target_seq[:, j]

                # Find the index of the first EOS token in the sequence
                eos_idx = (targets_seq == hin_token_map["\n"]).nonzero()
                if eos_idx.numel() > 0:
                    eos_idx = eos_idx[0][0]
                    predicted_seq = predicted_seq[:eos_idx]
                    targets_seq = targets_seq[:eos_idx]

In [None]:
s