In [1]:
from utils import *
import numpy as np
import pickle
import wandb

In [2]:
index_to_english_alphabet=pickle.load(open('vocab_tools/index_to_english_alphabet.pickle', 'rb'))
index_to_hindi_alphabet=pickle.load(open('vocab_tools/index_to_hindi_alphabet.pickle', 'rb'))

In [3]:
hindi_alphabet_to_index=pickle.load(open('vocab_tools/hindi_alphabet_to_index.pickle', 'rb')) 
english_alphabet_to_index=pickle.load(open('vocab_tools/english_alphabet_to_index.pickle', 'rb')) 

In [4]:
X_train=np.load('simple_data/X_train.npy')
X_valid=np.load('simple_data/X_val.npy')

y_train=np.load('simple_data/y_train.npy')
y_valid=np.load('simple_data/y_val.npy')

In [5]:
from __future__ import print_function, division
import os
import torch
import pandas as pd
from skimage import io, transform
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

plt.ion()   # interactive mode

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
class Eng_Hind_Dataset(Dataset):

    def __init__(self, in_file, out_file, root_dir='simple_data',device='cuda'):

        self.input = torch.tensor(np.load(root_dir+'/'+in_file))
        self.output = torch.tensor(np.load(root_dir+'/'+out_file))
        
        assert(len(self.input)==len(self.output),"Error: I/O Lengths must be same")
        

    def __len__(self):
        return len(self.input)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
            
        X=self.input[idx]
        X=X.to(device)
        y=self.output[idx]
        y=y.to(device)
        


        sample = {'input': X, 'output': y}

        return sample

In [7]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [8]:
device

device(type='cuda')

In [9]:
training_data=Eng_Hind_Dataset("X_train.npy","y_train.npy",device=device)
val_data=Eng_Hind_Dataset("X_val.npy","y_val.npy",device=device)
test_data=Eng_Hind_Dataset("X_test.npy","y_test.npy",device=device)

In [10]:
train_dataloader = DataLoader(training_data, batch_size=16,shuffle=True)

In [11]:
val_dataloader = DataLoader(val_data, batch_size=16,shuffle=True)

In [12]:
import torch.nn as nn

In [13]:
def cell_type(mode:str='rnn'):
    mode=mode.lower()
    if mode == 'rnn':
        return nn.RNN
    elif mode =='gru':
        return nn.GRU
    else:
        return nn.LSTM
        

In [14]:
class Encoder(nn.Module):
    """
    Input :
        - source batch
    Layer : 
        source batch -> Embedding -> LSTM
    Output :
        - LSTM hidden state
        - LSTM cell state

    Parmeters
    ---------
    input_dim : int
        Input dimension, should equal to the source vocab size.
    
    emb_dim : int
        Embedding layer's dimension.
        
    hid_dim : int
        LSTM Hidden/Cell state's dimension.
        
    n_layers : int
        Number of LSTM layers.
        
    dropout : float
        Dropout for the LSTM layer.
    """

    def __init__(self, input_size, enc_embed_size, hid_size, num_layers, cell_mode, dropout, is_bi):
        super().__init__()
        
        # create embedding layer
        self.embedding = nn.Embedding(input_size, enc_embed_size,padding_idx=english_alphabet_to_index['.'])

        #creating LSTM/GRU/RNN cell
        cell=cell_type(cell_mode)
        
        self.cell=cell(enc_embed_size,hid_size,num_layers,dropout=dropout,bidirectional=is_bi,batch_first=True)
        self.cell_mode=cell_mode
        
        

    def forward(self, input_batch: torch.LongTensor):
        """

        Parameters
        ----------
        src_batch : 2d torch.LongTensor
            Batched tokenized source sentence of shape [sent len, batch size].

        Returns
        -------
        hidden, cell : 3d torch.LongTensor
            Hidden and cell state of the LSTM layer. Each state's shape
            [n layers * n directions, batch size, hidden dim]
        """
        embedded = self.embedding(input_batch) # [sent len, batch size, emb dim]
        #print('encoder embd',embedded.shape)
        
        if self.cell_mode.lower()=='lstm':
            outputs, (hidden, cell) = self.cell(embedded)
            
            
        else:
            outputs, hidden = self.cell(embedded)
            cell=outputs
        # outputs -> [sent len, batch size, hidden dim * n directions]
        return hidden, cell

In [15]:
class Decoder(nn.Module):
    """
    Input :
        - first token in the target batch
        - LSTM hidden state from the encoder
        - LSTM cell state from the encoder
    Layer :
        target batch -> Embedding -- 
                                   |
        encoder hidden state ------|--> LSTM -> Linear
                                   |
        encoder cell state   -------
        
    Output :
        - prediction
        - LSTM hidden state
        - LSTM cell state

    Parmeters
    ---------
    output : int
        Output dimension, should equal to the target vocab size.
    
    emb_dim : int
        Embedding layer's dimension.
        
    hid_dim : int
        LSTM Hidden/Cell state's dimension.
        
    n_layers : int
        Number of LSTM layers.
        
    dropout : float
        Dropout for the LSTM layer.
    """

    
    def __init__(self, output_size, dec_embed_size, hid_size, num_layers, cell_mode, dropout, is_bi):
        super().__init__()


        self.embedding = nn.Embedding(output_size, dec_embed_size,padding_idx=hindi_alphabet_to_index['.'])
        
        cell=cell_type(cell_mode)
        
        self.cell=cell(dec_embed_size,hid_size,num_layers,dropout=dropout,bidirectional=is_bi,batch_first=True)
        if is_bi:
            self.out = nn.Linear(hid_size*2, output_size)
        else:
             self.out = nn.Linear(hid_size, output_size)
        
        self.output_size=output_size
        self.cell_mode=cell_mode
        

    def forward(self, trg: torch.LongTensor, hidden: torch.FloatTensor, cell: torch.FloatTensor):
        """

        Parameters
        ----------
        trg : 1d torch.LongTensor
            Batched tokenized source sentence of shape [batch size].
            
        hidden, cell : 3d torch.FloatTensor
            Hidden and cell state of the LSTM layer. Each state's shape
            [n layers * n directions, batch size, hidden dim]

        Returns
        -------
        prediction : 2d torch.LongTensor
            For each token in the batch, the predicted target vobulary.
            Shape [batch size, output dim]

        hidden, cell : 3d torch.FloatTensor
            Hidden and cell state of the LSTM layer. Each state's shape
            [n layers * n directions, batch size, hidden dim]
        """
        # [1, batch size, emb dim], the 1 serves as sent len
        embedded = self.embedding(trg.unsqueeze(1))
        if self.cell_mode.lower()=='lstm':
            #print('decoder embed',embedded.shape)
            outputs, (hidden, cell) = self.cell(embedded, (hidden, cell))
        else:
            outputs, hidden = self.cell(embedded, hidden)
            cell=hidden
        prediction = self.out(outputs.squeeze(1))
        return prediction, hidden, cell

In [16]:
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, device):
        super().__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.device = device


    def forward(self, source_batch, target_batch, teacher_forcing_ratio=0.5):

        batch_size,max_len  = target_batch.shape
        #print(max_len,batch_size)
        target_vocab_size = self.decoder.output_size
        #print(target_vocab_size)

        # tensor to store decoder's output
        outputs = torch.zeros(max_len, batch_size, target_vocab_size).to(self.device)

        # last hidden & cell state of the encoder is used as the decoder's initial hidden state
        hidden, cell = self.encoder(source_batch)       

        trg = target_batch[:,0]
        for i in range(1, max_len):
            prediction, hidden, cell = self.decoder(trg, hidden, cell)
            outputs[i] = prediction

            if np.random.random() < teacher_forcing_ratio:
                trg = target_batch[:,i]
            else:
                trg = prediction.argmax(1)

        return outputs


        
        

In [17]:
hindi_alphabet_to_index['>']

1

In [18]:
ss1=next(iter(train_dataloader))

In [19]:
# E=Encoder(30, embed_size, hid_size, num_layers, cell_mode, dropout, is_bi)
# E=E.to(device)

# D=Decoder(68, embed_size, hid_size, num_layers, cell_mode, dropout, is_bi)

# D=D.to(device)
# S=Seq2Seq(E,D,device)
# S.to(device)    
# print(f'The model has {count_params(S):,} trainable parameters')

In [20]:
E=Encoder(30, 64, 256, 1, 'lstm', 0.1, True)
E=E.to(device)

D=Decoder(68, 128, 256, 1, 'lstm', 0.1, True)

D=D.to(device)
S=Seq2Seq(E,D,device)
S.to(device)    


Seq2Seq(
  (encoder): Encoder(
    (embedding): Embedding(30, 64, padding_idx=2)
    (cell): LSTM(64, 256, batch_first=True, dropout=0.1, bidirectional=True)
  )
  (decoder): Decoder(
    (embedding): Embedding(68, 128, padding_idx=2)
    (cell): LSTM(128, 256, batch_first=True, dropout=0.1, bidirectional=True)
    (out): Linear(in_features=512, out_features=68, bias=True)
  )
)

In [21]:
S(ss1['input'],ss1['output'])

tensor([[[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
           0.0000e+00,  0.0000e+00],
         [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
           0.0000e+00,  0.0000e+00],
         [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
           0.0000e+00,  0.0000e+00],
         ...,
         [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
           0.0000e+00,  0.0000e+00],
         [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
           0.0000e+00,  0.0000e+00],
         [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
           0.0000e+00,  0.0000e+00]],

        [[ 7.1363e-02,  1.9440e-02, -3.3042e-02,  ..., -2.0240e-02,
          -1.6442e-03,  3.8942e-03],
         [ 8.3536e-02,  2.1729e-03,  1.3374e-03,  ..., -8.2703e-03,
          -1.7474e-02,  4.9452e-03],
         [ 7.9245e-02,  2.0575e-03, -8.8284e-03,  ..., -1.2259e-02,
          -2.4090e-02,  1.9068e-02],
         ...,
         [ 8.3514e-02, -9

In [22]:
def accuracy_calc(target_seq,seq2,mode='full',device=device):# predicted
    eos_index=(target_seq==hindi_alphabet_to_index['>']).nonzero()
    eos_idx=eos_index[:,1]
    
    correct=torch.Tensor([0]).to(device)
    correct_chars=torch.Tensor([0]).to(device)
    tot_chars=torch.Tensor([0]).to(device)
    for iterate,idx in enumerate(eos_idx):
        inputter=seq2[iterate][:idx]
        outputter=target_seq[iterate][:idx]
        if torch.all(torch.eq(inputter,outputter)):
            correct+=1
            correct_chars+=idx
            tot_chars+=idx
        else:
            correct_chars+=torch.sum(inputter == outputter).item()
            tot_chars+=idx
            
#         print(correct,correct_chars,tot_chars)
        
    return correct.item(),correct_chars.item(),tot_chars.item()
            
            
        
    

In [23]:
accuracy_calc(ss1['output'],ss1['output'])

(16.0, 137.0, 137.0)

In [24]:
def train(seq2seq, iterator, optimizer, criterion):
    
    
    seq2seq.train()
    
    epoch_loss = 0
    correct=0
    correct_char=0
    tot_char=0
    
    relax_acc=0
    
    
    for batch in iterator:
        optimizer.zero_grad()
        outputs = seq2seq(batch['input'], batch['output'])
        batch_label=batch['output'].transpose(0,1)
        batch_size=len(batch['output'])
        
        _, predicted = torch.max(outputs, dim=2)
        outputs_flatten = outputs.view(-1, outputs.shape[-1])
        trg_flatten = batch_label.reshape(-1)
        

        trg_flatten.requires_grad=False
        loss = criterion(outputs_flatten, trg_flatten)
        correct_temp,correct_chars_temp,tot_chars_temp=accuracy_calc(batch['output'],predicted.transpose(0,1))
        
        #___________
        
        correct+=correct_temp
        correct_char+=correct_chars_temp
        tot_char+=tot_chars_temp
        
        
        #_______________
        

        loss.backward()
        optimizer.step()
        

        epoch_loss += loss.item()
        


    return epoch_loss / len(iterator), correct/(len(iterator)*16),correct_char/tot_char

In [25]:
def evaluate(seq2seq, iterator, criterion):
    seq2seq.eval()

    epoch_loss = 0
    correct=0
    correct_char=0
    tot_char=0
    
    relax_acc=0
    
    with torch.no_grad():
        for batch in iterator:
            outputs = seq2seq(batch['input'], batch['output'],teacher_forcing_ratio=0)
            batch_label=batch['output'].transpose(0,1)
            batch_size=len(batch['output'])


            _, predicted = torch.max(outputs, dim=2)
            #print('wow_preds',predicted.shape)

            outputs_flatten = outputs.view(-1, outputs.shape[-1])
            trg_flatten = batch_label.reshape(-1)

            loss = criterion(outputs_flatten, trg_flatten)
            
            correct_temp,correct_chars_temp,tot_chars_temp=accuracy_calc(batch['output'],predicted.transpose(0,1))
        
            #___________

            correct+=correct_temp
            correct_char+=correct_chars_temp
            tot_char+=tot_chars_temp

            #_______________       
            
            epoch_loss += loss.item()
            

    return epoch_loss / len(iterator), correct/(len(iterator)*16),correct_char/tot_char



In [26]:
def epoch_time(start_time, end_time):
    e_time = end_time - start_time
    mins = e_time // 60
    secs = e_time%60
    return mins, secs,

In [27]:
def count_params(model):
    return sum(param.numel() for param in model.parameters() if param.requires_grad)

In [28]:
import time
import random

In [29]:
import torch.optim as optim
def make_model(train_iterator,valid_iterator, enc_embed_size,dec_embed_size,
               hid_size, num_layers, cell_mode, dropout, is_bi, epochs=20):
    E=Encoder(30, enc_embed_size, hid_size, num_layers, cell_mode, dropout, is_bi)
    E=E.to(device)
    
    D=Decoder(68, dec_embed_size, hid_size, num_layers, cell_mode, dropout, is_bi)
    
    D=D.to(device)
    S=Seq2Seq(E,D,device)
    S.to(device)    
    print(f'The model has {count_params(S):,} trainable parameters')
    
    optimizer = optim.Adam(S.parameters())
    criterion = nn.CrossEntropyLoss(ignore_index=hindi_alphabet_to_index['.'])
    criterion=criterion.to(device)
    
    best_valid_loss = float('inf')

    for epoch in range(epochs):    
        start_time = time.time()
        train_loss,train_acc,train_stuff = train(S, train_iterator, optimizer, criterion)
        valid_loss,valid_acc,val_stuff = evaluate(S, valid_iterator, criterion)
        end_time = time.time()

        epoch_mins, epoch_secs = epoch_time(start_time, end_time)

        if valid_loss < best_valid_loss:
            best_valid_loss = valid_loss
            torch.save(S.state_dict(), 'model1.pt')

        # it's easier to see a change in perplexity between epoch as it's an exponential
        # of the loss, hence the scale of the measure is much bigger
        print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs:.2f}s')
        print(f'\t Train Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
        print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')
        print(f'\t Relaxed Train. Acc: {train_stuff*100:.2f}% | Relaxed Val. Acc: {val_stuff*100:.2f}%')
        wandb.log({'epoch':epoch, 'train loss':train_loss, 'train acc':train_acc, 'valid loss': valid_loss,
                  'valid acc': valid_acc, 'relxd train acc': train_stuff, 'relxd valid acc': val_stuff})
        
    return S

    

In [30]:
# train_iterator=train_dataloader
# valid_iterator=val_dataloader
# SS=make_model(train_iterator,valid_iterator,epochs=15)

In [31]:
def sweeper():
    
    config_defaults=None
    
    
    # Initialize new wandb run
    run=wandb.init(config=config_defaults,resume=True)
    #current config
    
    config = wandb.config
    
    np.random.seed(0) #setting a seed to make better inference of use of params
    
    train_iterator=train_dataloader
    valid_iterator=val_dataloader
    SS=make_model(train_iterator,valid_iterator, config.enc_embed_size, config.dec_embed_size,
               config.hid_size, config.num_layers, config.cell_mode, config.dropout, config.is_bi, config.epochs)
    
    run_name='Run:' +\
    ' enc_embed size: ' + str(config.enc_embed_size) +\
    ' dec_embed size: ' + str(config.dec_embed_size) +\
    ', hid_size: ' + str(config.hid_size) +\
    ', num_layers: ' + str(config.num_layers)+\
    ', cell_mode'+ str(config.cell_mode)
    ', dropout:' + str(config.dropout)+\
    ', bidirect?:' + str(config.is_bi)+\
    ', epochs:'+str(config.epochs)+\
    ' simple.'
    print(run_name)
    wandb.run.name = run_name
    wandb.run.save()
#     run.finalize()

#     wandb.run.finish()
#     run.finish()
    

    
    

In [32]:
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mpragalbh[0m ([33mpragalbh-tushar[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

In [33]:
sweep_config = {
  "name": "Simple sweep: No Attn",
  "metric": {
      "name":"valid acc",
      "goal": "maximize"
  },
  "method": "bayes",
  "parameters": {
              'num_layers':{
                  "values":[1,2,3]
              },
        'enc_embed_size':{
                "values":  [16,32, 64, 128, 256, 512]
                  
              },
      'dec_embed_size':{
                "values":  [16,32, 64, 128, 256, 512]
                  
              },
        'hid_size':{
            "values": [16,32, 64, 128, 256, 512]
        },
      
        'cell_mode':{
            'values': ['rnn','gru','lstm']
        },
        "dropout": {
            "values": [0.2,0.25,0.3]
        },
        'is_bi': {
            "values": [True, False]
        },
        'epochs': {
            "values": [10, 20, 30, 40]
        } ,
        
    }
}      



In [34]:
# sweep_id = wandb.sweep(sweep_config,  entity="pragalbh", project="DL_Assign3")

In [35]:
wandb.agent('1pfav8pe',project="DL_Assign3",entity="pragalbh", function=sweeper, count=150)

[34m[1mwandb[0m: Agent Starting Run: rqo2cy6g with config:
[34m[1mwandb[0m: 	cell_mode: lstm
[34m[1mwandb[0m: 	dec_embed_size: 512
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	enc_embed_size: 256
[34m[1mwandb[0m: 	epochs: 40
[34m[1mwandb[0m: 	hid_size: 128
[34m[1mwandb[0m: 	is_bi: False
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: Currently logged in as: [33mpragalbh[0m. Use [1m`wandb login --relogin`[0m to force relogin


The model has 841,796 trainable parameters
Epoch: 01 | Time: 0.0m 35.76s
	 Train Loss: 2.734 | Train Acc: 0.21%
	 Val. Loss: 2.161 |  Val. Acc: 5.42%
	 Relaxed Train. Acc: 32.43% | Relaxed Val. Acc: 50.55%
Epoch: 02 | Time: 0.0m 35.60s
	 Train Loss: 1.758 | Train Acc: 5.26%
	 Val. Loss: 1.708 |  Val. Acc: 16.06%
	 Relaxed Train. Acc: 57.43% | Relaxed Val. Acc: 61.46%
Epoch: 03 | Time: 0.0m 35.57s
	 Train Loss: 1.483 | Train Acc: 10.91%
	 Val. Loss: 1.609 |  Val. Acc: 20.02%
	 Relaxed Train. Acc: 65.53% | Relaxed Val. Acc: 64.50%
Epoch: 04 | Time: 0.0m 35.68s
	 Train Loss: 1.371 | Train Acc: 14.29%
	 Val. Loss: 1.544 |  Val. Acc: 24.95%
	 Relaxed Train. Acc: 68.95% | Relaxed Val. Acc: 67.93%
Epoch: 05 | Time: 0.0m 35.67s
	 Train Loss: 1.302 | Train Acc: 16.74%
	 Val. Loss: 1.521 |  Val. Acc: 25.76%
	 Relaxed Train. Acc: 71.16% | Relaxed Val. Acc: 68.30%
Epoch: 06 | Time: 0.0m 35.65s
	 Train Loss: 1.258 | Train Acc: 18.45%
	 Val. Loss: 1.517 |  Val. Acc: 27.98%
	 Relaxed Train. Acc: 72.5



Epoch: 40 | Time: 0.0m 35.64s
	 Train Loss: 0.931 | Train Acc: 37.13%
	 Val. Loss: 1.442 |  Val. Acc: 34.20%
	 Relaxed Train. Acc: 83.37% | Relaxed Val. Acc: 73.91%
Run: enc_embed size: 256 dec_embed size: 512, hid_size: 128, num_layers: 2, cell_modelstm


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
relxd train acc,▁▄▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇██████████████████████
relxd valid acc,▁▄▅▆▆▆▇▇▇▇▇▇▇▇▇▇▇███████████████████████
train acc,▁▂▃▄▄▄▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇█████████
train loss,█▄▃▃▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
valid acc,▁▃▄▅▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇██▇█████▇█████████
valid loss,█▄▃▂▂▂▂▂▁▂▂▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁

0,1
epoch,39.0
relxd train acc,0.83375
relxd valid acc,0.73906
train acc,0.37129
train loss,0.93054
valid acc,0.34204
valid loss,1.44243


[34m[1mwandb[0m: Agent Starting Run: mj7ldib8 with config:
[34m[1mwandb[0m: 	cell_mode: lstm
[34m[1mwandb[0m: 	dec_embed_size: 256
[34m[1mwandb[0m: 	dropout: 0.25
[34m[1mwandb[0m: 	enc_embed_size: 128
[34m[1mwandb[0m: 	epochs: 40
[34m[1mwandb[0m: 	hid_size: 128
[34m[1mwandb[0m: 	is_bi: True
[34m[1mwandb[0m: 	num_layers: 3


The model has 2,279,236 trainable parameters
Epoch: 01 | Time: 1.0m 21.51s
	 Train Loss: 1.817 | Train Acc: 7.31%
	 Val. Loss: 1.574 |  Val. Acc: 23.41%
	 Relaxed Train. Acc: 56.56% | Relaxed Val. Acc: 66.67%
Epoch: 02 | Time: 1.0m 21.53s
	 Train Loss: 1.242 | Train Acc: 18.90%
	 Val. Loss: 1.464 |  Val. Acc: 30.86%
	 Relaxed Train. Acc: 73.02% | Relaxed Val. Acc: 70.24%
Epoch: 03 | Time: 1.0m 21.40s
	 Train Loss: 1.121 | Train Acc: 24.73%
	 Val. Loss: 1.410 |  Val. Acc: 34.33%
	 Relaxed Train. Acc: 77.01% | Relaxed Val. Acc: 73.18%
Epoch: 04 | Time: 1.0m 21.36s
	 Train Loss: 1.057 | Train Acc: 28.64%
	 Val. Loss: 1.398 |  Val. Acc: 35.03%
	 Relaxed Train. Acc: 79.26% | Relaxed Val. Acc: 73.17%
Epoch: 05 | Time: 1.0m 21.34s
	 Train Loss: 1.015 | Train Acc: 30.82%
	 Val. Loss: 1.390 |  Val. Acc: 36.35%
	 Relaxed Train. Acc: 80.64% | Relaxed Val. Acc: 73.81%
Epoch: 06 | Time: 1.0m 21.32s
	 Train Loss: 0.983 | Train Acc: 33.23%
	 Val. Loss: 1.361 |  Val. Acc: 38.01%
	 Relaxed Train. Acc: 

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
relxd train acc,▁▄▅▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇████████████████████
relxd valid acc,▁▃▅▅▆▆▇▇▇▇▇▇▇██▇▇██▇████████▇███▇█▇██▇▇▇
train acc,▁▃▄▄▄▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇███████████
train loss,█▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
valid acc,▁▄▅▅▆▆▇▇▇▇▇▇▇██▇███████████████████████▇
valid loss,█▄▃▂▂▁▁▂▂▂▂▂▂▂▁▂▃▂▃▃▃▃▃▃▃▃▄▄▅▄▄▄▅▅▆▅▅▅▅▅

0,1
epoch,39.0
relxd train acc,0.89898
relxd valid acc,0.76381
train acc,0.54607
train loss,0.73829
valid acc,0.40869
valid loss,1.48754


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: b11o7b6v with config:
[34m[1mwandb[0m: 	cell_mode: lstm
[34m[1mwandb[0m: 	dec_embed_size: 16
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	enc_embed_size: 128
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	hid_size: 512
[34m[1mwandb[0m: 	is_bi: True
[34m[1mwandb[0m: 	num_layers: 3


The model has 30,073,732 trainable parameters
Epoch: 01 | Time: 2.0m 37.85s
	 Train Loss: 1.471 | Train Acc: 14.62%
	 Val. Loss: 1.424 |  Val. Acc: 31.84%
	 Relaxed Train. Acc: 66.31% | Relaxed Val. Acc: 71.78%
Epoch: 02 | Time: 2.0m 38.07s
	 Train Loss: 1.055 | Train Acc: 28.43%
	 Val. Loss: 1.359 |  Val. Acc: 36.11%
	 Relaxed Train. Acc: 79.24% | Relaxed Val. Acc: 73.85%
Epoch: 03 | Time: 2.0m 38.00s
	 Train Loss: 0.958 | Train Acc: 34.64%
	 Val. Loss: 1.351 |  Val. Acc: 37.23%
	 Relaxed Train. Acc: 82.49% | Relaxed Val. Acc: 74.81%
Epoch: 04 | Time: 2.0m 37.99s
	 Train Loss: 0.903 | Train Acc: 39.00%
	 Val. Loss: 1.358 |  Val. Acc: 39.92%
	 Relaxed Train. Acc: 84.38% | Relaxed Val. Acc: 76.07%
Epoch: 05 | Time: 2.0m 37.93s
	 Train Loss: 0.859 | Train Acc: 42.59%
	 Val. Loss: 1.358 |  Val. Acc: 38.31%
	 Relaxed Train. Acc: 85.81% | Relaxed Val. Acc: 75.83%
Epoch: 06 | Time: 2.0m 37.87s
	 Train Loss: 0.826 | Train Acc: 45.79%
	 Val. Loss: 1.370 |  Val. Acc: 40.58%
	 Relaxed Train. Acc

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
relxd train acc,▁▅▆▆▇▇▇▇▇███████████
relxd valid acc,▁▄▅▆▆▇▇█▇▇█▇▇▇▇▇▇▇▇▇
train acc,▁▃▄▅▅▆▆▇▇▇▇▇████████
train loss,█▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁
valid acc,▁▄▅▇▆▇▇█▇▇▇▇██▇▆▇▇██
valid loss,▄▁▁▁▁▂▄▃▄▄▄▆▆▇▇█▇▇▇█

0,1
epoch,19.0
relxd train acc,0.90869
relxd valid acc,0.76863
train acc,0.58227
train loss,0.70588
valid acc,0.41162
valid loss,1.51732


[34m[1mwandb[0m: Agent Starting Run: dfx1r7ay with config:
[34m[1mwandb[0m: 	cell_mode: gru
[34m[1mwandb[0m: 	dec_embed_size: 32
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	enc_embed_size: 128
[34m[1mwandb[0m: 	epochs: 40
[34m[1mwandb[0m: 	hid_size: 256
[34m[1mwandb[0m: 	is_bi: True
[34m[1mwandb[0m: 	num_layers: 1


The model has 1,079,236 trainable parameters
Epoch: 01 | Time: 0.0m 31.26s
	 Train Loss: 1.651 | Train Acc: 9.53%
	 Val. Loss: 1.550 |  Val. Acc: 24.15%
	 Relaxed Train. Acc: 60.42% | Relaxed Val. Acc: 67.17%
Epoch: 02 | Time: 0.0m 31.14s
	 Train Loss: 1.218 | Train Acc: 21.10%
	 Val. Loss: 1.533 |  Val. Acc: 27.03%
	 Relaxed Train. Acc: 73.80% | Relaxed Val. Acc: 69.07%
Epoch: 03 | Time: 0.0m 31.07s
	 Train Loss: 1.104 | Train Acc: 26.58%
	 Val. Loss: 1.491 |  Val. Acc: 29.96%
	 Relaxed Train. Acc: 77.67% | Relaxed Val. Acc: 69.97%
Epoch: 04 | Time: 0.0m 31.03s
	 Train Loss: 1.046 | Train Acc: 30.38%
	 Val. Loss: 1.476 |  Val. Acc: 31.74%
	 Relaxed Train. Acc: 79.72% | Relaxed Val. Acc: 71.54%
Epoch: 05 | Time: 0.0m 30.99s
	 Train Loss: 1.001 | Train Acc: 33.07%
	 Val. Loss: 1.454 |  Val. Acc: 32.84%
	 Relaxed Train. Acc: 81.19% | Relaxed Val. Acc: 72.95%
Epoch: 06 | Time: 0.0m 30.97s
	 Train Loss: 0.960 | Train Acc: 36.20%
	 Val. Loss: 1.493 |  Val. Acc: 33.64%
	 Relaxed Train. Acc: 

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
relxd train acc,▁▄▅▅▆▆▆▇▇▇▇▇▇▇██████████████████████████
relxd valid acc,▁▃▄▆█▇▇▇██▇████▇▇▆▇▇▇▇▆▇▇▇▇▇▇▆▆▆▆▆▆▆▆▆▇▆
train acc,▁▃▃▄▄▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇████████████████████
train loss,█▅▄▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
valid acc,▁▃▅▆▇▇▇▇█▇▇█▇█▇▇▆▆▆▆▆▇▆▆▆▇▆▆▆▆▆▆▆▅▅▆▆▆▇▆
valid loss,▃▃▂▁▁▂▂▃▂▃▄▃▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇█▇▇▇██▇▇

0,1
epoch,39.0
relxd train acc,0.90405
relxd valid acc,0.7154
train acc,0.58305
train loss,0.73221
valid acc,0.31934
valid loss,1.76905


[34m[1mwandb[0m: Agent Starting Run: r21kns0f with config:
[34m[1mwandb[0m: 	cell_mode: lstm
[34m[1mwandb[0m: 	dec_embed_size: 64
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	enc_embed_size: 64
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	hid_size: 32
[34m[1mwandb[0m: 	is_bi: True
[34m[1mwandb[0m: 	num_layers: 1


The model has 60,868 trainable parameters
Epoch: 01 | Time: 0.0m 26.85s
	 Train Loss: 2.398 | Train Acc: 0.66%
	 Val. Loss: 2.161 |  Val. Acc: 5.42%
	 Relaxed Train. Acc: 40.55% | Relaxed Val. Acc: 48.47%
Epoch: 02 | Time: 0.0m 26.83s
	 Train Loss: 1.892 | Train Acc: 3.21%
	 Val. Loss: 1.953 |  Val. Acc: 8.37%
	 Relaxed Train. Acc: 53.11% | Relaxed Val. Acc: 54.12%
Epoch: 03 | Time: 0.0m 26.85s
	 Train Loss: 1.724 | Train Acc: 5.49%
	 Val. Loss: 1.844 |  Val. Acc: 11.43%
	 Relaxed Train. Acc: 58.02% | Relaxed Val. Acc: 56.64%
Epoch: 04 | Time: 0.0m 26.80s
	 Train Loss: 1.632 | Train Acc: 7.51%
	 Val. Loss: 1.804 |  Val. Acc: 13.38%
	 Relaxed Train. Acc: 60.77% | Relaxed Val. Acc: 58.77%
Epoch: 05 | Time: 0.0m 26.77s
	 Train Loss: 1.576 | Train Acc: 8.98%
	 Val. Loss: 1.762 |  Val. Acc: 15.75%
	 Relaxed Train. Acc: 62.52% | Relaxed Val. Acc: 60.36%
Epoch: 06 | Time: 0.0m 26.73s
	 Train Loss: 1.532 | Train Acc: 10.19%
	 Val. Loss: 1.732 |  Val. Acc: 16.50%
	 Relaxed Train. Acc: 63.95% | 

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
relxd train acc,▁▄▅▆▆▆▇▇▇▇▇▇████████
relxd valid acc,▁▃▄▅▆▆▇▇▇▇▇▇▇███████
train acc,▁▂▃▄▅▅▆▆▆▆▇▇▇▇▇█████
train loss,█▅▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁
valid acc,▁▂▃▄▅▅▅▆▆▆▇▇▇▇▇▇▇███
valid loss,█▅▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁

0,1
epoch,19.0
relxd train acc,0.70655
relxd valid acc,0.64979
train acc,0.16824
train loss,1.3295
valid acc,0.22339
valid loss,1.63023


[34m[1mwandb[0m: Agent Starting Run: s1huhmdf with config:
[34m[1mwandb[0m: 	cell_mode: lstm
[34m[1mwandb[0m: 	dec_embed_size: 128
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	enc_embed_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hid_size: 16
[34m[1mwandb[0m: 	is_bi: True
[34m[1mwandb[0m: 	num_layers: 3


The model has 60,068 trainable parameters
Epoch: 01 | Time: 0.0m 42.21s
	 Train Loss: 3.026 | Train Acc: 0.00%
	 Val. Loss: 2.791 |  Val. Acc: 0.12%
	 Relaxed Train. Acc: 24.92% | Relaxed Val. Acc: 33.08%
Epoch: 02 | Time: 0.0m 42.25s
	 Train Loss: 2.536 | Train Acc: 0.06%
	 Val. Loss: 2.387 |  Val. Acc: 1.17%
	 Relaxed Train. Acc: 35.87% | Relaxed Val. Acc: 42.83%
Epoch: 03 | Time: 0.0m 42.24s
	 Train Loss: 2.288 | Train Acc: 0.27%
	 Val. Loss: 2.203 |  Val. Acc: 2.69%
	 Relaxed Train. Acc: 41.98% | Relaxed Val. Acc: 47.70%
Epoch: 04 | Time: 0.0m 42.25s
	 Train Loss: 2.141 | Train Acc: 0.57%
	 Val. Loss: 2.081 |  Val. Acc: 4.57%
	 Relaxed Train. Acc: 46.05% | Relaxed Val. Acc: 51.31%
Epoch: 05 | Time: 0.0m 42.25s
	 Train Loss: 2.040 | Train Acc: 1.10%
	 Val. Loss: 1.987 |  Val. Acc: 6.25%
	 Relaxed Train. Acc: 48.83% | Relaxed Val. Acc: 53.55%
Epoch: 06 | Time: 0.0m 42.19s
	 Train Loss: 1.965 | Train Acc: 1.78%
	 Val. Loss: 1.932 |  Val. Acc: 7.84%
	 Relaxed Train. Acc: 50.90% | Relax

0,1
epoch,▁▂▃▃▄▅▆▆▇█
relxd train acc,▁▃▅▆▆▇▇███
relxd valid acc,▁▄▅▆▇▇▇███
train acc,▁▁▁▂▃▄▅▆▇█
train loss,█▅▄▃▂▂▂▁▁▁
valid acc,▁▂▂▃▄▅▆▇▇█
valid loss,█▅▄▃▂▂▂▁▁▁

0,1
epoch,9.0
relxd train acc,0.55751
relxd valid acc,0.58576
train acc,0.03795
train loss,1.80175
valid acc,0.12891
valid loss,1.82106


[34m[1mwandb[0m: Agent Starting Run: 51kp3rdm with config:
[34m[1mwandb[0m: 	cell_mode: rnn
[34m[1mwandb[0m: 	dec_embed_size: 16
[34m[1mwandb[0m: 	dropout: 0.25
[34m[1mwandb[0m: 	enc_embed_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hid_size: 32
[34m[1mwandb[0m: 	is_bi: True
[34m[1mwandb[0m: 	num_layers: 3


The model has 41,988 trainable parameters
Epoch: 01 | Time: 0.0m 40.48s
	 Train Loss: 2.784 | Train Acc: 0.04%
	 Val. Loss: 2.530 |  Val. Acc: 0.83%
	 Relaxed Train. Acc: 33.57% | Relaxed Val. Acc: 42.64%
Epoch: 02 | Time: 0.0m 40.44s
	 Train Loss: 2.430 | Train Acc: 0.21%
	 Val. Loss: 2.327 |  Val. Acc: 1.86%
	 Relaxed Train. Acc: 41.35% | Relaxed Val. Acc: 46.09%
Epoch: 03 | Time: 0.0m 40.44s
	 Train Loss: 2.275 | Train Acc: 0.55%
	 Val. Loss: 2.213 |  Val. Acc: 3.83%
	 Relaxed Train. Acc: 44.38% | Relaxed Val. Acc: 48.77%
Epoch: 04 | Time: 0.0m 40.51s
	 Train Loss: 2.176 | Train Acc: 0.96%
	 Val. Loss: 2.120 |  Val. Acc: 4.49%
	 Relaxed Train. Acc: 46.66% | Relaxed Val. Acc: 50.65%
Epoch: 05 | Time: 0.0m 40.41s
	 Train Loss: 2.094 | Train Acc: 1.46%
	 Val. Loss: 2.076 |  Val. Acc: 5.13%
	 Relaxed Train. Acc: 48.54% | Relaxed Val. Acc: 51.86%
Epoch: 06 | Time: 0.0m 40.43s
	 Train Loss: 2.028 | Train Acc: 2.01%
	 Val. Loss: 2.034 |  Val. Acc: 7.01%
	 Relaxed Train. Acc: 50.33% | Relax

0,1
epoch,▁▂▃▃▄▅▆▆▇█
relxd train acc,▁▄▅▅▆▇▇▇██
relxd valid acc,▁▃▄▅▆▆▇▇▇█
train acc,▁▁▂▃▄▅▆▆▇█
train loss,█▅▄▃▃▂▂▂▁▁
valid acc,▁▂▃▄▄▆▆▇▆█
valid loss,█▆▄▃▃▂▂▂▂▁

0,1
epoch,9.0
relxd train acc,0.54305
relxd valid acc,0.56803
train acc,0.03535
train loss,1.87341
valid acc,0.0979
valid loss,1.90214


[34m[1mwandb[0m: Agent Starting Run: so718mic with config:
[34m[1mwandb[0m: 	cell_mode: rnn
[34m[1mwandb[0m: 	dec_embed_size: 512
[34m[1mwandb[0m: 	dropout: 0.25
[34m[1mwandb[0m: 	enc_embed_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hid_size: 256
[34m[1mwandb[0m: 	is_bi: True
[34m[1mwandb[0m: 	num_layers: 1


The model has 630,724 trainable parameters
Epoch: 01 | Time: 0.0m 32.33s
	 Train Loss: 2.502 | Train Acc: 0.30%
	 Val. Loss: 2.443 |  Val. Acc: 2.27%
	 Relaxed Train. Acc: 40.77% | Relaxed Val. Acc: 46.03%
Epoch: 02 | Time: 0.0m 32.32s
	 Train Loss: 2.259 | Train Acc: 0.68%
	 Val. Loss: 2.366 |  Val. Acc: 2.93%
	 Relaxed Train. Acc: 45.93% | Relaxed Val. Acc: 47.15%
Epoch: 03 | Time: 0.0m 32.38s
	 Train Loss: 2.202 | Train Acc: 0.91%
	 Val. Loss: 2.318 |  Val. Acc: 2.76%
	 Relaxed Train. Acc: 47.28% | Relaxed Val. Acc: 47.76%
Epoch: 04 | Time: 0.0m 32.39s
	 Train Loss: 2.177 | Train Acc: 0.95%
	 Val. Loss: 2.311 |  Val. Acc: 4.03%
	 Relaxed Train. Acc: 47.74% | Relaxed Val. Acc: 48.60%
Epoch: 05 | Time: 0.0m 32.39s
	 Train Loss: 2.164 | Train Acc: 1.01%
	 Val. Loss: 2.317 |  Val. Acc: 3.49%
	 Relaxed Train. Acc: 48.11% | Relaxed Val. Acc: 48.46%
Epoch: 06 | Time: 0.0m 32.39s
	 Train Loss: 2.158 | Train Acc: 1.12%
	 Val. Loss: 2.294 |  Val. Acc: 4.08%
	 Relaxed Train. Acc: 48.36% | Rela

0,1
epoch,▁▂▃▃▄▅▆▆▇█
relxd train acc,▁▆▇▇██████
relxd valid acc,▁▄▅▇▇█▇█▇▆
train acc,▁▄▆▇▇█▇██▆
train loss,█▃▂▂▁▁▁▁▁▁
valid acc,▁▄▃█▆█▆▇█▄
valid loss,█▄▂▂▂▁▃▃▂▃

0,1
epoch,9.0
relxd train acc,0.48623
relxd valid acc,0.48135
train acc,0.00937
train loss,2.15008
valid acc,0.03101
valid loss,2.32841


[34m[1mwandb[0m: Agent Starting Run: refwd3nq with config:
[34m[1mwandb[0m: 	cell_mode: lstm
[34m[1mwandb[0m: 	dec_embed_size: 32
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	enc_embed_size: 16
[34m[1mwandb[0m: 	epochs: 30
[34m[1mwandb[0m: 	hid_size: 16
[34m[1mwandb[0m: 	is_bi: True
[34m[1mwandb[0m: 	num_layers: 3


The model has 41,252 trainable parameters
Epoch: 01 | Time: 0.0m 41.95s
	 Train Loss: 2.982 | Train Acc: 0.00%
	 Val. Loss: 2.711 |  Val. Acc: 0.20%
	 Relaxed Train. Acc: 26.28% | Relaxed Val. Acc: 34.88%
Epoch: 02 | Time: 0.0m 41.94s
	 Train Loss: 2.457 | Train Acc: 0.13%
	 Val. Loss: 2.318 |  Val. Acc: 1.78%
	 Relaxed Train. Acc: 38.21% | Relaxed Val. Acc: 44.22%
Epoch: 03 | Time: 0.0m 41.92s
	 Train Loss: 2.207 | Train Acc: 0.51%
	 Val. Loss: 2.128 |  Val. Acc: 4.22%
	 Relaxed Train. Acc: 44.78% | Relaxed Val. Acc: 49.39%
Epoch: 04 | Time: 0.0m 41.95s
	 Train Loss: 2.072 | Train Acc: 1.07%
	 Val. Loss: 2.047 |  Val. Acc: 5.83%
	 Relaxed Train. Acc: 48.28% | Relaxed Val. Acc: 52.30%
Epoch: 05 | Time: 0.0m 41.90s
	 Train Loss: 1.984 | Train Acc: 1.64%
	 Val. Loss: 1.975 |  Val. Acc: 7.20%
	 Relaxed Train. Acc: 50.68% | Relaxed Val. Acc: 54.42%
Epoch: 06 | Time: 0.0m 41.92s
	 Train Loss: 1.921 | Train Acc: 2.27%
	 Val. Loss: 1.920 |  Val. Acc: 8.40%
	 Relaxed Train. Acc: 52.27% | Relax

0,1
epoch,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███
relxd train acc,▁▃▄▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇████████████
relxd valid acc,▁▃▄▅▆▆▆▆▆▆▇▇▇▇▇▇▇█▇███████████
train acc,▁▁▁▂▂▃▃▃▄▄▄▅▅▅▆▆▆▆▇▇▇▇▇▇██████
train loss,█▅▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁
valid acc,▁▂▂▃▃▄▄▅▅▅▅▆▆▆▆▆▇▇▇█▇█████████
valid loss,█▅▄▄▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,29.0
relxd train acc,0.6472
relxd valid acc,0.64879
train acc,0.10119
train loss,1.51423
valid acc,0.19824
valid loss,1.62


[34m[1mwandb[0m: Agent Starting Run: 3p7jeokx with config:
[34m[1mwandb[0m: 	cell_mode: lstm
[34m[1mwandb[0m: 	dec_embed_size: 128
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	enc_embed_size: 32
[34m[1mwandb[0m: 	epochs: 40
[34m[1mwandb[0m: 	hid_size: 16
[34m[1mwandb[0m: 	is_bi: True
[34m[1mwandb[0m: 	num_layers: 1


The model has 36,996 trainable parameters
Epoch: 01 | Time: 0.0m 26.60s
	 Train Loss: 2.810 | Train Acc: 0.03%
	 Val. Loss: 2.701 |  Val. Acc: 0.54%
	 Relaxed Train. Acc: 30.79% | Relaxed Val. Acc: 35.31%
Epoch: 02 | Time: 0.0m 26.60s
	 Train Loss: 2.419 | Train Acc: 0.16%
	 Val. Loss: 2.499 |  Val. Acc: 0.78%
	 Relaxed Train. Acc: 38.62% | Relaxed Val. Acc: 38.14%
Epoch: 03 | Time: 0.0m 26.60s
	 Train Loss: 2.271 | Train Acc: 0.46%
	 Val. Loss: 2.391 |  Val. Acc: 1.73%
	 Relaxed Train. Acc: 42.29% | Relaxed Val. Acc: 41.72%
Epoch: 04 | Time: 0.0m 26.58s
	 Train Loss: 2.168 | Train Acc: 0.81%
	 Val. Loss: 2.319 |  Val. Acc: 2.81%
	 Relaxed Train. Acc: 45.12% | Relaxed Val. Acc: 43.88%
Epoch: 05 | Time: 0.0m 26.60s
	 Train Loss: 2.106 | Train Acc: 1.12%
	 Val. Loss: 2.252 |  Val. Acc: 3.44%
	 Relaxed Train. Acc: 46.83% | Relaxed Val. Acc: 45.67%
Epoch: 06 | Time: 0.0m 26.59s
	 Train Loss: 2.045 | Train Acc: 1.52%
	 Val. Loss: 2.185 |  Val. Acc: 4.13%
	 Relaxed Train. Acc: 48.56% | Relax

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
relxd train acc,▁▃▄▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇███████████████
relxd valid acc,▁▂▃▄▄▅▅▆▆▆▆▆▇▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇██████████
train acc,▁▁▁▂▂▃▃▃▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇█████
train loss,█▆▅▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
valid acc,▁▁▂▂▃▃▄▄▄▅▅▅▅▆▆▆▆▆▆▆▆▆▆▆▇▇▇▆▇▇▇▇▇▇▇████▇
valid loss,█▆▅▅▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,39.0
relxd train acc,0.59368
relxd valid acc,0.56489
train acc,0.06717
train loss,1.68957
valid acc,0.11621
valid loss,1.90259


[34m[1mwandb[0m: Agent Starting Run: bx5ks9i5 with config:
[34m[1mwandb[0m: 	cell_mode: gru
[34m[1mwandb[0m: 	dec_embed_size: 16
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	enc_embed_size: 32
[34m[1mwandb[0m: 	epochs: 40
[34m[1mwandb[0m: 	hid_size: 512
[34m[1mwandb[0m: 	is_bi: True
[34m[1mwandb[0m: 	num_layers: 1


The model has 3,377,220 trainable parameters
Epoch: 01 | Time: 0.0m 35.71s
	 Train Loss: 1.613 | Train Acc: 10.81%
	 Val. Loss: 1.555 |  Val. Acc: 25.20%
	 Relaxed Train. Acc: 61.98% | Relaxed Val. Acc: 67.22%
Epoch: 02 | Time: 0.0m 35.71s
	 Train Loss: 1.165 | Train Acc: 23.43%
	 Val. Loss: 1.453 |  Val. Acc: 29.98%
	 Relaxed Train. Acc: 75.61% | Relaxed Val. Acc: 71.01%
Epoch: 03 | Time: 0.0m 35.63s
	 Train Loss: 1.044 | Train Acc: 30.18%
	 Val. Loss: 1.441 |  Val. Acc: 32.15%
	 Relaxed Train. Acc: 79.72% | Relaxed Val. Acc: 72.34%
Epoch: 04 | Time: 0.0m 35.56s
	 Train Loss: 0.973 | Train Acc: 34.87%
	 Val. Loss: 1.417 |  Val. Acc: 34.23%
	 Relaxed Train. Acc: 82.06% | Relaxed Val. Acc: 73.28%
Epoch: 05 | Time: 0.0m 35.53s
	 Train Loss: 0.919 | Train Acc: 39.37%
	 Val. Loss: 1.483 |  Val. Acc: 31.86%
	 Relaxed Train. Acc: 84.01% | Relaxed Val. Acc: 72.29%
Epoch: 06 | Time: 0.0m 35.48s
	 Train Loss: 0.879 | Train Acc: 42.44%
	 Val. Loss: 1.522 |  Val. Acc: 33.74%
	 Relaxed Train. Acc:

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
relxd train acc,▁▄▅▆▆▇▇▇▇███████████████████████████████
relxd valid acc,▁▅▇█▇▇▇▇▇▇█▇▇▇▇▇█▇▇▆▆▇▇▆▇▇▇▇▆▆▆▆▆▇▆▆▇▅▆▆
train acc,▁▃▄▄▅▆▆▆▇▇▇▇████████████████████████████
train loss,█▄▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
valid acc,▁▄▆█▆▇█▇▇▇█▆▇▇▆▆▆▇▆▆▆▆▆▅▆▅▆▆▅▅▅▅▅▆▄▅▆▅▅▄
valid loss,▄▂▁▁▂▃▃▄▄▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇█▇▇██▇█▇▇█▇▇

0,1
epoch,39.0
relxd train acc,0.8972
relxd valid acc,0.71511
train acc,0.56543
train loss,0.74784
valid acc,0.29541
valid loss,1.72394


[34m[1mwandb[0m: Agent Starting Run: 4g6oidjq with config:
[34m[1mwandb[0m: 	cell_mode: lstm
[34m[1mwandb[0m: 	dec_embed_size: 16
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	enc_embed_size: 128
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hid_size: 512
[34m[1mwandb[0m: 	is_bi: False
[34m[1mwandb[0m: 	num_layers: 3


The model has 10,845,060 trainable parameters
Epoch: 01 | Time: 1.0m 15.99s
	 Train Loss: 2.805 | Train Acc: 0.14%
	 Val. Loss: 2.100 |  Val. Acc: 5.57%
	 Relaxed Train. Acc: 30.55% | Relaxed Val. Acc: 51.95%
Epoch: 02 | Time: 1.0m 16.27s
	 Train Loss: 1.528 | Train Acc: 10.30%
	 Val. Loss: 1.526 |  Val. Acc: 25.37%
	 Relaxed Train. Acc: 64.95% | Relaxed Val. Acc: 68.46%
Epoch: 03 | Time: 1.0m 16.21s
	 Train Loss: 1.199 | Train Acc: 20.51%
	 Val. Loss: 1.441 |  Val. Acc: 31.49%
	 Relaxed Train. Acc: 74.57% | Relaxed Val. Acc: 71.63%
Epoch: 04 | Time: 1.0m 16.20s
	 Train Loss: 1.077 | Train Acc: 26.84%
	 Val. Loss: 1.407 |  Val. Acc: 34.96%
	 Relaxed Train. Acc: 78.61% | Relaxed Val. Acc: 73.70%
Epoch: 05 | Time: 1.0m 16.12s
	 Train Loss: 1.011 | Train Acc: 31.34%
	 Val. Loss: 1.389 |  Val. Acc: 35.42%
	 Relaxed Train. Acc: 80.78% | Relaxed Val. Acc: 73.81%
Epoch: 06 | Time: 1.0m 16.08s
	 Train Loss: 0.957 | Train Acc: 34.91%
	 Val. Loss: 1.382 |  Val. Acc: 37.82%
	 Relaxed Train. Acc: 

0,1
epoch,▁▂▃▃▄▅▆▆▇█
relxd train acc,▁▅▆▇▇▇████
relxd valid acc,▁▆▇▇▇█████
train acc,▁▃▄▅▆▆▇▇██
train loss,█▃▂▂▂▁▁▁▁▁
valid acc,▁▅▆▇▇█████
valid loss,█▂▂▁▁▁▁▁▁▁

0,1
epoch,9.0
relxd train acc,0.8703
relxd valid acc,0.75637
train acc,0.4602
train loss,0.82199
valid acc,0.38477
valid loss,1.39481


[34m[1mwandb[0m: Agent Starting Run: yw20aj7u with config:
[34m[1mwandb[0m: 	cell_mode: rnn
[34m[1mwandb[0m: 	dec_embed_size: 16
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	enc_embed_size: 128
[34m[1mwandb[0m: 	epochs: 30
[34m[1mwandb[0m: 	hid_size: 512
[34m[1mwandb[0m: 	is_bi: True
[34m[1mwandb[0m: 	num_layers: 2


The model has 4,424,580 trainable parameters


[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.


In [None]:
sweeper()

In [None]:
a=next(iter(train_dataloader))

In [None]:
bya=SS(a['input'],a['output'])

In [None]:
a['output'].shape

In [None]:
bya.shape

In [None]:
import math

In [None]:
def word_from_torchies(torchie1,index_toalp):
    torchie=torchie1.cpu().numpy()
    return word_from_vecs(torchie,index_toalp,False)

In [None]:
def word_from_batch(batch):
    wordlet=[]
    for i in range(len(batch)):
        wordlet.append(word_from_torchies(batch[i],index_to_hindi_alphabet))
    return wordlet
        

In [None]:
word_from_torchies(bya.transpose(0,1).argmax(2)[0],index_to_hindi_alphabet)

In [None]:
index_to_hindi_alphabet[65]

In [None]:
def make_wrd(stuff):
    asa=[]
    for k in stuff.cpu().numpy():
        asa.append(index_to_hindi_alphabet[k])
    return "".join(asa)
    

In [None]:
make_wrd(bya.transpose(0,1).argmax(2)[10])

In [None]:
bya.transpose(0,1).argmax(2)[10]

In [None]:
for i in range(16):
    print('................')
    print(i)
    print(word_from_torchies(a['output'][i],index_to_hindi_alphabet),\
         '---',\
         make_wrd(bya.transpose(0,1).argmax(2)[i])\
         )


In [None]:
word (self, source_batch,target_batch):
    max_len, batch_size = target_batch.shape
    outputs = torch.zeros(max_len, batch_size, target_vocab_size).to(self.device)


            # last hidden & cell state of the encoder is used as the decoder's initial hidden state
    hidden, cell = self.encoder(source_batch)

    wordet=[]


    trg = torch.tensor(hindi_alphabet_to_index['<'])
    trg=trg.to(device)
    wordet.append(index_to_hindi_alphabet(trg.cpu().numpy()))
    for i in range(1, max_len):
        prediction, hidden, cell = self.decoder(trg, hidden, cell)
        outputs[i] = prediction
        trg = prediction.argmax(1)
        wordet.append(index_to_hindi_alphabet(trg.cpu().numpy()))


    return ''.join(wordet)

In [None]:
word_from_vecs(X_valid[0],index_to_english_alphabet,False)

In [None]:
word_from_vecs(y_valid[0],index_to_hindi_alphabet,False)

In [None]:
next(iter(train_dataloader))

In [None]:
            for j in range(predicted.shape[1]):
                predicted_seq = predicted[:, j]
                targets_seq = target_seq[:, j]

                # Find the index of the first EOS token in the sequence
                eos_idx = (targets_seq == hin_token_map["\n"]).nonzero()
                if eos_idx.numel() > 0:
                    eos_idx = eos_idx[0][0]
                    predicted_seq = predicted_seq[:eos_idx]
                    targets_seq = targets_seq[:eos_idx]

In [None]:
s